{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7704762173712499, "eval_steps": 500, "global_step": 327400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 1.1640625, "learning_rate": 0.00019999999998312345, "loss": 2.2748, "step": 5 }, { "epoch": 0.0, "grad_norm": 1.0078125, "learning_rate": 0.00019999999993208572, "loss": 2.1334, "step": 10 }, { "epoch": 0.0, "grad_norm": 1.1875, "learning_rate": 0.00019999999984688613, "loss": 2.2703, "step": 15 }, { "epoch": 0.0, "grad_norm": 1.109375, "learning_rate": 0.00019999999972752477, "loss": 2.4581, "step": 20 }, { "epoch": 0.0, "grad_norm": 1.1328125, "learning_rate": 0.00019999999957400158, "loss": 2.2741, "step": 25 }, { "epoch": 0.0, "grad_norm": 0.984375, "learning_rate": 0.00019999999938631656, "loss": 2.1037, "step": 30 }, { "epoch": 0.0, "grad_norm": 1.0078125, "learning_rate": 0.00019999999916446974, "loss": 2.2129, "step": 35 }, { "epoch": 0.0, "grad_norm": 1.203125, "learning_rate": 0.00019999999890846112, "loss": 2.3794, "step": 40 }, { "epoch": 0.0, "grad_norm": 1.0390625, "learning_rate": 0.00019999999861829067, "loss": 2.1742, "step": 45 }, { "epoch": 0.0, "grad_norm": 1.0703125, "learning_rate": 0.00019999999829395842, "loss": 2.3187, "step": 50 }, { "epoch": 0.0, "grad_norm": 1.015625, "learning_rate": 0.00019999999793546435, "loss": 2.4028, "step": 55 }, { "epoch": 0.0, "grad_norm": 1.1015625, "learning_rate": 0.0001999999975428085, "loss": 2.3705, "step": 60 }, { "epoch": 0.0, "grad_norm": 1.28125, "learning_rate": 0.0001999999971159908, "loss": 2.1704, "step": 65 }, { "epoch": 0.0, "grad_norm": 1.21875, "learning_rate": 0.00019999999665501128, "loss": 2.2024, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.015625, "learning_rate": 0.00019999999615986995, "loss": 2.4423, "step": 75 }, { "epoch": 0.0, "grad_norm": 1.125, "learning_rate": 0.0001999999956305668, "loss": 2.1463, "step": 80 }, { "epoch": 0.0, "grad_norm": 1.2265625, "learning_rate": 0.00019999999506710187, "loss": 2.205, "step": 85 }, { "epoch": 0.0, "grad_norm": 1.21875, "learning_rate": 0.00019999999446947513, "loss": 2.4225, "step": 90 }, { "epoch": 0.0, "grad_norm": 1.09375, "learning_rate": 0.00019999999383768654, "loss": 2.2757, "step": 95 }, { "epoch": 0.0, "grad_norm": 1.1171875, "learning_rate": 0.00019999999317173617, "loss": 2.2886, "step": 100 }, { "epoch": 0.0, "grad_norm": 1.2578125, "learning_rate": 0.00019999999247162398, "loss": 2.2384, "step": 105 }, { "epoch": 0.0, "grad_norm": 1.2890625, "learning_rate": 0.00019999999173734998, "loss": 2.2602, "step": 110 }, { "epoch": 0.0, "grad_norm": 1.1953125, "learning_rate": 0.00019999999096891418, "loss": 2.264, "step": 115 }, { "epoch": 0.0, "grad_norm": 1.09375, "learning_rate": 0.00019999999016631655, "loss": 2.2352, "step": 120 }, { "epoch": 0.0, "grad_norm": 1.2109375, "learning_rate": 0.0001999999893295571, "loss": 2.2008, "step": 125 }, { "epoch": 0.0, "grad_norm": 0.9296875, "learning_rate": 0.00019999998845863587, "loss": 2.094, "step": 130 }, { "epoch": 0.0, "grad_norm": 1.0859375, "learning_rate": 0.00019999998755355282, "loss": 2.1801, "step": 135 }, { "epoch": 0.0, "grad_norm": 1.2109375, "learning_rate": 0.00019999998661430793, "loss": 2.2018, "step": 140 }, { "epoch": 0.0, "grad_norm": 1.0234375, "learning_rate": 0.00019999998564090127, "loss": 2.2511, "step": 145 }, { "epoch": 0.0, "grad_norm": 1.140625, "learning_rate": 0.0001999999846333328, "loss": 2.2874, "step": 150 }, { "epoch": 0.0, "grad_norm": 1.0703125, "learning_rate": 0.0001999999835916025, "loss": 2.2077, "step": 155 }, { "epoch": 0.0, "grad_norm": 1.109375, "learning_rate": 0.0001999999825157104, "loss": 2.3787, "step": 160 }, { "epoch": 0.0, "grad_norm": 1.328125, "learning_rate": 0.00019999998140565652, "loss": 2.3112, "step": 165 }, { "epoch": 0.0, "grad_norm": 1.28125, "learning_rate": 0.00019999998026144078, "loss": 2.5031, "step": 170 }, { "epoch": 0.0, "grad_norm": 1.0390625, "learning_rate": 0.00019999997908306326, "loss": 2.1468, "step": 175 }, { "epoch": 0.0, "grad_norm": 1.140625, "learning_rate": 0.00019999997787052394, "loss": 2.1772, "step": 180 }, { "epoch": 0.0, "grad_norm": 1.21875, "learning_rate": 0.00019999997662382282, "loss": 2.2432, "step": 185 }, { "epoch": 0.0, "grad_norm": 1.1171875, "learning_rate": 0.0001999999753429599, "loss": 2.2298, "step": 190 }, { "epoch": 0.0, "grad_norm": 1.09375, "learning_rate": 0.00019999997402793515, "loss": 2.2316, "step": 195 }, { "epoch": 0.0, "grad_norm": 1.3125, "learning_rate": 0.0001999999726787486, "loss": 2.0951, "step": 200 }, { "epoch": 0.0, "grad_norm": 1.0234375, "learning_rate": 0.00019999997129540025, "loss": 2.1101, "step": 205 }, { "epoch": 0.0, "grad_norm": 1.125, "learning_rate": 0.0001999999698778901, "loss": 2.2748, "step": 210 }, { "epoch": 0.0, "grad_norm": 1.2265625, "learning_rate": 0.00019999996842621814, "loss": 2.3192, "step": 215 }, { "epoch": 0.0, "grad_norm": 1.6484375, "learning_rate": 0.0001999999669403844, "loss": 2.2306, "step": 220 }, { "epoch": 0.0, "grad_norm": 1.3203125, "learning_rate": 0.0001999999654203888, "loss": 2.3753, "step": 225 }, { "epoch": 0.0, "grad_norm": 1.1953125, "learning_rate": 0.00019999996386623145, "loss": 2.3969, "step": 230 }, { "epoch": 0.0, "grad_norm": 1.0078125, "learning_rate": 0.0001999999622779123, "loss": 2.1176, "step": 235 }, { "epoch": 0.0, "grad_norm": 1.1171875, "learning_rate": 0.0001999999606554313, "loss": 2.3346, "step": 240 }, { "epoch": 0.0, "grad_norm": 1.15625, "learning_rate": 0.00019999995899878855, "loss": 2.1043, "step": 245 }, { "epoch": 0.0, "grad_norm": 1.078125, "learning_rate": 0.00019999995730798396, "loss": 2.3451, "step": 250 }, { "epoch": 0.0, "grad_norm": 1.21875, "learning_rate": 0.0001999999555830176, "loss": 2.2321, "step": 255 }, { "epoch": 0.0, "grad_norm": 1.1484375, "learning_rate": 0.00019999995382388944, "loss": 2.3257, "step": 260 }, { "epoch": 0.0, "grad_norm": 1.125, "learning_rate": 0.0001999999520305995, "loss": 2.3639, "step": 265 }, { "epoch": 0.0, "grad_norm": 1.3046875, "learning_rate": 0.0001999999502031477, "loss": 2.1833, "step": 270 }, { "epoch": 0.0, "grad_norm": 1.25, "learning_rate": 0.00019999994834153417, "loss": 2.1208, "step": 275 }, { "epoch": 0.0, "grad_norm": 1.109375, "learning_rate": 0.00019999994644575884, "loss": 2.2137, "step": 280 }, { "epoch": 0.0, "grad_norm": 1.2109375, "learning_rate": 0.0001999999445158217, "loss": 2.2559, "step": 285 }, { "epoch": 0.0, "grad_norm": 1.4296875, "learning_rate": 0.00019999994255172275, "loss": 2.1526, "step": 290 }, { "epoch": 0.0, "grad_norm": 1.3125, "learning_rate": 0.000199999940553462, "loss": 1.9841, "step": 295 }, { "epoch": 0.0, "grad_norm": 1.171875, "learning_rate": 0.00019999993852103947, "loss": 2.0477, "step": 300 }, { "epoch": 0.0, "grad_norm": 1.140625, "learning_rate": 0.00019999993645445513, "loss": 2.2832, "step": 305 }, { "epoch": 0.0, "grad_norm": 1.109375, "learning_rate": 0.00019999993435370903, "loss": 2.3638, "step": 310 }, { "epoch": 0.0, "grad_norm": 1.1484375, "learning_rate": 0.00019999993221880114, "loss": 2.077, "step": 315 }, { "epoch": 0.0, "grad_norm": 1.2578125, "learning_rate": 0.00019999993004973145, "loss": 2.1916, "step": 320 }, { "epoch": 0.0, "grad_norm": 1.3125, "learning_rate": 0.00019999992784649996, "loss": 2.191, "step": 325 }, { "epoch": 0.0, "grad_norm": 1.2265625, "learning_rate": 0.0001999999256091067, "loss": 2.2502, "step": 330 }, { "epoch": 0.0, "grad_norm": 1.4765625, "learning_rate": 0.00019999992333755162, "loss": 2.2943, "step": 335 }, { "epoch": 0.0, "grad_norm": 1.1015625, "learning_rate": 0.00019999992103183475, "loss": 2.2807, "step": 340 }, { "epoch": 0.0, "grad_norm": 1.171875, "learning_rate": 0.00019999991869195613, "loss": 2.1877, "step": 345 }, { "epoch": 0.0, "grad_norm": 1.109375, "learning_rate": 0.00019999991631791572, "loss": 2.2113, "step": 350 }, { "epoch": 0.0, "grad_norm": 1.0859375, "learning_rate": 0.0001999999139097135, "loss": 2.1985, "step": 355 }, { "epoch": 0.0, "grad_norm": 1.296875, "learning_rate": 0.0001999999114673495, "loss": 2.3213, "step": 360 }, { "epoch": 0.0, "grad_norm": 1.125, "learning_rate": 0.00019999990899082374, "loss": 2.1945, "step": 365 }, { "epoch": 0.0, "grad_norm": 1.3125, "learning_rate": 0.0001999999064801362, "loss": 2.0816, "step": 370 }, { "epoch": 0.0, "grad_norm": 1.046875, "learning_rate": 0.00019999990393528686, "loss": 2.4369, "step": 375 }, { "epoch": 0.0, "grad_norm": 1.2265625, "learning_rate": 0.00019999990135627574, "loss": 2.1927, "step": 380 }, { "epoch": 0.0, "grad_norm": 1.1484375, "learning_rate": 0.00019999989874310283, "loss": 2.2797, "step": 385 }, { "epoch": 0.0, "grad_norm": 1.109375, "learning_rate": 0.00019999989609576817, "loss": 2.0401, "step": 390 }, { "epoch": 0.0, "grad_norm": 1.171875, "learning_rate": 0.0001999998934142717, "loss": 2.2723, "step": 395 }, { "epoch": 0.0, "grad_norm": 1.4140625, "learning_rate": 0.00019999989069861346, "loss": 2.2797, "step": 400 }, { "epoch": 0.0, "grad_norm": 1.359375, "learning_rate": 0.00019999988794879345, "loss": 2.3519, "step": 405 }, { "epoch": 0.0, "grad_norm": 1.2421875, "learning_rate": 0.00019999988516481167, "loss": 2.1594, "step": 410 }, { "epoch": 0.0, "grad_norm": 1.078125, "learning_rate": 0.0001999998823466681, "loss": 2.2655, "step": 415 }, { "epoch": 0.0, "grad_norm": 1.03125, "learning_rate": 0.00019999987949436278, "loss": 2.1902, "step": 420 }, { "epoch": 0.0, "grad_norm": 1.1796875, "learning_rate": 0.00019999987660789567, "loss": 2.3331, "step": 425 }, { "epoch": 0.0, "grad_norm": 1.3671875, "learning_rate": 0.00019999987368726681, "loss": 2.1481, "step": 430 }, { "epoch": 0.0, "grad_norm": 1.609375, "learning_rate": 0.00019999987073247616, "loss": 2.0343, "step": 435 }, { "epoch": 0.0, "grad_norm": 1.203125, "learning_rate": 0.00019999986774352376, "loss": 2.3246, "step": 440 }, { "epoch": 0.0, "grad_norm": 1.15625, "learning_rate": 0.00019999986472040956, "loss": 2.1942, "step": 445 }, { "epoch": 0.0, "grad_norm": 1.2421875, "learning_rate": 0.0001999998616631336, "loss": 2.442, "step": 450 }, { "epoch": 0.0, "grad_norm": 1.3203125, "learning_rate": 0.00019999985857169588, "loss": 2.2454, "step": 455 }, { "epoch": 0.0, "grad_norm": 1.1171875, "learning_rate": 0.0001999998554460964, "loss": 2.098, "step": 460 }, { "epoch": 0.0, "grad_norm": 1.2734375, "learning_rate": 0.00019999985228633514, "loss": 2.298, "step": 465 }, { "epoch": 0.0, "grad_norm": 1.1171875, "learning_rate": 0.00019999984909241215, "loss": 2.2791, "step": 470 }, { "epoch": 0.0, "grad_norm": 1.2578125, "learning_rate": 0.00019999984586432736, "loss": 2.1733, "step": 475 }, { "epoch": 0.0, "grad_norm": 1.046875, "learning_rate": 0.00019999984260208082, "loss": 2.261, "step": 480 }, { "epoch": 0.0, "grad_norm": 1.3828125, "learning_rate": 0.00019999983930567254, "loss": 2.0042, "step": 485 }, { "epoch": 0.0, "grad_norm": 1.2265625, "learning_rate": 0.00019999983597510248, "loss": 2.3261, "step": 490 }, { "epoch": 0.0, "grad_norm": 1.2265625, "learning_rate": 0.00019999983261037067, "loss": 2.2346, "step": 495 }, { "epoch": 0.0, "grad_norm": 1.546875, "learning_rate": 0.00019999982921147712, "loss": 2.256, "step": 500 }, { "epoch": 0.0, "grad_norm": 1.109375, "learning_rate": 0.0001999998257784218, "loss": 2.1707, "step": 505 }, { "epoch": 0.0, "grad_norm": 1.390625, "learning_rate": 0.00019999982231120472, "loss": 2.2094, "step": 510 }, { "epoch": 0.0, "grad_norm": 1.375, "learning_rate": 0.0001999998188098259, "loss": 2.2973, "step": 515 }, { "epoch": 0.0, "grad_norm": 1.5390625, "learning_rate": 0.0001999998152742853, "loss": 2.3125, "step": 520 }, { "epoch": 0.0, "grad_norm": 0.94921875, "learning_rate": 0.00019999981170458296, "loss": 2.1885, "step": 525 }, { "epoch": 0.0, "grad_norm": 1.0703125, "learning_rate": 0.00019999980810071888, "loss": 2.2394, "step": 530 }, { "epoch": 0.0, "grad_norm": 1.59375, "learning_rate": 0.00019999980446269307, "loss": 2.3164, "step": 535 }, { "epoch": 0.0, "grad_norm": 1.3359375, "learning_rate": 0.0001999998007905055, "loss": 2.2626, "step": 540 }, { "epoch": 0.0, "grad_norm": 1.2265625, "learning_rate": 0.00019999979708415616, "loss": 2.1435, "step": 545 }, { "epoch": 0.0, "grad_norm": 1.4375, "learning_rate": 0.00019999979334364508, "loss": 2.4347, "step": 550 }, { "epoch": 0.0, "grad_norm": 1.453125, "learning_rate": 0.0001999997895689723, "loss": 2.2525, "step": 555 }, { "epoch": 0.0, "grad_norm": 1.3125, "learning_rate": 0.00019999978576013775, "loss": 2.1709, "step": 560 }, { "epoch": 0.0, "grad_norm": 1.1796875, "learning_rate": 0.00019999978191714146, "loss": 2.2178, "step": 565 }, { "epoch": 0.0, "grad_norm": 1.2421875, "learning_rate": 0.00019999977803998343, "loss": 2.1765, "step": 570 }, { "epoch": 0.0, "grad_norm": 1.3125, "learning_rate": 0.00019999977412866367, "loss": 2.3027, "step": 575 }, { "epoch": 0.0, "grad_norm": 1.359375, "learning_rate": 0.00019999977018318217, "loss": 2.1775, "step": 580 }, { "epoch": 0.0, "grad_norm": 1.1875, "learning_rate": 0.00019999976620353892, "loss": 2.1166, "step": 585 }, { "epoch": 0.0, "grad_norm": 1.578125, "learning_rate": 0.00019999976218973396, "loss": 2.2959, "step": 590 }, { "epoch": 0.0, "grad_norm": 1.328125, "learning_rate": 0.00019999975814176724, "loss": 2.1888, "step": 595 }, { "epoch": 0.0, "grad_norm": 1.3359375, "learning_rate": 0.00019999975405963883, "loss": 2.259, "step": 600 }, { "epoch": 0.0, "grad_norm": 1.078125, "learning_rate": 0.00019999974994334865, "loss": 2.2073, "step": 605 }, { "epoch": 0.0, "grad_norm": 1.5234375, "learning_rate": 0.00019999974579289675, "loss": 2.403, "step": 610 }, { "epoch": 0.0, "grad_norm": 1.265625, "learning_rate": 0.00019999974160828313, "loss": 2.4529, "step": 615 }, { "epoch": 0.0, "grad_norm": 1.2421875, "learning_rate": 0.0001999997373895078, "loss": 2.2087, "step": 620 }, { "epoch": 0.0, "grad_norm": 1.5, "learning_rate": 0.00019999973313657073, "loss": 2.1733, "step": 625 }, { "epoch": 0.0, "grad_norm": 1.4375, "learning_rate": 0.00019999972884947193, "loss": 2.2162, "step": 630 }, { "epoch": 0.0, "grad_norm": 1.375, "learning_rate": 0.00019999972452821143, "loss": 2.191, "step": 635 }, { "epoch": 0.0, "grad_norm": 1.1640625, "learning_rate": 0.00019999972017278918, "loss": 2.3274, "step": 640 }, { "epoch": 0.0, "grad_norm": 1.390625, "learning_rate": 0.00019999971578320524, "loss": 2.2213, "step": 645 }, { "epoch": 0.0, "grad_norm": 1.2109375, "learning_rate": 0.00019999971135945956, "loss": 2.1613, "step": 650 }, { "epoch": 0.0, "grad_norm": 1.25, "learning_rate": 0.00019999970690155218, "loss": 2.2629, "step": 655 }, { "epoch": 0.0, "grad_norm": 1.265625, "learning_rate": 0.00019999970240948308, "loss": 2.2418, "step": 660 }, { "epoch": 0.0, "grad_norm": 1.40625, "learning_rate": 0.00019999969788325227, "loss": 2.175, "step": 665 }, { "epoch": 0.0, "grad_norm": 1.2578125, "learning_rate": 0.00019999969332285978, "loss": 2.3216, "step": 670 }, { "epoch": 0.0, "grad_norm": 1.3125, "learning_rate": 0.00019999968872830555, "loss": 2.0236, "step": 675 }, { "epoch": 0.0, "grad_norm": 1.1953125, "learning_rate": 0.0001999996840995896, "loss": 2.2018, "step": 680 }, { "epoch": 0.0, "grad_norm": 1.2890625, "learning_rate": 0.00019999967943671196, "loss": 2.2677, "step": 685 }, { "epoch": 0.0, "grad_norm": 1.3359375, "learning_rate": 0.00019999967473967263, "loss": 2.2805, "step": 690 }, { "epoch": 0.0, "grad_norm": 1.328125, "learning_rate": 0.0001999996700084716, "loss": 2.1601, "step": 695 }, { "epoch": 0.0, "grad_norm": 1.296875, "learning_rate": 0.00019999966524310883, "loss": 2.1938, "step": 700 }, { "epoch": 0.0, "grad_norm": 1.3828125, "learning_rate": 0.0001999996604435844, "loss": 2.2306, "step": 705 }, { "epoch": 0.0, "grad_norm": 1.3046875, "learning_rate": 0.00019999965560989823, "loss": 2.3305, "step": 710 }, { "epoch": 0.0, "grad_norm": 1.2890625, "learning_rate": 0.0001999996507420504, "loss": 2.1069, "step": 715 }, { "epoch": 0.0, "grad_norm": 1.28125, "learning_rate": 0.00019999964584004086, "loss": 2.2555, "step": 720 }, { "epoch": 0.0, "grad_norm": 1.5234375, "learning_rate": 0.00019999964090386966, "loss": 2.0928, "step": 725 }, { "epoch": 0.0, "grad_norm": 1.0234375, "learning_rate": 0.00019999963593353673, "loss": 2.29, "step": 730 }, { "epoch": 0.0, "grad_norm": 1.359375, "learning_rate": 0.00019999963092904213, "loss": 2.2303, "step": 735 }, { "epoch": 0.0, "grad_norm": 1.3046875, "learning_rate": 0.00019999962589038585, "loss": 2.351, "step": 740 }, { "epoch": 0.0, "grad_norm": 1.375, "learning_rate": 0.00019999962081756784, "loss": 2.3407, "step": 745 }, { "epoch": 0.0, "grad_norm": 1.59375, "learning_rate": 0.0001999996157105882, "loss": 2.3023, "step": 750 }, { "epoch": 0.0, "grad_norm": 1.4375, "learning_rate": 0.00019999961056944683, "loss": 2.2317, "step": 755 }, { "epoch": 0.0, "grad_norm": 1.5390625, "learning_rate": 0.00019999960539414383, "loss": 2.2095, "step": 760 }, { "epoch": 0.0, "grad_norm": 1.3671875, "learning_rate": 0.0001999996001846791, "loss": 2.2116, "step": 765 }, { "epoch": 0.0, "grad_norm": 1.3671875, "learning_rate": 0.00019999959494105274, "loss": 2.0847, "step": 770 }, { "epoch": 0.0, "grad_norm": 1.4296875, "learning_rate": 0.00019999958966326466, "loss": 2.182, "step": 775 }, { "epoch": 0.0, "grad_norm": 1.328125, "learning_rate": 0.00019999958435131494, "loss": 2.4307, "step": 780 }, { "epoch": 0.0, "grad_norm": 1.1953125, "learning_rate": 0.00019999957900520353, "loss": 2.1324, "step": 785 }, { "epoch": 0.0, "grad_norm": 1.34375, "learning_rate": 0.00019999957362493045, "loss": 2.1563, "step": 790 }, { "epoch": 0.0, "grad_norm": 1.3671875, "learning_rate": 0.0001999995682104957, "loss": 2.1682, "step": 795 }, { "epoch": 0.0, "grad_norm": 1.34375, "learning_rate": 0.0001999995627618993, "loss": 2.0408, "step": 800 }, { "epoch": 0.0, "grad_norm": 1.6171875, "learning_rate": 0.00019999955727914123, "loss": 2.1327, "step": 805 }, { "epoch": 0.0, "grad_norm": 1.21875, "learning_rate": 0.00019999955176222152, "loss": 2.2939, "step": 810 }, { "epoch": 0.0, "grad_norm": 1.53125, "learning_rate": 0.00019999954621114011, "loss": 2.0967, "step": 815 }, { "epoch": 0.0, "grad_norm": 1.671875, "learning_rate": 0.00019999954062589707, "loss": 2.3232, "step": 820 }, { "epoch": 0.0, "grad_norm": 1.3125, "learning_rate": 0.00019999953500649236, "loss": 2.3499, "step": 825 }, { "epoch": 0.0, "grad_norm": 1.6875, "learning_rate": 0.000199999529352926, "loss": 2.3494, "step": 830 }, { "epoch": 0.0, "grad_norm": 1.46875, "learning_rate": 0.00019999952366519798, "loss": 2.2626, "step": 835 }, { "epoch": 0.0, "grad_norm": 1.2890625, "learning_rate": 0.0001999995179433083, "loss": 2.1258, "step": 840 }, { "epoch": 0.0, "grad_norm": 1.5, "learning_rate": 0.000199999512187257, "loss": 2.28, "step": 845 }, { "epoch": 0.0, "grad_norm": 1.40625, "learning_rate": 0.00019999950639704403, "loss": 2.2541, "step": 850 }, { "epoch": 0.0, "grad_norm": 1.3359375, "learning_rate": 0.00019999950057266944, "loss": 2.2327, "step": 855 }, { "epoch": 0.0, "grad_norm": 1.1953125, "learning_rate": 0.00019999949471413318, "loss": 2.2855, "step": 860 }, { "epoch": 0.0, "grad_norm": 1.28125, "learning_rate": 0.00019999948882143528, "loss": 2.1128, "step": 865 }, { "epoch": 0.0, "grad_norm": 1.296875, "learning_rate": 0.00019999948289457575, "loss": 2.2122, "step": 870 }, { "epoch": 0.0, "grad_norm": 1.9375, "learning_rate": 0.00019999947693355457, "loss": 2.2181, "step": 875 }, { "epoch": 0.0, "grad_norm": 1.375, "learning_rate": 0.00019999947093837176, "loss": 2.3615, "step": 880 }, { "epoch": 0.0, "grad_norm": 1.3046875, "learning_rate": 0.00019999946490902734, "loss": 2.1773, "step": 885 }, { "epoch": 0.0, "grad_norm": 1.3671875, "learning_rate": 0.00019999945884552128, "loss": 2.2166, "step": 890 }, { "epoch": 0.0, "grad_norm": 1.421875, "learning_rate": 0.00019999945274785358, "loss": 2.2889, "step": 895 }, { "epoch": 0.0, "grad_norm": 1.3203125, "learning_rate": 0.00019999944661602427, "loss": 2.1197, "step": 900 }, { "epoch": 0.0, "grad_norm": 1.1796875, "learning_rate": 0.00019999944045003332, "loss": 2.2592, "step": 905 }, { "epoch": 0.0, "grad_norm": 1.390625, "learning_rate": 0.00019999943424988076, "loss": 2.2333, "step": 910 }, { "epoch": 0.0, "grad_norm": 1.421875, "learning_rate": 0.00019999942801556656, "loss": 2.2804, "step": 915 }, { "epoch": 0.0, "grad_norm": 1.453125, "learning_rate": 0.00019999942174709078, "loss": 2.1851, "step": 920 }, { "epoch": 0.0, "grad_norm": 1.1953125, "learning_rate": 0.00019999941544445333, "loss": 2.3352, "step": 925 }, { "epoch": 0.0, "grad_norm": 1.453125, "learning_rate": 0.0001999994091076543, "loss": 2.3637, "step": 930 }, { "epoch": 0.0, "grad_norm": 1.390625, "learning_rate": 0.00019999940273669365, "loss": 2.5094, "step": 935 }, { "epoch": 0.0, "grad_norm": 1.4609375, "learning_rate": 0.0001999993963315714, "loss": 2.1136, "step": 940 }, { "epoch": 0.0, "grad_norm": 1.375, "learning_rate": 0.00019999938989228753, "loss": 2.2631, "step": 945 }, { "epoch": 0.0, "grad_norm": 1.3125, "learning_rate": 0.00019999938341884205, "loss": 2.224, "step": 950 }, { "epoch": 0.0, "grad_norm": 1.1953125, "learning_rate": 0.000199999376911235, "loss": 2.0599, "step": 955 }, { "epoch": 0.0, "grad_norm": 1.5, "learning_rate": 0.0001999993703694663, "loss": 2.3976, "step": 960 }, { "epoch": 0.0, "grad_norm": 1.3359375, "learning_rate": 0.00019999936379353603, "loss": 2.3665, "step": 965 }, { "epoch": 0.0, "grad_norm": 1.421875, "learning_rate": 0.00019999935718344417, "loss": 2.2484, "step": 970 }, { "epoch": 0.0, "grad_norm": 1.421875, "learning_rate": 0.0001999993505391907, "loss": 2.3746, "step": 975 }, { "epoch": 0.0, "grad_norm": 1.3984375, "learning_rate": 0.00019999934386077562, "loss": 2.1719, "step": 980 }, { "epoch": 0.0, "grad_norm": 1.3515625, "learning_rate": 0.000199999337148199, "loss": 2.1159, "step": 985 }, { "epoch": 0.0, "grad_norm": 1.359375, "learning_rate": 0.00019999933040146077, "loss": 2.2036, "step": 990 }, { "epoch": 0.0, "grad_norm": 1.5078125, "learning_rate": 0.00019999932362056095, "loss": 2.1803, "step": 995 }, { "epoch": 0.0, "grad_norm": 1.375, "learning_rate": 0.00019999931680549955, "loss": 2.2359, "step": 1000 }, { "epoch": 0.0, "grad_norm": 1.6328125, "learning_rate": 0.00019999930995627657, "loss": 2.4384, "step": 1005 }, { "epoch": 0.0, "grad_norm": 1.234375, "learning_rate": 0.000199999303072892, "loss": 2.3037, "step": 1010 }, { "epoch": 0.0, "grad_norm": 1.703125, "learning_rate": 0.0001999992961553459, "loss": 2.1458, "step": 1015 }, { "epoch": 0.0, "grad_norm": 1.546875, "learning_rate": 0.00019999928920363816, "loss": 2.1513, "step": 1020 }, { "epoch": 0.0, "grad_norm": 1.40625, "learning_rate": 0.0001999992822177689, "loss": 2.254, "step": 1025 }, { "epoch": 0.0, "grad_norm": 3.140625, "learning_rate": 0.00019999927519773807, "loss": 2.4114, "step": 1030 }, { "epoch": 0.0, "grad_norm": 1.625, "learning_rate": 0.00019999926814354564, "loss": 2.0214, "step": 1035 }, { "epoch": 0.0, "grad_norm": 1.5625, "learning_rate": 0.00019999926105519167, "loss": 2.198, "step": 1040 }, { "epoch": 0.0, "grad_norm": 1.1953125, "learning_rate": 0.00019999925393267612, "loss": 2.2064, "step": 1045 }, { "epoch": 0.0, "grad_norm": 1.7421875, "learning_rate": 0.00019999924677599904, "loss": 2.342, "step": 1050 }, { "epoch": 0.0, "grad_norm": 1.5, "learning_rate": 0.00019999923958516038, "loss": 2.3052, "step": 1055 }, { "epoch": 0.0, "grad_norm": 1.4609375, "learning_rate": 0.0001999992323601602, "loss": 2.3525, "step": 1060 }, { "epoch": 0.0, "grad_norm": 1.359375, "learning_rate": 0.0001999992251009984, "loss": 2.4058, "step": 1065 }, { "epoch": 0.0, "grad_norm": 1.4921875, "learning_rate": 0.00019999921780767513, "loss": 2.2206, "step": 1070 }, { "epoch": 0.0, "grad_norm": 1.296875, "learning_rate": 0.00019999921048019026, "loss": 2.134, "step": 1075 }, { "epoch": 0.0, "grad_norm": 1.3828125, "learning_rate": 0.00019999920311854387, "loss": 2.2574, "step": 1080 }, { "epoch": 0.0, "grad_norm": 1.296875, "learning_rate": 0.00019999919572273595, "loss": 2.3631, "step": 1085 }, { "epoch": 0.0, "grad_norm": 2.328125, "learning_rate": 0.00019999918829276644, "loss": 2.3736, "step": 1090 }, { "epoch": 0.0, "grad_norm": 1.46875, "learning_rate": 0.00019999918082863545, "loss": 2.2216, "step": 1095 }, { "epoch": 0.0, "grad_norm": 1.21875, "learning_rate": 0.0001999991733303429, "loss": 2.2636, "step": 1100 }, { "epoch": 0.0, "grad_norm": 1.265625, "learning_rate": 0.00019999916579788884, "loss": 2.1205, "step": 1105 }, { "epoch": 0.0, "grad_norm": 1.3984375, "learning_rate": 0.0001999991582312732, "loss": 2.2285, "step": 1110 }, { "epoch": 0.0, "grad_norm": 1.59375, "learning_rate": 0.00019999915063049608, "loss": 2.2566, "step": 1115 }, { "epoch": 0.0, "grad_norm": 1.6484375, "learning_rate": 0.00019999914299555745, "loss": 2.1312, "step": 1120 }, { "epoch": 0.0, "grad_norm": 1.4453125, "learning_rate": 0.00019999913532645726, "loss": 2.2534, "step": 1125 }, { "epoch": 0.0, "grad_norm": 1.53125, "learning_rate": 0.00019999912762319556, "loss": 2.348, "step": 1130 }, { "epoch": 0.0, "grad_norm": 1.3515625, "learning_rate": 0.00019999911988577236, "loss": 2.1846, "step": 1135 }, { "epoch": 0.0, "grad_norm": 1.3125, "learning_rate": 0.00019999911211418764, "loss": 2.2049, "step": 1140 }, { "epoch": 0.0, "grad_norm": 3.0, "learning_rate": 0.0001999991043084414, "loss": 2.2365, "step": 1145 }, { "epoch": 0.0, "grad_norm": 1.3125, "learning_rate": 0.00019999909646853365, "loss": 2.1308, "step": 1150 }, { "epoch": 0.0, "grad_norm": 1.3046875, "learning_rate": 0.00019999908859446444, "loss": 2.3056, "step": 1155 }, { "epoch": 0.0, "grad_norm": 1.65625, "learning_rate": 0.00019999908068623368, "loss": 2.2543, "step": 1160 }, { "epoch": 0.0, "grad_norm": 1.3671875, "learning_rate": 0.0001999990727438414, "loss": 2.4335, "step": 1165 }, { "epoch": 0.0, "grad_norm": 1.296875, "learning_rate": 0.00019999906476728766, "loss": 2.2608, "step": 1170 }, { "epoch": 0.0, "grad_norm": 1.5, "learning_rate": 0.00019999905675657244, "loss": 2.2045, "step": 1175 }, { "epoch": 0.0, "grad_norm": 1.5859375, "learning_rate": 0.0001999990487116957, "loss": 2.1604, "step": 1180 }, { "epoch": 0.0, "grad_norm": 1.4296875, "learning_rate": 0.0001999990406326575, "loss": 2.2555, "step": 1185 }, { "epoch": 0.0, "grad_norm": 1.765625, "learning_rate": 0.0001999990325194578, "loss": 2.1972, "step": 1190 }, { "epoch": 0.0, "grad_norm": 1.2734375, "learning_rate": 0.00019999902437209662, "loss": 2.1046, "step": 1195 }, { "epoch": 0.0, "grad_norm": 1.2421875, "learning_rate": 0.00019999901619057394, "loss": 2.0816, "step": 1200 }, { "epoch": 0.0, "grad_norm": 1.3828125, "learning_rate": 0.00019999900797488979, "loss": 2.2993, "step": 1205 }, { "epoch": 0.0, "grad_norm": 1.6484375, "learning_rate": 0.00019999899972504415, "loss": 1.9363, "step": 1210 }, { "epoch": 0.0, "grad_norm": 1.28125, "learning_rate": 0.00019999899144103708, "loss": 2.4032, "step": 1215 }, { "epoch": 0.0, "grad_norm": 1.5, "learning_rate": 0.00019999898312286852, "loss": 2.3658, "step": 1220 }, { "epoch": 0.0, "grad_norm": 1.390625, "learning_rate": 0.00019999897477053852, "loss": 2.344, "step": 1225 }, { "epoch": 0.0, "grad_norm": 1.34375, "learning_rate": 0.000199998966384047, "loss": 2.3437, "step": 1230 }, { "epoch": 0.0, "grad_norm": 1.5703125, "learning_rate": 0.00019999895796339408, "loss": 2.2266, "step": 1235 }, { "epoch": 0.0, "grad_norm": 2.0625, "learning_rate": 0.00019999894950857965, "loss": 2.0957, "step": 1240 }, { "epoch": 0.0, "grad_norm": 1.3984375, "learning_rate": 0.0001999989410196038, "loss": 2.2574, "step": 1245 }, { "epoch": 0.0, "grad_norm": 1.421875, "learning_rate": 0.0001999989324964665, "loss": 2.3439, "step": 1250 }, { "epoch": 0.0, "grad_norm": 1.3984375, "learning_rate": 0.0001999989239391677, "loss": 2.0943, "step": 1255 }, { "epoch": 0.0, "grad_norm": 1.5078125, "learning_rate": 0.0001999989153477075, "loss": 2.1393, "step": 1260 }, { "epoch": 0.0, "grad_norm": 1.3203125, "learning_rate": 0.00019999890672208585, "loss": 2.4636, "step": 1265 }, { "epoch": 0.0, "grad_norm": 1.6171875, "learning_rate": 0.00019999889806230275, "loss": 2.313, "step": 1270 }, { "epoch": 0.0, "grad_norm": 1.546875, "learning_rate": 0.00019999888936835823, "loss": 2.1632, "step": 1275 }, { "epoch": 0.0, "grad_norm": 1.5, "learning_rate": 0.00019999888064025227, "loss": 2.4098, "step": 1280 }, { "epoch": 0.0, "grad_norm": 1.453125, "learning_rate": 0.00019999887187798488, "loss": 2.2953, "step": 1285 }, { "epoch": 0.0, "grad_norm": 1.4296875, "learning_rate": 0.00019999886308155604, "loss": 2.28, "step": 1290 }, { "epoch": 0.0, "grad_norm": 1.1875, "learning_rate": 0.0001999988542509658, "loss": 2.2119, "step": 1295 }, { "epoch": 0.0, "grad_norm": 1.2421875, "learning_rate": 0.00019999884538621414, "loss": 2.2715, "step": 1300 }, { "epoch": 0.0, "grad_norm": 1.5234375, "learning_rate": 0.00019999883648730103, "loss": 2.1164, "step": 1305 }, { "epoch": 0.0, "grad_norm": 1.5546875, "learning_rate": 0.0001999988275542265, "loss": 2.1642, "step": 1310 }, { "epoch": 0.0, "grad_norm": 1.3671875, "learning_rate": 0.00019999881858699062, "loss": 2.246, "step": 1315 }, { "epoch": 0.0, "grad_norm": 1.515625, "learning_rate": 0.00019999880958559329, "loss": 2.129, "step": 1320 }, { "epoch": 0.0, "grad_norm": 1.2734375, "learning_rate": 0.00019999880055003455, "loss": 2.2931, "step": 1325 }, { "epoch": 0.0, "grad_norm": 1.609375, "learning_rate": 0.0001999987914803144, "loss": 2.3641, "step": 1330 }, { "epoch": 0.0, "grad_norm": 1.3828125, "learning_rate": 0.00019999878237643288, "loss": 2.2184, "step": 1335 }, { "epoch": 0.0, "grad_norm": 1.328125, "learning_rate": 0.00019999877323838993, "loss": 2.2087, "step": 1340 }, { "epoch": 0.0, "grad_norm": 1.4296875, "learning_rate": 0.0001999987640661856, "loss": 2.1239, "step": 1345 }, { "epoch": 0.0, "grad_norm": 1.4921875, "learning_rate": 0.00019999875485981987, "loss": 2.2889, "step": 1350 }, { "epoch": 0.0, "grad_norm": 1.2734375, "learning_rate": 0.00019999874561929274, "loss": 2.2789, "step": 1355 }, { "epoch": 0.0, "grad_norm": 1.28125, "learning_rate": 0.00019999873634460427, "loss": 2.251, "step": 1360 }, { "epoch": 0.0, "grad_norm": 1.34375, "learning_rate": 0.00019999872703575439, "loss": 2.2973, "step": 1365 }, { "epoch": 0.0, "grad_norm": 1.375, "learning_rate": 0.00019999871769274313, "loss": 2.2469, "step": 1370 }, { "epoch": 0.0, "grad_norm": 1.5546875, "learning_rate": 0.0001999987083155705, "loss": 2.0209, "step": 1375 }, { "epoch": 0.0, "grad_norm": 1.4921875, "learning_rate": 0.0001999986989042365, "loss": 2.3687, "step": 1380 }, { "epoch": 0.0, "grad_norm": 1.4609375, "learning_rate": 0.0001999986894587411, "loss": 2.1836, "step": 1385 }, { "epoch": 0.0, "grad_norm": 1.2734375, "learning_rate": 0.00019999867997908436, "loss": 2.1712, "step": 1390 }, { "epoch": 0.0, "grad_norm": 1.578125, "learning_rate": 0.0001999986704652663, "loss": 2.2251, "step": 1395 }, { "epoch": 0.0, "grad_norm": 1.5234375, "learning_rate": 0.0001999986609172868, "loss": 2.3569, "step": 1400 }, { "epoch": 0.0, "grad_norm": 1.328125, "learning_rate": 0.00019999865133514598, "loss": 2.2389, "step": 1405 }, { "epoch": 0.0, "grad_norm": 1.40625, "learning_rate": 0.0001999986417188438, "loss": 2.266, "step": 1410 }, { "epoch": 0.0, "grad_norm": 1.15625, "learning_rate": 0.0001999986320683803, "loss": 2.227, "step": 1415 }, { "epoch": 0.0, "grad_norm": 1.2109375, "learning_rate": 0.0001999986223837554, "loss": 2.1799, "step": 1420 }, { "epoch": 0.0, "grad_norm": 1.421875, "learning_rate": 0.0001999986126649692, "loss": 2.3677, "step": 1425 }, { "epoch": 0.0, "grad_norm": 1.4140625, "learning_rate": 0.00019999860291202166, "loss": 2.1736, "step": 1430 }, { "epoch": 0.0, "grad_norm": 1.7265625, "learning_rate": 0.00019999859312491277, "loss": 2.3106, "step": 1435 }, { "epoch": 0.0, "grad_norm": 1.65625, "learning_rate": 0.00019999858330364256, "loss": 2.3685, "step": 1440 }, { "epoch": 0.0, "grad_norm": 1.4140625, "learning_rate": 0.00019999857344821101, "loss": 2.1046, "step": 1445 }, { "epoch": 0.0, "grad_norm": 1.640625, "learning_rate": 0.00019999856355861813, "loss": 2.2262, "step": 1450 }, { "epoch": 0.0, "grad_norm": 1.59375, "learning_rate": 0.00019999855363486392, "loss": 2.4277, "step": 1455 }, { "epoch": 0.0, "grad_norm": 1.5625, "learning_rate": 0.0001999985436769484, "loss": 1.9944, "step": 1460 }, { "epoch": 0.0, "grad_norm": 1.5859375, "learning_rate": 0.00019999853368487158, "loss": 2.3459, "step": 1465 }, { "epoch": 0.0, "grad_norm": 1.46875, "learning_rate": 0.00019999852365863344, "loss": 2.2262, "step": 1470 }, { "epoch": 0.0, "grad_norm": 1.6328125, "learning_rate": 0.000199998513598234, "loss": 2.1097, "step": 1475 }, { "epoch": 0.0, "grad_norm": 1.3203125, "learning_rate": 0.00019999850350367322, "loss": 2.0718, "step": 1480 }, { "epoch": 0.0, "grad_norm": 1.421875, "learning_rate": 0.00019999849337495117, "loss": 2.347, "step": 1485 }, { "epoch": 0.0, "grad_norm": 1.3984375, "learning_rate": 0.00019999848321206778, "loss": 2.3348, "step": 1490 }, { "epoch": 0.0, "grad_norm": 1.5859375, "learning_rate": 0.00019999847301502315, "loss": 2.3611, "step": 1495 }, { "epoch": 0.0, "grad_norm": 1.6171875, "learning_rate": 0.0001999984627838172, "loss": 2.3309, "step": 1500 }, { "epoch": 0.0, "grad_norm": 1.6640625, "learning_rate": 0.00019999845251844996, "loss": 2.1946, "step": 1505 }, { "epoch": 0.0, "grad_norm": 1.5234375, "learning_rate": 0.00019999844221892143, "loss": 2.1322, "step": 1510 }, { "epoch": 0.0, "grad_norm": 1.6796875, "learning_rate": 0.00019999843188523163, "loss": 2.1799, "step": 1515 }, { "epoch": 0.0, "grad_norm": 1.4296875, "learning_rate": 0.00019999842151738052, "loss": 2.1758, "step": 1520 }, { "epoch": 0.0, "grad_norm": 1.3203125, "learning_rate": 0.00019999841111536818, "loss": 2.3735, "step": 1525 }, { "epoch": 0.0, "grad_norm": 1.2578125, "learning_rate": 0.00019999840067919453, "loss": 2.264, "step": 1530 }, { "epoch": 0.0, "grad_norm": 1.421875, "learning_rate": 0.00019999839020885964, "loss": 2.2419, "step": 1535 }, { "epoch": 0.0, "grad_norm": 1.796875, "learning_rate": 0.0001999983797043635, "loss": 2.1499, "step": 1540 }, { "epoch": 0.0, "grad_norm": 1.2734375, "learning_rate": 0.00019999836916570607, "loss": 2.4034, "step": 1545 }, { "epoch": 0.0, "grad_norm": 1.4609375, "learning_rate": 0.00019999835859288738, "loss": 2.3069, "step": 1550 }, { "epoch": 0.0, "grad_norm": 1.6953125, "learning_rate": 0.00019999834798590746, "loss": 2.279, "step": 1555 }, { "epoch": 0.0, "grad_norm": 1.5859375, "learning_rate": 0.0001999983373447663, "loss": 2.1695, "step": 1560 }, { "epoch": 0.0, "grad_norm": 1.4921875, "learning_rate": 0.00019999832666946386, "loss": 2.2196, "step": 1565 }, { "epoch": 0.0, "grad_norm": 1.5078125, "learning_rate": 0.0001999983159600002, "loss": 2.2451, "step": 1570 }, { "epoch": 0.0, "grad_norm": 1.6171875, "learning_rate": 0.0001999983052163753, "loss": 2.4099, "step": 1575 }, { "epoch": 0.0, "grad_norm": 1.3515625, "learning_rate": 0.00019999829443858916, "loss": 2.1981, "step": 1580 }, { "epoch": 0.0, "grad_norm": 1.609375, "learning_rate": 0.0001999982836266418, "loss": 2.2667, "step": 1585 }, { "epoch": 0.0, "grad_norm": 1.5078125, "learning_rate": 0.0001999982727805332, "loss": 2.2546, "step": 1590 }, { "epoch": 0.0, "grad_norm": 1.3359375, "learning_rate": 0.0001999982619002634, "loss": 2.1575, "step": 1595 }, { "epoch": 0.0, "grad_norm": 1.78125, "learning_rate": 0.00019999825098583233, "loss": 2.2531, "step": 1600 }, { "epoch": 0.0, "grad_norm": 1.703125, "learning_rate": 0.0001999982400372401, "loss": 2.3461, "step": 1605 }, { "epoch": 0.0, "grad_norm": 1.6015625, "learning_rate": 0.00019999822905448662, "loss": 2.2799, "step": 1610 }, { "epoch": 0.0, "grad_norm": 1.5546875, "learning_rate": 0.00019999821803757192, "loss": 2.3824, "step": 1615 }, { "epoch": 0.0, "grad_norm": 1.4296875, "learning_rate": 0.00019999820698649606, "loss": 2.2943, "step": 1620 }, { "epoch": 0.0, "grad_norm": 1.5546875, "learning_rate": 0.00019999819590125898, "loss": 2.3458, "step": 1625 }, { "epoch": 0.0, "grad_norm": 1.265625, "learning_rate": 0.0001999981847818607, "loss": 2.3139, "step": 1630 }, { "epoch": 0.0, "grad_norm": 1.5390625, "learning_rate": 0.0001999981736283012, "loss": 2.161, "step": 1635 }, { "epoch": 0.0, "grad_norm": 1.578125, "learning_rate": 0.00019999816244058055, "loss": 2.4334, "step": 1640 }, { "epoch": 0.0, "grad_norm": 1.328125, "learning_rate": 0.0001999981512186987, "loss": 2.1683, "step": 1645 }, { "epoch": 0.0, "grad_norm": 1.40625, "learning_rate": 0.00019999813996265567, "loss": 2.2577, "step": 1650 }, { "epoch": 0.0, "grad_norm": 1.5703125, "learning_rate": 0.00019999812867245144, "loss": 2.187, "step": 1655 }, { "epoch": 0.0, "grad_norm": 1.5, "learning_rate": 0.00019999811734808606, "loss": 2.1915, "step": 1660 }, { "epoch": 0.0, "grad_norm": 1.421875, "learning_rate": 0.00019999810598955953, "loss": 2.2403, "step": 1665 }, { "epoch": 0.0, "grad_norm": 1.4453125, "learning_rate": 0.0001999980945968718, "loss": 2.3249, "step": 1670 }, { "epoch": 0.0, "grad_norm": 1.5078125, "learning_rate": 0.0001999980831700229, "loss": 2.2495, "step": 1675 }, { "epoch": 0.0, "grad_norm": 1.421875, "learning_rate": 0.00019999807170901286, "loss": 2.2013, "step": 1680 }, { "epoch": 0.0, "grad_norm": 1.734375, "learning_rate": 0.00019999806021384168, "loss": 2.3149, "step": 1685 }, { "epoch": 0.0, "grad_norm": 1.3359375, "learning_rate": 0.00019999804868450934, "loss": 2.2121, "step": 1690 }, { "epoch": 0.0, "grad_norm": 1.5234375, "learning_rate": 0.00019999803712101585, "loss": 2.227, "step": 1695 }, { "epoch": 0.0, "grad_norm": 1.4453125, "learning_rate": 0.0001999980255233612, "loss": 2.329, "step": 1700 }, { "epoch": 0.0, "grad_norm": 1.9375, "learning_rate": 0.00019999801389154542, "loss": 2.4041, "step": 1705 }, { "epoch": 0.0, "grad_norm": 1.2265625, "learning_rate": 0.0001999980022255685, "loss": 2.3898, "step": 1710 }, { "epoch": 0.0, "grad_norm": 2.078125, "learning_rate": 0.00019999799052543047, "loss": 2.2985, "step": 1715 }, { "epoch": 0.0, "grad_norm": 1.234375, "learning_rate": 0.00019999797879113128, "loss": 2.1848, "step": 1720 }, { "epoch": 0.0, "grad_norm": 1.390625, "learning_rate": 0.000199997967022671, "loss": 2.2209, "step": 1725 }, { "epoch": 0.0, "grad_norm": 1.3671875, "learning_rate": 0.0001999979552200496, "loss": 2.1941, "step": 1730 }, { "epoch": 0.0, "grad_norm": 1.578125, "learning_rate": 0.00019999794338326706, "loss": 2.1926, "step": 1735 }, { "epoch": 0.0, "grad_norm": 1.5, "learning_rate": 0.00019999793151232343, "loss": 2.3224, "step": 1740 }, { "epoch": 0.0, "grad_norm": 1.671875, "learning_rate": 0.00019999791960721868, "loss": 2.223, "step": 1745 }, { "epoch": 0.0, "grad_norm": 1.234375, "learning_rate": 0.00019999790766795283, "loss": 2.1808, "step": 1750 }, { "epoch": 0.0, "grad_norm": 1.4140625, "learning_rate": 0.00019999789569452586, "loss": 2.1489, "step": 1755 }, { "epoch": 0.0, "grad_norm": 1.65625, "learning_rate": 0.00019999788368693785, "loss": 2.3028, "step": 1760 }, { "epoch": 0.0, "grad_norm": 1.40625, "learning_rate": 0.00019999787164518871, "loss": 2.3827, "step": 1765 }, { "epoch": 0.0, "grad_norm": 1.734375, "learning_rate": 0.00019999785956927848, "loss": 2.2855, "step": 1770 }, { "epoch": 0.0, "grad_norm": 1.3046875, "learning_rate": 0.00019999784745920718, "loss": 2.1894, "step": 1775 }, { "epoch": 0.0, "grad_norm": 1.7734375, "learning_rate": 0.0001999978353149748, "loss": 2.2698, "step": 1780 }, { "epoch": 0.0, "grad_norm": 1.59375, "learning_rate": 0.00019999782313658132, "loss": 2.3544, "step": 1785 }, { "epoch": 0.0, "grad_norm": 1.3359375, "learning_rate": 0.0001999978109240268, "loss": 2.2418, "step": 1790 }, { "epoch": 0.0, "grad_norm": 1.859375, "learning_rate": 0.00019999779867731123, "loss": 2.1831, "step": 1795 }, { "epoch": 0.0, "grad_norm": 1.7421875, "learning_rate": 0.0001999977863964346, "loss": 2.2457, "step": 1800 }, { "epoch": 0.0, "grad_norm": 1.390625, "learning_rate": 0.00019999777408139687, "loss": 2.3132, "step": 1805 }, { "epoch": 0.0, "grad_norm": 1.5625, "learning_rate": 0.00019999776173219811, "loss": 2.3542, "step": 1810 }, { "epoch": 0.0, "grad_norm": 1.5, "learning_rate": 0.00019999774934883831, "loss": 2.2028, "step": 1815 }, { "epoch": 0.0, "grad_norm": 1.53125, "learning_rate": 0.00019999773693131744, "loss": 2.2546, "step": 1820 }, { "epoch": 0.0, "grad_norm": 1.59375, "learning_rate": 0.00019999772447963556, "loss": 2.1192, "step": 1825 }, { "epoch": 0.0, "grad_norm": 1.59375, "learning_rate": 0.00019999771199379263, "loss": 2.3402, "step": 1830 }, { "epoch": 0.0, "grad_norm": 2.546875, "learning_rate": 0.00019999769947378867, "loss": 2.2695, "step": 1835 }, { "epoch": 0.0, "grad_norm": 1.2421875, "learning_rate": 0.00019999768691962366, "loss": 2.0718, "step": 1840 }, { "epoch": 0.0, "grad_norm": 1.6796875, "learning_rate": 0.00019999767433129763, "loss": 2.218, "step": 1845 }, { "epoch": 0.0, "grad_norm": 1.4375, "learning_rate": 0.00019999766170881062, "loss": 2.2075, "step": 1850 }, { "epoch": 0.0, "grad_norm": 1.5625, "learning_rate": 0.00019999764905216256, "loss": 2.2823, "step": 1855 }, { "epoch": 0.0, "grad_norm": 1.4453125, "learning_rate": 0.00019999763636135352, "loss": 2.1773, "step": 1860 }, { "epoch": 0.0, "grad_norm": 1.34375, "learning_rate": 0.00019999762363638344, "loss": 2.0164, "step": 1865 }, { "epoch": 0.0, "grad_norm": 1.71875, "learning_rate": 0.00019999761087725236, "loss": 2.2334, "step": 1870 }, { "epoch": 0.0, "grad_norm": 1.390625, "learning_rate": 0.0001999975980839603, "loss": 2.2705, "step": 1875 }, { "epoch": 0.0, "grad_norm": 1.765625, "learning_rate": 0.00019999758525650725, "loss": 2.2924, "step": 1880 }, { "epoch": 0.0, "grad_norm": 1.390625, "learning_rate": 0.00019999757239489322, "loss": 2.2156, "step": 1885 }, { "epoch": 0.0, "grad_norm": 1.578125, "learning_rate": 0.00019999755949911817, "loss": 2.3795, "step": 1890 }, { "epoch": 0.0, "grad_norm": 1.5546875, "learning_rate": 0.00019999754656918218, "loss": 2.1866, "step": 1895 }, { "epoch": 0.0, "grad_norm": 1.875, "learning_rate": 0.00019999753360508518, "loss": 2.1879, "step": 1900 }, { "epoch": 0.0, "grad_norm": 1.453125, "learning_rate": 0.00019999752060682722, "loss": 2.283, "step": 1905 }, { "epoch": 0.0, "grad_norm": 1.4140625, "learning_rate": 0.00019999750757440827, "loss": 2.1592, "step": 1910 }, { "epoch": 0.0, "grad_norm": 1.484375, "learning_rate": 0.00019999749450782842, "loss": 2.2714, "step": 1915 }, { "epoch": 0.0, "grad_norm": 1.5703125, "learning_rate": 0.00019999748140708755, "loss": 2.2114, "step": 1920 }, { "epoch": 0.0, "grad_norm": 1.5234375, "learning_rate": 0.00019999746827218577, "loss": 2.1543, "step": 1925 }, { "epoch": 0.0, "grad_norm": 1.546875, "learning_rate": 0.000199997455103123, "loss": 2.0782, "step": 1930 }, { "epoch": 0.0, "grad_norm": 1.5703125, "learning_rate": 0.00019999744189989934, "loss": 2.0649, "step": 1935 }, { "epoch": 0.0, "grad_norm": 1.5703125, "learning_rate": 0.0001999974286625147, "loss": 2.1522, "step": 1940 }, { "epoch": 0.0, "grad_norm": 1.609375, "learning_rate": 0.00019999741539096912, "loss": 2.2912, "step": 1945 }, { "epoch": 0.0, "grad_norm": 1.2578125, "learning_rate": 0.00019999740208526264, "loss": 2.3928, "step": 1950 }, { "epoch": 0.0, "grad_norm": 1.8046875, "learning_rate": 0.00019999738874539518, "loss": 2.1745, "step": 1955 }, { "epoch": 0.0, "grad_norm": 1.5390625, "learning_rate": 0.00019999737537136687, "loss": 2.3722, "step": 1960 }, { "epoch": 0.0, "grad_norm": 1.484375, "learning_rate": 0.0001999973619631776, "loss": 2.151, "step": 1965 }, { "epoch": 0.0, "grad_norm": 1.578125, "learning_rate": 0.00019999734852082742, "loss": 2.2544, "step": 1970 }, { "epoch": 0.0, "grad_norm": 1.640625, "learning_rate": 0.00019999733504431636, "loss": 2.1575, "step": 1975 }, { "epoch": 0.0, "grad_norm": 1.4140625, "learning_rate": 0.00019999732153364437, "loss": 2.047, "step": 1980 }, { "epoch": 0.0, "grad_norm": 1.3828125, "learning_rate": 0.0001999973079888115, "loss": 2.2041, "step": 1985 }, { "epoch": 0.0, "grad_norm": 1.46875, "learning_rate": 0.00019999729440981772, "loss": 2.2398, "step": 1990 }, { "epoch": 0.0, "grad_norm": 1.53125, "learning_rate": 0.00019999728079666304, "loss": 2.4203, "step": 1995 }, { "epoch": 0.0, "grad_norm": 1.421875, "learning_rate": 0.0001999972671493475, "loss": 2.219, "step": 2000 }, { "epoch": 0.0, "grad_norm": 1.5546875, "learning_rate": 0.00019999725346787108, "loss": 2.3168, "step": 2005 }, { "epoch": 0.0, "grad_norm": 1.6171875, "learning_rate": 0.00019999723975223376, "loss": 2.4317, "step": 2010 }, { "epoch": 0.0, "grad_norm": 1.5703125, "learning_rate": 0.0001999972260024356, "loss": 2.2609, "step": 2015 }, { "epoch": 0.0, "grad_norm": 1.5859375, "learning_rate": 0.00019999721221847656, "loss": 2.4252, "step": 2020 }, { "epoch": 0.0, "grad_norm": 1.1640625, "learning_rate": 0.00019999719840035665, "loss": 2.2118, "step": 2025 }, { "epoch": 0.0, "grad_norm": 1.4609375, "learning_rate": 0.00019999718454807587, "loss": 2.4189, "step": 2030 }, { "epoch": 0.0, "grad_norm": 1.6328125, "learning_rate": 0.00019999717066163428, "loss": 2.3118, "step": 2035 }, { "epoch": 0.0, "grad_norm": 1.3984375, "learning_rate": 0.00019999715674103182, "loss": 2.3022, "step": 2040 }, { "epoch": 0.0, "grad_norm": 1.8125, "learning_rate": 0.0001999971427862685, "loss": 2.3169, "step": 2045 }, { "epoch": 0.0, "grad_norm": 1.828125, "learning_rate": 0.0001999971287973444, "loss": 2.208, "step": 2050 }, { "epoch": 0.0, "grad_norm": 1.59375, "learning_rate": 0.0001999971147742594, "loss": 2.3251, "step": 2055 }, { "epoch": 0.0, "grad_norm": 1.484375, "learning_rate": 0.0001999971007170136, "loss": 2.2817, "step": 2060 }, { "epoch": 0.0, "grad_norm": 1.1953125, "learning_rate": 0.00019999708662560699, "loss": 2.1426, "step": 2065 }, { "epoch": 0.0, "grad_norm": 1.578125, "learning_rate": 0.00019999707250003954, "loss": 2.2881, "step": 2070 }, { "epoch": 0.0, "grad_norm": 1.4765625, "learning_rate": 0.00019999705834031128, "loss": 2.0854, "step": 2075 }, { "epoch": 0.0, "grad_norm": 1.78125, "learning_rate": 0.0001999970441464222, "loss": 2.1186, "step": 2080 }, { "epoch": 0.0, "grad_norm": 1.7734375, "learning_rate": 0.00019999702991837235, "loss": 2.3487, "step": 2085 }, { "epoch": 0.0, "grad_norm": 1.296875, "learning_rate": 0.00019999701565616169, "loss": 2.3834, "step": 2090 }, { "epoch": 0.0, "grad_norm": 1.65625, "learning_rate": 0.00019999700135979023, "loss": 2.2016, "step": 2095 }, { "epoch": 0.0, "grad_norm": 1.4921875, "learning_rate": 0.00019999698702925797, "loss": 2.2889, "step": 2100 }, { "epoch": 0.0, "grad_norm": 1.34375, "learning_rate": 0.00019999697266456497, "loss": 1.939, "step": 2105 }, { "epoch": 0.0, "grad_norm": 1.5625, "learning_rate": 0.0001999969582657111, "loss": 2.1245, "step": 2110 }, { "epoch": 0.0, "grad_norm": 1.6484375, "learning_rate": 0.00019999694383269657, "loss": 2.1575, "step": 2115 }, { "epoch": 0.0, "grad_norm": 1.515625, "learning_rate": 0.0001999969293655212, "loss": 2.2441, "step": 2120 }, { "epoch": 0.01, "grad_norm": 1.4453125, "learning_rate": 0.00019999691486418506, "loss": 2.1848, "step": 2125 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.00019999690032868818, "loss": 2.2448, "step": 2130 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019999688575903055, "loss": 2.2855, "step": 2135 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019999687115521218, "loss": 2.121, "step": 2140 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019999685651723303, "loss": 2.2494, "step": 2145 }, { "epoch": 0.01, "grad_norm": 1.421875, "learning_rate": 0.00019999684184509318, "loss": 2.3208, "step": 2150 }, { "epoch": 0.01, "grad_norm": 1.734375, "learning_rate": 0.00019999682713879255, "loss": 2.4287, "step": 2155 }, { "epoch": 0.01, "grad_norm": 1.46875, "learning_rate": 0.00019999681239833122, "loss": 2.1965, "step": 2160 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.0001999967976237092, "loss": 2.2471, "step": 2165 }, { "epoch": 0.01, "grad_norm": 1.7734375, "learning_rate": 0.0001999967828149264, "loss": 2.2651, "step": 2170 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.0001999967679719829, "loss": 2.1516, "step": 2175 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.0001999967530948787, "loss": 2.1554, "step": 2180 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 0.00019999673818361376, "loss": 2.1197, "step": 2185 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.0001999967232381882, "loss": 2.2437, "step": 2190 }, { "epoch": 0.01, "grad_norm": 1.4453125, "learning_rate": 0.0001999967082586019, "loss": 2.3926, "step": 2195 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.0001999966932448549, "loss": 2.3573, "step": 2200 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.0001999966781969472, "loss": 2.3922, "step": 2205 }, { "epoch": 0.01, "grad_norm": 1.484375, "learning_rate": 0.00019999666311487884, "loss": 2.2439, "step": 2210 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019999664799864983, "loss": 2.168, "step": 2215 }, { "epoch": 0.01, "grad_norm": 1.296875, "learning_rate": 0.00019999663284826012, "loss": 2.3617, "step": 2220 }, { "epoch": 0.01, "grad_norm": 1.4921875, "learning_rate": 0.0001999966176637098, "loss": 2.3287, "step": 2225 }, { "epoch": 0.01, "grad_norm": 1.390625, "learning_rate": 0.00019999660244499875, "loss": 2.323, "step": 2230 }, { "epoch": 0.01, "grad_norm": 1.8046875, "learning_rate": 0.00019999658719212712, "loss": 2.2023, "step": 2235 }, { "epoch": 0.01, "grad_norm": 1.671875, "learning_rate": 0.0001999965719050948, "loss": 2.0859, "step": 2240 }, { "epoch": 0.01, "grad_norm": 1.40625, "learning_rate": 0.00019999655658390185, "loss": 2.1685, "step": 2245 }, { "epoch": 0.01, "grad_norm": 1.5703125, "learning_rate": 0.00019999654122854823, "loss": 2.2488, "step": 2250 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019999652583903402, "loss": 2.2588, "step": 2255 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 0.00019999651041535915, "loss": 2.3852, "step": 2260 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.00019999649495752367, "loss": 2.1697, "step": 2265 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019999647946552758, "loss": 2.3294, "step": 2270 }, { "epoch": 0.01, "grad_norm": 1.4453125, "learning_rate": 0.0001999964639393709, "loss": 2.3827, "step": 2275 }, { "epoch": 0.01, "grad_norm": 1.8203125, "learning_rate": 0.00019999644837905356, "loss": 2.2589, "step": 2280 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.00019999643278457567, "loss": 2.3008, "step": 2285 }, { "epoch": 0.01, "grad_norm": 1.375, "learning_rate": 0.00019999641715593715, "loss": 2.1601, "step": 2290 }, { "epoch": 0.01, "grad_norm": 1.3828125, "learning_rate": 0.00019999640149313805, "loss": 2.2241, "step": 2295 }, { "epoch": 0.01, "grad_norm": 1.4375, "learning_rate": 0.0001999963857961784, "loss": 2.2039, "step": 2300 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019999637006505813, "loss": 2.4399, "step": 2305 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.0001999963542997773, "loss": 2.3412, "step": 2310 }, { "epoch": 0.01, "grad_norm": 1.671875, "learning_rate": 0.0001999963385003359, "loss": 2.298, "step": 2315 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019999632266673394, "loss": 2.4054, "step": 2320 }, { "epoch": 0.01, "grad_norm": 1.3828125, "learning_rate": 0.0001999963067989714, "loss": 2.2334, "step": 2325 }, { "epoch": 0.01, "grad_norm": 1.4296875, "learning_rate": 0.00019999629089704834, "loss": 2.3069, "step": 2330 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.00019999627496096472, "loss": 2.2733, "step": 2335 }, { "epoch": 0.01, "grad_norm": 1.4375, "learning_rate": 0.00019999625899072055, "loss": 2.2726, "step": 2340 }, { "epoch": 0.01, "grad_norm": 1.515625, "learning_rate": 0.00019999624298631588, "loss": 2.1901, "step": 2345 }, { "epoch": 0.01, "grad_norm": 1.6875, "learning_rate": 0.00019999622694775066, "loss": 2.3077, "step": 2350 }, { "epoch": 0.01, "grad_norm": 1.75, "learning_rate": 0.00019999621087502488, "loss": 2.2192, "step": 2355 }, { "epoch": 0.01, "grad_norm": 1.765625, "learning_rate": 0.00019999619476813863, "loss": 2.1973, "step": 2360 }, { "epoch": 0.01, "grad_norm": 1.4375, "learning_rate": 0.00019999617862709185, "loss": 2.1121, "step": 2365 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 0.00019999616245188455, "loss": 2.3515, "step": 2370 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.00019999614624251677, "loss": 2.0421, "step": 2375 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.0001999961299989885, "loss": 2.3201, "step": 2380 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.00019999611372129968, "loss": 2.2753, "step": 2385 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019999609740945043, "loss": 2.2337, "step": 2390 }, { "epoch": 0.01, "grad_norm": 1.6328125, "learning_rate": 0.0001999960810634407, "loss": 2.2772, "step": 2395 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019999606468327046, "loss": 2.3721, "step": 2400 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.00019999604826893976, "loss": 2.2378, "step": 2405 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019999603182044862, "loss": 2.1609, "step": 2410 }, { "epoch": 0.01, "grad_norm": 1.484375, "learning_rate": 0.000199996015337797, "loss": 2.1508, "step": 2415 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.00019999599882098492, "loss": 2.276, "step": 2420 }, { "epoch": 0.01, "grad_norm": 1.4140625, "learning_rate": 0.0001999959822700124, "loss": 2.335, "step": 2425 }, { "epoch": 0.01, "grad_norm": 1.6015625, "learning_rate": 0.00019999596568487946, "loss": 2.0218, "step": 2430 }, { "epoch": 0.01, "grad_norm": 1.921875, "learning_rate": 0.0001999959490655861, "loss": 2.2316, "step": 2435 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019999593241213227, "loss": 2.3244, "step": 2440 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.000199995915724518, "loss": 2.1824, "step": 2445 }, { "epoch": 0.01, "grad_norm": 1.4765625, "learning_rate": 0.00019999589900274335, "loss": 2.3239, "step": 2450 }, { "epoch": 0.01, "grad_norm": 1.671875, "learning_rate": 0.0001999958822468083, "loss": 2.2445, "step": 2455 }, { "epoch": 0.01, "grad_norm": 1.53125, "learning_rate": 0.00019999586545671282, "loss": 2.1247, "step": 2460 }, { "epoch": 0.01, "grad_norm": 1.265625, "learning_rate": 0.00019999584863245694, "loss": 2.1141, "step": 2465 }, { "epoch": 0.01, "grad_norm": 1.34375, "learning_rate": 0.00019999583177404063, "loss": 2.271, "step": 2470 }, { "epoch": 0.01, "grad_norm": 1.7109375, "learning_rate": 0.00019999581488146397, "loss": 2.3432, "step": 2475 }, { "epoch": 0.01, "grad_norm": 1.59375, "learning_rate": 0.00019999579795472694, "loss": 2.3315, "step": 2480 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.0001999957809938295, "loss": 2.1187, "step": 2485 }, { "epoch": 0.01, "grad_norm": 1.703125, "learning_rate": 0.0001999957639987717, "loss": 2.2414, "step": 2490 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019999574696955352, "loss": 2.2493, "step": 2495 }, { "epoch": 0.01, "grad_norm": 1.7265625, "learning_rate": 0.00019999572990617498, "loss": 2.4517, "step": 2500 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019999571280863613, "loss": 2.4121, "step": 2505 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019999569567693689, "loss": 2.3235, "step": 2510 }, { "epoch": 0.01, "grad_norm": 1.4296875, "learning_rate": 0.0001999956785110773, "loss": 2.1808, "step": 2515 }, { "epoch": 0.01, "grad_norm": 1.3671875, "learning_rate": 0.00019999566131105741, "loss": 2.1333, "step": 2520 }, { "epoch": 0.01, "grad_norm": 1.6328125, "learning_rate": 0.00019999564407687713, "loss": 2.2409, "step": 2525 }, { "epoch": 0.01, "grad_norm": 1.515625, "learning_rate": 0.0001999956268085366, "loss": 2.3754, "step": 2530 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.00019999560950603572, "loss": 2.1429, "step": 2535 }, { "epoch": 0.01, "grad_norm": 1.3359375, "learning_rate": 0.0001999955921693745, "loss": 2.1869, "step": 2540 }, { "epoch": 0.01, "grad_norm": 1.1640625, "learning_rate": 0.000199995574798553, "loss": 2.0164, "step": 2545 }, { "epoch": 0.01, "grad_norm": 2.625, "learning_rate": 0.0001999955573935712, "loss": 2.3629, "step": 2550 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019999553995442905, "loss": 2.2269, "step": 2555 }, { "epoch": 0.01, "grad_norm": 1.828125, "learning_rate": 0.00019999552248112665, "loss": 2.1042, "step": 2560 }, { "epoch": 0.01, "grad_norm": 1.4609375, "learning_rate": 0.00019999550497366397, "loss": 2.0816, "step": 2565 }, { "epoch": 0.01, "grad_norm": 1.4375, "learning_rate": 0.00019999548743204103, "loss": 2.1961, "step": 2570 }, { "epoch": 0.01, "grad_norm": 1.3125, "learning_rate": 0.00019999546985625778, "loss": 2.292, "step": 2575 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019999545224631428, "loss": 2.1847, "step": 2580 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.00019999543460221054, "loss": 2.3252, "step": 2585 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.00019999541692394652, "loss": 2.1593, "step": 2590 }, { "epoch": 0.01, "grad_norm": 1.84375, "learning_rate": 0.00019999539921152225, "loss": 2.3469, "step": 2595 }, { "epoch": 0.01, "grad_norm": 1.4765625, "learning_rate": 0.00019999538146493775, "loss": 2.238, "step": 2600 }, { "epoch": 0.01, "grad_norm": 1.734375, "learning_rate": 0.00019999536368419302, "loss": 2.3857, "step": 2605 }, { "epoch": 0.01, "grad_norm": 1.484375, "learning_rate": 0.00019999534586928806, "loss": 2.1818, "step": 2610 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019999532802022287, "loss": 2.2973, "step": 2615 }, { "epoch": 0.01, "grad_norm": 1.703125, "learning_rate": 0.00019999531013699745, "loss": 2.4203, "step": 2620 }, { "epoch": 0.01, "grad_norm": 1.8984375, "learning_rate": 0.00019999529221961183, "loss": 2.2807, "step": 2625 }, { "epoch": 0.01, "grad_norm": 1.765625, "learning_rate": 0.000199995274268066, "loss": 2.2686, "step": 2630 }, { "epoch": 0.01, "grad_norm": 1.4375, "learning_rate": 0.00019999525628235998, "loss": 2.2314, "step": 2635 }, { "epoch": 0.01, "grad_norm": 1.3515625, "learning_rate": 0.00019999523826249376, "loss": 2.3282, "step": 2640 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019999522020846736, "loss": 2.271, "step": 2645 }, { "epoch": 0.01, "grad_norm": 1.6015625, "learning_rate": 0.00019999520212028078, "loss": 2.249, "step": 2650 }, { "epoch": 0.01, "grad_norm": 1.78125, "learning_rate": 0.00019999518399793403, "loss": 2.1987, "step": 2655 }, { "epoch": 0.01, "grad_norm": 1.8671875, "learning_rate": 0.0001999951658414271, "loss": 2.3118, "step": 2660 }, { "epoch": 0.01, "grad_norm": 1.671875, "learning_rate": 0.00019999514765075998, "loss": 2.268, "step": 2665 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019999512942593274, "loss": 2.3102, "step": 2670 }, { "epoch": 0.01, "grad_norm": 1.6328125, "learning_rate": 0.00019999511116694537, "loss": 2.1956, "step": 2675 }, { "epoch": 0.01, "grad_norm": 1.6015625, "learning_rate": 0.00019999509287379783, "loss": 2.3278, "step": 2680 }, { "epoch": 0.01, "grad_norm": 2.03125, "learning_rate": 0.00019999507454649014, "loss": 2.3751, "step": 2685 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.00019999505618502233, "loss": 2.2283, "step": 2690 }, { "epoch": 0.01, "grad_norm": 1.53125, "learning_rate": 0.0001999950377893944, "loss": 2.1523, "step": 2695 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 0.00019999501935960635, "loss": 2.0374, "step": 2700 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.00019999500089565818, "loss": 2.3199, "step": 2705 }, { "epoch": 0.01, "grad_norm": 1.59375, "learning_rate": 0.0001999949823975499, "loss": 2.2467, "step": 2710 }, { "epoch": 0.01, "grad_norm": 1.421875, "learning_rate": 0.00019999496386528156, "loss": 2.1088, "step": 2715 }, { "epoch": 0.01, "grad_norm": 1.59375, "learning_rate": 0.00019999494529885308, "loss": 2.2994, "step": 2720 }, { "epoch": 0.01, "grad_norm": 1.390625, "learning_rate": 0.00019999492669826454, "loss": 2.0626, "step": 2725 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.0001999949080635159, "loss": 2.3821, "step": 2730 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.0001999948893946072, "loss": 2.3635, "step": 2735 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.00019999487069153845, "loss": 2.1792, "step": 2740 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.0001999948519543096, "loss": 2.2423, "step": 2745 }, { "epoch": 0.01, "grad_norm": 1.3359375, "learning_rate": 0.0001999948331829207, "loss": 2.0631, "step": 2750 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.00019999481437737177, "loss": 2.0453, "step": 2755 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 0.0001999947955376628, "loss": 2.224, "step": 2760 }, { "epoch": 0.01, "grad_norm": 1.4453125, "learning_rate": 0.0001999947766637938, "loss": 2.2311, "step": 2765 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019999475775576473, "loss": 2.3752, "step": 2770 }, { "epoch": 0.01, "grad_norm": 1.4921875, "learning_rate": 0.00019999473881357566, "loss": 2.1967, "step": 2775 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.0001999947198372266, "loss": 2.1029, "step": 2780 }, { "epoch": 0.01, "grad_norm": 1.4609375, "learning_rate": 0.0001999947008267175, "loss": 2.1293, "step": 2785 }, { "epoch": 0.01, "grad_norm": 1.9453125, "learning_rate": 0.0001999946817820484, "loss": 2.1697, "step": 2790 }, { "epoch": 0.01, "grad_norm": 1.515625, "learning_rate": 0.0001999946627032193, "loss": 2.205, "step": 2795 }, { "epoch": 0.01, "grad_norm": 1.703125, "learning_rate": 0.00019999464359023023, "loss": 2.2087, "step": 2800 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019999462444308117, "loss": 2.1019, "step": 2805 }, { "epoch": 0.01, "grad_norm": 1.3359375, "learning_rate": 0.00019999460526177215, "loss": 2.3176, "step": 2810 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.0001999945860463031, "loss": 2.2262, "step": 2815 }, { "epoch": 0.01, "grad_norm": 1.3984375, "learning_rate": 0.00019999456679667415, "loss": 2.1374, "step": 2820 }, { "epoch": 0.01, "grad_norm": 1.890625, "learning_rate": 0.0001999945475128852, "loss": 2.306, "step": 2825 }, { "epoch": 0.01, "grad_norm": 1.3046875, "learning_rate": 0.00019999452819493633, "loss": 2.4378, "step": 2830 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.0001999945088428275, "loss": 2.2878, "step": 2835 }, { "epoch": 0.01, "grad_norm": 1.375, "learning_rate": 0.00019999448945655872, "loss": 2.1443, "step": 2840 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.00019999447003613002, "loss": 2.0483, "step": 2845 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.0001999944505815414, "loss": 2.3037, "step": 2850 }, { "epoch": 0.01, "grad_norm": 1.875, "learning_rate": 0.00019999443109279286, "loss": 2.4593, "step": 2855 }, { "epoch": 0.01, "grad_norm": 1.765625, "learning_rate": 0.0001999944115698844, "loss": 2.3778, "step": 2860 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019999439201281606, "loss": 2.2992, "step": 2865 }, { "epoch": 0.01, "grad_norm": 1.8515625, "learning_rate": 0.00019999437242158781, "loss": 2.43, "step": 2870 }, { "epoch": 0.01, "grad_norm": 1.4609375, "learning_rate": 0.00019999435279619964, "loss": 2.4416, "step": 2875 }, { "epoch": 0.01, "grad_norm": 1.7890625, "learning_rate": 0.00019999433313665162, "loss": 2.1653, "step": 2880 }, { "epoch": 0.01, "grad_norm": 1.296875, "learning_rate": 0.00019999431344294372, "loss": 2.2632, "step": 2885 }, { "epoch": 0.01, "grad_norm": 1.53125, "learning_rate": 0.0001999942937150759, "loss": 2.2279, "step": 2890 }, { "epoch": 0.01, "grad_norm": 1.3671875, "learning_rate": 0.00019999427395304829, "loss": 2.3438, "step": 2895 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.00019999425415686078, "loss": 2.2648, "step": 2900 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.00019999423432651343, "loss": 2.1803, "step": 2905 }, { "epoch": 0.01, "grad_norm": 1.34375, "learning_rate": 0.00019999421446200623, "loss": 2.2063, "step": 2910 }, { "epoch": 0.01, "grad_norm": 1.328125, "learning_rate": 0.00019999419456333918, "loss": 2.3316, "step": 2915 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019999417463051233, "loss": 2.3044, "step": 2920 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019999415466352564, "loss": 2.2678, "step": 2925 }, { "epoch": 0.01, "grad_norm": 1.46875, "learning_rate": 0.00019999413466237914, "loss": 2.2732, "step": 2930 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.00019999411462707283, "loss": 2.1711, "step": 2935 }, { "epoch": 0.01, "grad_norm": 1.515625, "learning_rate": 0.00019999409455760675, "loss": 2.3944, "step": 2940 }, { "epoch": 0.01, "grad_norm": 1.78125, "learning_rate": 0.00019999407445398082, "loss": 2.2067, "step": 2945 }, { "epoch": 0.01, "grad_norm": 1.9921875, "learning_rate": 0.00019999405431619512, "loss": 2.2924, "step": 2950 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.00019999403414424963, "loss": 2.3625, "step": 2955 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019999401393814436, "loss": 2.1247, "step": 2960 }, { "epoch": 0.01, "grad_norm": 1.765625, "learning_rate": 0.00019999399369787933, "loss": 2.1442, "step": 2965 }, { "epoch": 0.01, "grad_norm": 1.3359375, "learning_rate": 0.00019999397342345456, "loss": 2.0917, "step": 2970 }, { "epoch": 0.01, "grad_norm": 1.7109375, "learning_rate": 0.00019999395311487002, "loss": 2.0516, "step": 2975 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.00019999393277212571, "loss": 2.0766, "step": 2980 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019999391239522167, "loss": 2.1684, "step": 2985 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019999389198415793, "loss": 2.2293, "step": 2990 }, { "epoch": 0.01, "grad_norm": 1.46875, "learning_rate": 0.0001999938715389344, "loss": 2.1904, "step": 2995 }, { "epoch": 0.01, "grad_norm": 1.9609375, "learning_rate": 0.0001999938510595512, "loss": 2.3104, "step": 3000 }, { "epoch": 0.01, "grad_norm": 1.9140625, "learning_rate": 0.00019999383054600828, "loss": 2.2513, "step": 3005 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019999380999830563, "loss": 2.2227, "step": 3010 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.0001999937894164433, "loss": 2.1551, "step": 3015 }, { "epoch": 0.01, "grad_norm": 2.1875, "learning_rate": 0.00019999376880042128, "loss": 2.2178, "step": 3020 }, { "epoch": 0.01, "grad_norm": 1.21875, "learning_rate": 0.00019999374815023958, "loss": 2.2376, "step": 3025 }, { "epoch": 0.01, "grad_norm": 1.2578125, "learning_rate": 0.00019999372746589818, "loss": 2.055, "step": 3030 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019999370674739713, "loss": 2.3191, "step": 3035 }, { "epoch": 0.01, "grad_norm": 1.484375, "learning_rate": 0.0001999936859947364, "loss": 2.3929, "step": 3040 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.00019999366520791602, "loss": 2.3797, "step": 3045 }, { "epoch": 0.01, "grad_norm": 1.5703125, "learning_rate": 0.000199993644386936, "loss": 2.2043, "step": 3050 }, { "epoch": 0.01, "grad_norm": 1.1796875, "learning_rate": 0.00019999362353179633, "loss": 2.1695, "step": 3055 }, { "epoch": 0.01, "grad_norm": 1.9921875, "learning_rate": 0.00019999360264249703, "loss": 2.2617, "step": 3060 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 0.0001999935817190381, "loss": 2.199, "step": 3065 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 0.00019999356076141954, "loss": 2.1358, "step": 3070 }, { "epoch": 0.01, "grad_norm": 1.6328125, "learning_rate": 0.00019999353976964137, "loss": 2.2701, "step": 3075 }, { "epoch": 0.01, "grad_norm": 1.5703125, "learning_rate": 0.0001999935187437036, "loss": 2.3099, "step": 3080 }, { "epoch": 0.01, "grad_norm": 1.359375, "learning_rate": 0.00019999349768360622, "loss": 2.334, "step": 3085 }, { "epoch": 0.01, "grad_norm": 1.7890625, "learning_rate": 0.00019999347658934924, "loss": 2.3674, "step": 3090 }, { "epoch": 0.01, "grad_norm": 1.1875, "learning_rate": 0.0001999934554609327, "loss": 2.2194, "step": 3095 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019999343429835656, "loss": 2.3246, "step": 3100 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 0.00019999341310162087, "loss": 2.2699, "step": 3105 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.0001999933918707256, "loss": 2.2727, "step": 3110 }, { "epoch": 0.01, "grad_norm": 1.3984375, "learning_rate": 0.00019999337060567078, "loss": 2.2747, "step": 3115 }, { "epoch": 0.01, "grad_norm": 1.9140625, "learning_rate": 0.0001999933493064564, "loss": 2.4446, "step": 3120 }, { "epoch": 0.01, "grad_norm": 1.7265625, "learning_rate": 0.0001999933279730825, "loss": 2.0075, "step": 3125 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019999330660554904, "loss": 2.271, "step": 3130 }, { "epoch": 0.01, "grad_norm": 1.3203125, "learning_rate": 0.0001999932852038561, "loss": 2.1105, "step": 3135 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.0001999932637680036, "loss": 2.4415, "step": 3140 }, { "epoch": 0.01, "grad_norm": 1.90625, "learning_rate": 0.0001999932422979916, "loss": 2.1787, "step": 3145 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019999322079382008, "loss": 2.2789, "step": 3150 }, { "epoch": 0.01, "grad_norm": 1.515625, "learning_rate": 0.0001999931992554891, "loss": 2.305, "step": 3155 }, { "epoch": 0.01, "grad_norm": 1.53125, "learning_rate": 0.00019999317768299858, "loss": 2.1671, "step": 3160 }, { "epoch": 0.01, "grad_norm": 1.40625, "learning_rate": 0.0001999931560763486, "loss": 2.2321, "step": 3165 }, { "epoch": 0.01, "grad_norm": 1.7890625, "learning_rate": 0.00019999313443553915, "loss": 2.0649, "step": 3170 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 0.00019999311276057023, "loss": 2.2361, "step": 3175 }, { "epoch": 0.01, "grad_norm": 1.421875, "learning_rate": 0.00019999309105144184, "loss": 2.1873, "step": 3180 }, { "epoch": 0.01, "grad_norm": 1.859375, "learning_rate": 0.000199993069308154, "loss": 2.4244, "step": 3185 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.0001999930475307067, "loss": 2.2698, "step": 3190 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019999302571909998, "loss": 2.1437, "step": 3195 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.00019999300387333384, "loss": 2.0375, "step": 3200 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.00019999298199340827, "loss": 2.1633, "step": 3205 }, { "epoch": 0.01, "grad_norm": 1.4375, "learning_rate": 0.00019999296007932327, "loss": 2.0023, "step": 3210 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019999293813107886, "loss": 2.1797, "step": 3215 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 0.00019999291614867506, "loss": 2.2495, "step": 3220 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019999289413211188, "loss": 2.1427, "step": 3225 }, { "epoch": 0.01, "grad_norm": 1.96875, "learning_rate": 0.00019999287208138927, "loss": 2.2247, "step": 3230 }, { "epoch": 0.01, "grad_norm": 1.421875, "learning_rate": 0.0001999928499965073, "loss": 2.2923, "step": 3235 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.000199992827877466, "loss": 2.2487, "step": 3240 }, { "epoch": 0.01, "grad_norm": 1.6875, "learning_rate": 0.00019999280572426532, "loss": 2.4089, "step": 3245 }, { "epoch": 0.01, "grad_norm": 1.7890625, "learning_rate": 0.00019999278353690524, "loss": 2.2905, "step": 3250 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019999276131538583, "loss": 2.3136, "step": 3255 }, { "epoch": 0.01, "grad_norm": 1.7890625, "learning_rate": 0.0001999927390597071, "loss": 2.3385, "step": 3260 }, { "epoch": 0.01, "grad_norm": 1.7265625, "learning_rate": 0.00019999271676986904, "loss": 2.3635, "step": 3265 }, { "epoch": 0.01, "grad_norm": 3.890625, "learning_rate": 0.00019999269444587162, "loss": 2.2026, "step": 3270 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.0001999926720877149, "loss": 2.1615, "step": 3275 }, { "epoch": 0.01, "grad_norm": 1.4765625, "learning_rate": 0.0001999926496953989, "loss": 2.2169, "step": 3280 }, { "epoch": 0.01, "grad_norm": 1.6328125, "learning_rate": 0.00019999262726892356, "loss": 2.3627, "step": 3285 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019999260480828892, "loss": 2.1791, "step": 3290 }, { "epoch": 0.01, "grad_norm": 1.2890625, "learning_rate": 0.00019999258231349502, "loss": 2.0717, "step": 3295 }, { "epoch": 0.01, "grad_norm": 1.4609375, "learning_rate": 0.00019999255978454185, "loss": 2.1774, "step": 3300 }, { "epoch": 0.01, "grad_norm": 1.4140625, "learning_rate": 0.00019999253722142936, "loss": 2.4004, "step": 3305 }, { "epoch": 0.01, "grad_norm": 1.828125, "learning_rate": 0.00019999251462415765, "loss": 2.1034, "step": 3310 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 0.00019999249199272666, "loss": 2.4688, "step": 3315 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.00019999246932713642, "loss": 2.3101, "step": 3320 }, { "epoch": 0.01, "grad_norm": 1.3671875, "learning_rate": 0.00019999244662738695, "loss": 2.1189, "step": 3325 }, { "epoch": 0.01, "grad_norm": 1.515625, "learning_rate": 0.00019999242389347823, "loss": 2.189, "step": 3330 }, { "epoch": 0.01, "grad_norm": 2.359375, "learning_rate": 0.0001999924011254103, "loss": 2.2639, "step": 3335 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019999237832318316, "loss": 2.2859, "step": 3340 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 0.0001999923554867968, "loss": 2.3575, "step": 3345 }, { "epoch": 0.01, "grad_norm": 1.3359375, "learning_rate": 0.00019999233261625124, "loss": 2.3306, "step": 3350 }, { "epoch": 0.01, "grad_norm": 1.84375, "learning_rate": 0.00019999230971154647, "loss": 2.0953, "step": 3355 }, { "epoch": 0.01, "grad_norm": 1.8359375, "learning_rate": 0.00019999228677268252, "loss": 2.1624, "step": 3360 }, { "epoch": 0.01, "grad_norm": 2.140625, "learning_rate": 0.0001999922637996594, "loss": 2.3595, "step": 3365 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.00019999224079247712, "loss": 2.3501, "step": 3370 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019999221775113566, "loss": 2.2752, "step": 3375 }, { "epoch": 0.01, "grad_norm": 1.734375, "learning_rate": 0.00019999219467563506, "loss": 2.2942, "step": 3380 }, { "epoch": 0.01, "grad_norm": 1.3515625, "learning_rate": 0.00019999217156597529, "loss": 2.1549, "step": 3385 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.0001999921484221564, "loss": 2.2573, "step": 3390 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.00019999212524417836, "loss": 2.2955, "step": 3395 }, { "epoch": 0.01, "grad_norm": 1.703125, "learning_rate": 0.00019999210203204123, "loss": 2.1166, "step": 3400 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019999207878574496, "loss": 2.0818, "step": 3405 }, { "epoch": 0.01, "grad_norm": 1.8515625, "learning_rate": 0.00019999205550528957, "loss": 2.0815, "step": 3410 }, { "epoch": 0.01, "grad_norm": 2.046875, "learning_rate": 0.0001999920321906751, "loss": 2.3233, "step": 3415 }, { "epoch": 0.01, "grad_norm": 2.078125, "learning_rate": 0.00019999200884190151, "loss": 2.2773, "step": 3420 }, { "epoch": 0.01, "grad_norm": 1.4921875, "learning_rate": 0.00019999198545896888, "loss": 2.3167, "step": 3425 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019999196204187718, "loss": 2.2942, "step": 3430 }, { "epoch": 0.01, "grad_norm": 1.6015625, "learning_rate": 0.00019999193859062637, "loss": 2.0659, "step": 3435 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.00019999191510521654, "loss": 2.2548, "step": 3440 }, { "epoch": 0.01, "grad_norm": 1.4140625, "learning_rate": 0.00019999189158564763, "loss": 2.1465, "step": 3445 }, { "epoch": 0.01, "grad_norm": 1.671875, "learning_rate": 0.00019999186803191968, "loss": 2.1321, "step": 3450 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.0001999918444440327, "loss": 2.0689, "step": 3455 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 0.0001999918208219867, "loss": 2.0974, "step": 3460 }, { "epoch": 0.01, "grad_norm": 1.4609375, "learning_rate": 0.00019999179716578166, "loss": 2.1393, "step": 3465 }, { "epoch": 0.01, "grad_norm": 1.4765625, "learning_rate": 0.0001999917734754176, "loss": 2.1904, "step": 3470 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019999174975089456, "loss": 2.3621, "step": 3475 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019999172599221253, "loss": 2.1706, "step": 3480 }, { "epoch": 0.01, "grad_norm": 1.4609375, "learning_rate": 0.0001999917021993715, "loss": 2.2432, "step": 3485 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019999167837237146, "loss": 2.1336, "step": 3490 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.0001999916545112125, "loss": 2.3414, "step": 3495 }, { "epoch": 0.01, "grad_norm": 1.515625, "learning_rate": 0.00019999163061589458, "loss": 2.2191, "step": 3500 }, { "epoch": 0.01, "grad_norm": 1.6875, "learning_rate": 0.0001999916066864177, "loss": 2.3275, "step": 3505 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.00019999158272278184, "loss": 2.0421, "step": 3510 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019999155872498704, "loss": 2.1873, "step": 3515 }, { "epoch": 0.01, "grad_norm": 2.25, "learning_rate": 0.00019999153469303334, "loss": 2.1306, "step": 3520 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.0001999915106269207, "loss": 2.3004, "step": 3525 }, { "epoch": 0.01, "grad_norm": 1.3515625, "learning_rate": 0.00019999148652664919, "loss": 2.2526, "step": 3530 }, { "epoch": 0.01, "grad_norm": 1.765625, "learning_rate": 0.00019999146239221873, "loss": 2.2629, "step": 3535 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019999143822362937, "loss": 2.2084, "step": 3540 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019999141402088114, "loss": 2.1412, "step": 3545 }, { "epoch": 0.01, "grad_norm": 1.359375, "learning_rate": 0.00019999138978397404, "loss": 2.3332, "step": 3550 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019999136551290803, "loss": 2.1623, "step": 3555 }, { "epoch": 0.01, "grad_norm": 1.6875, "learning_rate": 0.0001999913412076832, "loss": 2.2692, "step": 3560 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019999131686829948, "loss": 2.1846, "step": 3565 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 0.00019999129249475693, "loss": 2.2379, "step": 3570 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019999126808705552, "loss": 2.2001, "step": 3575 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.0001999912436451953, "loss": 2.1612, "step": 3580 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019999121916917627, "loss": 2.2972, "step": 3585 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.0001999911946589984, "loss": 2.2049, "step": 3590 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019999117011466173, "loss": 2.3829, "step": 3595 }, { "epoch": 0.01, "grad_norm": 1.75, "learning_rate": 0.00019999114553616627, "loss": 2.2943, "step": 3600 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.000199991120923512, "loss": 2.2959, "step": 3605 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019999109627669902, "loss": 2.3553, "step": 3610 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.00019999107159572718, "loss": 2.2244, "step": 3615 }, { "epoch": 0.01, "grad_norm": 1.734375, "learning_rate": 0.00019999104688059663, "loss": 2.1474, "step": 3620 }, { "epoch": 0.01, "grad_norm": 1.3359375, "learning_rate": 0.00019999102213130732, "loss": 2.1972, "step": 3625 }, { "epoch": 0.01, "grad_norm": 1.6015625, "learning_rate": 0.0001999909973478592, "loss": 2.3214, "step": 3630 }, { "epoch": 0.01, "grad_norm": 1.515625, "learning_rate": 0.00019999097253025242, "loss": 2.168, "step": 3635 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.0001999909476784869, "loss": 2.1207, "step": 3640 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019999092279256262, "loss": 2.1926, "step": 3645 }, { "epoch": 0.01, "grad_norm": 1.34375, "learning_rate": 0.00019999089787247967, "loss": 2.2419, "step": 3650 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019999087291823798, "loss": 2.2185, "step": 3655 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.00019999084792983763, "loss": 2.1446, "step": 3660 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.00019999082290727858, "loss": 2.2383, "step": 3665 }, { "epoch": 0.01, "grad_norm": 1.4609375, "learning_rate": 0.00019999079785056083, "loss": 2.2368, "step": 3670 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.0001999907727596844, "loss": 2.2967, "step": 3675 }, { "epoch": 0.01, "grad_norm": 1.8359375, "learning_rate": 0.00019999074763464935, "loss": 2.2662, "step": 3680 }, { "epoch": 0.01, "grad_norm": 1.765625, "learning_rate": 0.00019999072247545562, "loss": 2.0884, "step": 3685 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019999069728210327, "loss": 2.3086, "step": 3690 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019999067205459228, "loss": 2.1968, "step": 3695 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019999064679292262, "loss": 2.3123, "step": 3700 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.0001999906214970944, "loss": 2.1606, "step": 3705 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019999059616710753, "loss": 2.2786, "step": 3710 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.0001999905708029621, "loss": 2.3011, "step": 3715 }, { "epoch": 0.01, "grad_norm": 1.96875, "learning_rate": 0.00019999054540465806, "loss": 2.2715, "step": 3720 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019999051997219542, "loss": 2.2606, "step": 3725 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 0.0001999904945055742, "loss": 2.3526, "step": 3730 }, { "epoch": 0.01, "grad_norm": 1.4765625, "learning_rate": 0.00019999046900479443, "loss": 2.1854, "step": 3735 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.00019999044346985612, "loss": 2.2779, "step": 3740 }, { "epoch": 0.01, "grad_norm": 1.9296875, "learning_rate": 0.00019999041790075923, "loss": 2.2718, "step": 3745 }, { "epoch": 0.01, "grad_norm": 1.734375, "learning_rate": 0.0001999903922975038, "loss": 2.2211, "step": 3750 }, { "epoch": 0.01, "grad_norm": 1.40625, "learning_rate": 0.00019999036666008984, "loss": 2.1565, "step": 3755 }, { "epoch": 0.01, "grad_norm": 1.8671875, "learning_rate": 0.00019999034098851736, "loss": 2.1764, "step": 3760 }, { "epoch": 0.01, "grad_norm": 1.484375, "learning_rate": 0.0001999903152827864, "loss": 2.2572, "step": 3765 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 0.0001999902895428969, "loss": 2.1331, "step": 3770 }, { "epoch": 0.01, "grad_norm": 1.8203125, "learning_rate": 0.0001999902637688489, "loss": 2.2683, "step": 3775 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 0.00019999023796064242, "loss": 2.2285, "step": 3780 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.00019999021211827747, "loss": 2.1635, "step": 3785 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.00019999018624175405, "loss": 2.32, "step": 3790 }, { "epoch": 0.01, "grad_norm": 1.4296875, "learning_rate": 0.00019999016033107216, "loss": 2.0023, "step": 3795 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.0001999901343862318, "loss": 2.3292, "step": 3800 }, { "epoch": 0.01, "grad_norm": 1.375, "learning_rate": 0.00019999010840723305, "loss": 2.2517, "step": 3805 }, { "epoch": 0.01, "grad_norm": 1.4296875, "learning_rate": 0.00019999008239407582, "loss": 2.309, "step": 3810 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019999005634676017, "loss": 2.2863, "step": 3815 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019999003026528614, "loss": 2.1697, "step": 3820 }, { "epoch": 0.01, "grad_norm": 1.515625, "learning_rate": 0.00019999000414965364, "loss": 2.2687, "step": 3825 }, { "epoch": 0.01, "grad_norm": 1.765625, "learning_rate": 0.0001999899779998628, "loss": 2.2781, "step": 3830 }, { "epoch": 0.01, "grad_norm": 1.59375, "learning_rate": 0.00019998995181591355, "loss": 2.2074, "step": 3835 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.0001999899255978059, "loss": 2.1855, "step": 3840 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.00019998989934553988, "loss": 2.2756, "step": 3845 }, { "epoch": 0.01, "grad_norm": 1.7109375, "learning_rate": 0.0001999898730591155, "loss": 2.2836, "step": 3850 }, { "epoch": 0.01, "grad_norm": 1.7109375, "learning_rate": 0.00019998984673853278, "loss": 2.329, "step": 3855 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019998982038379171, "loss": 2.0849, "step": 3860 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.0001999897939948923, "loss": 2.1752, "step": 3865 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.0001999897675718346, "loss": 2.2694, "step": 3870 }, { "epoch": 0.01, "grad_norm": 1.703125, "learning_rate": 0.00019998974111461854, "loss": 2.3411, "step": 3875 }, { "epoch": 0.01, "grad_norm": 1.78125, "learning_rate": 0.00019998971462324415, "loss": 2.3852, "step": 3880 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.0001999896880977115, "loss": 2.2138, "step": 3885 }, { "epoch": 0.01, "grad_norm": 1.2734375, "learning_rate": 0.00019998966153802057, "loss": 2.1681, "step": 3890 }, { "epoch": 0.01, "grad_norm": 1.53125, "learning_rate": 0.00019998963494417132, "loss": 2.1993, "step": 3895 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.00019998960831616383, "loss": 2.124, "step": 3900 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019998958165399805, "loss": 2.243, "step": 3905 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019998955495767403, "loss": 2.1551, "step": 3910 }, { "epoch": 0.01, "grad_norm": 1.7265625, "learning_rate": 0.00019998952822719175, "loss": 2.2065, "step": 3915 }, { "epoch": 0.01, "grad_norm": 1.7109375, "learning_rate": 0.00019998950146255125, "loss": 2.3091, "step": 3920 }, { "epoch": 0.01, "grad_norm": 1.7109375, "learning_rate": 0.0001999894746637525, "loss": 2.1571, "step": 3925 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 0.00019998944783079558, "loss": 2.1827, "step": 3930 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.00019998942096368042, "loss": 2.2454, "step": 3935 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.00019998939406240705, "loss": 2.1933, "step": 3940 }, { "epoch": 0.01, "grad_norm": 1.671875, "learning_rate": 0.00019998936712697552, "loss": 2.2221, "step": 3945 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019998934015738576, "loss": 2.4361, "step": 3950 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.00019998931315363787, "loss": 2.2273, "step": 3955 }, { "epoch": 0.01, "grad_norm": 1.46875, "learning_rate": 0.0001999892861157318, "loss": 2.2333, "step": 3960 }, { "epoch": 0.01, "grad_norm": 1.7890625, "learning_rate": 0.0001999892590436676, "loss": 2.1536, "step": 3965 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 0.00019998923193744524, "loss": 2.2766, "step": 3970 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 0.00019998920479706472, "loss": 2.2915, "step": 3975 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.0001999891776225261, "loss": 2.1889, "step": 3980 }, { "epoch": 0.01, "grad_norm": 1.75, "learning_rate": 0.00019998915041382937, "loss": 2.3043, "step": 3985 }, { "epoch": 0.01, "grad_norm": 1.46875, "learning_rate": 0.0001999891231709745, "loss": 2.2897, "step": 3990 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.00019998909589396155, "loss": 2.336, "step": 3995 }, { "epoch": 0.01, "grad_norm": 1.53125, "learning_rate": 0.00019998906858279054, "loss": 2.2337, "step": 4000 }, { "epoch": 0.01, "grad_norm": 1.5703125, "learning_rate": 0.00019998904123746142, "loss": 2.1787, "step": 4005 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.00019998901385797423, "loss": 2.3239, "step": 4010 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.000199988986444329, "loss": 2.3407, "step": 4015 }, { "epoch": 0.01, "grad_norm": 1.671875, "learning_rate": 0.0001999889589965257, "loss": 2.2094, "step": 4020 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.00019998893151456436, "loss": 2.0779, "step": 4025 }, { "epoch": 0.01, "grad_norm": 1.828125, "learning_rate": 0.00019998890399844499, "loss": 2.2007, "step": 4030 }, { "epoch": 0.01, "grad_norm": 1.53125, "learning_rate": 0.0001999888764481676, "loss": 2.1764, "step": 4035 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.0001999888488637322, "loss": 2.4346, "step": 4040 }, { "epoch": 0.01, "grad_norm": 1.84375, "learning_rate": 0.00019998882124513877, "loss": 2.2549, "step": 4045 }, { "epoch": 0.01, "grad_norm": 1.4921875, "learning_rate": 0.00019998879359238737, "loss": 2.3358, "step": 4050 }, { "epoch": 0.01, "grad_norm": 1.421875, "learning_rate": 0.000199988765905478, "loss": 2.2821, "step": 4055 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.00019998873818441064, "loss": 2.1195, "step": 4060 }, { "epoch": 0.01, "grad_norm": 2.9375, "learning_rate": 0.00019998871042918527, "loss": 2.2958, "step": 4065 }, { "epoch": 0.01, "grad_norm": 1.7734375, "learning_rate": 0.00019998868263980198, "loss": 2.0757, "step": 4070 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.00019998865481626077, "loss": 2.2688, "step": 4075 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.0001999886269585616, "loss": 2.1994, "step": 4080 }, { "epoch": 0.01, "grad_norm": 1.8984375, "learning_rate": 0.00019998859906670448, "loss": 2.1657, "step": 4085 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019998857114068947, "loss": 2.1979, "step": 4090 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.00019998854318051655, "loss": 2.2457, "step": 4095 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019998851518618571, "loss": 2.2592, "step": 4100 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 0.000199988487157697, "loss": 2.3098, "step": 4105 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.0001999884590950504, "loss": 2.3877, "step": 4110 }, { "epoch": 0.01, "grad_norm": 1.484375, "learning_rate": 0.00019998843099824592, "loss": 2.1499, "step": 4115 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.0001999884028672836, "loss": 2.2069, "step": 4120 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.0001999883747021634, "loss": 2.2799, "step": 4125 }, { "epoch": 0.01, "grad_norm": 1.7109375, "learning_rate": 0.0001999883465028854, "loss": 2.1459, "step": 4130 }, { "epoch": 0.01, "grad_norm": 2.078125, "learning_rate": 0.00019998831826944952, "loss": 2.1088, "step": 4135 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.00019998829000185585, "loss": 2.2404, "step": 4140 }, { "epoch": 0.01, "grad_norm": 1.6875, "learning_rate": 0.0001999882617001043, "loss": 2.3461, "step": 4145 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019998823336419503, "loss": 2.2913, "step": 4150 }, { "epoch": 0.01, "grad_norm": 1.4453125, "learning_rate": 0.00019998820499412794, "loss": 2.1234, "step": 4155 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.00019998817658990306, "loss": 2.233, "step": 4160 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.0001999881481515204, "loss": 2.284, "step": 4165 }, { "epoch": 0.01, "grad_norm": 1.7734375, "learning_rate": 0.00019998811967897998, "loss": 2.3243, "step": 4170 }, { "epoch": 0.01, "grad_norm": 1.75, "learning_rate": 0.0001999880911722818, "loss": 2.2934, "step": 4175 }, { "epoch": 0.01, "grad_norm": 1.9765625, "learning_rate": 0.00019998806263142588, "loss": 2.266, "step": 4180 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 0.00019998803405641224, "loss": 2.201, "step": 4185 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019998800544724087, "loss": 2.1393, "step": 4190 }, { "epoch": 0.01, "grad_norm": 1.6875, "learning_rate": 0.00019998797680391176, "loss": 2.2411, "step": 4195 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.000199987948126425, "loss": 2.3867, "step": 4200 }, { "epoch": 0.01, "grad_norm": 1.4921875, "learning_rate": 0.00019998791941478047, "loss": 2.4271, "step": 4205 }, { "epoch": 0.01, "grad_norm": 1.3828125, "learning_rate": 0.00019998789066897826, "loss": 2.3053, "step": 4210 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019998786188901843, "loss": 2.1245, "step": 4215 }, { "epoch": 0.01, "grad_norm": 1.75, "learning_rate": 0.0001999878330749009, "loss": 2.2644, "step": 4220 }, { "epoch": 0.01, "grad_norm": 1.4921875, "learning_rate": 0.00019998780422662572, "loss": 2.3508, "step": 4225 }, { "epoch": 0.01, "grad_norm": 1.6015625, "learning_rate": 0.0001999877753441929, "loss": 2.2197, "step": 4230 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019998774642760242, "loss": 2.3289, "step": 4235 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.0001999877174768543, "loss": 2.2664, "step": 4240 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 0.0001999876884919486, "loss": 2.2841, "step": 4245 }, { "epoch": 0.01, "grad_norm": 2.0, "learning_rate": 0.00019998765947288526, "loss": 2.1472, "step": 4250 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019998763041966436, "loss": 2.2738, "step": 4255 }, { "epoch": 0.01, "grad_norm": 1.46875, "learning_rate": 0.00019998760133228585, "loss": 2.3188, "step": 4260 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.00019998757221074978, "loss": 2.1757, "step": 4265 }, { "epoch": 0.01, "grad_norm": 1.34375, "learning_rate": 0.0001999875430550561, "loss": 1.9914, "step": 4270 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 0.00019998751386520489, "loss": 2.0503, "step": 4275 }, { "epoch": 0.01, "grad_norm": 1.7265625, "learning_rate": 0.00019998748464119612, "loss": 2.2513, "step": 4280 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.00019998745538302984, "loss": 2.3848, "step": 4285 }, { "epoch": 0.01, "grad_norm": 1.421875, "learning_rate": 0.000199987426090706, "loss": 2.4177, "step": 4290 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.00019998739676422463, "loss": 2.4008, "step": 4295 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 0.0001999873674035858, "loss": 2.0839, "step": 4300 }, { "epoch": 0.01, "grad_norm": 1.421875, "learning_rate": 0.00019998733800878946, "loss": 2.2303, "step": 4305 }, { "epoch": 0.01, "grad_norm": 1.78125, "learning_rate": 0.00019998730857983562, "loss": 2.19, "step": 4310 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.00019998727911672428, "loss": 2.1478, "step": 4315 }, { "epoch": 0.01, "grad_norm": 1.46875, "learning_rate": 0.00019998724961945553, "loss": 2.3052, "step": 4320 }, { "epoch": 0.01, "grad_norm": 1.5703125, "learning_rate": 0.00019998722008802928, "loss": 2.2417, "step": 4325 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019998719052244558, "loss": 2.2206, "step": 4330 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.00019998716092270444, "loss": 2.0851, "step": 4335 }, { "epoch": 0.01, "grad_norm": 1.9453125, "learning_rate": 0.00019998713128880588, "loss": 2.2557, "step": 4340 }, { "epoch": 0.01, "grad_norm": 1.6328125, "learning_rate": 0.00019998710162074992, "loss": 2.2769, "step": 4345 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.00019998707191853655, "loss": 2.1602, "step": 4350 }, { "epoch": 0.01, "grad_norm": 1.9140625, "learning_rate": 0.00019998704218216574, "loss": 2.177, "step": 4355 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.0001999870124116376, "loss": 2.3632, "step": 4360 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019998698260695203, "loss": 2.2009, "step": 4365 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019998695276810912, "loss": 2.1718, "step": 4370 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019998692289510886, "loss": 2.2401, "step": 4375 }, { "epoch": 0.01, "grad_norm": 1.1875, "learning_rate": 0.00019998689298795128, "loss": 2.0542, "step": 4380 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.0001999868630466363, "loss": 2.0834, "step": 4385 }, { "epoch": 0.01, "grad_norm": 1.4765625, "learning_rate": 0.00019998683307116406, "loss": 2.3615, "step": 4390 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019998680306153446, "loss": 2.2094, "step": 4395 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019998677301774756, "loss": 2.1962, "step": 4400 }, { "epoch": 0.01, "grad_norm": 1.34375, "learning_rate": 0.00019998674293980338, "loss": 2.1704, "step": 4405 }, { "epoch": 0.01, "grad_norm": 1.671875, "learning_rate": 0.0001999867128277019, "loss": 2.1601, "step": 4410 }, { "epoch": 0.01, "grad_norm": 1.4296875, "learning_rate": 0.00019998668268144315, "loss": 2.1163, "step": 4415 }, { "epoch": 0.01, "grad_norm": 1.359375, "learning_rate": 0.00019998665250102712, "loss": 2.2088, "step": 4420 }, { "epoch": 0.01, "grad_norm": 1.890625, "learning_rate": 0.00019998662228645387, "loss": 2.3422, "step": 4425 }, { "epoch": 0.01, "grad_norm": 1.6875, "learning_rate": 0.00019998659203772338, "loss": 2.1986, "step": 4430 }, { "epoch": 0.01, "grad_norm": 1.6328125, "learning_rate": 0.00019998656175483563, "loss": 2.3572, "step": 4435 }, { "epoch": 0.01, "grad_norm": 1.8515625, "learning_rate": 0.00019998653143779067, "loss": 2.2311, "step": 4440 }, { "epoch": 0.01, "grad_norm": 1.4296875, "learning_rate": 0.00019998650108658846, "loss": 2.2909, "step": 4445 }, { "epoch": 0.01, "grad_norm": 2.78125, "learning_rate": 0.0001999864707012291, "loss": 2.1973, "step": 4450 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019998644028171252, "loss": 2.2048, "step": 4455 }, { "epoch": 0.01, "grad_norm": 1.3125, "learning_rate": 0.00019998640982803878, "loss": 2.257, "step": 4460 }, { "epoch": 0.01, "grad_norm": 1.2890625, "learning_rate": 0.00019998637934020785, "loss": 2.155, "step": 4465 }, { "epoch": 0.01, "grad_norm": 1.6328125, "learning_rate": 0.00019998634881821976, "loss": 2.2404, "step": 4470 }, { "epoch": 0.01, "grad_norm": 1.4921875, "learning_rate": 0.00019998631826207452, "loss": 2.2563, "step": 4475 }, { "epoch": 0.01, "grad_norm": 1.4921875, "learning_rate": 0.00019998628767177215, "loss": 2.2249, "step": 4480 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019998625704731264, "loss": 2.2646, "step": 4485 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.00019998622638869602, "loss": 2.1749, "step": 4490 }, { "epoch": 0.01, "grad_norm": 1.75, "learning_rate": 0.0001999861956959223, "loss": 2.2066, "step": 4495 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.00019998616496899146, "loss": 2.3716, "step": 4500 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019998613420790356, "loss": 2.2079, "step": 4505 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.00019998610341265857, "loss": 2.0765, "step": 4510 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.0001999860725832565, "loss": 2.3201, "step": 4515 }, { "epoch": 0.01, "grad_norm": 2.0625, "learning_rate": 0.0001999860417196974, "loss": 2.3252, "step": 4520 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019998601082198123, "loss": 2.3762, "step": 4525 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.00019998597989010804, "loss": 2.3412, "step": 4530 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.00019998594892407781, "loss": 2.1566, "step": 4535 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.0001999859179238906, "loss": 2.3502, "step": 4540 }, { "epoch": 0.01, "grad_norm": 1.59375, "learning_rate": 0.00019998588688954633, "loss": 2.3492, "step": 4545 }, { "epoch": 0.01, "grad_norm": 1.515625, "learning_rate": 0.00019998585582104513, "loss": 2.3814, "step": 4550 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.00019998582471838692, "loss": 2.3403, "step": 4555 }, { "epoch": 0.01, "grad_norm": 2.109375, "learning_rate": 0.00019998579358157176, "loss": 2.3947, "step": 4560 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.0001999857624105996, "loss": 2.2513, "step": 4565 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.0001999857312054705, "loss": 2.2094, "step": 4570 }, { "epoch": 0.01, "grad_norm": 1.828125, "learning_rate": 0.00019998569996618448, "loss": 2.2571, "step": 4575 }, { "epoch": 0.01, "grad_norm": 2.0, "learning_rate": 0.00019998566869274152, "loss": 2.2367, "step": 4580 }, { "epoch": 0.01, "grad_norm": 1.7734375, "learning_rate": 0.00019998563738514165, "loss": 2.2698, "step": 4585 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019998560604338485, "loss": 2.2581, "step": 4590 }, { "epoch": 0.01, "grad_norm": 1.7109375, "learning_rate": 0.00019998557466747115, "loss": 2.2386, "step": 4595 }, { "epoch": 0.01, "grad_norm": 1.8515625, "learning_rate": 0.00019998554325740062, "loss": 2.1201, "step": 4600 }, { "epoch": 0.01, "grad_norm": 1.7734375, "learning_rate": 0.00019998551181317319, "loss": 2.2172, "step": 4605 }, { "epoch": 0.01, "grad_norm": 1.90625, "learning_rate": 0.0001999854803347889, "loss": 2.0584, "step": 4610 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019998544882224772, "loss": 2.241, "step": 4615 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.00019998541727554972, "loss": 2.0622, "step": 4620 }, { "epoch": 0.01, "grad_norm": 1.6875, "learning_rate": 0.0001999853856946949, "loss": 2.0703, "step": 4625 }, { "epoch": 0.01, "grad_norm": 2.046875, "learning_rate": 0.0001999853540796832, "loss": 1.949, "step": 4630 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.0001999853224305148, "loss": 2.4195, "step": 4635 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.0001999852907471895, "loss": 2.2535, "step": 4640 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 0.00019998525902970745, "loss": 2.2335, "step": 4645 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.00019998522727806864, "loss": 2.2974, "step": 4650 }, { "epoch": 0.01, "grad_norm": 1.7265625, "learning_rate": 0.00019998519549227305, "loss": 2.2991, "step": 4655 }, { "epoch": 0.01, "grad_norm": 1.59375, "learning_rate": 0.00019998516367232067, "loss": 2.1455, "step": 4660 }, { "epoch": 0.01, "grad_norm": 1.59375, "learning_rate": 0.00019998513181821158, "loss": 1.9368, "step": 4665 }, { "epoch": 0.01, "grad_norm": 1.8828125, "learning_rate": 0.00019998509992994574, "loss": 2.2681, "step": 4670 }, { "epoch": 0.01, "grad_norm": 1.7734375, "learning_rate": 0.0001999850680075232, "loss": 2.249, "step": 4675 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.00019998503605094393, "loss": 2.1529, "step": 4680 }, { "epoch": 0.01, "grad_norm": 1.4765625, "learning_rate": 0.00019998500406020794, "loss": 2.2734, "step": 4685 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 0.0001999849720353153, "loss": 2.2352, "step": 4690 }, { "epoch": 0.01, "grad_norm": 1.90625, "learning_rate": 0.00019998493997626596, "loss": 2.1786, "step": 4695 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 0.00019998490788305994, "loss": 2.0726, "step": 4700 }, { "epoch": 0.01, "grad_norm": 1.734375, "learning_rate": 0.0001999848757556973, "loss": 2.3065, "step": 4705 }, { "epoch": 0.01, "grad_norm": 1.5546875, "learning_rate": 0.00019998484359417797, "loss": 2.1724, "step": 4710 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019998481139850202, "loss": 2.239, "step": 4715 }, { "epoch": 0.01, "grad_norm": 1.6015625, "learning_rate": 0.00019998477916866945, "loss": 2.2469, "step": 4720 }, { "epoch": 0.01, "grad_norm": 1.8671875, "learning_rate": 0.00019998474690468025, "loss": 2.2163, "step": 4725 }, { "epoch": 0.01, "grad_norm": 1.859375, "learning_rate": 0.00019998471460653448, "loss": 2.3883, "step": 4730 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.0001999846822742321, "loss": 2.1848, "step": 4735 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.0001999846499077731, "loss": 2.0381, "step": 4740 }, { "epoch": 0.01, "grad_norm": 1.703125, "learning_rate": 0.00019998461750715758, "loss": 2.3968, "step": 4745 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019998458507238548, "loss": 2.22, "step": 4750 }, { "epoch": 0.01, "grad_norm": 1.7109375, "learning_rate": 0.00019998455260345686, "loss": 2.3518, "step": 4755 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 0.00019998452010037168, "loss": 2.1693, "step": 4760 }, { "epoch": 0.01, "grad_norm": 1.46875, "learning_rate": 0.00019998448756312998, "loss": 2.1446, "step": 4765 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.00019998445499173174, "loss": 2.3396, "step": 4770 }, { "epoch": 0.01, "grad_norm": 1.4765625, "learning_rate": 0.00019998442238617706, "loss": 2.4819, "step": 4775 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019998438974646582, "loss": 2.3404, "step": 4780 }, { "epoch": 0.01, "grad_norm": 1.96875, "learning_rate": 0.00019998435707259816, "loss": 2.2099, "step": 4785 }, { "epoch": 0.01, "grad_norm": 1.8671875, "learning_rate": 0.000199984324364574, "loss": 2.2519, "step": 4790 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.00019998429162239336, "loss": 2.1127, "step": 4795 }, { "epoch": 0.01, "grad_norm": 1.7109375, "learning_rate": 0.0001999842588460563, "loss": 2.1288, "step": 4800 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.0001999842260355628, "loss": 2.2337, "step": 4805 }, { "epoch": 0.01, "grad_norm": 1.78125, "learning_rate": 0.00019998419319091287, "loss": 2.1957, "step": 4810 }, { "epoch": 0.01, "grad_norm": 1.7734375, "learning_rate": 0.00019998416031210653, "loss": 2.2096, "step": 4815 }, { "epoch": 0.01, "grad_norm": 1.953125, "learning_rate": 0.00019998412739914375, "loss": 2.3076, "step": 4820 }, { "epoch": 0.01, "grad_norm": 1.390625, "learning_rate": 0.00019998409445202463, "loss": 2.1856, "step": 4825 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019998406147074912, "loss": 2.2323, "step": 4830 }, { "epoch": 0.01, "grad_norm": 1.703125, "learning_rate": 0.00019998402845531726, "loss": 1.9528, "step": 4835 }, { "epoch": 0.01, "grad_norm": 1.6015625, "learning_rate": 0.000199983995405729, "loss": 2.3253, "step": 4840 }, { "epoch": 0.01, "grad_norm": 1.734375, "learning_rate": 0.0001999839623219844, "loss": 2.3619, "step": 4845 }, { "epoch": 0.01, "grad_norm": 1.4609375, "learning_rate": 0.00019998392920408348, "loss": 2.3771, "step": 4850 }, { "epoch": 0.01, "grad_norm": 1.859375, "learning_rate": 0.00019998389605202626, "loss": 2.2335, "step": 4855 }, { "epoch": 0.01, "grad_norm": 2.1875, "learning_rate": 0.0001999838628658127, "loss": 2.4525, "step": 4860 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019998382964544283, "loss": 2.2642, "step": 4865 }, { "epoch": 0.01, "grad_norm": 1.4453125, "learning_rate": 0.00019998379639091666, "loss": 2.3758, "step": 4870 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.00019998376310223425, "loss": 2.2564, "step": 4875 }, { "epoch": 0.01, "grad_norm": 1.3984375, "learning_rate": 0.00019998372977939556, "loss": 2.0766, "step": 4880 }, { "epoch": 0.01, "grad_norm": 1.5703125, "learning_rate": 0.00019998369642240064, "loss": 2.2468, "step": 4885 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.0001999836630312494, "loss": 2.0401, "step": 4890 }, { "epoch": 0.01, "grad_norm": 1.8203125, "learning_rate": 0.000199983629605942, "loss": 2.2345, "step": 4895 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019998359614647836, "loss": 2.3841, "step": 4900 }, { "epoch": 0.01, "grad_norm": 1.53125, "learning_rate": 0.00019998356265285848, "loss": 2.098, "step": 4905 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.00019998352912508244, "loss": 2.2603, "step": 4910 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.00019998349556315018, "loss": 2.0468, "step": 4915 }, { "epoch": 0.01, "grad_norm": 1.75, "learning_rate": 0.00019998346196706176, "loss": 2.2689, "step": 4920 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.0001999834283368172, "loss": 2.1525, "step": 4925 }, { "epoch": 0.01, "grad_norm": 1.8515625, "learning_rate": 0.00019998339467241648, "loss": 2.2456, "step": 4930 }, { "epoch": 0.01, "grad_norm": 1.8359375, "learning_rate": 0.0001999833609738596, "loss": 2.1188, "step": 4935 }, { "epoch": 0.01, "grad_norm": 2.0, "learning_rate": 0.00019998332724114657, "loss": 2.0877, "step": 4940 }, { "epoch": 0.01, "grad_norm": 1.328125, "learning_rate": 0.00019998329347427744, "loss": 2.2402, "step": 4945 }, { "epoch": 0.01, "grad_norm": 1.8984375, "learning_rate": 0.00019998325967325223, "loss": 2.3405, "step": 4950 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.00019998322583807089, "loss": 2.1657, "step": 4955 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019998319196873348, "loss": 2.48, "step": 4960 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019998315806524003, "loss": 2.2158, "step": 4965 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.0001999831241275905, "loss": 2.1675, "step": 4970 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.00019998309015578492, "loss": 2.1881, "step": 4975 }, { "epoch": 0.01, "grad_norm": 1.5703125, "learning_rate": 0.00019998305614982328, "loss": 2.1591, "step": 4980 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.00019998302210970563, "loss": 2.1447, "step": 4985 }, { "epoch": 0.01, "grad_norm": 2.359375, "learning_rate": 0.00019998298803543195, "loss": 2.16, "step": 4990 }, { "epoch": 0.01, "grad_norm": 1.7734375, "learning_rate": 0.0001999829539270023, "loss": 2.2298, "step": 4995 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.00019998291978441665, "loss": 2.2636, "step": 5000 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019998288560767503, "loss": 1.941, "step": 5005 }, { "epoch": 0.01, "grad_norm": 1.8203125, "learning_rate": 0.0001999828513967774, "loss": 2.0597, "step": 5010 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.00019998281715172386, "loss": 2.1591, "step": 5015 }, { "epoch": 0.01, "grad_norm": 1.8671875, "learning_rate": 0.00019998278287251434, "loss": 2.2116, "step": 5020 }, { "epoch": 0.01, "grad_norm": 1.859375, "learning_rate": 0.00019998274855914894, "loss": 2.2567, "step": 5025 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.00019998271421162758, "loss": 2.2287, "step": 5030 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.0001999826798299503, "loss": 2.2748, "step": 5035 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.00019998264541411714, "loss": 2.1855, "step": 5040 }, { "epoch": 0.01, "grad_norm": 1.4765625, "learning_rate": 0.0001999826109641281, "loss": 2.1752, "step": 5045 }, { "epoch": 0.01, "grad_norm": 1.8828125, "learning_rate": 0.00019998257647998319, "loss": 2.2474, "step": 5050 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.00019998254196168245, "loss": 2.3061, "step": 5055 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019998250740922583, "loss": 2.1465, "step": 5060 }, { "epoch": 0.01, "grad_norm": 2.28125, "learning_rate": 0.00019998247282261336, "loss": 2.3933, "step": 5065 }, { "epoch": 0.01, "grad_norm": 1.9765625, "learning_rate": 0.00019998243820184506, "loss": 2.1285, "step": 5070 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 0.00019998240354692095, "loss": 2.4275, "step": 5075 }, { "epoch": 0.01, "grad_norm": 1.390625, "learning_rate": 0.00019998236885784105, "loss": 2.2929, "step": 5080 }, { "epoch": 0.01, "grad_norm": 1.46875, "learning_rate": 0.00019998233413460534, "loss": 2.1777, "step": 5085 }, { "epoch": 0.01, "grad_norm": 1.734375, "learning_rate": 0.00019998229937721388, "loss": 2.2106, "step": 5090 }, { "epoch": 0.01, "grad_norm": 1.3984375, "learning_rate": 0.00019998226458566663, "loss": 2.1745, "step": 5095 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019998222975996363, "loss": 2.1813, "step": 5100 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.0001999821949001049, "loss": 2.0009, "step": 5105 }, { "epoch": 0.01, "grad_norm": 1.59375, "learning_rate": 0.00019998216000609043, "loss": 2.1964, "step": 5110 }, { "epoch": 0.01, "grad_norm": 1.8046875, "learning_rate": 0.0001999821250779202, "loss": 2.2293, "step": 5115 }, { "epoch": 0.01, "grad_norm": 1.3828125, "learning_rate": 0.00019998209011559435, "loss": 2.2679, "step": 5120 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.00019998205511911275, "loss": 2.2195, "step": 5125 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.00019998202008847548, "loss": 2.2594, "step": 5130 }, { "epoch": 0.01, "grad_norm": 1.65625, "learning_rate": 0.00019998198502368253, "loss": 2.3842, "step": 5135 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.0001999819499247339, "loss": 2.0679, "step": 5140 }, { "epoch": 0.01, "grad_norm": 2.171875, "learning_rate": 0.00019998191479162966, "loss": 2.2119, "step": 5145 }, { "epoch": 0.01, "grad_norm": 1.7890625, "learning_rate": 0.00019998187962436977, "loss": 2.1322, "step": 5150 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.00019998184442295426, "loss": 2.1196, "step": 5155 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.00019998180918738315, "loss": 2.2831, "step": 5160 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.00019998177391765642, "loss": 2.2027, "step": 5165 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.00019998173861377413, "loss": 2.0043, "step": 5170 }, { "epoch": 0.01, "grad_norm": 1.34375, "learning_rate": 0.0001999817032757362, "loss": 2.0974, "step": 5175 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 0.00019998166790354278, "loss": 2.0993, "step": 5180 }, { "epoch": 0.01, "grad_norm": 2.046875, "learning_rate": 0.0001999816324971938, "loss": 2.3854, "step": 5185 }, { "epoch": 0.01, "grad_norm": 1.703125, "learning_rate": 0.00019998159705668925, "loss": 2.185, "step": 5190 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 0.00019998156158202915, "loss": 2.3004, "step": 5195 }, { "epoch": 0.01, "grad_norm": 1.75, "learning_rate": 0.0001999815260732136, "loss": 2.1517, "step": 5200 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019998149053024252, "loss": 2.1734, "step": 5205 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.00019998145495311596, "loss": 2.4762, "step": 5210 }, { "epoch": 0.01, "grad_norm": 2.0625, "learning_rate": 0.0001999814193418339, "loss": 2.4032, "step": 5215 }, { "epoch": 0.01, "grad_norm": 1.7734375, "learning_rate": 0.00019998138369639636, "loss": 2.2317, "step": 5220 }, { "epoch": 0.01, "grad_norm": 1.8515625, "learning_rate": 0.0001999813480168034, "loss": 2.2118, "step": 5225 }, { "epoch": 0.01, "grad_norm": 2.890625, "learning_rate": 0.00019998131230305498, "loss": 2.2272, "step": 5230 }, { "epoch": 0.01, "grad_norm": 1.6015625, "learning_rate": 0.00019998127655515116, "loss": 2.3367, "step": 5235 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019998124077309188, "loss": 2.3519, "step": 5240 }, { "epoch": 0.01, "grad_norm": 2.0, "learning_rate": 0.00019998120495687722, "loss": 2.3634, "step": 5245 }, { "epoch": 0.01, "grad_norm": 1.6875, "learning_rate": 0.00019998116910650718, "loss": 2.2704, "step": 5250 }, { "epoch": 0.01, "grad_norm": 1.8671875, "learning_rate": 0.00019998113322198174, "loss": 2.2812, "step": 5255 }, { "epoch": 0.01, "grad_norm": 1.890625, "learning_rate": 0.00019998109730330092, "loss": 2.2306, "step": 5260 }, { "epoch": 0.01, "grad_norm": 1.9609375, "learning_rate": 0.00019998106135046478, "loss": 2.1923, "step": 5265 }, { "epoch": 0.01, "grad_norm": 1.6328125, "learning_rate": 0.00019998102536347324, "loss": 2.1959, "step": 5270 }, { "epoch": 0.01, "grad_norm": 1.78125, "learning_rate": 0.00019998098934232643, "loss": 2.3376, "step": 5275 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019998095328702426, "loss": 2.1696, "step": 5280 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.0001999809171975668, "loss": 2.127, "step": 5285 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.00019998088107395402, "loss": 2.5151, "step": 5290 }, { "epoch": 0.01, "grad_norm": 1.90625, "learning_rate": 0.000199980844916186, "loss": 2.2487, "step": 5295 }, { "epoch": 0.01, "grad_norm": 1.9140625, "learning_rate": 0.00019998080872426268, "loss": 2.2239, "step": 5300 }, { "epoch": 0.01, "grad_norm": 1.7109375, "learning_rate": 0.0001999807724981841, "loss": 2.1716, "step": 5305 }, { "epoch": 0.01, "grad_norm": 2.625, "learning_rate": 0.00019998073623795032, "loss": 2.5961, "step": 5310 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019998069994356125, "loss": 2.2125, "step": 5315 }, { "epoch": 0.01, "grad_norm": 2.109375, "learning_rate": 0.000199980663615017, "loss": 2.4083, "step": 5320 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019998062725231752, "loss": 2.1569, "step": 5325 }, { "epoch": 0.01, "grad_norm": 1.53125, "learning_rate": 0.00019998059085546283, "loss": 2.2281, "step": 5330 }, { "epoch": 0.01, "grad_norm": 1.875, "learning_rate": 0.00019998055442445298, "loss": 2.036, "step": 5335 }, { "epoch": 0.01, "grad_norm": 1.8515625, "learning_rate": 0.00019998051795928795, "loss": 2.2039, "step": 5340 }, { "epoch": 0.01, "grad_norm": 1.8515625, "learning_rate": 0.00019998048145996778, "loss": 2.1652, "step": 5345 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.00019998044492649245, "loss": 2.261, "step": 5350 }, { "epoch": 0.01, "grad_norm": 1.921875, "learning_rate": 0.000199980408358862, "loss": 2.3246, "step": 5355 }, { "epoch": 0.01, "grad_norm": 1.8359375, "learning_rate": 0.00019998037175707642, "loss": 2.2006, "step": 5360 }, { "epoch": 0.01, "grad_norm": 1.9609375, "learning_rate": 0.00019998033512113575, "loss": 2.3199, "step": 5365 }, { "epoch": 0.01, "grad_norm": 1.8203125, "learning_rate": 0.00019998029845103994, "loss": 2.1965, "step": 5370 }, { "epoch": 0.01, "grad_norm": 1.4921875, "learning_rate": 0.00019998026174678909, "loss": 2.2403, "step": 5375 }, { "epoch": 0.01, "grad_norm": 2.15625, "learning_rate": 0.00019998022500838315, "loss": 2.2558, "step": 5380 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019998018823582213, "loss": 2.1415, "step": 5385 }, { "epoch": 0.01, "grad_norm": 1.75, "learning_rate": 0.0001999801514291061, "loss": 2.313, "step": 5390 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.000199980114588235, "loss": 2.1923, "step": 5395 }, { "epoch": 0.01, "grad_norm": 1.53125, "learning_rate": 0.00019998007771320893, "loss": 2.2228, "step": 5400 }, { "epoch": 0.01, "grad_norm": 1.6875, "learning_rate": 0.00019998004080402782, "loss": 2.1776, "step": 5405 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019998000386069173, "loss": 2.3306, "step": 5410 }, { "epoch": 0.01, "grad_norm": 1.453125, "learning_rate": 0.00019997996688320062, "loss": 2.3457, "step": 5415 }, { "epoch": 0.01, "grad_norm": 1.921875, "learning_rate": 0.00019997992987155457, "loss": 2.229, "step": 5420 }, { "epoch": 0.01, "grad_norm": 1.5703125, "learning_rate": 0.00019997989282575357, "loss": 2.3146, "step": 5425 }, { "epoch": 0.01, "grad_norm": 1.671875, "learning_rate": 0.0001999798557457976, "loss": 2.1714, "step": 5430 }, { "epoch": 0.01, "grad_norm": 1.90625, "learning_rate": 0.00019997981863168672, "loss": 2.2686, "step": 5435 }, { "epoch": 0.01, "grad_norm": 1.6328125, "learning_rate": 0.00019997978148342095, "loss": 2.3622, "step": 5440 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.0001999797443010002, "loss": 2.2818, "step": 5445 }, { "epoch": 0.01, "grad_norm": 1.8828125, "learning_rate": 0.00019997970708442463, "loss": 2.0882, "step": 5450 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.0001999796698336941, "loss": 2.2996, "step": 5455 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.00019997963254880875, "loss": 2.185, "step": 5460 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.00019997959522976858, "loss": 2.5236, "step": 5465 }, { "epoch": 0.01, "grad_norm": 1.46875, "learning_rate": 0.00019997955787657353, "loss": 2.042, "step": 5470 }, { "epoch": 0.01, "grad_norm": 2.0, "learning_rate": 0.00019997952048922364, "loss": 2.199, "step": 5475 }, { "epoch": 0.01, "grad_norm": 2.0625, "learning_rate": 0.00019997948306771891, "loss": 2.1778, "step": 5480 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019997944561205944, "loss": 2.075, "step": 5485 }, { "epoch": 0.01, "grad_norm": 2.015625, "learning_rate": 0.00019997940812224512, "loss": 2.2857, "step": 5490 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019997937059827606, "loss": 2.2295, "step": 5495 }, { "epoch": 0.01, "grad_norm": 1.4765625, "learning_rate": 0.0001999793330401522, "loss": 2.1758, "step": 5500 }, { "epoch": 0.01, "grad_norm": 1.421875, "learning_rate": 0.00019997929544787358, "loss": 2.1769, "step": 5505 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.00019997925782144028, "loss": 2.1892, "step": 5510 }, { "epoch": 0.01, "grad_norm": 1.7109375, "learning_rate": 0.00019997922016085223, "loss": 2.3637, "step": 5515 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019997918246610942, "loss": 2.2043, "step": 5520 }, { "epoch": 0.01, "grad_norm": 1.4921875, "learning_rate": 0.00019997914473721197, "loss": 2.2661, "step": 5525 }, { "epoch": 0.01, "grad_norm": 1.890625, "learning_rate": 0.00019997910697415976, "loss": 2.1856, "step": 5530 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019997906917695293, "loss": 2.278, "step": 5535 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.0001999790313455914, "loss": 2.2428, "step": 5540 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.00019997899348007527, "loss": 2.3738, "step": 5545 }, { "epoch": 0.01, "grad_norm": 1.7890625, "learning_rate": 0.00019997895558040445, "loss": 2.2352, "step": 5550 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019997891764657905, "loss": 2.3801, "step": 5555 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.000199978879678599, "loss": 2.2362, "step": 5560 }, { "epoch": 0.01, "grad_norm": 1.3359375, "learning_rate": 0.00019997884167646438, "loss": 2.1816, "step": 5565 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.00019997880364017517, "loss": 2.1942, "step": 5570 }, { "epoch": 0.01, "grad_norm": 1.7734375, "learning_rate": 0.00019997876556973138, "loss": 2.167, "step": 5575 }, { "epoch": 0.01, "grad_norm": 1.8828125, "learning_rate": 0.00019997872746513302, "loss": 2.1191, "step": 5580 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019997868932638012, "loss": 2.2029, "step": 5585 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019997865115347268, "loss": 2.1044, "step": 5590 }, { "epoch": 0.01, "grad_norm": 1.8828125, "learning_rate": 0.00019997861294641074, "loss": 2.2319, "step": 5595 }, { "epoch": 0.01, "grad_norm": 1.53125, "learning_rate": 0.00019997857470519428, "loss": 2.3488, "step": 5600 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.00019997853642982335, "loss": 2.2613, "step": 5605 }, { "epoch": 0.01, "grad_norm": 1.9921875, "learning_rate": 0.0001999784981202979, "loss": 2.2457, "step": 5610 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.000199978459776618, "loss": 2.3267, "step": 5615 }, { "epoch": 0.01, "grad_norm": 1.78125, "learning_rate": 0.00019997842139878364, "loss": 2.1513, "step": 5620 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.00019997838298679485, "loss": 2.212, "step": 5625 }, { "epoch": 0.01, "grad_norm": 1.8203125, "learning_rate": 0.0001999783445406516, "loss": 2.039, "step": 5630 }, { "epoch": 0.01, "grad_norm": 1.75, "learning_rate": 0.00019997830606035397, "loss": 2.2361, "step": 5635 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.00019997826754590193, "loss": 2.2664, "step": 5640 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019997822899729548, "loss": 2.3322, "step": 5645 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.00019997819041453466, "loss": 2.3018, "step": 5650 }, { "epoch": 0.01, "grad_norm": 2.015625, "learning_rate": 0.0001999781517976195, "loss": 2.1287, "step": 5655 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019997811314655, "loss": 2.1266, "step": 5660 }, { "epoch": 0.01, "grad_norm": 1.4765625, "learning_rate": 0.00019997807446132612, "loss": 2.179, "step": 5665 }, { "epoch": 0.01, "grad_norm": 1.8046875, "learning_rate": 0.00019997803574194794, "loss": 2.1116, "step": 5670 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019997799698841546, "loss": 2.2951, "step": 5675 }, { "epoch": 0.01, "grad_norm": 1.828125, "learning_rate": 0.00019997795820072867, "loss": 2.3048, "step": 5680 }, { "epoch": 0.01, "grad_norm": 1.859375, "learning_rate": 0.00019997791937888758, "loss": 2.3391, "step": 5685 }, { "epoch": 0.01, "grad_norm": 1.765625, "learning_rate": 0.00019997788052289223, "loss": 2.3774, "step": 5690 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019997784163274264, "loss": 2.3805, "step": 5695 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 0.00019997780270843878, "loss": 2.2553, "step": 5700 }, { "epoch": 0.01, "grad_norm": 1.8046875, "learning_rate": 0.0001999777637499807, "loss": 2.3184, "step": 5705 }, { "epoch": 0.01, "grad_norm": 1.734375, "learning_rate": 0.00019997772475736843, "loss": 2.1679, "step": 5710 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.0001999776857306019, "loss": 2.269, "step": 5715 }, { "epoch": 0.01, "grad_norm": 1.8671875, "learning_rate": 0.0001999776466696812, "loss": 2.3287, "step": 5720 }, { "epoch": 0.01, "grad_norm": 1.6328125, "learning_rate": 0.00019997760757460635, "loss": 2.0896, "step": 5725 }, { "epoch": 0.01, "grad_norm": 2.1875, "learning_rate": 0.00019997756844537733, "loss": 2.2752, "step": 5730 }, { "epoch": 0.01, "grad_norm": 2.03125, "learning_rate": 0.00019997752928199415, "loss": 2.2102, "step": 5735 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 0.00019997749008445682, "loss": 2.0669, "step": 5740 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.00019997745085276539, "loss": 2.1643, "step": 5745 }, { "epoch": 0.01, "grad_norm": 1.703125, "learning_rate": 0.00019997741158691985, "loss": 2.2808, "step": 5750 }, { "epoch": 0.01, "grad_norm": 1.4921875, "learning_rate": 0.0001999773722869202, "loss": 2.1423, "step": 5755 }, { "epoch": 0.01, "grad_norm": 1.8828125, "learning_rate": 0.00019997733295276646, "loss": 2.2814, "step": 5760 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019997729358445864, "loss": 2.2274, "step": 5765 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.0001999772541819968, "loss": 2.1866, "step": 5770 }, { "epoch": 0.01, "grad_norm": 1.7265625, "learning_rate": 0.0001999772147453809, "loss": 2.2818, "step": 5775 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.00019997717527461096, "loss": 2.1172, "step": 5780 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019997713576968702, "loss": 2.1458, "step": 5785 }, { "epoch": 0.01, "grad_norm": 1.828125, "learning_rate": 0.00019997709623060907, "loss": 2.2716, "step": 5790 }, { "epoch": 0.01, "grad_norm": 1.4921875, "learning_rate": 0.00019997705665737714, "loss": 2.3796, "step": 5795 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019997701704999122, "loss": 2.2587, "step": 5800 }, { "epoch": 0.01, "grad_norm": 2.0, "learning_rate": 0.0001999769774084513, "loss": 2.213, "step": 5805 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.00019997693773275753, "loss": 2.2023, "step": 5810 }, { "epoch": 0.01, "grad_norm": 1.703125, "learning_rate": 0.00019997689802290976, "loss": 2.0766, "step": 5815 }, { "epoch": 0.01, "grad_norm": 1.921875, "learning_rate": 0.00019997685827890805, "loss": 2.2081, "step": 5820 }, { "epoch": 0.01, "grad_norm": 1.375, "learning_rate": 0.00019997681850075248, "loss": 2.1659, "step": 5825 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.00019997677868844297, "loss": 2.2615, "step": 5830 }, { "epoch": 0.01, "grad_norm": 1.9921875, "learning_rate": 0.00019997673884197963, "loss": 2.1877, "step": 5835 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019997669896136238, "loss": 2.3398, "step": 5840 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.0001999766590465913, "loss": 2.3495, "step": 5845 }, { "epoch": 0.01, "grad_norm": 1.4140625, "learning_rate": 0.00019997661909766637, "loss": 2.2172, "step": 5850 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.0001999765791145876, "loss": 2.237, "step": 5855 }, { "epoch": 0.01, "grad_norm": 1.40625, "learning_rate": 0.00019997653909735507, "loss": 2.1566, "step": 5860 }, { "epoch": 0.01, "grad_norm": 1.78125, "learning_rate": 0.00019997649904596868, "loss": 2.343, "step": 5865 }, { "epoch": 0.01, "grad_norm": 1.75, "learning_rate": 0.00019997645896042854, "loss": 2.339, "step": 5870 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.00019997641884073462, "loss": 2.3688, "step": 5875 }, { "epoch": 0.01, "grad_norm": 1.890625, "learning_rate": 0.00019997637868688693, "loss": 2.2995, "step": 5880 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.0001999763384988855, "loss": 2.2566, "step": 5885 }, { "epoch": 0.01, "grad_norm": 1.7265625, "learning_rate": 0.00019997629827673034, "loss": 2.1969, "step": 5890 }, { "epoch": 0.01, "grad_norm": 1.671875, "learning_rate": 0.00019997625802042147, "loss": 2.2067, "step": 5895 }, { "epoch": 0.01, "grad_norm": 1.703125, "learning_rate": 0.00019997621772995888, "loss": 2.2066, "step": 5900 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.00019997617740534265, "loss": 2.4086, "step": 5905 }, { "epoch": 0.01, "grad_norm": 1.53125, "learning_rate": 0.0001999761370465727, "loss": 2.0491, "step": 5910 }, { "epoch": 0.01, "grad_norm": 1.7890625, "learning_rate": 0.00019997609665364913, "loss": 2.1248, "step": 5915 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.00019997605622657186, "loss": 2.3407, "step": 5920 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.00019997601576534098, "loss": 2.3738, "step": 5925 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.00019997597526995645, "loss": 2.2035, "step": 5930 }, { "epoch": 0.01, "grad_norm": 1.734375, "learning_rate": 0.00019997593474041836, "loss": 2.1297, "step": 5935 }, { "epoch": 0.01, "grad_norm": 1.765625, "learning_rate": 0.00019997589417672666, "loss": 2.2358, "step": 5940 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.00019997585357888137, "loss": 2.2899, "step": 5945 }, { "epoch": 0.01, "grad_norm": 1.7890625, "learning_rate": 0.00019997581294688255, "loss": 2.1704, "step": 5950 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.00019997577228073014, "loss": 2.0504, "step": 5955 }, { "epoch": 0.01, "grad_norm": 1.515625, "learning_rate": 0.00019997573158042422, "loss": 2.2135, "step": 5960 }, { "epoch": 0.01, "grad_norm": 1.8359375, "learning_rate": 0.00019997569084596477, "loss": 2.263, "step": 5965 }, { "epoch": 0.01, "grad_norm": 1.5703125, "learning_rate": 0.0001999756500773518, "loss": 2.267, "step": 5970 }, { "epoch": 0.01, "grad_norm": 1.359375, "learning_rate": 0.00019997560927458534, "loss": 2.2952, "step": 5975 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.0001999755684376654, "loss": 2.2944, "step": 5980 }, { "epoch": 0.01, "grad_norm": 1.3828125, "learning_rate": 0.000199975527566592, "loss": 2.2054, "step": 5985 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 0.00019997548666136515, "loss": 2.2728, "step": 5990 }, { "epoch": 0.01, "grad_norm": 2.203125, "learning_rate": 0.00019997544572198482, "loss": 2.3431, "step": 5995 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.00019997540474845109, "loss": 2.1673, "step": 6000 }, { "epoch": 0.01, "grad_norm": 1.8671875, "learning_rate": 0.00019997536374076399, "loss": 2.1724, "step": 6005 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019997532269892343, "loss": 2.1507, "step": 6010 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 0.0001999752816229295, "loss": 2.1732, "step": 6015 }, { "epoch": 0.01, "grad_norm": 1.921875, "learning_rate": 0.0001999752405127822, "loss": 2.4069, "step": 6020 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.00019997519936848157, "loss": 2.4279, "step": 6025 }, { "epoch": 0.01, "grad_norm": 1.5625, "learning_rate": 0.0001999751581900276, "loss": 2.1131, "step": 6030 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.00019997511697742029, "loss": 2.3537, "step": 6035 }, { "epoch": 0.01, "grad_norm": 1.6953125, "learning_rate": 0.00019997507573065967, "loss": 2.226, "step": 6040 }, { "epoch": 0.01, "grad_norm": 1.4453125, "learning_rate": 0.00019997503444974576, "loss": 2.2286, "step": 6045 }, { "epoch": 0.01, "grad_norm": 1.3984375, "learning_rate": 0.0001999749931346785, "loss": 2.0808, "step": 6050 }, { "epoch": 0.01, "grad_norm": 1.765625, "learning_rate": 0.00019997495178545804, "loss": 2.2522, "step": 6055 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019997491040208432, "loss": 2.0908, "step": 6060 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.00019997486898455733, "loss": 2.2092, "step": 6065 }, { "epoch": 0.01, "grad_norm": 1.6015625, "learning_rate": 0.00019997482753287714, "loss": 2.2804, "step": 6070 }, { "epoch": 0.01, "grad_norm": 1.671875, "learning_rate": 0.0001999747860470437, "loss": 2.2966, "step": 6075 }, { "epoch": 0.01, "grad_norm": 1.875, "learning_rate": 0.0001999747445270571, "loss": 2.212, "step": 6080 }, { "epoch": 0.01, "grad_norm": 1.578125, "learning_rate": 0.00019997470297291727, "loss": 2.1274, "step": 6085 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.0001999746613846243, "loss": 2.2775, "step": 6090 }, { "epoch": 0.01, "grad_norm": 1.828125, "learning_rate": 0.00019997461976217816, "loss": 2.1106, "step": 6095 }, { "epoch": 0.01, "grad_norm": 2.015625, "learning_rate": 0.0001999745781055789, "loss": 2.2616, "step": 6100 }, { "epoch": 0.01, "grad_norm": 1.546875, "learning_rate": 0.0001999745364148265, "loss": 2.0906, "step": 6105 }, { "epoch": 0.01, "grad_norm": 1.4609375, "learning_rate": 0.00019997449468992095, "loss": 2.0831, "step": 6110 }, { "epoch": 0.01, "grad_norm": 1.6875, "learning_rate": 0.0001999744529308623, "loss": 2.2648, "step": 6115 }, { "epoch": 0.01, "grad_norm": 2.015625, "learning_rate": 0.0001999744111376506, "loss": 2.3571, "step": 6120 }, { "epoch": 0.01, "grad_norm": 1.8203125, "learning_rate": 0.00019997436931028584, "loss": 2.345, "step": 6125 }, { "epoch": 0.01, "grad_norm": 2.046875, "learning_rate": 0.00019997432744876798, "loss": 2.3862, "step": 6130 }, { "epoch": 0.01, "grad_norm": 1.6875, "learning_rate": 0.0001999742855530971, "loss": 2.3155, "step": 6135 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 0.0001999742436232732, "loss": 2.2072, "step": 6140 }, { "epoch": 0.01, "grad_norm": 1.7265625, "learning_rate": 0.00019997420165929626, "loss": 2.5017, "step": 6145 }, { "epoch": 0.01, "grad_norm": 1.765625, "learning_rate": 0.00019997415966116635, "loss": 2.4234, "step": 6150 }, { "epoch": 0.01, "grad_norm": 1.5859375, "learning_rate": 0.00019997411762888342, "loss": 2.143, "step": 6155 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.00019997407556244755, "loss": 2.1593, "step": 6160 }, { "epoch": 0.01, "grad_norm": 1.71875, "learning_rate": 0.0001999740334618587, "loss": 2.1747, "step": 6165 }, { "epoch": 0.01, "grad_norm": 1.8359375, "learning_rate": 0.0001999739913271169, "loss": 2.3597, "step": 6170 }, { "epoch": 0.01, "grad_norm": 1.90625, "learning_rate": 0.0001999739491582222, "loss": 2.4624, "step": 6175 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 0.00019997390695517458, "loss": 2.3543, "step": 6180 }, { "epoch": 0.01, "grad_norm": 1.390625, "learning_rate": 0.00019997386471797403, "loss": 2.1273, "step": 6185 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.00019997382244662065, "loss": 2.1699, "step": 6190 }, { "epoch": 0.01, "grad_norm": 1.7265625, "learning_rate": 0.00019997378014111435, "loss": 2.2545, "step": 6195 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.0001999737378014552, "loss": 2.0784, "step": 6200 }, { "epoch": 0.01, "grad_norm": 1.90625, "learning_rate": 0.00019997369542764322, "loss": 2.3731, "step": 6205 }, { "epoch": 0.01, "grad_norm": 1.84375, "learning_rate": 0.00019997365301967844, "loss": 2.3367, "step": 6210 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 0.00019997361057756078, "loss": 2.3796, "step": 6215 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019997356810129037, "loss": 2.0931, "step": 6220 }, { "epoch": 0.01, "grad_norm": 1.703125, "learning_rate": 0.00019997352559086718, "loss": 2.1128, "step": 6225 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.0001999734830462912, "loss": 2.4559, "step": 6230 }, { "epoch": 0.01, "grad_norm": 2.125, "learning_rate": 0.00019997344046756248, "loss": 2.1893, "step": 6235 }, { "epoch": 0.01, "grad_norm": 1.6171875, "learning_rate": 0.000199973397854681, "loss": 1.8429, "step": 6240 }, { "epoch": 0.01, "grad_norm": 1.609375, "learning_rate": 0.00019997335520764682, "loss": 2.0784, "step": 6245 }, { "epoch": 0.01, "grad_norm": 1.4140625, "learning_rate": 0.0001999733125264599, "loss": 2.2955, "step": 6250 }, { "epoch": 0.01, "grad_norm": 1.6328125, "learning_rate": 0.0001999732698111203, "loss": 2.1799, "step": 6255 }, { "epoch": 0.01, "grad_norm": 1.46875, "learning_rate": 0.000199973227061628, "loss": 2.1479, "step": 6260 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019997318427798305, "loss": 2.1888, "step": 6265 }, { "epoch": 0.01, "grad_norm": 1.3671875, "learning_rate": 0.00019997314146018543, "loss": 2.2479, "step": 6270 }, { "epoch": 0.01, "grad_norm": 1.8671875, "learning_rate": 0.00019997309860823518, "loss": 2.2849, "step": 6275 }, { "epoch": 0.01, "grad_norm": 1.5078125, "learning_rate": 0.00019997305572213233, "loss": 2.2601, "step": 6280 }, { "epoch": 0.01, "grad_norm": 2.03125, "learning_rate": 0.00019997301280187685, "loss": 2.2726, "step": 6285 }, { "epoch": 0.01, "grad_norm": 1.78125, "learning_rate": 0.00019997296984746878, "loss": 2.1898, "step": 6290 }, { "epoch": 0.01, "grad_norm": 1.640625, "learning_rate": 0.00019997292685890815, "loss": 2.4021, "step": 6295 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.00019997288383619493, "loss": 2.1787, "step": 6300 }, { "epoch": 0.01, "grad_norm": 1.6640625, "learning_rate": 0.00019997284077932914, "loss": 2.1513, "step": 6305 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019997279768831086, "loss": 1.9134, "step": 6310 }, { "epoch": 0.01, "grad_norm": 1.78125, "learning_rate": 0.00019997275456314003, "loss": 2.277, "step": 6315 }, { "epoch": 0.01, "grad_norm": 1.8125, "learning_rate": 0.00019997271140381672, "loss": 2.2198, "step": 6320 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 0.00019997266821034088, "loss": 2.2993, "step": 6325 }, { "epoch": 0.01, "grad_norm": 1.7890625, "learning_rate": 0.00019997262498271258, "loss": 2.2565, "step": 6330 }, { "epoch": 0.01, "grad_norm": 1.859375, "learning_rate": 0.00019997258172093182, "loss": 2.2814, "step": 6335 }, { "epoch": 0.01, "grad_norm": 2.046875, "learning_rate": 0.00019997253842499863, "loss": 2.1668, "step": 6340 }, { "epoch": 0.01, "grad_norm": 1.6484375, "learning_rate": 0.00019997249509491299, "loss": 2.2508, "step": 6345 }, { "epoch": 0.01, "grad_norm": 1.90625, "learning_rate": 0.00019997245173067492, "loss": 2.1322, "step": 6350 }, { "epoch": 0.01, "grad_norm": 1.484375, "learning_rate": 0.00019997240833228446, "loss": 2.203, "step": 6355 }, { "epoch": 0.01, "grad_norm": 1.4765625, "learning_rate": 0.00019997236489974164, "loss": 2.2385, "step": 6360 }, { "epoch": 0.01, "grad_norm": 1.7421875, "learning_rate": 0.00019997232143304641, "loss": 2.2513, "step": 6365 }, { "epoch": 0.01, "grad_norm": 1.6015625, "learning_rate": 0.00019997227793219882, "loss": 2.1753, "step": 6370 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.0001999722343971989, "loss": 2.2181, "step": 6375 }, { "epoch": 0.02, "grad_norm": 1.65625, "learning_rate": 0.00019997219082804664, "loss": 2.2685, "step": 6380 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.00019997214722474208, "loss": 2.1078, "step": 6385 }, { "epoch": 0.02, "grad_norm": 1.3515625, "learning_rate": 0.00019997210358728524, "loss": 2.1669, "step": 6390 }, { "epoch": 0.02, "grad_norm": 1.53125, "learning_rate": 0.0001999720599156761, "loss": 2.212, "step": 6395 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.0001999720162099147, "loss": 2.0819, "step": 6400 }, { "epoch": 0.02, "grad_norm": 1.6953125, "learning_rate": 0.00019997197247000102, "loss": 2.0991, "step": 6405 }, { "epoch": 0.02, "grad_norm": 1.53125, "learning_rate": 0.00019997192869593514, "loss": 2.2572, "step": 6410 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.000199971884887717, "loss": 2.1232, "step": 6415 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019997184104534666, "loss": 2.2761, "step": 6420 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019997179716882413, "loss": 2.1968, "step": 6425 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019997175325814942, "loss": 2.1554, "step": 6430 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.00019997170931332253, "loss": 2.1424, "step": 6435 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.00019997166533434351, "loss": 2.2532, "step": 6440 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.00019997162132121235, "loss": 2.136, "step": 6445 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.00019997157727392908, "loss": 2.2255, "step": 6450 }, { "epoch": 0.02, "grad_norm": 1.625, "learning_rate": 0.0001999715331924937, "loss": 2.2287, "step": 6455 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.0001999714890769062, "loss": 2.3828, "step": 6460 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.00019997144492716668, "loss": 2.3253, "step": 6465 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.00019997140074327509, "loss": 2.0453, "step": 6470 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.00019997135652523142, "loss": 2.2021, "step": 6475 }, { "epoch": 0.02, "grad_norm": 1.7109375, "learning_rate": 0.00019997131227303576, "loss": 2.1825, "step": 6480 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019997126798668808, "loss": 2.2472, "step": 6485 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.0001999712236661884, "loss": 2.3724, "step": 6490 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.00019997117931153672, "loss": 2.3277, "step": 6495 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.00019997113492273307, "loss": 2.2819, "step": 6500 }, { "epoch": 0.02, "grad_norm": 2.390625, "learning_rate": 0.0001999710904997775, "loss": 2.2369, "step": 6505 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019997104604267, "loss": 2.3364, "step": 6510 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.00019997100155141052, "loss": 2.116, "step": 6515 }, { "epoch": 0.02, "grad_norm": 1.5078125, "learning_rate": 0.00019997095702599916, "loss": 2.2794, "step": 6520 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.0001999709124664359, "loss": 2.0798, "step": 6525 }, { "epoch": 0.02, "grad_norm": 1.8984375, "learning_rate": 0.0001999708678727208, "loss": 2.3892, "step": 6530 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.00019997082324485382, "loss": 2.1364, "step": 6535 }, { "epoch": 0.02, "grad_norm": 1.8984375, "learning_rate": 0.00019997077858283497, "loss": 2.3625, "step": 6540 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.00019997073388666433, "loss": 2.1805, "step": 6545 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019997068915634183, "loss": 2.279, "step": 6550 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019997064439186755, "loss": 2.1784, "step": 6555 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.00019997059959324148, "loss": 2.2016, "step": 6560 }, { "epoch": 0.02, "grad_norm": 1.5390625, "learning_rate": 0.00019997055476046364, "loss": 2.1179, "step": 6565 }, { "epoch": 0.02, "grad_norm": 1.6171875, "learning_rate": 0.00019997050989353405, "loss": 2.2547, "step": 6570 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019997046499245274, "loss": 2.4132, "step": 6575 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.00019997042005721966, "loss": 2.1715, "step": 6580 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.00019997037508783492, "loss": 2.3491, "step": 6585 }, { "epoch": 0.02, "grad_norm": 1.8984375, "learning_rate": 0.00019997033008429846, "loss": 2.3167, "step": 6590 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.00019997028504661034, "loss": 2.1964, "step": 6595 }, { "epoch": 0.02, "grad_norm": 1.8984375, "learning_rate": 0.00019997023997477052, "loss": 2.3533, "step": 6600 }, { "epoch": 0.02, "grad_norm": 1.4140625, "learning_rate": 0.0001999701948687791, "loss": 1.9494, "step": 6605 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.000199970149728636, "loss": 2.1166, "step": 6610 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.00019997010455434134, "loss": 2.2031, "step": 6615 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.00019997005934589503, "loss": 2.2055, "step": 6620 }, { "epoch": 0.02, "grad_norm": 1.9609375, "learning_rate": 0.00019997001410329717, "loss": 2.3133, "step": 6625 }, { "epoch": 0.02, "grad_norm": 1.828125, "learning_rate": 0.00019996996882654772, "loss": 2.1078, "step": 6630 }, { "epoch": 0.02, "grad_norm": 1.5, "learning_rate": 0.00019996992351564672, "loss": 2.1888, "step": 6635 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.0001999698781705942, "loss": 2.3004, "step": 6640 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.0001999698327913901, "loss": 2.4516, "step": 6645 }, { "epoch": 0.02, "grad_norm": 1.6875, "learning_rate": 0.00019996978737803453, "loss": 2.3854, "step": 6650 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.0001999697419305275, "loss": 2.1157, "step": 6655 }, { "epoch": 0.02, "grad_norm": 1.875, "learning_rate": 0.00019996969644886894, "loss": 2.2711, "step": 6660 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.00019996965093305896, "loss": 2.5561, "step": 6665 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.0001999696053830975, "loss": 2.1993, "step": 6670 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.00019996955979898463, "loss": 2.3331, "step": 6675 }, { "epoch": 0.02, "grad_norm": 1.59375, "learning_rate": 0.00019996951418072034, "loss": 2.2792, "step": 6680 }, { "epoch": 0.02, "grad_norm": 1.8671875, "learning_rate": 0.00019996946852830462, "loss": 2.1453, "step": 6685 }, { "epoch": 0.02, "grad_norm": 1.9765625, "learning_rate": 0.00019996942284173757, "loss": 2.1874, "step": 6690 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.0001999693771210191, "loss": 2.074, "step": 6695 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.00019996933136614932, "loss": 2.2449, "step": 6700 }, { "epoch": 0.02, "grad_norm": 1.625, "learning_rate": 0.0001999692855771282, "loss": 2.3056, "step": 6705 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.00019996923975395574, "loss": 2.4186, "step": 6710 }, { "epoch": 0.02, "grad_norm": 1.3515625, "learning_rate": 0.00019996919389663197, "loss": 2.2362, "step": 6715 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019996914800515694, "loss": 2.3827, "step": 6720 }, { "epoch": 0.02, "grad_norm": 1.484375, "learning_rate": 0.0001999691020795306, "loss": 2.1258, "step": 6725 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019996905611975303, "loss": 2.2159, "step": 6730 }, { "epoch": 0.02, "grad_norm": 1.859375, "learning_rate": 0.0001999690101258242, "loss": 2.1021, "step": 6735 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019996896409774415, "loss": 2.2279, "step": 6740 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.0001999689180355129, "loss": 2.4127, "step": 6745 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.00019996887193913043, "loss": 2.1839, "step": 6750 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.00019996882580859682, "loss": 2.2959, "step": 6755 }, { "epoch": 0.02, "grad_norm": 1.65625, "learning_rate": 0.000199968779643912, "loss": 2.0957, "step": 6760 }, { "epoch": 0.02, "grad_norm": 1.8515625, "learning_rate": 0.00019996873344507607, "loss": 2.2652, "step": 6765 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.00019996868721208899, "loss": 2.0425, "step": 6770 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.00019996864094495078, "loss": 2.316, "step": 6775 }, { "epoch": 0.02, "grad_norm": 1.5546875, "learning_rate": 0.00019996859464366148, "loss": 2.2204, "step": 6780 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.0001999685483082211, "loss": 2.2282, "step": 6785 }, { "epoch": 0.02, "grad_norm": 1.53125, "learning_rate": 0.00019996850193862965, "loss": 2.2293, "step": 6790 }, { "epoch": 0.02, "grad_norm": 1.4765625, "learning_rate": 0.00019996845553488718, "loss": 2.4573, "step": 6795 }, { "epoch": 0.02, "grad_norm": 1.5390625, "learning_rate": 0.0001999684090969936, "loss": 2.5442, "step": 6800 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.00019996836262494905, "loss": 2.2054, "step": 6805 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019996831611875348, "loss": 2.1886, "step": 6810 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.0001999682695784069, "loss": 2.2693, "step": 6815 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019996822300390939, "loss": 2.3548, "step": 6820 }, { "epoch": 0.02, "grad_norm": 1.65625, "learning_rate": 0.0001999681763952609, "loss": 2.1255, "step": 6825 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.00019996812975246145, "loss": 2.0934, "step": 6830 }, { "epoch": 0.02, "grad_norm": 1.65625, "learning_rate": 0.00019996808307551112, "loss": 2.1861, "step": 6835 }, { "epoch": 0.02, "grad_norm": 1.8515625, "learning_rate": 0.00019996803636440985, "loss": 2.4134, "step": 6840 }, { "epoch": 0.02, "grad_norm": 1.484375, "learning_rate": 0.00019996798961915765, "loss": 1.9876, "step": 6845 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.00019996794283975465, "loss": 2.1636, "step": 6850 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.00019996789602620072, "loss": 2.0273, "step": 6855 }, { "epoch": 0.02, "grad_norm": 1.46875, "learning_rate": 0.00019996784917849597, "loss": 2.2285, "step": 6860 }, { "epoch": 0.02, "grad_norm": 1.96875, "learning_rate": 0.0001999678022966404, "loss": 2.1609, "step": 6865 }, { "epoch": 0.02, "grad_norm": 1.6953125, "learning_rate": 0.00019996775538063402, "loss": 2.282, "step": 6870 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.0001999677084304768, "loss": 2.1767, "step": 6875 }, { "epoch": 0.02, "grad_norm": 1.625, "learning_rate": 0.00019996766144616884, "loss": 2.1698, "step": 6880 }, { "epoch": 0.02, "grad_norm": 2.09375, "learning_rate": 0.0001999676144277101, "loss": 2.2935, "step": 6885 }, { "epoch": 0.02, "grad_norm": 1.921875, "learning_rate": 0.0001999675673751006, "loss": 2.3626, "step": 6890 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019996752028834038, "loss": 2.225, "step": 6895 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019996747316742942, "loss": 2.3426, "step": 6900 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.0001999674260123678, "loss": 2.226, "step": 6905 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019996737882315544, "loss": 2.2296, "step": 6910 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019996733159979244, "loss": 2.3319, "step": 6915 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.0001999672843422788, "loss": 2.0636, "step": 6920 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.00019996723705061452, "loss": 2.331, "step": 6925 }, { "epoch": 0.02, "grad_norm": 1.546875, "learning_rate": 0.0001999671897247996, "loss": 2.1248, "step": 6930 }, { "epoch": 0.02, "grad_norm": 1.75, "learning_rate": 0.00019996714236483406, "loss": 2.1738, "step": 6935 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019996709497071798, "loss": 2.2803, "step": 6940 }, { "epoch": 0.02, "grad_norm": 1.984375, "learning_rate": 0.0001999670475424513, "loss": 2.3078, "step": 6945 }, { "epoch": 0.02, "grad_norm": 1.6953125, "learning_rate": 0.00019996700008003405, "loss": 2.3154, "step": 6950 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.0001999669525834663, "loss": 2.2273, "step": 6955 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.00019996690505274799, "loss": 2.2793, "step": 6960 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.00019996685748787917, "loss": 2.3376, "step": 6965 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019996680988885988, "loss": 1.9398, "step": 6970 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.0001999667622556901, "loss": 2.3925, "step": 6975 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.00019996671458836986, "loss": 2.1119, "step": 6980 }, { "epoch": 0.02, "grad_norm": 1.515625, "learning_rate": 0.00019996666688689918, "loss": 2.1889, "step": 6985 }, { "epoch": 0.02, "grad_norm": 1.5078125, "learning_rate": 0.0001999666191512781, "loss": 2.4193, "step": 6990 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.00019996657138150655, "loss": 2.1369, "step": 6995 }, { "epoch": 0.02, "grad_norm": 1.8515625, "learning_rate": 0.00019996652357758468, "loss": 2.2833, "step": 7000 }, { "epoch": 0.02, "grad_norm": 1.3046875, "learning_rate": 0.00019996647573951236, "loss": 2.1531, "step": 7005 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.00019996642786728974, "loss": 2.234, "step": 7010 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.00019996637996091675, "loss": 2.2506, "step": 7015 }, { "epoch": 0.02, "grad_norm": 1.7109375, "learning_rate": 0.00019996633202039343, "loss": 2.1967, "step": 7020 }, { "epoch": 0.02, "grad_norm": 1.6875, "learning_rate": 0.0001999662840457198, "loss": 2.2769, "step": 7025 }, { "epoch": 0.02, "grad_norm": 1.515625, "learning_rate": 0.00019996623603689586, "loss": 2.1579, "step": 7030 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.0001999661879939217, "loss": 2.0828, "step": 7035 }, { "epoch": 0.02, "grad_norm": 1.296875, "learning_rate": 0.0001999661399167972, "loss": 2.3937, "step": 7040 }, { "epoch": 0.02, "grad_norm": 1.6015625, "learning_rate": 0.00019996609180552248, "loss": 2.1028, "step": 7045 }, { "epoch": 0.02, "grad_norm": 2.015625, "learning_rate": 0.00019996604366009755, "loss": 2.1379, "step": 7050 }, { "epoch": 0.02, "grad_norm": 1.453125, "learning_rate": 0.00019996599548052237, "loss": 2.2715, "step": 7055 }, { "epoch": 0.02, "grad_norm": 1.578125, "learning_rate": 0.00019996594726679704, "loss": 2.2441, "step": 7060 }, { "epoch": 0.02, "grad_norm": 1.9765625, "learning_rate": 0.00019996589901892148, "loss": 2.1218, "step": 7065 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.0001999658507368958, "loss": 2.2576, "step": 7070 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019996580242071996, "loss": 2.1257, "step": 7075 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.00019996575407039396, "loss": 2.1321, "step": 7080 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.00019996570568591788, "loss": 2.2122, "step": 7085 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.00019996565726729167, "loss": 2.1159, "step": 7090 }, { "epoch": 0.02, "grad_norm": 2.1875, "learning_rate": 0.00019996560881451543, "loss": 2.028, "step": 7095 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.0001999655603275891, "loss": 2.3339, "step": 7100 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019996551180651267, "loss": 2.1706, "step": 7105 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019996546325128626, "loss": 2.0743, "step": 7110 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019996541466190984, "loss": 2.1873, "step": 7115 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019996536603838342, "loss": 2.2355, "step": 7120 }, { "epoch": 0.02, "grad_norm": 1.90625, "learning_rate": 0.000199965317380707, "loss": 2.2567, "step": 7125 }, { "epoch": 0.02, "grad_norm": 1.3671875, "learning_rate": 0.0001999652686888806, "loss": 2.328, "step": 7130 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.00019996521996290428, "loss": 2.1232, "step": 7135 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019996517120277806, "loss": 2.2463, "step": 7140 }, { "epoch": 0.02, "grad_norm": 1.9140625, "learning_rate": 0.00019996512240850187, "loss": 2.0419, "step": 7145 }, { "epoch": 0.02, "grad_norm": 1.859375, "learning_rate": 0.0001999650735800758, "loss": 2.1498, "step": 7150 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.00019996502471749983, "loss": 2.1145, "step": 7155 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.000199964975820774, "loss": 2.3563, "step": 7160 }, { "epoch": 0.02, "grad_norm": 1.75, "learning_rate": 0.00019996492688989834, "loss": 2.1807, "step": 7165 }, { "epoch": 0.02, "grad_norm": 2.75, "learning_rate": 0.00019996487792487284, "loss": 2.3178, "step": 7170 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.00019996482892569752, "loss": 2.326, "step": 7175 }, { "epoch": 0.02, "grad_norm": 1.8671875, "learning_rate": 0.0001999647798923724, "loss": 2.2387, "step": 7180 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019996473082489752, "loss": 2.3833, "step": 7185 }, { "epoch": 0.02, "grad_norm": 2.046875, "learning_rate": 0.00019996468172327284, "loss": 2.2688, "step": 7190 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019996463258749843, "loss": 2.3274, "step": 7195 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.0001999645834175743, "loss": 2.2623, "step": 7200 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019996453421350046, "loss": 2.1023, "step": 7205 }, { "epoch": 0.02, "grad_norm": 1.171875, "learning_rate": 0.00019996448497527688, "loss": 2.136, "step": 7210 }, { "epoch": 0.02, "grad_norm": 1.4921875, "learning_rate": 0.00019996443570290367, "loss": 2.0554, "step": 7215 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.00019996438639638075, "loss": 2.1698, "step": 7220 }, { "epoch": 0.02, "grad_norm": 2.109375, "learning_rate": 0.0001999643370557082, "loss": 2.2682, "step": 7225 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019996428768088604, "loss": 2.1464, "step": 7230 }, { "epoch": 0.02, "grad_norm": 1.75, "learning_rate": 0.00019996423827191423, "loss": 2.2142, "step": 7235 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.00019996418882879284, "loss": 2.1893, "step": 7240 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.00019996413935152187, "loss": 2.3341, "step": 7245 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.00019996408984010135, "loss": 2.1885, "step": 7250 }, { "epoch": 0.02, "grad_norm": 1.859375, "learning_rate": 0.00019996404029453126, "loss": 2.4123, "step": 7255 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.00019996399071481167, "loss": 2.0611, "step": 7260 }, { "epoch": 0.02, "grad_norm": 1.921875, "learning_rate": 0.00019996394110094253, "loss": 2.427, "step": 7265 }, { "epoch": 0.02, "grad_norm": 1.953125, "learning_rate": 0.00019996389145292393, "loss": 2.1562, "step": 7270 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019996384177075585, "loss": 2.0967, "step": 7275 }, { "epoch": 0.02, "grad_norm": 1.75, "learning_rate": 0.00019996379205443828, "loss": 2.3539, "step": 7280 }, { "epoch": 0.02, "grad_norm": 1.5, "learning_rate": 0.0001999637423039713, "loss": 2.3423, "step": 7285 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019996369251935487, "loss": 2.2903, "step": 7290 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019996364270058906, "loss": 2.35, "step": 7295 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.0001999635928476738, "loss": 2.2891, "step": 7300 }, { "epoch": 0.02, "grad_norm": 1.7109375, "learning_rate": 0.00019996354296060922, "loss": 2.2877, "step": 7305 }, { "epoch": 0.02, "grad_norm": 1.6171875, "learning_rate": 0.00019996349303939525, "loss": 2.2676, "step": 7310 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019996344308403194, "loss": 2.1786, "step": 7315 }, { "epoch": 0.02, "grad_norm": 2.078125, "learning_rate": 0.0001999633930945193, "loss": 2.2499, "step": 7320 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.00019996334307085736, "loss": 2.2275, "step": 7325 }, { "epoch": 0.02, "grad_norm": 1.9765625, "learning_rate": 0.00019996329301304611, "loss": 2.2686, "step": 7330 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019996324292108564, "loss": 2.2388, "step": 7335 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.0001999631927949759, "loss": 2.2882, "step": 7340 }, { "epoch": 0.02, "grad_norm": 1.6875, "learning_rate": 0.00019996314263471686, "loss": 2.2057, "step": 7345 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.00019996309244030863, "loss": 2.0698, "step": 7350 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.00019996304221175123, "loss": 2.3621, "step": 7355 }, { "epoch": 0.02, "grad_norm": 1.3359375, "learning_rate": 0.0001999629919490446, "loss": 2.1841, "step": 7360 }, { "epoch": 0.02, "grad_norm": 2.140625, "learning_rate": 0.00019996294165218882, "loss": 2.1765, "step": 7365 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019996289132118386, "loss": 2.2451, "step": 7370 }, { "epoch": 0.02, "grad_norm": 1.5390625, "learning_rate": 0.0001999628409560298, "loss": 2.4252, "step": 7375 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.0001999627905567266, "loss": 2.1565, "step": 7380 }, { "epoch": 0.02, "grad_norm": 1.4375, "learning_rate": 0.0001999627401232743, "loss": 2.0532, "step": 7385 }, { "epoch": 0.02, "grad_norm": 1.90625, "learning_rate": 0.00019996268965567294, "loss": 2.2374, "step": 7390 }, { "epoch": 0.02, "grad_norm": 1.9765625, "learning_rate": 0.00019996263915392248, "loss": 2.0549, "step": 7395 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.00019996258861802298, "loss": 2.0543, "step": 7400 }, { "epoch": 0.02, "grad_norm": 2.125, "learning_rate": 0.00019996253804797443, "loss": 2.0971, "step": 7405 }, { "epoch": 0.02, "grad_norm": 2.046875, "learning_rate": 0.0001999624874437769, "loss": 2.0867, "step": 7410 }, { "epoch": 0.02, "grad_norm": 1.984375, "learning_rate": 0.00019996243680543035, "loss": 2.1878, "step": 7415 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.0001999623861329348, "loss": 2.3676, "step": 7420 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019996233542629032, "loss": 2.2962, "step": 7425 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019996228468549686, "loss": 2.1016, "step": 7430 }, { "epoch": 0.02, "grad_norm": 1.984375, "learning_rate": 0.0001999622339105545, "loss": 1.9712, "step": 7435 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.00019996218310146323, "loss": 2.1915, "step": 7440 }, { "epoch": 0.02, "grad_norm": 1.5625, "learning_rate": 0.00019996213225822304, "loss": 2.2568, "step": 7445 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.000199962081380834, "loss": 2.1442, "step": 7450 }, { "epoch": 0.02, "grad_norm": 2.03125, "learning_rate": 0.00019996203046929607, "loss": 2.2753, "step": 7455 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019996197952360932, "loss": 2.2666, "step": 7460 }, { "epoch": 0.02, "grad_norm": 1.75, "learning_rate": 0.00019996192854377374, "loss": 2.2448, "step": 7465 }, { "epoch": 0.02, "grad_norm": 1.921875, "learning_rate": 0.00019996187752978933, "loss": 2.2985, "step": 7470 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019996182648165616, "loss": 2.1311, "step": 7475 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.0001999617753993742, "loss": 2.3318, "step": 7480 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.0001999617242829435, "loss": 2.2333, "step": 7485 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019996167313236405, "loss": 2.2469, "step": 7490 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.0001999616219476359, "loss": 2.4562, "step": 7495 }, { "epoch": 0.02, "grad_norm": 1.96875, "learning_rate": 0.00019996157072875903, "loss": 2.247, "step": 7500 }, { "epoch": 0.02, "grad_norm": 1.9921875, "learning_rate": 0.00019996151947573347, "loss": 2.3068, "step": 7505 }, { "epoch": 0.02, "grad_norm": 1.9765625, "learning_rate": 0.00019996146818855923, "loss": 2.2278, "step": 7510 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.00019996141686723636, "loss": 2.2363, "step": 7515 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019996136551176486, "loss": 2.2066, "step": 7520 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.00019996131412214474, "loss": 2.2679, "step": 7525 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.000199961262698376, "loss": 2.197, "step": 7530 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.0001999612112404587, "loss": 2.0372, "step": 7535 }, { "epoch": 0.02, "grad_norm": 1.4296875, "learning_rate": 0.00019996115974839284, "loss": 2.2067, "step": 7540 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.00019996110822217844, "loss": 2.3002, "step": 7545 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.00019996105666181553, "loss": 2.2855, "step": 7550 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.00019996100506730407, "loss": 2.1436, "step": 7555 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.00019996095343864415, "loss": 2.0901, "step": 7560 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.0001999609017758357, "loss": 2.4159, "step": 7565 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019996085007887885, "loss": 2.4652, "step": 7570 }, { "epoch": 0.02, "grad_norm": 1.8984375, "learning_rate": 0.00019996079834777355, "loss": 2.4455, "step": 7575 }, { "epoch": 0.02, "grad_norm": 1.9140625, "learning_rate": 0.00019996074658251982, "loss": 2.3438, "step": 7580 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.00019996069478311768, "loss": 2.2952, "step": 7585 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.00019996064294956715, "loss": 2.1683, "step": 7590 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.00019996059108186827, "loss": 2.3045, "step": 7595 }, { "epoch": 0.02, "grad_norm": 1.65625, "learning_rate": 0.00019996053918002105, "loss": 2.1798, "step": 7600 }, { "epoch": 0.02, "grad_norm": 2.015625, "learning_rate": 0.00019996048724402546, "loss": 2.4316, "step": 7605 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019996043527388158, "loss": 2.1745, "step": 7610 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.00019996038326958938, "loss": 2.3148, "step": 7615 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.00019996033123114893, "loss": 2.2871, "step": 7620 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.0001999602791585602, "loss": 2.2092, "step": 7625 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019996022705182323, "loss": 2.3713, "step": 7630 }, { "epoch": 0.02, "grad_norm": 1.9609375, "learning_rate": 0.00019996017491093803, "loss": 2.2415, "step": 7635 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.00019996012273590464, "loss": 2.2828, "step": 7640 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019996007052672303, "loss": 2.2407, "step": 7645 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019996001828339325, "loss": 2.2111, "step": 7650 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019995996600591533, "loss": 2.2429, "step": 7655 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.00019995991369428927, "loss": 2.3118, "step": 7660 }, { "epoch": 0.02, "grad_norm": 1.6875, "learning_rate": 0.0001999598613485151, "loss": 2.1999, "step": 7665 }, { "epoch": 0.02, "grad_norm": 1.6171875, "learning_rate": 0.0001999598089685928, "loss": 2.1913, "step": 7670 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019995975655452244, "loss": 2.2797, "step": 7675 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.00019995970410630404, "loss": 2.1947, "step": 7680 }, { "epoch": 0.02, "grad_norm": 1.890625, "learning_rate": 0.00019995965162393752, "loss": 2.0801, "step": 7685 }, { "epoch": 0.02, "grad_norm": 1.6953125, "learning_rate": 0.000199959599107423, "loss": 2.3028, "step": 7690 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.0001999595465567605, "loss": 2.1389, "step": 7695 }, { "epoch": 0.02, "grad_norm": 1.484375, "learning_rate": 0.00019995949397194997, "loss": 2.2793, "step": 7700 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.0001999594413529915, "loss": 2.1349, "step": 7705 }, { "epoch": 0.02, "grad_norm": 2.1875, "learning_rate": 0.00019995938869988504, "loss": 2.2175, "step": 7710 }, { "epoch": 0.02, "grad_norm": 2.0625, "learning_rate": 0.00019995933601263066, "loss": 2.2942, "step": 7715 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.00019995928329122835, "loss": 2.2797, "step": 7720 }, { "epoch": 0.02, "grad_norm": 2.328125, "learning_rate": 0.0001999592305356781, "loss": 2.3132, "step": 7725 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019995917774598003, "loss": 2.432, "step": 7730 }, { "epoch": 0.02, "grad_norm": 1.6953125, "learning_rate": 0.00019995912492213404, "loss": 2.36, "step": 7735 }, { "epoch": 0.02, "grad_norm": 2.015625, "learning_rate": 0.0001999590720641402, "loss": 2.3115, "step": 7740 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.00019995901917199858, "loss": 2.2262, "step": 7745 }, { "epoch": 0.02, "grad_norm": 1.5390625, "learning_rate": 0.00019995896624570912, "loss": 2.2906, "step": 7750 }, { "epoch": 0.02, "grad_norm": 2.5, "learning_rate": 0.00019995891328527184, "loss": 2.2334, "step": 7755 }, { "epoch": 0.02, "grad_norm": 1.921875, "learning_rate": 0.00019995886029068683, "loss": 2.3408, "step": 7760 }, { "epoch": 0.02, "grad_norm": 2.1875, "learning_rate": 0.000199958807261954, "loss": 2.3733, "step": 7765 }, { "epoch": 0.02, "grad_norm": 1.9375, "learning_rate": 0.00019995875419907347, "loss": 2.1626, "step": 7770 }, { "epoch": 0.02, "grad_norm": 1.578125, "learning_rate": 0.00019995870110204523, "loss": 2.1708, "step": 7775 }, { "epoch": 0.02, "grad_norm": 2.046875, "learning_rate": 0.00019995864797086923, "loss": 2.1699, "step": 7780 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019995859480554558, "loss": 2.1935, "step": 7785 }, { "epoch": 0.02, "grad_norm": 1.6015625, "learning_rate": 0.00019995854160607425, "loss": 2.3524, "step": 7790 }, { "epoch": 0.02, "grad_norm": 1.6875, "learning_rate": 0.00019995848837245527, "loss": 2.2617, "step": 7795 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.0001999584351046887, "loss": 2.1859, "step": 7800 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.00019995838180277445, "loss": 2.3505, "step": 7805 }, { "epoch": 0.02, "grad_norm": 1.3984375, "learning_rate": 0.00019995832846671262, "loss": 2.1525, "step": 7810 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019995827509650325, "loss": 2.2282, "step": 7815 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019995822169214627, "loss": 2.1726, "step": 7820 }, { "epoch": 0.02, "grad_norm": 1.96875, "learning_rate": 0.00019995816825364179, "loss": 2.2341, "step": 7825 }, { "epoch": 0.02, "grad_norm": 1.4609375, "learning_rate": 0.00019995811478098978, "loss": 2.3828, "step": 7830 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.00019995806127419026, "loss": 2.3822, "step": 7835 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019995800773324324, "loss": 1.9418, "step": 7840 }, { "epoch": 0.02, "grad_norm": 1.953125, "learning_rate": 0.00019995795415814874, "loss": 2.1856, "step": 7845 }, { "epoch": 0.02, "grad_norm": 1.9921875, "learning_rate": 0.0001999579005489068, "loss": 2.1804, "step": 7850 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.00019995784690551746, "loss": 2.2499, "step": 7855 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.0001999577932279807, "loss": 2.0428, "step": 7860 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019995773951629652, "loss": 2.1821, "step": 7865 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.00019995768577046497, "loss": 2.2119, "step": 7870 }, { "epoch": 0.02, "grad_norm": 2.0625, "learning_rate": 0.00019995763199048608, "loss": 2.4029, "step": 7875 }, { "epoch": 0.02, "grad_norm": 1.6875, "learning_rate": 0.00019995757817635985, "loss": 2.2381, "step": 7880 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.00019995752432808628, "loss": 1.9317, "step": 7885 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.0001999574704456654, "loss": 2.1784, "step": 7890 }, { "epoch": 0.02, "grad_norm": 1.6953125, "learning_rate": 0.00019995741652909725, "loss": 2.238, "step": 7895 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.00019995736257838183, "loss": 2.3678, "step": 7900 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.00019995730859351916, "loss": 2.2522, "step": 7905 }, { "epoch": 0.02, "grad_norm": 2.234375, "learning_rate": 0.00019995725457450924, "loss": 2.1457, "step": 7910 }, { "epoch": 0.02, "grad_norm": 1.5390625, "learning_rate": 0.00019995720052135214, "loss": 2.2612, "step": 7915 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.00019995714643404787, "loss": 2.2973, "step": 7920 }, { "epoch": 0.02, "grad_norm": 1.6171875, "learning_rate": 0.00019995709231259638, "loss": 2.1846, "step": 7925 }, { "epoch": 0.02, "grad_norm": 1.8671875, "learning_rate": 0.00019995703815699773, "loss": 2.2435, "step": 7930 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.00019995698396725196, "loss": 2.2131, "step": 7935 }, { "epoch": 0.02, "grad_norm": 1.59375, "learning_rate": 0.0001999569297433591, "loss": 2.2931, "step": 7940 }, { "epoch": 0.02, "grad_norm": 1.5546875, "learning_rate": 0.0001999568754853191, "loss": 2.3246, "step": 7945 }, { "epoch": 0.02, "grad_norm": 1.5625, "learning_rate": 0.00019995682119313204, "loss": 2.2245, "step": 7950 }, { "epoch": 0.02, "grad_norm": 1.4921875, "learning_rate": 0.00019995676686679792, "loss": 2.2305, "step": 7955 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.00019995671250631675, "loss": 2.1544, "step": 7960 }, { "epoch": 0.02, "grad_norm": 1.8671875, "learning_rate": 0.00019995665811168854, "loss": 2.2213, "step": 7965 }, { "epoch": 0.02, "grad_norm": 2.109375, "learning_rate": 0.00019995660368291333, "loss": 2.2932, "step": 7970 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019995654921999112, "loss": 2.1343, "step": 7975 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019995649472292197, "loss": 2.0902, "step": 7980 }, { "epoch": 0.02, "grad_norm": 1.890625, "learning_rate": 0.00019995644019170586, "loss": 2.1023, "step": 7985 }, { "epoch": 0.02, "grad_norm": 1.421875, "learning_rate": 0.0001999563856263428, "loss": 2.2317, "step": 7990 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.0001999563310268328, "loss": 2.2342, "step": 7995 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.00019995627639317597, "loss": 2.0305, "step": 8000 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019995622172537221, "loss": 2.4562, "step": 8005 }, { "epoch": 0.02, "grad_norm": 2.234375, "learning_rate": 0.00019995616702342167, "loss": 2.2705, "step": 8010 }, { "epoch": 0.02, "grad_norm": 1.984375, "learning_rate": 0.0001999561122873242, "loss": 2.0307, "step": 8015 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019995605751707996, "loss": 2.1555, "step": 8020 }, { "epoch": 0.02, "grad_norm": 1.9609375, "learning_rate": 0.0001999560027126889, "loss": 2.1149, "step": 8025 }, { "epoch": 0.02, "grad_norm": 2.28125, "learning_rate": 0.00019995594787415106, "loss": 2.2541, "step": 8030 }, { "epoch": 0.02, "grad_norm": 1.4765625, "learning_rate": 0.00019995589300146645, "loss": 2.0092, "step": 8035 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.0001999558380946351, "loss": 2.1592, "step": 8040 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.00019995578315365703, "loss": 2.2094, "step": 8045 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019995572817853223, "loss": 2.35, "step": 8050 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019995567316926076, "loss": 2.1936, "step": 8055 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019995561812584262, "loss": 2.2981, "step": 8060 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.0001999555630482778, "loss": 2.148, "step": 8065 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019995550793656638, "loss": 2.2672, "step": 8070 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019995545279070833, "loss": 2.405, "step": 8075 }, { "epoch": 0.02, "grad_norm": 1.6015625, "learning_rate": 0.0001999553976107037, "loss": 2.268, "step": 8080 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.00019995534239655247, "loss": 2.3525, "step": 8085 }, { "epoch": 0.02, "grad_norm": 1.46875, "learning_rate": 0.00019995528714825472, "loss": 2.2927, "step": 8090 }, { "epoch": 0.02, "grad_norm": 1.9140625, "learning_rate": 0.0001999552318658104, "loss": 2.0876, "step": 8095 }, { "epoch": 0.02, "grad_norm": 1.59375, "learning_rate": 0.00019995517654921953, "loss": 2.3829, "step": 8100 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019995512119848222, "loss": 2.1835, "step": 8105 }, { "epoch": 0.02, "grad_norm": 1.5234375, "learning_rate": 0.0001999550658135984, "loss": 2.1634, "step": 8110 }, { "epoch": 0.02, "grad_norm": 1.5703125, "learning_rate": 0.00019995501039456813, "loss": 2.2514, "step": 8115 }, { "epoch": 0.02, "grad_norm": 1.9609375, "learning_rate": 0.00019995495494139141, "loss": 2.2446, "step": 8120 }, { "epoch": 0.02, "grad_norm": 1.546875, "learning_rate": 0.00019995489945406824, "loss": 2.1991, "step": 8125 }, { "epoch": 0.02, "grad_norm": 1.46875, "learning_rate": 0.00019995484393259868, "loss": 2.1948, "step": 8130 }, { "epoch": 0.02, "grad_norm": 1.5703125, "learning_rate": 0.00019995478837698275, "loss": 2.1859, "step": 8135 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019995473278722043, "loss": 2.1693, "step": 8140 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019995467716331174, "loss": 2.2929, "step": 8145 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.00019995462150525675, "loss": 2.2417, "step": 8150 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.00019995456581305544, "loss": 2.177, "step": 8155 }, { "epoch": 0.02, "grad_norm": 2.234375, "learning_rate": 0.00019995451008670786, "loss": 2.3903, "step": 8160 }, { "epoch": 0.02, "grad_norm": 1.828125, "learning_rate": 0.000199954454326214, "loss": 2.1388, "step": 8165 }, { "epoch": 0.02, "grad_norm": 1.4765625, "learning_rate": 0.00019995439853157386, "loss": 2.2735, "step": 8170 }, { "epoch": 0.02, "grad_norm": 1.546875, "learning_rate": 0.0001999543427027875, "loss": 2.1711, "step": 8175 }, { "epoch": 0.02, "grad_norm": 1.53125, "learning_rate": 0.00019995428683985495, "loss": 2.2904, "step": 8180 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.00019995423094277618, "loss": 2.277, "step": 8185 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.0001999541750115512, "loss": 2.1478, "step": 8190 }, { "epoch": 0.02, "grad_norm": 1.53125, "learning_rate": 0.00019995411904618008, "loss": 2.1861, "step": 8195 }, { "epoch": 0.02, "grad_norm": 1.7109375, "learning_rate": 0.00019995406304666285, "loss": 2.3143, "step": 8200 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.00019995400701299946, "loss": 2.1302, "step": 8205 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.00019995395094518997, "loss": 2.1323, "step": 8210 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019995389484323445, "loss": 2.1542, "step": 8215 }, { "epoch": 0.02, "grad_norm": 1.5703125, "learning_rate": 0.00019995383870713283, "loss": 2.1953, "step": 8220 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.00019995378253688517, "loss": 1.9692, "step": 8225 }, { "epoch": 0.02, "grad_norm": 1.921875, "learning_rate": 0.00019995372633249147, "loss": 2.3231, "step": 8230 }, { "epoch": 0.02, "grad_norm": 1.6953125, "learning_rate": 0.0001999536700939518, "loss": 2.1362, "step": 8235 }, { "epoch": 0.02, "grad_norm": 1.8984375, "learning_rate": 0.0001999536138212661, "loss": 2.2735, "step": 8240 }, { "epoch": 0.02, "grad_norm": 1.859375, "learning_rate": 0.00019995355751443444, "loss": 2.3605, "step": 8245 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019995350117345685, "loss": 2.2118, "step": 8250 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.00019995344479833334, "loss": 2.288, "step": 8255 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.0001999533883890639, "loss": 2.3248, "step": 8260 }, { "epoch": 0.02, "grad_norm": 1.8515625, "learning_rate": 0.00019995333194564857, "loss": 2.1781, "step": 8265 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.0001999532754680874, "loss": 2.1743, "step": 8270 }, { "epoch": 0.02, "grad_norm": 1.953125, "learning_rate": 0.00019995321895638032, "loss": 2.2086, "step": 8275 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019995316241052747, "loss": 2.3302, "step": 8280 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019995310583052878, "loss": 2.1107, "step": 8285 }, { "epoch": 0.02, "grad_norm": 1.90625, "learning_rate": 0.00019995304921638428, "loss": 2.344, "step": 8290 }, { "epoch": 0.02, "grad_norm": 1.9609375, "learning_rate": 0.00019995299256809403, "loss": 2.1733, "step": 8295 }, { "epoch": 0.02, "grad_norm": 1.9375, "learning_rate": 0.000199952935885658, "loss": 2.3484, "step": 8300 }, { "epoch": 0.02, "grad_norm": 1.53125, "learning_rate": 0.00019995287916907626, "loss": 2.2408, "step": 8305 }, { "epoch": 0.02, "grad_norm": 3.84375, "learning_rate": 0.0001999528224183488, "loss": 2.0006, "step": 8310 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019995276563347566, "loss": 2.3531, "step": 8315 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.00019995270881445677, "loss": 2.1948, "step": 8320 }, { "epoch": 0.02, "grad_norm": 1.828125, "learning_rate": 0.00019995265196129228, "loss": 2.2746, "step": 8325 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019995259507398213, "loss": 2.304, "step": 8330 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.0001999525381525264, "loss": 2.2058, "step": 8335 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019995248119692504, "loss": 2.2431, "step": 8340 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.0001999524242071781, "loss": 2.3983, "step": 8345 }, { "epoch": 0.02, "grad_norm": 1.9765625, "learning_rate": 0.0001999523671832856, "loss": 2.0814, "step": 8350 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019995231012524757, "loss": 2.3866, "step": 8355 }, { "epoch": 0.02, "grad_norm": 1.828125, "learning_rate": 0.00019995225303306402, "loss": 2.2135, "step": 8360 }, { "epoch": 0.02, "grad_norm": 1.890625, "learning_rate": 0.00019995219590673494, "loss": 2.0505, "step": 8365 }, { "epoch": 0.02, "grad_norm": 2.046875, "learning_rate": 0.0001999521387462604, "loss": 2.1822, "step": 8370 }, { "epoch": 0.02, "grad_norm": 1.53125, "learning_rate": 0.00019995208155164037, "loss": 2.3838, "step": 8375 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019995202432287494, "loss": 2.1891, "step": 8380 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019995196705996406, "loss": 2.1797, "step": 8385 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019995190976290777, "loss": 2.1966, "step": 8390 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.0001999518524317061, "loss": 2.166, "step": 8395 }, { "epoch": 0.02, "grad_norm": 1.578125, "learning_rate": 0.00019995179506635907, "loss": 2.154, "step": 8400 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.0001999517376668667, "loss": 2.284, "step": 8405 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.00019995168023322898, "loss": 2.3061, "step": 8410 }, { "epoch": 0.02, "grad_norm": 1.546875, "learning_rate": 0.00019995162276544598, "loss": 2.2649, "step": 8415 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.00019995156526351769, "loss": 2.304, "step": 8420 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019995150772744412, "loss": 2.3835, "step": 8425 }, { "epoch": 0.02, "grad_norm": 1.8671875, "learning_rate": 0.00019995145015722532, "loss": 2.1877, "step": 8430 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019995139255286125, "loss": 2.3251, "step": 8435 }, { "epoch": 0.02, "grad_norm": 1.875, "learning_rate": 0.00019995133491435203, "loss": 2.2541, "step": 8440 }, { "epoch": 0.02, "grad_norm": 1.5, "learning_rate": 0.0001999512772416976, "loss": 2.2219, "step": 8445 }, { "epoch": 0.02, "grad_norm": 2.296875, "learning_rate": 0.00019995121953489796, "loss": 2.2125, "step": 8450 }, { "epoch": 0.02, "grad_norm": 1.828125, "learning_rate": 0.0001999511617939532, "loss": 2.1096, "step": 8455 }, { "epoch": 0.02, "grad_norm": 1.5078125, "learning_rate": 0.00019995110401886332, "loss": 2.2653, "step": 8460 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019995104620962832, "loss": 2.1662, "step": 8465 }, { "epoch": 0.02, "grad_norm": 1.875, "learning_rate": 0.00019995098836624823, "loss": 2.3297, "step": 8470 }, { "epoch": 0.02, "grad_norm": 2.5625, "learning_rate": 0.00019995093048872308, "loss": 2.3881, "step": 8475 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.00019995087257705288, "loss": 2.3107, "step": 8480 }, { "epoch": 0.02, "grad_norm": 1.953125, "learning_rate": 0.00019995081463123764, "loss": 2.3592, "step": 8485 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.0001999507566512774, "loss": 2.216, "step": 8490 }, { "epoch": 0.02, "grad_norm": 1.9375, "learning_rate": 0.00019995069863717216, "loss": 2.2647, "step": 8495 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.00019995064058892195, "loss": 2.1753, "step": 8500 }, { "epoch": 0.02, "grad_norm": 1.8515625, "learning_rate": 0.00019995058250652677, "loss": 2.3966, "step": 8505 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.00019995052438998668, "loss": 2.2188, "step": 8510 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.00019995046623930168, "loss": 2.2204, "step": 8515 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019995040805447176, "loss": 2.1595, "step": 8520 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.000199950349835497, "loss": 2.3152, "step": 8525 }, { "epoch": 0.02, "grad_norm": 1.9140625, "learning_rate": 0.0001999502915823774, "loss": 2.1418, "step": 8530 }, { "epoch": 0.02, "grad_norm": 1.9765625, "learning_rate": 0.00019995023329511292, "loss": 2.1914, "step": 8535 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019995017497370364, "loss": 2.0803, "step": 8540 }, { "epoch": 0.02, "grad_norm": 1.8984375, "learning_rate": 0.0001999501166181496, "loss": 2.3545, "step": 8545 }, { "epoch": 0.02, "grad_norm": 1.625, "learning_rate": 0.00019995005822845077, "loss": 2.2496, "step": 8550 }, { "epoch": 0.02, "grad_norm": 3.203125, "learning_rate": 0.00019994999980460718, "loss": 2.3331, "step": 8555 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019994994134661887, "loss": 2.2202, "step": 8560 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019994988285448583, "loss": 2.168, "step": 8565 }, { "epoch": 0.02, "grad_norm": 1.7109375, "learning_rate": 0.0001999498243282081, "loss": 1.965, "step": 8570 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.0001999497657677857, "loss": 2.2338, "step": 8575 }, { "epoch": 0.02, "grad_norm": 1.2421875, "learning_rate": 0.00019994970717321866, "loss": 2.1519, "step": 8580 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.00019994964854450696, "loss": 2.1972, "step": 8585 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019994958988165068, "loss": 2.221, "step": 8590 }, { "epoch": 0.02, "grad_norm": 1.59375, "learning_rate": 0.0001999495311846498, "loss": 2.2009, "step": 8595 }, { "epoch": 0.02, "grad_norm": 2.015625, "learning_rate": 0.00019994947245350434, "loss": 2.119, "step": 8600 }, { "epoch": 0.02, "grad_norm": 2.1875, "learning_rate": 0.00019994941368821435, "loss": 2.3951, "step": 8605 }, { "epoch": 0.02, "grad_norm": 1.4375, "learning_rate": 0.00019994935488877976, "loss": 2.2022, "step": 8610 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.00019994929605520072, "loss": 2.0215, "step": 8615 }, { "epoch": 0.02, "grad_norm": 1.859375, "learning_rate": 0.00019994923718747717, "loss": 2.2746, "step": 8620 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019994917828560914, "loss": 2.0802, "step": 8625 }, { "epoch": 0.02, "grad_norm": 1.921875, "learning_rate": 0.00019994911934959669, "loss": 2.2802, "step": 8630 }, { "epoch": 0.02, "grad_norm": 1.984375, "learning_rate": 0.0001999490603794398, "loss": 2.1186, "step": 8635 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019994900137513847, "loss": 2.1836, "step": 8640 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.00019994894233669276, "loss": 2.3607, "step": 8645 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019994888326410268, "loss": 2.2338, "step": 8650 }, { "epoch": 0.02, "grad_norm": 1.65625, "learning_rate": 0.00019994882415736825, "loss": 2.2184, "step": 8655 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.00019994876501648948, "loss": 2.0896, "step": 8660 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.00019994870584146642, "loss": 2.1342, "step": 8665 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019994864663229906, "loss": 1.9894, "step": 8670 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019994858738898744, "loss": 2.1528, "step": 8675 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.00019994852811153153, "loss": 2.1224, "step": 8680 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.00019994846879993144, "loss": 2.1552, "step": 8685 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.00019994840945418709, "loss": 2.3516, "step": 8690 }, { "epoch": 0.02, "grad_norm": 1.9609375, "learning_rate": 0.0001999483500742986, "loss": 2.1042, "step": 8695 }, { "epoch": 0.02, "grad_norm": 1.6953125, "learning_rate": 0.0001999482906602659, "loss": 2.3138, "step": 8700 }, { "epoch": 0.02, "grad_norm": 1.5078125, "learning_rate": 0.00019994823121208908, "loss": 2.2025, "step": 8705 }, { "epoch": 0.02, "grad_norm": 1.90625, "learning_rate": 0.00019994817172976813, "loss": 2.3482, "step": 8710 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.00019994811221330305, "loss": 2.28, "step": 8715 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.0001999480526626939, "loss": 2.0558, "step": 8720 }, { "epoch": 0.02, "grad_norm": 2.140625, "learning_rate": 0.00019994799307794067, "loss": 2.3307, "step": 8725 }, { "epoch": 0.02, "grad_norm": 1.4453125, "learning_rate": 0.00019994793345904342, "loss": 2.2134, "step": 8730 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.0001999478738060021, "loss": 2.0869, "step": 8735 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.0001999478141188168, "loss": 2.3639, "step": 8740 }, { "epoch": 0.02, "grad_norm": 1.59375, "learning_rate": 0.00019994775439748753, "loss": 2.1375, "step": 8745 }, { "epoch": 0.02, "grad_norm": 2.03125, "learning_rate": 0.00019994769464201428, "loss": 2.0061, "step": 8750 }, { "epoch": 0.02, "grad_norm": 3.359375, "learning_rate": 0.00019994763485239707, "loss": 2.3453, "step": 8755 }, { "epoch": 0.02, "grad_norm": 1.6875, "learning_rate": 0.00019994757502863594, "loss": 2.3215, "step": 8760 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.0001999475151707309, "loss": 2.3747, "step": 8765 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.000199947455278682, "loss": 2.3612, "step": 8770 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.00019994739535248923, "loss": 2.0813, "step": 8775 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.0001999473353921526, "loss": 2.1557, "step": 8780 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.00019994727539767218, "loss": 2.0121, "step": 8785 }, { "epoch": 0.02, "grad_norm": 2.03125, "learning_rate": 0.00019994721536904794, "loss": 2.1822, "step": 8790 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.0001999471553062799, "loss": 2.3207, "step": 8795 }, { "epoch": 0.02, "grad_norm": 1.65625, "learning_rate": 0.00019994709520936813, "loss": 2.3169, "step": 8800 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.0001999470350783126, "loss": 2.1795, "step": 8805 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.00019994697491311337, "loss": 2.3111, "step": 8810 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.00019994691471377044, "loss": 2.1808, "step": 8815 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019994685448028378, "loss": 2.3312, "step": 8820 }, { "epoch": 0.02, "grad_norm": 1.875, "learning_rate": 0.00019994679421265351, "loss": 2.3059, "step": 8825 }, { "epoch": 0.02, "grad_norm": 1.5390625, "learning_rate": 0.00019994673391087958, "loss": 2.1796, "step": 8830 }, { "epoch": 0.02, "grad_norm": 1.515625, "learning_rate": 0.00019994667357496206, "loss": 2.2673, "step": 8835 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.00019994661320490092, "loss": 2.129, "step": 8840 }, { "epoch": 0.02, "grad_norm": 1.96875, "learning_rate": 0.0001999465528006962, "loss": 2.273, "step": 8845 }, { "epoch": 0.02, "grad_norm": 1.8515625, "learning_rate": 0.00019994649236234797, "loss": 2.2378, "step": 8850 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019994643188985615, "loss": 2.1733, "step": 8855 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.00019994637138322086, "loss": 2.1192, "step": 8860 }, { "epoch": 0.02, "grad_norm": 1.875, "learning_rate": 0.00019994631084244206, "loss": 2.3828, "step": 8865 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.0001999462502675198, "loss": 2.1094, "step": 8870 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.00019994618965845403, "loss": 2.2387, "step": 8875 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.0001999461290152449, "loss": 2.2421, "step": 8880 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.00019994606833789234, "loss": 2.355, "step": 8885 }, { "epoch": 0.02, "grad_norm": 1.6015625, "learning_rate": 0.00019994600762639637, "loss": 2.2999, "step": 8890 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019994594688075705, "loss": 2.2954, "step": 8895 }, { "epoch": 0.02, "grad_norm": 2.015625, "learning_rate": 0.00019994588610097434, "loss": 2.4849, "step": 8900 }, { "epoch": 0.02, "grad_norm": 1.859375, "learning_rate": 0.00019994582528704835, "loss": 2.0398, "step": 8905 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019994576443897906, "loss": 2.1616, "step": 8910 }, { "epoch": 0.02, "grad_norm": 1.859375, "learning_rate": 0.00019994570355676646, "loss": 2.1038, "step": 8915 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019994564264041057, "loss": 2.2545, "step": 8920 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.0001999455816899115, "loss": 2.237, "step": 8925 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.00019994552070526917, "loss": 2.4439, "step": 8930 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.00019994545968648363, "loss": 2.1308, "step": 8935 }, { "epoch": 0.02, "grad_norm": 1.546875, "learning_rate": 0.00019994539863355491, "loss": 2.2897, "step": 8940 }, { "epoch": 0.02, "grad_norm": 2.015625, "learning_rate": 0.00019994533754648304, "loss": 2.1254, "step": 8945 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.00019994527642526805, "loss": 2.2156, "step": 8950 }, { "epoch": 0.02, "grad_norm": 1.59375, "learning_rate": 0.0001999452152699099, "loss": 2.113, "step": 8955 }, { "epoch": 0.02, "grad_norm": 1.9765625, "learning_rate": 0.00019994515408040868, "loss": 2.1928, "step": 8960 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019994509285676436, "loss": 2.4424, "step": 8965 }, { "epoch": 0.02, "grad_norm": 1.5625, "learning_rate": 0.000199945031598977, "loss": 2.2966, "step": 8970 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019994497030704662, "loss": 2.2244, "step": 8975 }, { "epoch": 0.02, "grad_norm": 1.59375, "learning_rate": 0.0001999449089809732, "loss": 2.0124, "step": 8980 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019994484762075678, "loss": 2.0368, "step": 8985 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.0001999447862263974, "loss": 2.1568, "step": 8990 }, { "epoch": 0.02, "grad_norm": 2.046875, "learning_rate": 0.0001999447247978951, "loss": 2.3311, "step": 8995 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019994466333524984, "loss": 2.261, "step": 9000 }, { "epoch": 0.02, "grad_norm": 1.640625, "learning_rate": 0.00019994460183846163, "loss": 2.1998, "step": 9005 }, { "epoch": 0.02, "grad_norm": 1.453125, "learning_rate": 0.0001999445403075306, "loss": 2.3582, "step": 9010 }, { "epoch": 0.02, "grad_norm": 1.515625, "learning_rate": 0.00019994447874245667, "loss": 2.2456, "step": 9015 }, { "epoch": 0.02, "grad_norm": 1.3828125, "learning_rate": 0.0001999444171432399, "loss": 2.2082, "step": 9020 }, { "epoch": 0.02, "grad_norm": 1.8671875, "learning_rate": 0.0001999443555098803, "loss": 2.1586, "step": 9025 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.0001999442938423779, "loss": 2.1972, "step": 9030 }, { "epoch": 0.02, "grad_norm": 1.515625, "learning_rate": 0.0001999442321407327, "loss": 2.4134, "step": 9035 }, { "epoch": 0.02, "grad_norm": 2.375, "learning_rate": 0.00019994417040494476, "loss": 2.0827, "step": 9040 }, { "epoch": 0.02, "grad_norm": 2.1875, "learning_rate": 0.00019994410863501407, "loss": 2.2398, "step": 9045 }, { "epoch": 0.02, "grad_norm": 1.4921875, "learning_rate": 0.00019994404683094068, "loss": 2.2573, "step": 9050 }, { "epoch": 0.02, "grad_norm": 1.9765625, "learning_rate": 0.00019994398499272454, "loss": 2.2285, "step": 9055 }, { "epoch": 0.02, "grad_norm": 1.9140625, "learning_rate": 0.00019994392312036577, "loss": 2.3217, "step": 9060 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019994386121386434, "loss": 2.1862, "step": 9065 }, { "epoch": 0.02, "grad_norm": 1.890625, "learning_rate": 0.00019994379927322023, "loss": 2.2904, "step": 9070 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.00019994373729843355, "loss": 2.341, "step": 9075 }, { "epoch": 0.02, "grad_norm": 1.96875, "learning_rate": 0.00019994367528950425, "loss": 2.2477, "step": 9080 }, { "epoch": 0.02, "grad_norm": 2.203125, "learning_rate": 0.0001999436132464324, "loss": 2.3324, "step": 9085 }, { "epoch": 0.02, "grad_norm": 1.6953125, "learning_rate": 0.000199943551169218, "loss": 2.2516, "step": 9090 }, { "epoch": 0.02, "grad_norm": 2.0625, "learning_rate": 0.00019994348905786106, "loss": 2.1851, "step": 9095 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019994342691236162, "loss": 2.3794, "step": 9100 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.00019994336473271967, "loss": 2.0697, "step": 9105 }, { "epoch": 0.02, "grad_norm": 1.7109375, "learning_rate": 0.00019994330251893528, "loss": 2.2772, "step": 9110 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.00019994324027100845, "loss": 2.3759, "step": 9115 }, { "epoch": 0.02, "grad_norm": 1.4140625, "learning_rate": 0.00019994317798893919, "loss": 1.9551, "step": 9120 }, { "epoch": 0.02, "grad_norm": 1.9375, "learning_rate": 0.00019994311567272748, "loss": 2.2638, "step": 9125 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.00019994305332237343, "loss": 2.2438, "step": 9130 }, { "epoch": 0.02, "grad_norm": 1.75, "learning_rate": 0.00019994299093787702, "loss": 2.2274, "step": 9135 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.0001999429285192383, "loss": 2.2698, "step": 9140 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.0001999428660664572, "loss": 2.1907, "step": 9145 }, { "epoch": 0.02, "grad_norm": 1.546875, "learning_rate": 0.00019994280357953385, "loss": 2.2105, "step": 9150 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.00019994274105846822, "loss": 2.2767, "step": 9155 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019994267850326032, "loss": 2.2011, "step": 9160 }, { "epoch": 0.02, "grad_norm": 1.828125, "learning_rate": 0.00019994261591391022, "loss": 2.2175, "step": 9165 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.00019994255329041787, "loss": 2.1646, "step": 9170 }, { "epoch": 0.02, "grad_norm": 2.109375, "learning_rate": 0.00019994249063278336, "loss": 2.1288, "step": 9175 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.00019994242794100665, "loss": 2.0797, "step": 9180 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019994236521508784, "loss": 2.2341, "step": 9185 }, { "epoch": 0.02, "grad_norm": 1.9921875, "learning_rate": 0.00019994230245502686, "loss": 2.3282, "step": 9190 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.0001999422396608238, "loss": 2.2636, "step": 9195 }, { "epoch": 0.02, "grad_norm": 2.125, "learning_rate": 0.0001999421768324787, "loss": 2.0817, "step": 9200 }, { "epoch": 0.02, "grad_norm": 1.890625, "learning_rate": 0.00019994211396999148, "loss": 2.289, "step": 9205 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019994205107336225, "loss": 2.2441, "step": 9210 }, { "epoch": 0.02, "grad_norm": 1.6171875, "learning_rate": 0.00019994198814259097, "loss": 2.4263, "step": 9215 }, { "epoch": 0.02, "grad_norm": 1.90625, "learning_rate": 0.00019994192517767772, "loss": 2.3214, "step": 9220 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.00019994186217862247, "loss": 2.3144, "step": 9225 }, { "epoch": 0.02, "grad_norm": 1.4453125, "learning_rate": 0.00019994179914542532, "loss": 1.9962, "step": 9230 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.0001999417360780862, "loss": 2.0662, "step": 9235 }, { "epoch": 0.02, "grad_norm": 2.53125, "learning_rate": 0.00019994167297660516, "loss": 2.1507, "step": 9240 }, { "epoch": 0.02, "grad_norm": 2.0625, "learning_rate": 0.00019994160984098227, "loss": 2.2854, "step": 9245 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.00019994154667121747, "loss": 2.3995, "step": 9250 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019994148346731087, "loss": 2.3009, "step": 9255 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019994142022926244, "loss": 2.4903, "step": 9260 }, { "epoch": 0.02, "grad_norm": 1.8515625, "learning_rate": 0.0001999413569570722, "loss": 2.2487, "step": 9265 }, { "epoch": 0.02, "grad_norm": 1.7109375, "learning_rate": 0.00019994129365074015, "loss": 2.1184, "step": 9270 }, { "epoch": 0.02, "grad_norm": 1.9140625, "learning_rate": 0.00019994123031026636, "loss": 2.4116, "step": 9275 }, { "epoch": 0.02, "grad_norm": 1.953125, "learning_rate": 0.00019994116693565082, "loss": 1.9815, "step": 9280 }, { "epoch": 0.02, "grad_norm": 1.5078125, "learning_rate": 0.0001999411035268936, "loss": 2.1826, "step": 9285 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.0001999410400839947, "loss": 2.2301, "step": 9290 }, { "epoch": 0.02, "grad_norm": 1.953125, "learning_rate": 0.00019994097660695407, "loss": 2.0051, "step": 9295 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019994091309577183, "loss": 2.2332, "step": 9300 }, { "epoch": 0.02, "grad_norm": 1.65625, "learning_rate": 0.00019994084955044796, "loss": 2.2943, "step": 9305 }, { "epoch": 0.02, "grad_norm": 1.65625, "learning_rate": 0.00019994078597098246, "loss": 2.4205, "step": 9310 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.0001999407223573754, "loss": 2.3941, "step": 9315 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.00019994065870962676, "loss": 2.2569, "step": 9320 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.00019994059502773657, "loss": 2.1002, "step": 9325 }, { "epoch": 0.02, "grad_norm": 2.25, "learning_rate": 0.00019994053131170488, "loss": 2.2047, "step": 9330 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.0001999404675615317, "loss": 2.2173, "step": 9335 }, { "epoch": 0.02, "grad_norm": 1.90625, "learning_rate": 0.00019994040377721702, "loss": 2.2147, "step": 9340 }, { "epoch": 0.02, "grad_norm": 1.9453125, "learning_rate": 0.0001999403399587609, "loss": 2.4016, "step": 9345 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.00019994027610616334, "loss": 2.0737, "step": 9350 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.00019994021221942437, "loss": 2.1702, "step": 9355 }, { "epoch": 0.02, "grad_norm": 1.515625, "learning_rate": 0.000199940148298544, "loss": 2.246, "step": 9360 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.00019994008434352228, "loss": 2.1464, "step": 9365 }, { "epoch": 0.02, "grad_norm": 2.046875, "learning_rate": 0.0001999400203543592, "loss": 2.2819, "step": 9370 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.0001999399563310548, "loss": 2.2903, "step": 9375 }, { "epoch": 0.02, "grad_norm": 1.65625, "learning_rate": 0.0001999398922736091, "loss": 2.1807, "step": 9380 }, { "epoch": 0.02, "grad_norm": 2.3125, "learning_rate": 0.00019993982818202215, "loss": 2.3414, "step": 9385 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.0001999397640562939, "loss": 2.2938, "step": 9390 }, { "epoch": 0.02, "grad_norm": 1.7109375, "learning_rate": 0.00019993969989642442, "loss": 2.2373, "step": 9395 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019993963570241378, "loss": 2.2738, "step": 9400 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019993957147426187, "loss": 2.2198, "step": 9405 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019993950721196885, "loss": 2.2474, "step": 9410 }, { "epoch": 0.02, "grad_norm": 2.421875, "learning_rate": 0.00019993944291553465, "loss": 2.2942, "step": 9415 }, { "epoch": 0.02, "grad_norm": 2.203125, "learning_rate": 0.00019993937858495933, "loss": 2.3277, "step": 9420 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.0001999393142202429, "loss": 2.2506, "step": 9425 }, { "epoch": 0.02, "grad_norm": 2.046875, "learning_rate": 0.00019993924982138542, "loss": 2.2541, "step": 9430 }, { "epoch": 0.02, "grad_norm": 1.6953125, "learning_rate": 0.00019993918538838682, "loss": 2.2118, "step": 9435 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.00019993912092124722, "loss": 2.3294, "step": 9440 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019993905641996662, "loss": 2.1099, "step": 9445 }, { "epoch": 0.02, "grad_norm": 1.6875, "learning_rate": 0.000199938991884545, "loss": 2.2352, "step": 9450 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.0001999389273149824, "loss": 2.1623, "step": 9455 }, { "epoch": 0.02, "grad_norm": 1.484375, "learning_rate": 0.00019993886271127886, "loss": 2.2518, "step": 9460 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.00019993879807343441, "loss": 2.1996, "step": 9465 }, { "epoch": 0.02, "grad_norm": 1.96875, "learning_rate": 0.00019993873340144904, "loss": 2.331, "step": 9470 }, { "epoch": 0.02, "grad_norm": 1.6171875, "learning_rate": 0.00019993866869532278, "loss": 2.3412, "step": 9475 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019993860395505565, "loss": 2.107, "step": 9480 }, { "epoch": 0.02, "grad_norm": 1.6171875, "learning_rate": 0.0001999385391806477, "loss": 2.0561, "step": 9485 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.00019993847437209893, "loss": 2.217, "step": 9490 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019993840952940936, "loss": 2.3706, "step": 9495 }, { "epoch": 0.02, "grad_norm": 1.6484375, "learning_rate": 0.000199938344652579, "loss": 2.2831, "step": 9500 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.00019993827974160793, "loss": 2.2117, "step": 9505 }, { "epoch": 0.02, "grad_norm": 1.8515625, "learning_rate": 0.0001999382147964961, "loss": 2.3083, "step": 9510 }, { "epoch": 0.02, "grad_norm": 1.625, "learning_rate": 0.00019993814981724358, "loss": 2.2176, "step": 9515 }, { "epoch": 0.02, "grad_norm": 1.890625, "learning_rate": 0.00019993808480385037, "loss": 2.3248, "step": 9520 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.0001999380197563165, "loss": 2.0946, "step": 9525 }, { "epoch": 0.02, "grad_norm": 1.578125, "learning_rate": 0.00019993795467464199, "loss": 2.2198, "step": 9530 }, { "epoch": 0.02, "grad_norm": 1.890625, "learning_rate": 0.00019993788955882688, "loss": 2.2996, "step": 9535 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.00019993782440887113, "loss": 2.1238, "step": 9540 }, { "epoch": 0.02, "grad_norm": 1.5703125, "learning_rate": 0.0001999377592247748, "loss": 2.3761, "step": 9545 }, { "epoch": 0.02, "grad_norm": 1.6171875, "learning_rate": 0.00019993769400653797, "loss": 2.1416, "step": 9550 }, { "epoch": 0.02, "grad_norm": 1.890625, "learning_rate": 0.0001999376287541606, "loss": 2.3376, "step": 9555 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.0001999375634676427, "loss": 1.9727, "step": 9560 }, { "epoch": 0.02, "grad_norm": 2.359375, "learning_rate": 0.00019993749814698432, "loss": 2.2341, "step": 9565 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.0001999374327921855, "loss": 2.4086, "step": 9570 }, { "epoch": 0.02, "grad_norm": 1.890625, "learning_rate": 0.00019993736740324622, "loss": 2.1291, "step": 9575 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.0001999373019801665, "loss": 2.249, "step": 9580 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.00019993723652294643, "loss": 2.1775, "step": 9585 }, { "epoch": 0.02, "grad_norm": 2.015625, "learning_rate": 0.00019993717103158597, "loss": 2.1096, "step": 9590 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019993710550608517, "loss": 2.265, "step": 9595 }, { "epoch": 0.02, "grad_norm": 1.6015625, "learning_rate": 0.00019993703994644404, "loss": 2.3416, "step": 9600 }, { "epoch": 0.02, "grad_norm": 1.96875, "learning_rate": 0.00019993697435266258, "loss": 2.236, "step": 9605 }, { "epoch": 0.02, "grad_norm": 1.7109375, "learning_rate": 0.00019993690872474086, "loss": 2.0926, "step": 9610 }, { "epoch": 0.02, "grad_norm": 1.96875, "learning_rate": 0.0001999368430626789, "loss": 2.1354, "step": 9615 }, { "epoch": 0.02, "grad_norm": 1.8515625, "learning_rate": 0.00019993677736647667, "loss": 2.0912, "step": 9620 }, { "epoch": 0.02, "grad_norm": 1.5703125, "learning_rate": 0.00019993671163613422, "loss": 2.1404, "step": 9625 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.00019993664587165163, "loss": 2.2896, "step": 9630 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019993658007302881, "loss": 2.2438, "step": 9635 }, { "epoch": 0.02, "grad_norm": 1.71875, "learning_rate": 0.00019993651424026585, "loss": 2.1957, "step": 9640 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019993644837336278, "loss": 2.1948, "step": 9645 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019993638247231958, "loss": 2.3364, "step": 9650 }, { "epoch": 0.02, "grad_norm": 1.6171875, "learning_rate": 0.00019993631653713632, "loss": 2.3268, "step": 9655 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019993625056781303, "loss": 2.1329, "step": 9660 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019993618456434965, "loss": 2.2092, "step": 9665 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019993611852674628, "loss": 2.1286, "step": 9670 }, { "epoch": 0.02, "grad_norm": 2.484375, "learning_rate": 0.00019993605245500297, "loss": 2.3762, "step": 9675 }, { "epoch": 0.02, "grad_norm": 1.6015625, "learning_rate": 0.0001999359863491196, "loss": 2.243, "step": 9680 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.00019993592020909636, "loss": 2.105, "step": 9685 }, { "epoch": 0.02, "grad_norm": 1.7109375, "learning_rate": 0.00019993585403493315, "loss": 2.1309, "step": 9690 }, { "epoch": 0.02, "grad_norm": 1.875, "learning_rate": 0.00019993578782663004, "loss": 2.2768, "step": 9695 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.00019993572158418709, "loss": 2.1582, "step": 9700 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019993565530760426, "loss": 2.35, "step": 9705 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.0001999355889968816, "loss": 2.1027, "step": 9710 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019993552265201915, "loss": 2.1838, "step": 9715 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.0001999354562730169, "loss": 2.1996, "step": 9720 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.00019993538985987487, "loss": 2.1072, "step": 9725 }, { "epoch": 0.02, "grad_norm": 1.828125, "learning_rate": 0.0001999353234125931, "loss": 2.2899, "step": 9730 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019993525693117164, "loss": 2.3682, "step": 9735 }, { "epoch": 0.02, "grad_norm": 1.375, "learning_rate": 0.00019993519041561046, "loss": 2.2454, "step": 9740 }, { "epoch": 0.02, "grad_norm": 1.875, "learning_rate": 0.0001999351238659096, "loss": 2.2502, "step": 9745 }, { "epoch": 0.02, "grad_norm": 1.578125, "learning_rate": 0.0001999350572820691, "loss": 2.232, "step": 9750 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019993499066408896, "loss": 2.1283, "step": 9755 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019993492401196926, "loss": 2.2862, "step": 9760 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.0001999348573257099, "loss": 2.1033, "step": 9765 }, { "epoch": 0.02, "grad_norm": 1.859375, "learning_rate": 0.00019993479060531106, "loss": 2.3554, "step": 9770 }, { "epoch": 0.02, "grad_norm": 2.015625, "learning_rate": 0.00019993472385077264, "loss": 2.2383, "step": 9775 }, { "epoch": 0.02, "grad_norm": 1.6171875, "learning_rate": 0.0001999346570620947, "loss": 2.0654, "step": 9780 }, { "epoch": 0.02, "grad_norm": 2.046875, "learning_rate": 0.0001999345902392773, "loss": 2.0995, "step": 9785 }, { "epoch": 0.02, "grad_norm": 1.921875, "learning_rate": 0.00019993452338232042, "loss": 2.4826, "step": 9790 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019993445649122408, "loss": 2.1489, "step": 9795 }, { "epoch": 0.02, "grad_norm": 1.8671875, "learning_rate": 0.00019993438956598833, "loss": 2.0229, "step": 9800 }, { "epoch": 0.02, "grad_norm": 1.53125, "learning_rate": 0.0001999343226066132, "loss": 2.3284, "step": 9805 }, { "epoch": 0.02, "grad_norm": 1.9375, "learning_rate": 0.00019993425561309864, "loss": 2.1034, "step": 9810 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.00019993418858544476, "loss": 2.2674, "step": 9815 }, { "epoch": 0.02, "grad_norm": 1.9140625, "learning_rate": 0.00019993412152365155, "loss": 2.231, "step": 9820 }, { "epoch": 0.02, "grad_norm": 1.9765625, "learning_rate": 0.00019993405442771903, "loss": 2.1535, "step": 9825 }, { "epoch": 0.02, "grad_norm": 1.8515625, "learning_rate": 0.0001999339872976472, "loss": 2.1232, "step": 9830 }, { "epoch": 0.02, "grad_norm": 1.5, "learning_rate": 0.00019993392013343612, "loss": 2.3204, "step": 9835 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019993385293508583, "loss": 2.1381, "step": 9840 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019993378570259628, "loss": 2.1272, "step": 9845 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019993371843596758, "loss": 2.2441, "step": 9850 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019993365113519967, "loss": 2.1753, "step": 9855 }, { "epoch": 0.02, "grad_norm": 1.875, "learning_rate": 0.00019993358380029265, "loss": 2.0461, "step": 9860 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019993351643124647, "loss": 2.225, "step": 9865 }, { "epoch": 0.02, "grad_norm": 1.9375, "learning_rate": 0.0001999334490280612, "loss": 2.2775, "step": 9870 }, { "epoch": 0.02, "grad_norm": 1.890625, "learning_rate": 0.00019993338159073686, "loss": 2.1866, "step": 9875 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.00019993331411927346, "loss": 2.221, "step": 9880 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019993324661367104, "loss": 2.2824, "step": 9885 }, { "epoch": 0.02, "grad_norm": 1.8671875, "learning_rate": 0.00019993317907392958, "loss": 2.243, "step": 9890 }, { "epoch": 0.02, "grad_norm": 1.5234375, "learning_rate": 0.00019993311150004917, "loss": 2.1587, "step": 9895 }, { "epoch": 0.02, "grad_norm": 1.5703125, "learning_rate": 0.00019993304389202975, "loss": 2.1438, "step": 9900 }, { "epoch": 0.02, "grad_norm": 1.875, "learning_rate": 0.00019993297624987144, "loss": 2.3603, "step": 9905 }, { "epoch": 0.02, "grad_norm": 1.96875, "learning_rate": 0.00019993290857357417, "loss": 2.2253, "step": 9910 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019993284086313804, "loss": 2.2476, "step": 9915 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019993277311856303, "loss": 2.149, "step": 9920 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019993270533984916, "loss": 2.0296, "step": 9925 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.0001999326375269965, "loss": 2.1527, "step": 9930 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.000199932569680005, "loss": 2.2873, "step": 9935 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019993250179887472, "loss": 2.3048, "step": 9940 }, { "epoch": 0.02, "grad_norm": 1.40625, "learning_rate": 0.00019993243388360572, "loss": 2.2617, "step": 9945 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019993236593419795, "loss": 2.2659, "step": 9950 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.0001999322979506515, "loss": 2.3056, "step": 9955 }, { "epoch": 0.02, "grad_norm": 2.15625, "learning_rate": 0.00019993222993296635, "loss": 2.3422, "step": 9960 }, { "epoch": 0.02, "grad_norm": 2.5625, "learning_rate": 0.00019993216188114255, "loss": 2.122, "step": 9965 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.0001999320937951801, "loss": 2.1033, "step": 9970 }, { "epoch": 0.02, "grad_norm": 1.9140625, "learning_rate": 0.00019993202567507903, "loss": 2.2827, "step": 9975 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019993195752083937, "loss": 2.2579, "step": 9980 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019993188933246114, "loss": 2.1389, "step": 9985 }, { "epoch": 0.02, "grad_norm": 1.40625, "learning_rate": 0.00019993182110994437, "loss": 2.2176, "step": 9990 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.00019993175285328908, "loss": 2.2778, "step": 9995 }, { "epoch": 0.02, "grad_norm": 1.9921875, "learning_rate": 0.0001999316845624953, "loss": 2.1792, "step": 10000 }, { "epoch": 0.02, "grad_norm": 1.921875, "learning_rate": 0.00019993161623756302, "loss": 2.2659, "step": 10005 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.0001999315478784923, "loss": 2.1513, "step": 10010 }, { "epoch": 0.02, "grad_norm": 1.828125, "learning_rate": 0.00019993147948528316, "loss": 2.3828, "step": 10015 }, { "epoch": 0.02, "grad_norm": 1.859375, "learning_rate": 0.0001999314110579356, "loss": 2.2038, "step": 10020 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.00019993134259644964, "loss": 2.2293, "step": 10025 }, { "epoch": 0.02, "grad_norm": 1.9765625, "learning_rate": 0.00019993127410082538, "loss": 2.197, "step": 10030 }, { "epoch": 0.02, "grad_norm": 1.828125, "learning_rate": 0.00019993120557106273, "loss": 2.3534, "step": 10035 }, { "epoch": 0.02, "grad_norm": 2.4375, "learning_rate": 0.0001999311370071618, "loss": 2.1523, "step": 10040 }, { "epoch": 0.02, "grad_norm": 1.3515625, "learning_rate": 0.00019993106840912256, "loss": 1.9926, "step": 10045 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019993099977694506, "loss": 2.0637, "step": 10050 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.00019993093111062932, "loss": 2.2691, "step": 10055 }, { "epoch": 0.02, "grad_norm": 1.625, "learning_rate": 0.00019993086241017536, "loss": 2.2447, "step": 10060 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.0001999307936755832, "loss": 2.2669, "step": 10065 }, { "epoch": 0.02, "grad_norm": 2.546875, "learning_rate": 0.0001999307249068529, "loss": 2.003, "step": 10070 }, { "epoch": 0.02, "grad_norm": 2.046875, "learning_rate": 0.0001999306561039844, "loss": 2.2318, "step": 10075 }, { "epoch": 0.02, "grad_norm": 1.609375, "learning_rate": 0.0001999305872669778, "loss": 2.2031, "step": 10080 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.0001999305183958331, "loss": 2.1608, "step": 10085 }, { "epoch": 0.02, "grad_norm": 1.8515625, "learning_rate": 0.00019993044949055032, "loss": 2.3604, "step": 10090 }, { "epoch": 0.02, "grad_norm": 2.375, "learning_rate": 0.0001999303805511295, "loss": 2.1306, "step": 10095 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019993031157757065, "loss": 2.146, "step": 10100 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.0001999302425698738, "loss": 2.3781, "step": 10105 }, { "epoch": 0.02, "grad_norm": 1.5390625, "learning_rate": 0.00019993017352803893, "loss": 2.1261, "step": 10110 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.00019993010445206613, "loss": 2.1923, "step": 10115 }, { "epoch": 0.02, "grad_norm": 1.59375, "learning_rate": 0.00019993003534195537, "loss": 2.2485, "step": 10120 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019992996619770672, "loss": 2.161, "step": 10125 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019992989701932015, "loss": 2.0586, "step": 10130 }, { "epoch": 0.02, "grad_norm": 1.9375, "learning_rate": 0.00019992982780679577, "loss": 2.197, "step": 10135 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.0001999297585601335, "loss": 1.9175, "step": 10140 }, { "epoch": 0.02, "grad_norm": 1.5546875, "learning_rate": 0.00019992968927933343, "loss": 2.028, "step": 10145 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.00019992961996439555, "loss": 2.27, "step": 10150 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019992955061531994, "loss": 2.0836, "step": 10155 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.00019992948123210655, "loss": 2.2764, "step": 10160 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.00019992941181475545, "loss": 2.1416, "step": 10165 }, { "epoch": 0.02, "grad_norm": 1.8203125, "learning_rate": 0.00019992934236326665, "loss": 2.2194, "step": 10170 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.00019992927287764016, "loss": 2.241, "step": 10175 }, { "epoch": 0.02, "grad_norm": 1.890625, "learning_rate": 0.00019992920335787603, "loss": 2.3162, "step": 10180 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019992913380397428, "loss": 1.9571, "step": 10185 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019992906421593493, "loss": 2.1016, "step": 10190 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.00019992899459375795, "loss": 2.3157, "step": 10195 }, { "epoch": 0.02, "grad_norm": 2.03125, "learning_rate": 0.00019992892493744346, "loss": 2.1592, "step": 10200 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019992885524699143, "loss": 2.3585, "step": 10205 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.00019992878552240186, "loss": 2.1572, "step": 10210 }, { "epoch": 0.02, "grad_norm": 1.59375, "learning_rate": 0.00019992871576367485, "loss": 2.3438, "step": 10215 }, { "epoch": 0.02, "grad_norm": 1.4921875, "learning_rate": 0.00019992864597081033, "loss": 2.1719, "step": 10220 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.00019992857614380843, "loss": 2.1002, "step": 10225 }, { "epoch": 0.02, "grad_norm": 1.7421875, "learning_rate": 0.00019992850628266907, "loss": 2.3169, "step": 10230 }, { "epoch": 0.02, "grad_norm": 1.671875, "learning_rate": 0.00019992843638739233, "loss": 2.2649, "step": 10235 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.00019992836645797826, "loss": 2.3218, "step": 10240 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.00019992829649442682, "loss": 2.3601, "step": 10245 }, { "epoch": 0.02, "grad_norm": 2.046875, "learning_rate": 0.00019992822649673805, "loss": 1.9558, "step": 10250 }, { "epoch": 0.02, "grad_norm": 1.953125, "learning_rate": 0.00019992815646491198, "loss": 2.4024, "step": 10255 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.00019992808639894864, "loss": 2.421, "step": 10260 }, { "epoch": 0.02, "grad_norm": 1.5625, "learning_rate": 0.00019992801629884808, "loss": 2.2105, "step": 10265 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.00019992794616461028, "loss": 2.2463, "step": 10270 }, { "epoch": 0.02, "grad_norm": 2.03125, "learning_rate": 0.0001999278759962353, "loss": 2.2917, "step": 10275 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.0001999278057937231, "loss": 2.1947, "step": 10280 }, { "epoch": 0.02, "grad_norm": 1.8515625, "learning_rate": 0.0001999277355570738, "loss": 2.1913, "step": 10285 }, { "epoch": 0.02, "grad_norm": 1.765625, "learning_rate": 0.00019992766528628738, "loss": 2.1167, "step": 10290 }, { "epoch": 0.02, "grad_norm": 1.890625, "learning_rate": 0.0001999275949813638, "loss": 2.1992, "step": 10295 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.00019992752464230318, "loss": 2.2966, "step": 10300 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.0001999274542691055, "loss": 2.2292, "step": 10305 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.0001999273838617708, "loss": 2.3233, "step": 10310 }, { "epoch": 0.02, "grad_norm": 1.546875, "learning_rate": 0.00019992731342029907, "loss": 2.119, "step": 10315 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019992724294469038, "loss": 2.4563, "step": 10320 }, { "epoch": 0.02, "grad_norm": 2.328125, "learning_rate": 0.00019992717243494469, "loss": 2.2791, "step": 10325 }, { "epoch": 0.02, "grad_norm": 1.859375, "learning_rate": 0.00019992710189106213, "loss": 2.2953, "step": 10330 }, { "epoch": 0.02, "grad_norm": 1.8984375, "learning_rate": 0.00019992703131304263, "loss": 2.0772, "step": 10335 }, { "epoch": 0.02, "grad_norm": 2.0625, "learning_rate": 0.00019992696070088623, "loss": 2.1717, "step": 10340 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019992689005459297, "loss": 2.2233, "step": 10345 }, { "epoch": 0.02, "grad_norm": 1.515625, "learning_rate": 0.00019992681937416288, "loss": 2.3109, "step": 10350 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019992674865959597, "loss": 2.3896, "step": 10355 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019992667791089228, "loss": 2.2707, "step": 10360 }, { "epoch": 0.02, "grad_norm": 1.65625, "learning_rate": 0.0001999266071280518, "loss": 2.0906, "step": 10365 }, { "epoch": 0.02, "grad_norm": 1.9140625, "learning_rate": 0.0001999265363110746, "loss": 2.3219, "step": 10370 }, { "epoch": 0.02, "grad_norm": 1.8671875, "learning_rate": 0.0001999264654599607, "loss": 2.2641, "step": 10375 }, { "epoch": 0.02, "grad_norm": 1.6875, "learning_rate": 0.00019992639457471007, "loss": 2.2361, "step": 10380 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.0001999263236553228, "loss": 2.3176, "step": 10385 }, { "epoch": 0.02, "grad_norm": 1.6953125, "learning_rate": 0.00019992625270179887, "loss": 2.2995, "step": 10390 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.00019992618171413832, "loss": 2.3012, "step": 10395 }, { "epoch": 0.02, "grad_norm": 2.125, "learning_rate": 0.0001999261106923412, "loss": 2.3969, "step": 10400 }, { "epoch": 0.02, "grad_norm": 1.8046875, "learning_rate": 0.00019992603963640747, "loss": 2.3755, "step": 10405 }, { "epoch": 0.02, "grad_norm": 1.78125, "learning_rate": 0.00019992596854633723, "loss": 2.2796, "step": 10410 }, { "epoch": 0.02, "grad_norm": 1.484375, "learning_rate": 0.00019992589742213044, "loss": 2.0466, "step": 10415 }, { "epoch": 0.02, "grad_norm": 1.9296875, "learning_rate": 0.00019992582626378718, "loss": 2.2855, "step": 10420 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.0001999257550713074, "loss": 2.664, "step": 10425 }, { "epoch": 0.02, "grad_norm": 1.9140625, "learning_rate": 0.00019992568384469118, "loss": 2.3211, "step": 10430 }, { "epoch": 0.02, "grad_norm": 1.7265625, "learning_rate": 0.00019992561258393857, "loss": 2.2616, "step": 10435 }, { "epoch": 0.02, "grad_norm": 1.8828125, "learning_rate": 0.00019992554128904955, "loss": 1.9787, "step": 10440 }, { "epoch": 0.02, "grad_norm": 1.6015625, "learning_rate": 0.00019992546996002412, "loss": 2.0912, "step": 10445 }, { "epoch": 0.02, "grad_norm": 1.8984375, "learning_rate": 0.00019992539859686238, "loss": 2.0776, "step": 10450 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019992532719956426, "loss": 2.3093, "step": 10455 }, { "epoch": 0.02, "grad_norm": 2.203125, "learning_rate": 0.0001999252557681299, "loss": 2.0959, "step": 10460 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.0001999251843025592, "loss": 2.0714, "step": 10465 }, { "epoch": 0.02, "grad_norm": 2.0, "learning_rate": 0.00019992511280285226, "loss": 2.4986, "step": 10470 }, { "epoch": 0.02, "grad_norm": 1.4453125, "learning_rate": 0.00019992504126900912, "loss": 2.1628, "step": 10475 }, { "epoch": 0.02, "grad_norm": 1.6953125, "learning_rate": 0.00019992496970102974, "loss": 2.2875, "step": 10480 }, { "epoch": 0.02, "grad_norm": 1.796875, "learning_rate": 0.00019992489809891417, "loss": 2.265, "step": 10485 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019992482646266247, "loss": 2.2558, "step": 10490 }, { "epoch": 0.02, "grad_norm": 2.09375, "learning_rate": 0.00019992475479227464, "loss": 2.1759, "step": 10495 }, { "epoch": 0.02, "grad_norm": 1.6875, "learning_rate": 0.00019992468308775068, "loss": 2.2927, "step": 10500 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019992461134909066, "loss": 2.1756, "step": 10505 }, { "epoch": 0.02, "grad_norm": 1.5859375, "learning_rate": 0.00019992453957629457, "loss": 2.1345, "step": 10510 }, { "epoch": 0.02, "grad_norm": 1.6171875, "learning_rate": 0.00019992446776936245, "loss": 2.0917, "step": 10515 }, { "epoch": 0.02, "grad_norm": 2.71875, "learning_rate": 0.0001999243959282943, "loss": 2.2115, "step": 10520 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.0001999243240530902, "loss": 2.2551, "step": 10525 }, { "epoch": 0.02, "grad_norm": 1.4453125, "learning_rate": 0.0001999242521437501, "loss": 2.1259, "step": 10530 }, { "epoch": 0.02, "grad_norm": 1.7734375, "learning_rate": 0.0001999241802002741, "loss": 2.4077, "step": 10535 }, { "epoch": 0.02, "grad_norm": 1.703125, "learning_rate": 0.00019992410822266215, "loss": 2.1997, "step": 10540 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019992403621091432, "loss": 2.2681, "step": 10545 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019992396416503063, "loss": 2.4784, "step": 10550 }, { "epoch": 0.02, "grad_norm": 1.515625, "learning_rate": 0.0001999238920850111, "loss": 1.9685, "step": 10555 }, { "epoch": 0.02, "grad_norm": 1.7109375, "learning_rate": 0.0001999238199708558, "loss": 2.2715, "step": 10560 }, { "epoch": 0.02, "grad_norm": 2.21875, "learning_rate": 0.00019992374782256466, "loss": 2.0988, "step": 10565 }, { "epoch": 0.02, "grad_norm": 1.8984375, "learning_rate": 0.00019992367564013777, "loss": 2.1575, "step": 10570 }, { "epoch": 0.02, "grad_norm": 1.7890625, "learning_rate": 0.00019992360342357515, "loss": 2.2754, "step": 10575 }, { "epoch": 0.02, "grad_norm": 1.6796875, "learning_rate": 0.00019992353117287678, "loss": 2.3162, "step": 10580 }, { "epoch": 0.02, "grad_norm": 1.7578125, "learning_rate": 0.00019992345888804276, "loss": 2.3451, "step": 10585 }, { "epoch": 0.02, "grad_norm": 1.6640625, "learning_rate": 0.00019992338656907305, "loss": 2.1281, "step": 10590 }, { "epoch": 0.02, "grad_norm": 1.8359375, "learning_rate": 0.00019992331421596772, "loss": 2.1633, "step": 10595 }, { "epoch": 0.02, "grad_norm": 4.71875, "learning_rate": 0.00019992324182872674, "loss": 2.2447, "step": 10600 }, { "epoch": 0.02, "grad_norm": 1.6328125, "learning_rate": 0.0001999231694073502, "loss": 2.411, "step": 10605 }, { "epoch": 0.02, "grad_norm": 1.8125, "learning_rate": 0.0001999230969518381, "loss": 2.2695, "step": 10610 }, { "epoch": 0.02, "grad_norm": 1.734375, "learning_rate": 0.00019992302446219043, "loss": 2.2055, "step": 10615 }, { "epoch": 0.02, "grad_norm": 1.84375, "learning_rate": 0.00019992295193840725, "loss": 2.2109, "step": 10620 }, { "epoch": 0.03, "grad_norm": 2.15625, "learning_rate": 0.0001999228793804886, "loss": 2.2278, "step": 10625 }, { "epoch": 0.03, "grad_norm": 2.140625, "learning_rate": 0.00019992280678843446, "loss": 2.1855, "step": 10630 }, { "epoch": 0.03, "grad_norm": 1.90625, "learning_rate": 0.00019992273416224486, "loss": 1.9629, "step": 10635 }, { "epoch": 0.03, "grad_norm": 1.8515625, "learning_rate": 0.00019992266150191987, "loss": 2.1731, "step": 10640 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.0001999225888074595, "loss": 2.1467, "step": 10645 }, { "epoch": 0.03, "grad_norm": 1.609375, "learning_rate": 0.00019992251607886373, "loss": 2.218, "step": 10650 }, { "epoch": 0.03, "grad_norm": 1.796875, "learning_rate": 0.0001999224433161326, "loss": 2.132, "step": 10655 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.00019992237051926618, "loss": 2.1207, "step": 10660 }, { "epoch": 0.03, "grad_norm": 1.6953125, "learning_rate": 0.00019992229768826448, "loss": 2.2079, "step": 10665 }, { "epoch": 0.03, "grad_norm": 1.7578125, "learning_rate": 0.00019992222482312748, "loss": 2.1372, "step": 10670 }, { "epoch": 0.03, "grad_norm": 1.4609375, "learning_rate": 0.00019992215192385525, "loss": 2.2366, "step": 10675 }, { "epoch": 0.03, "grad_norm": 1.46875, "learning_rate": 0.0001999220789904478, "loss": 2.2155, "step": 10680 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.00019992200602290518, "loss": 2.1334, "step": 10685 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.00019992193302122738, "loss": 2.4385, "step": 10690 }, { "epoch": 0.03, "grad_norm": 1.5546875, "learning_rate": 0.0001999218599854144, "loss": 2.3661, "step": 10695 }, { "epoch": 0.03, "grad_norm": 1.9765625, "learning_rate": 0.00019992178691546631, "loss": 2.1743, "step": 10700 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.00019992171381138314, "loss": 2.1491, "step": 10705 }, { "epoch": 0.03, "grad_norm": 1.8984375, "learning_rate": 0.0001999216406731649, "loss": 2.335, "step": 10710 }, { "epoch": 0.03, "grad_norm": 1.6953125, "learning_rate": 0.0001999215675008116, "loss": 2.2828, "step": 10715 }, { "epoch": 0.03, "grad_norm": 1.625, "learning_rate": 0.0001999214942943233, "loss": 2.2725, "step": 10720 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019992142105370003, "loss": 2.1377, "step": 10725 }, { "epoch": 0.03, "grad_norm": 1.9609375, "learning_rate": 0.00019992134777894175, "loss": 2.3535, "step": 10730 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.00019992127447004854, "loss": 2.2971, "step": 10735 }, { "epoch": 0.03, "grad_norm": 1.96875, "learning_rate": 0.0001999212011270204, "loss": 2.0853, "step": 10740 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.0001999211277498574, "loss": 2.1873, "step": 10745 }, { "epoch": 0.03, "grad_norm": 2.328125, "learning_rate": 0.0001999210543385595, "loss": 2.4488, "step": 10750 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.00019992098089312673, "loss": 2.1761, "step": 10755 }, { "epoch": 0.03, "grad_norm": 1.9296875, "learning_rate": 0.00019992090741355917, "loss": 2.3314, "step": 10760 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.00019992083389985682, "loss": 2.2093, "step": 10765 }, { "epoch": 0.03, "grad_norm": 1.9375, "learning_rate": 0.0001999207603520197, "loss": 2.1987, "step": 10770 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.00019992068677004784, "loss": 2.0883, "step": 10775 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.00019992061315394125, "loss": 2.1326, "step": 10780 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.00019992053950369996, "loss": 2.1632, "step": 10785 }, { "epoch": 0.03, "grad_norm": 1.640625, "learning_rate": 0.00019992046581932403, "loss": 2.3154, "step": 10790 }, { "epoch": 0.03, "grad_norm": 2.125, "learning_rate": 0.00019992039210081343, "loss": 2.1478, "step": 10795 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.0001999203183481682, "loss": 2.2178, "step": 10800 }, { "epoch": 0.03, "grad_norm": 2.4375, "learning_rate": 0.0001999202445613884, "loss": 2.2183, "step": 10805 }, { "epoch": 0.03, "grad_norm": 1.984375, "learning_rate": 0.00019992017074047406, "loss": 2.3963, "step": 10810 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.00019992009688542514, "loss": 2.3261, "step": 10815 }, { "epoch": 0.03, "grad_norm": 1.625, "learning_rate": 0.00019992002299624168, "loss": 2.3748, "step": 10820 }, { "epoch": 0.03, "grad_norm": 1.8125, "learning_rate": 0.00019991994907292377, "loss": 2.1485, "step": 10825 }, { "epoch": 0.03, "grad_norm": 1.9296875, "learning_rate": 0.0001999198751154714, "loss": 2.113, "step": 10830 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.00019991980112388455, "loss": 2.137, "step": 10835 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019991972709816332, "loss": 2.2773, "step": 10840 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.00019991965303830766, "loss": 2.1449, "step": 10845 }, { "epoch": 0.03, "grad_norm": 1.6171875, "learning_rate": 0.00019991957894431768, "loss": 2.2835, "step": 10850 }, { "epoch": 0.03, "grad_norm": 1.5546875, "learning_rate": 0.00019991950481619333, "loss": 2.3898, "step": 10855 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.00019991943065393467, "loss": 2.1315, "step": 10860 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.00019991935645754171, "loss": 2.2463, "step": 10865 }, { "epoch": 0.03, "grad_norm": 1.9609375, "learning_rate": 0.0001999192822270145, "loss": 2.2828, "step": 10870 }, { "epoch": 0.03, "grad_norm": 2.046875, "learning_rate": 0.00019991920796235305, "loss": 2.2182, "step": 10875 }, { "epoch": 0.03, "grad_norm": 1.953125, "learning_rate": 0.0001999191336635574, "loss": 2.2683, "step": 10880 }, { "epoch": 0.03, "grad_norm": 2.046875, "learning_rate": 0.00019991905933062752, "loss": 2.0888, "step": 10885 }, { "epoch": 0.03, "grad_norm": 1.90625, "learning_rate": 0.0001999189849635635, "loss": 2.1656, "step": 10890 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019991891056236538, "loss": 2.0872, "step": 10895 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.0001999188361270331, "loss": 2.1521, "step": 10900 }, { "epoch": 0.03, "grad_norm": 1.84375, "learning_rate": 0.00019991876165756676, "loss": 2.1692, "step": 10905 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.00019991868715396634, "loss": 2.1315, "step": 10910 }, { "epoch": 0.03, "grad_norm": 1.9140625, "learning_rate": 0.00019991861261623188, "loss": 2.2877, "step": 10915 }, { "epoch": 0.03, "grad_norm": 1.640625, "learning_rate": 0.00019991853804436342, "loss": 2.2852, "step": 10920 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.00019991846343836098, "loss": 2.0885, "step": 10925 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.0001999183887982246, "loss": 2.1342, "step": 10930 }, { "epoch": 0.03, "grad_norm": 2.1875, "learning_rate": 0.00019991831412395424, "loss": 2.2736, "step": 10935 }, { "epoch": 0.03, "grad_norm": 1.9375, "learning_rate": 0.00019991823941555, "loss": 2.1001, "step": 10940 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019991816467301187, "loss": 2.2034, "step": 10945 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.00019991808989633988, "loss": 2.1051, "step": 10950 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.00019991801508553408, "loss": 2.1712, "step": 10955 }, { "epoch": 0.03, "grad_norm": 2.28125, "learning_rate": 0.00019991794024059445, "loss": 2.0935, "step": 10960 }, { "epoch": 0.03, "grad_norm": 1.5390625, "learning_rate": 0.00019991786536152105, "loss": 2.2532, "step": 10965 }, { "epoch": 0.03, "grad_norm": 1.6328125, "learning_rate": 0.0001999177904483139, "loss": 2.0147, "step": 10970 }, { "epoch": 0.03, "grad_norm": 1.9765625, "learning_rate": 0.000199917715500973, "loss": 2.1756, "step": 10975 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.00019991764051949842, "loss": 2.2654, "step": 10980 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.00019991756550389017, "loss": 2.1712, "step": 10985 }, { "epoch": 0.03, "grad_norm": 2.28125, "learning_rate": 0.00019991749045414822, "loss": 2.106, "step": 10990 }, { "epoch": 0.03, "grad_norm": 1.53125, "learning_rate": 0.00019991741537027266, "loss": 2.2688, "step": 10995 }, { "epoch": 0.03, "grad_norm": 2.21875, "learning_rate": 0.00019991734025226352, "loss": 2.1262, "step": 11000 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.0001999172651001208, "loss": 2.2642, "step": 11005 }, { "epoch": 0.03, "grad_norm": 2.140625, "learning_rate": 0.00019991718991384453, "loss": 2.2204, "step": 11010 }, { "epoch": 0.03, "grad_norm": 1.859375, "learning_rate": 0.00019991711469343474, "loss": 2.1513, "step": 11015 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.00019991703943889145, "loss": 2.3747, "step": 11020 }, { "epoch": 0.03, "grad_norm": 2.1875, "learning_rate": 0.00019991696415021465, "loss": 2.3106, "step": 11025 }, { "epoch": 0.03, "grad_norm": 1.546875, "learning_rate": 0.00019991688882740446, "loss": 2.1993, "step": 11030 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.0001999168134704608, "loss": 2.2105, "step": 11035 }, { "epoch": 0.03, "grad_norm": 1.8125, "learning_rate": 0.00019991673807938379, "loss": 2.3017, "step": 11040 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019991666265417338, "loss": 2.2292, "step": 11045 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.0001999165871948296, "loss": 2.3894, "step": 11050 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.00019991651170135258, "loss": 2.334, "step": 11055 }, { "epoch": 0.03, "grad_norm": 1.4765625, "learning_rate": 0.00019991643617374219, "loss": 2.1333, "step": 11060 }, { "epoch": 0.03, "grad_norm": 2.046875, "learning_rate": 0.00019991636061199856, "loss": 2.2407, "step": 11065 }, { "epoch": 0.03, "grad_norm": 2.0, "learning_rate": 0.00019991628501612172, "loss": 2.2082, "step": 11070 }, { "epoch": 0.03, "grad_norm": 1.6484375, "learning_rate": 0.00019991620938611163, "loss": 2.2421, "step": 11075 }, { "epoch": 0.03, "grad_norm": 2.1875, "learning_rate": 0.00019991613372196836, "loss": 2.0302, "step": 11080 }, { "epoch": 0.03, "grad_norm": 1.5703125, "learning_rate": 0.00019991605802369193, "loss": 2.2133, "step": 11085 }, { "epoch": 0.03, "grad_norm": 1.9140625, "learning_rate": 0.00019991598229128236, "loss": 2.3101, "step": 11090 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.00019991590652473966, "loss": 2.2022, "step": 11095 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.0001999158307240639, "loss": 2.2263, "step": 11100 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.00019991575488925508, "loss": 2.2231, "step": 11105 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.0001999156790203132, "loss": 2.1702, "step": 11110 }, { "epoch": 0.03, "grad_norm": 2.078125, "learning_rate": 0.00019991560311723833, "loss": 2.1754, "step": 11115 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.00019991552718003048, "loss": 2.1714, "step": 11120 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.0001999154512086897, "loss": 2.2262, "step": 11125 }, { "epoch": 0.03, "grad_norm": 1.5390625, "learning_rate": 0.00019991537520321596, "loss": 1.9654, "step": 11130 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.0001999152991636093, "loss": 2.315, "step": 11135 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.00019991522308986976, "loss": 2.3425, "step": 11140 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.0001999151469819974, "loss": 2.191, "step": 11145 }, { "epoch": 0.03, "grad_norm": 1.453125, "learning_rate": 0.0001999150708399922, "loss": 2.0641, "step": 11150 }, { "epoch": 0.03, "grad_norm": 1.7578125, "learning_rate": 0.0001999149946638542, "loss": 2.0862, "step": 11155 }, { "epoch": 0.03, "grad_norm": 1.765625, "learning_rate": 0.00019991491845358343, "loss": 2.2477, "step": 11160 }, { "epoch": 0.03, "grad_norm": 1.5625, "learning_rate": 0.00019991484220917988, "loss": 2.2, "step": 11165 }, { "epoch": 0.03, "grad_norm": 1.5234375, "learning_rate": 0.00019991476593064365, "loss": 2.191, "step": 11170 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.00019991468961797472, "loss": 2.0614, "step": 11175 }, { "epoch": 0.03, "grad_norm": 1.7578125, "learning_rate": 0.0001999146132711731, "loss": 2.2582, "step": 11180 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.00019991453689023885, "loss": 2.143, "step": 11185 }, { "epoch": 0.03, "grad_norm": 1.9765625, "learning_rate": 0.00019991446047517197, "loss": 2.2096, "step": 11190 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.00019991438402597252, "loss": 2.2404, "step": 11195 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.00019991430754264048, "loss": 2.1349, "step": 11200 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.00019991423102517592, "loss": 2.3335, "step": 11205 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.00019991415447357885, "loss": 2.259, "step": 11210 }, { "epoch": 0.03, "grad_norm": 1.8125, "learning_rate": 0.00019991407788784926, "loss": 2.2, "step": 11215 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.00019991400126798722, "loss": 2.2968, "step": 11220 }, { "epoch": 0.03, "grad_norm": 1.8515625, "learning_rate": 0.00019991392461399276, "loss": 2.0485, "step": 11225 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.00019991384792586588, "loss": 2.245, "step": 11230 }, { "epoch": 0.03, "grad_norm": 1.9609375, "learning_rate": 0.00019991377120360664, "loss": 2.3002, "step": 11235 }, { "epoch": 0.03, "grad_norm": 1.796875, "learning_rate": 0.000199913694447215, "loss": 2.1686, "step": 11240 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.0001999136176566911, "loss": 2.2341, "step": 11245 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.0001999135408320348, "loss": 2.1737, "step": 11250 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019991346397324632, "loss": 2.2351, "step": 11255 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.00019991338708032552, "loss": 2.3409, "step": 11260 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019991331015327253, "loss": 2.2979, "step": 11265 }, { "epoch": 0.03, "grad_norm": 1.9921875, "learning_rate": 0.00019991323319208733, "loss": 2.3351, "step": 11270 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019991315619676992, "loss": 2.0854, "step": 11275 }, { "epoch": 0.03, "grad_norm": 1.5078125, "learning_rate": 0.00019991307916732043, "loss": 2.2989, "step": 11280 }, { "epoch": 0.03, "grad_norm": 1.8515625, "learning_rate": 0.0001999130021037388, "loss": 2.1865, "step": 11285 }, { "epoch": 0.03, "grad_norm": 1.96875, "learning_rate": 0.00019991292500602507, "loss": 2.0601, "step": 11290 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.00019991284787417927, "loss": 2.2261, "step": 11295 }, { "epoch": 0.03, "grad_norm": 1.8125, "learning_rate": 0.00019991277070820142, "loss": 2.1196, "step": 11300 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.00019991269350809156, "loss": 2.3312, "step": 11305 }, { "epoch": 0.03, "grad_norm": 1.6875, "learning_rate": 0.00019991261627384975, "loss": 2.0565, "step": 11310 }, { "epoch": 0.03, "grad_norm": 1.9375, "learning_rate": 0.00019991253900547592, "loss": 2.0883, "step": 11315 }, { "epoch": 0.03, "grad_norm": 1.765625, "learning_rate": 0.0001999124617029702, "loss": 2.1961, "step": 11320 }, { "epoch": 0.03, "grad_norm": 2.0625, "learning_rate": 0.00019991238436633252, "loss": 2.0971, "step": 11325 }, { "epoch": 0.03, "grad_norm": 1.90625, "learning_rate": 0.000199912306995563, "loss": 2.2318, "step": 11330 }, { "epoch": 0.03, "grad_norm": 2.28125, "learning_rate": 0.0001999122295906616, "loss": 2.3744, "step": 11335 }, { "epoch": 0.03, "grad_norm": 1.84375, "learning_rate": 0.0001999121521516284, "loss": 2.2979, "step": 11340 }, { "epoch": 0.03, "grad_norm": 1.6328125, "learning_rate": 0.00019991207467846337, "loss": 2.2439, "step": 11345 }, { "epoch": 0.03, "grad_norm": 1.5625, "learning_rate": 0.00019991199717116657, "loss": 2.1996, "step": 11350 }, { "epoch": 0.03, "grad_norm": 2.15625, "learning_rate": 0.00019991191962973802, "loss": 2.3185, "step": 11355 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.00019991184205417776, "loss": 2.3459, "step": 11360 }, { "epoch": 0.03, "grad_norm": 1.640625, "learning_rate": 0.0001999117644444858, "loss": 2.1705, "step": 11365 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.00019991168680066218, "loss": 2.3904, "step": 11370 }, { "epoch": 0.03, "grad_norm": 2.171875, "learning_rate": 0.00019991160912270685, "loss": 2.1223, "step": 11375 }, { "epoch": 0.03, "grad_norm": 1.421875, "learning_rate": 0.00019991153141061995, "loss": 2.213, "step": 11380 }, { "epoch": 0.03, "grad_norm": 1.5390625, "learning_rate": 0.00019991145366440147, "loss": 2.2429, "step": 11385 }, { "epoch": 0.03, "grad_norm": 1.9765625, "learning_rate": 0.00019991137588405141, "loss": 2.0726, "step": 11390 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019991129806956983, "loss": 2.3059, "step": 11395 }, { "epoch": 0.03, "grad_norm": 1.8125, "learning_rate": 0.00019991122022095672, "loss": 2.1766, "step": 11400 }, { "epoch": 0.03, "grad_norm": 1.90625, "learning_rate": 0.00019991114233821214, "loss": 2.2378, "step": 11405 }, { "epoch": 0.03, "grad_norm": 1.6328125, "learning_rate": 0.0001999110644213361, "loss": 2.3376, "step": 11410 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.0001999109864703286, "loss": 2.2563, "step": 11415 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.00019991090848518972, "loss": 2.244, "step": 11420 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.00019991083046591945, "loss": 2.1239, "step": 11425 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.00019991075241251784, "loss": 2.2737, "step": 11430 }, { "epoch": 0.03, "grad_norm": 1.5546875, "learning_rate": 0.0001999106743249849, "loss": 2.2653, "step": 11435 }, { "epoch": 0.03, "grad_norm": 2.28125, "learning_rate": 0.00019991059620332066, "loss": 2.2683, "step": 11440 }, { "epoch": 0.03, "grad_norm": 1.609375, "learning_rate": 0.00019991051804752515, "loss": 2.3032, "step": 11445 }, { "epoch": 0.03, "grad_norm": 1.6328125, "learning_rate": 0.0001999104398575984, "loss": 2.3917, "step": 11450 }, { "epoch": 0.03, "grad_norm": 1.546875, "learning_rate": 0.00019991036163354042, "loss": 2.0965, "step": 11455 }, { "epoch": 0.03, "grad_norm": 1.6875, "learning_rate": 0.00019991028337535126, "loss": 2.2796, "step": 11460 }, { "epoch": 0.03, "grad_norm": 1.546875, "learning_rate": 0.0001999102050830309, "loss": 2.3467, "step": 11465 }, { "epoch": 0.03, "grad_norm": 1.4609375, "learning_rate": 0.00019991012675657946, "loss": 2.3167, "step": 11470 }, { "epoch": 0.03, "grad_norm": 1.5703125, "learning_rate": 0.0001999100483959969, "loss": 2.3901, "step": 11475 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.00019990997000128322, "loss": 2.1456, "step": 11480 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.0001999098915724385, "loss": 2.2445, "step": 11485 }, { "epoch": 0.03, "grad_norm": 1.53125, "learning_rate": 0.00019990981310946272, "loss": 2.1281, "step": 11490 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.00019990973461235599, "loss": 2.3469, "step": 11495 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.00019990965608111827, "loss": 2.3854, "step": 11500 }, { "epoch": 0.03, "grad_norm": 1.3046875, "learning_rate": 0.00019990957751574956, "loss": 2.2619, "step": 11505 }, { "epoch": 0.03, "grad_norm": 1.6328125, "learning_rate": 0.00019990949891624996, "loss": 2.3058, "step": 11510 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.00019990942028261946, "loss": 2.2652, "step": 11515 }, { "epoch": 0.03, "grad_norm": 1.9140625, "learning_rate": 0.00019990934161485808, "loss": 2.252, "step": 11520 }, { "epoch": 0.03, "grad_norm": 1.6796875, "learning_rate": 0.00019990926291296588, "loss": 2.1629, "step": 11525 }, { "epoch": 0.03, "grad_norm": 1.9140625, "learning_rate": 0.00019990918417694283, "loss": 2.3323, "step": 11530 }, { "epoch": 0.03, "grad_norm": 1.546875, "learning_rate": 0.00019990910540678902, "loss": 2.4257, "step": 11535 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.00019990902660250442, "loss": 2.13, "step": 11540 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.0001999089477640891, "loss": 2.2318, "step": 11545 }, { "epoch": 0.03, "grad_norm": 1.453125, "learning_rate": 0.00019990886889154308, "loss": 2.1343, "step": 11550 }, { "epoch": 0.03, "grad_norm": 1.5, "learning_rate": 0.00019990878998486634, "loss": 2.1801, "step": 11555 }, { "epoch": 0.03, "grad_norm": 2.34375, "learning_rate": 0.000199908711044059, "loss": 2.1382, "step": 11560 }, { "epoch": 0.03, "grad_norm": 1.6171875, "learning_rate": 0.000199908632069121, "loss": 2.2253, "step": 11565 }, { "epoch": 0.03, "grad_norm": 1.328125, "learning_rate": 0.0001999085530600524, "loss": 2.1122, "step": 11570 }, { "epoch": 0.03, "grad_norm": 1.84375, "learning_rate": 0.00019990847401685325, "loss": 2.1091, "step": 11575 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.00019990839493952352, "loss": 2.3157, "step": 11580 }, { "epoch": 0.03, "grad_norm": 1.5078125, "learning_rate": 0.0001999083158280633, "loss": 2.4086, "step": 11585 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.00019990823668247258, "loss": 2.1033, "step": 11590 }, { "epoch": 0.03, "grad_norm": 1.6484375, "learning_rate": 0.00019990815750275137, "loss": 2.215, "step": 11595 }, { "epoch": 0.03, "grad_norm": 1.9140625, "learning_rate": 0.00019990807828889975, "loss": 2.2421, "step": 11600 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.0001999079990409177, "loss": 2.2801, "step": 11605 }, { "epoch": 0.03, "grad_norm": 1.7578125, "learning_rate": 0.0001999079197588053, "loss": 2.1485, "step": 11610 }, { "epoch": 0.03, "grad_norm": 1.8125, "learning_rate": 0.0001999078404425625, "loss": 2.2117, "step": 11615 }, { "epoch": 0.03, "grad_norm": 2.203125, "learning_rate": 0.0001999077610921894, "loss": 2.2331, "step": 11620 }, { "epoch": 0.03, "grad_norm": 1.6796875, "learning_rate": 0.00019990768170768597, "loss": 2.2277, "step": 11625 }, { "epoch": 0.03, "grad_norm": 2.3125, "learning_rate": 0.00019990760228905227, "loss": 2.1713, "step": 11630 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019990752283628832, "loss": 2.3031, "step": 11635 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019990744334939415, "loss": 2.3128, "step": 11640 }, { "epoch": 0.03, "grad_norm": 1.4453125, "learning_rate": 0.0001999073638283698, "loss": 2.2222, "step": 11645 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.0001999072842732153, "loss": 2.3343, "step": 11650 }, { "epoch": 0.03, "grad_norm": 1.9375, "learning_rate": 0.00019990720468393062, "loss": 2.311, "step": 11655 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.00019990712506051583, "loss": 2.5447, "step": 11660 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.00019990704540297097, "loss": 2.1057, "step": 11665 }, { "epoch": 0.03, "grad_norm": 1.859375, "learning_rate": 0.00019990696571129605, "loss": 1.948, "step": 11670 }, { "epoch": 0.03, "grad_norm": 1.625, "learning_rate": 0.0001999068859854911, "loss": 2.2379, "step": 11675 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.00019990680622555614, "loss": 2.0946, "step": 11680 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.0001999067264314912, "loss": 2.1938, "step": 11685 }, { "epoch": 0.03, "grad_norm": 1.9921875, "learning_rate": 0.00019990664660329632, "loss": 1.9944, "step": 11690 }, { "epoch": 0.03, "grad_norm": 1.890625, "learning_rate": 0.0001999065667409715, "loss": 2.274, "step": 11695 }, { "epoch": 0.03, "grad_norm": 1.6796875, "learning_rate": 0.00019990648684451681, "loss": 2.077, "step": 11700 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.00019990640691393226, "loss": 2.2447, "step": 11705 }, { "epoch": 0.03, "grad_norm": 1.765625, "learning_rate": 0.00019990632694921783, "loss": 2.2361, "step": 11710 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.00019990624695037358, "loss": 2.2975, "step": 11715 }, { "epoch": 0.03, "grad_norm": 2.078125, "learning_rate": 0.0001999061669173996, "loss": 2.28, "step": 11720 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.00019990608685029583, "loss": 2.2875, "step": 11725 }, { "epoch": 0.03, "grad_norm": 1.6171875, "learning_rate": 0.00019990600674906232, "loss": 2.0993, "step": 11730 }, { "epoch": 0.03, "grad_norm": 1.765625, "learning_rate": 0.0001999059266136991, "loss": 2.28, "step": 11735 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.00019990584644420623, "loss": 2.1471, "step": 11740 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.0001999057662405837, "loss": 2.2565, "step": 11745 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019990568600283158, "loss": 2.2857, "step": 11750 }, { "epoch": 0.03, "grad_norm": 1.9609375, "learning_rate": 0.0001999056057309498, "loss": 2.023, "step": 11755 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.0001999055254249385, "loss": 2.1351, "step": 11760 }, { "epoch": 0.03, "grad_norm": 2.0625, "learning_rate": 0.00019990544508479764, "loss": 2.3176, "step": 11765 }, { "epoch": 0.03, "grad_norm": 1.515625, "learning_rate": 0.0001999053647105273, "loss": 2.2512, "step": 11770 }, { "epoch": 0.03, "grad_norm": 1.6484375, "learning_rate": 0.00019990528430212744, "loss": 2.1766, "step": 11775 }, { "epoch": 0.03, "grad_norm": 1.9921875, "learning_rate": 0.00019990520385959815, "loss": 2.3237, "step": 11780 }, { "epoch": 0.03, "grad_norm": 1.5, "learning_rate": 0.0001999051233829394, "loss": 1.986, "step": 11785 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.00019990504287215125, "loss": 2.3509, "step": 11790 }, { "epoch": 0.03, "grad_norm": 1.9140625, "learning_rate": 0.00019990496232723376, "loss": 2.1648, "step": 11795 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.0001999048817481869, "loss": 2.2047, "step": 11800 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.00019990480113501072, "loss": 2.1868, "step": 11805 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.00019990472048770527, "loss": 1.9508, "step": 11810 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.00019990463980627053, "loss": 2.2568, "step": 11815 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.00019990455909070655, "loss": 2.186, "step": 11820 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.00019990447834101338, "loss": 2.1212, "step": 11825 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019990439755719101, "loss": 2.0859, "step": 11830 }, { "epoch": 0.03, "grad_norm": 1.609375, "learning_rate": 0.0001999043167392395, "loss": 2.2076, "step": 11835 }, { "epoch": 0.03, "grad_norm": 1.96875, "learning_rate": 0.00019990423588715886, "loss": 2.275, "step": 11840 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.0001999041550009491, "loss": 2.1928, "step": 11845 }, { "epoch": 0.03, "grad_norm": 1.953125, "learning_rate": 0.00019990407408061031, "loss": 2.0766, "step": 11850 }, { "epoch": 0.03, "grad_norm": 1.7578125, "learning_rate": 0.00019990399312614244, "loss": 2.2191, "step": 11855 }, { "epoch": 0.03, "grad_norm": 1.765625, "learning_rate": 0.0001999039121375456, "loss": 2.1374, "step": 11860 }, { "epoch": 0.03, "grad_norm": 1.4921875, "learning_rate": 0.0001999038311148197, "loss": 2.0336, "step": 11865 }, { "epoch": 0.03, "grad_norm": 2.234375, "learning_rate": 0.0001999037500579649, "loss": 2.2041, "step": 11870 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.00019990366896698115, "loss": 2.224, "step": 11875 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.0001999035878418685, "loss": 2.2342, "step": 11880 }, { "epoch": 0.03, "grad_norm": 1.9765625, "learning_rate": 0.00019990350668262695, "loss": 2.2307, "step": 11885 }, { "epoch": 0.03, "grad_norm": 1.8515625, "learning_rate": 0.00019990342548925657, "loss": 2.1813, "step": 11890 }, { "epoch": 0.03, "grad_norm": 1.8984375, "learning_rate": 0.00019990334426175737, "loss": 2.1491, "step": 11895 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.00019990326300012935, "loss": 2.3004, "step": 11900 }, { "epoch": 0.03, "grad_norm": 1.453125, "learning_rate": 0.00019990318170437258, "loss": 2.2427, "step": 11905 }, { "epoch": 0.03, "grad_norm": 1.859375, "learning_rate": 0.00019990310037448707, "loss": 2.127, "step": 11910 }, { "epoch": 0.03, "grad_norm": 1.9453125, "learning_rate": 0.00019990301901047288, "loss": 2.04, "step": 11915 }, { "epoch": 0.03, "grad_norm": 1.890625, "learning_rate": 0.00019990293761232996, "loss": 2.0304, "step": 11920 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.0001999028561800584, "loss": 2.2173, "step": 11925 }, { "epoch": 0.03, "grad_norm": 1.6953125, "learning_rate": 0.00019990277471365823, "loss": 2.1517, "step": 11930 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.00019990269321312943, "loss": 2.2784, "step": 11935 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.00019990261167847207, "loss": 2.2428, "step": 11940 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.0001999025301096862, "loss": 2.1534, "step": 11945 }, { "epoch": 0.03, "grad_norm": 1.6484375, "learning_rate": 0.00019990244850677177, "loss": 2.3862, "step": 11950 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.00019990236686972886, "loss": 2.124, "step": 11955 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.00019990228519855752, "loss": 2.0681, "step": 11960 }, { "epoch": 0.03, "grad_norm": 1.84375, "learning_rate": 0.0001999022034932577, "loss": 2.1875, "step": 11965 }, { "epoch": 0.03, "grad_norm": 2.0, "learning_rate": 0.00019990212175382952, "loss": 2.1166, "step": 11970 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.00019990203998027296, "loss": 2.2879, "step": 11975 }, { "epoch": 0.03, "grad_norm": 2.125, "learning_rate": 0.000199901958172588, "loss": 2.2809, "step": 11980 }, { "epoch": 0.03, "grad_norm": 1.796875, "learning_rate": 0.00019990187633077476, "loss": 2.3744, "step": 11985 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.00019990179445483324, "loss": 2.1236, "step": 11990 }, { "epoch": 0.03, "grad_norm": 2.078125, "learning_rate": 0.00019990171254476342, "loss": 2.268, "step": 11995 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.0001999016306005654, "loss": 2.0813, "step": 12000 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.00019990154862223913, "loss": 2.2603, "step": 12005 }, { "epoch": 0.03, "grad_norm": 1.796875, "learning_rate": 0.00019990146660978471, "loss": 2.2152, "step": 12010 }, { "epoch": 0.03, "grad_norm": 2.0625, "learning_rate": 0.0001999013845632021, "loss": 2.1961, "step": 12015 }, { "epoch": 0.03, "grad_norm": 1.4453125, "learning_rate": 0.0001999013024824914, "loss": 2.1059, "step": 12020 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.0001999012203676526, "loss": 2.3486, "step": 12025 }, { "epoch": 0.03, "grad_norm": 1.53125, "learning_rate": 0.00019990113821868573, "loss": 2.2242, "step": 12030 }, { "epoch": 0.03, "grad_norm": 1.5390625, "learning_rate": 0.00019990105603559082, "loss": 2.1498, "step": 12035 }, { "epoch": 0.03, "grad_norm": 1.8515625, "learning_rate": 0.00019990097381836788, "loss": 2.2199, "step": 12040 }, { "epoch": 0.03, "grad_norm": 1.9765625, "learning_rate": 0.000199900891567017, "loss": 2.3172, "step": 12045 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.0001999008092815381, "loss": 2.1396, "step": 12050 }, { "epoch": 0.03, "grad_norm": 1.7578125, "learning_rate": 0.0001999007269619313, "loss": 2.1533, "step": 12055 }, { "epoch": 0.03, "grad_norm": 1.421875, "learning_rate": 0.0001999006446081966, "loss": 2.3581, "step": 12060 }, { "epoch": 0.03, "grad_norm": 1.96875, "learning_rate": 0.00019990056222033404, "loss": 2.3046, "step": 12065 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.00019990047979834364, "loss": 2.3325, "step": 12070 }, { "epoch": 0.03, "grad_norm": 1.4609375, "learning_rate": 0.0001999003973422254, "loss": 1.986, "step": 12075 }, { "epoch": 0.03, "grad_norm": 1.4765625, "learning_rate": 0.0001999003148519794, "loss": 2.1922, "step": 12080 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.00019990023232760562, "loss": 2.3627, "step": 12085 }, { "epoch": 0.03, "grad_norm": 1.90625, "learning_rate": 0.0001999001497691041, "loss": 2.2677, "step": 12090 }, { "epoch": 0.03, "grad_norm": 2.125, "learning_rate": 0.0001999000671764749, "loss": 2.2485, "step": 12095 }, { "epoch": 0.03, "grad_norm": 1.6328125, "learning_rate": 0.00019989998454971803, "loss": 2.1445, "step": 12100 }, { "epoch": 0.03, "grad_norm": 1.625, "learning_rate": 0.0001998999018888335, "loss": 2.2739, "step": 12105 }, { "epoch": 0.03, "grad_norm": 1.6875, "learning_rate": 0.00019989981919382136, "loss": 2.213, "step": 12110 }, { "epoch": 0.03, "grad_norm": 1.4765625, "learning_rate": 0.0001998997364646816, "loss": 2.2034, "step": 12115 }, { "epoch": 0.03, "grad_norm": 1.859375, "learning_rate": 0.0001998996537014143, "loss": 2.1361, "step": 12120 }, { "epoch": 0.03, "grad_norm": 2.125, "learning_rate": 0.00019989957090401947, "loss": 2.0867, "step": 12125 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.00019989948807249715, "loss": 2.2291, "step": 12130 }, { "epoch": 0.03, "grad_norm": 1.8125, "learning_rate": 0.00019989940520684734, "loss": 2.1812, "step": 12135 }, { "epoch": 0.03, "grad_norm": 1.9140625, "learning_rate": 0.00019989932230707008, "loss": 2.1426, "step": 12140 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.00019989923937316538, "loss": 2.1501, "step": 12145 }, { "epoch": 0.03, "grad_norm": 1.984375, "learning_rate": 0.0001998991564051333, "loss": 2.3361, "step": 12150 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.00019989907340297385, "loss": 2.2145, "step": 12155 }, { "epoch": 0.03, "grad_norm": 1.3984375, "learning_rate": 0.00019989899036668707, "loss": 2.1865, "step": 12160 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.000199898907296273, "loss": 2.2027, "step": 12165 }, { "epoch": 0.03, "grad_norm": 1.859375, "learning_rate": 0.00019989882419173163, "loss": 2.2066, "step": 12170 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.00019989874105306302, "loss": 2.174, "step": 12175 }, { "epoch": 0.03, "grad_norm": 1.8125, "learning_rate": 0.0001998986578802672, "loss": 2.1458, "step": 12180 }, { "epoch": 0.03, "grad_norm": 1.3203125, "learning_rate": 0.00019989857467334415, "loss": 2.1555, "step": 12185 }, { "epoch": 0.03, "grad_norm": 1.96875, "learning_rate": 0.00019989849143229398, "loss": 2.1267, "step": 12190 }, { "epoch": 0.03, "grad_norm": 1.5625, "learning_rate": 0.00019989840815711664, "loss": 2.1609, "step": 12195 }, { "epoch": 0.03, "grad_norm": 1.984375, "learning_rate": 0.0001998983248478122, "loss": 2.4098, "step": 12200 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019989824150438073, "loss": 2.0759, "step": 12205 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019989815812682213, "loss": 2.1454, "step": 12210 }, { "epoch": 0.03, "grad_norm": 1.8984375, "learning_rate": 0.00019989807471513656, "loss": 2.1524, "step": 12215 }, { "epoch": 0.03, "grad_norm": 2.125, "learning_rate": 0.00019989799126932396, "loss": 2.2526, "step": 12220 }, { "epoch": 0.03, "grad_norm": 2.046875, "learning_rate": 0.00019989790778938442, "loss": 2.2827, "step": 12225 }, { "epoch": 0.03, "grad_norm": 1.6328125, "learning_rate": 0.00019989782427531794, "loss": 2.2876, "step": 12230 }, { "epoch": 0.03, "grad_norm": 2.15625, "learning_rate": 0.00019989774072712456, "loss": 2.1797, "step": 12235 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.00019989765714480427, "loss": 2.3472, "step": 12240 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.00019989757352835717, "loss": 2.2672, "step": 12245 }, { "epoch": 0.03, "grad_norm": 1.9296875, "learning_rate": 0.0001998974898777832, "loss": 2.3254, "step": 12250 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.00019989740619308247, "loss": 2.112, "step": 12255 }, { "epoch": 0.03, "grad_norm": 1.84375, "learning_rate": 0.00019989732247425494, "loss": 2.1509, "step": 12260 }, { "epoch": 0.03, "grad_norm": 2.125, "learning_rate": 0.00019989723872130072, "loss": 2.12, "step": 12265 }, { "epoch": 0.03, "grad_norm": 1.9921875, "learning_rate": 0.00019989715493421976, "loss": 2.1892, "step": 12270 }, { "epoch": 0.03, "grad_norm": 1.8984375, "learning_rate": 0.0001998970711130121, "loss": 2.42, "step": 12275 }, { "epoch": 0.03, "grad_norm": 1.859375, "learning_rate": 0.00019989698725767782, "loss": 2.2576, "step": 12280 }, { "epoch": 0.03, "grad_norm": 1.90625, "learning_rate": 0.00019989690336821693, "loss": 2.1623, "step": 12285 }, { "epoch": 0.03, "grad_norm": 2.046875, "learning_rate": 0.00019989681944462945, "loss": 2.2404, "step": 12290 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.00019989673548691537, "loss": 2.263, "step": 12295 }, { "epoch": 0.03, "grad_norm": 1.625, "learning_rate": 0.00019989665149507476, "loss": 2.1496, "step": 12300 }, { "epoch": 0.03, "grad_norm": 1.640625, "learning_rate": 0.00019989656746910765, "loss": 2.1547, "step": 12305 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019989648340901407, "loss": 2.2801, "step": 12310 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.00019989639931479404, "loss": 2.0561, "step": 12315 }, { "epoch": 0.03, "grad_norm": 1.9296875, "learning_rate": 0.00019989631518644755, "loss": 2.3066, "step": 12320 }, { "epoch": 0.03, "grad_norm": 1.6796875, "learning_rate": 0.0001998962310239747, "loss": 2.3534, "step": 12325 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.0001998961468273755, "loss": 2.2324, "step": 12330 }, { "epoch": 0.03, "grad_norm": 1.6484375, "learning_rate": 0.00019989606259664995, "loss": 2.1353, "step": 12335 }, { "epoch": 0.03, "grad_norm": 1.59375, "learning_rate": 0.00019989597833179807, "loss": 2.3965, "step": 12340 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.00019989589403281995, "loss": 2.07, "step": 12345 }, { "epoch": 0.03, "grad_norm": 1.3984375, "learning_rate": 0.00019989580969971554, "loss": 2.1364, "step": 12350 }, { "epoch": 0.03, "grad_norm": 1.6953125, "learning_rate": 0.00019989572533248495, "loss": 2.1942, "step": 12355 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.00019989564093112817, "loss": 2.2771, "step": 12360 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.0001998955564956452, "loss": 2.391, "step": 12365 }, { "epoch": 0.03, "grad_norm": 1.3203125, "learning_rate": 0.00019989547202603608, "loss": 2.2222, "step": 12370 }, { "epoch": 0.03, "grad_norm": 1.4765625, "learning_rate": 0.00019989538752230088, "loss": 2.0406, "step": 12375 }, { "epoch": 0.03, "grad_norm": 1.953125, "learning_rate": 0.0001998953029844396, "loss": 2.3122, "step": 12380 }, { "epoch": 0.03, "grad_norm": 2.0625, "learning_rate": 0.00019989521841245228, "loss": 2.2354, "step": 12385 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.00019989513380633893, "loss": 2.2834, "step": 12390 }, { "epoch": 0.03, "grad_norm": 1.9609375, "learning_rate": 0.00019989504916609961, "loss": 2.2271, "step": 12395 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.00019989496449173433, "loss": 2.0817, "step": 12400 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019989487978324308, "loss": 2.3241, "step": 12405 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.000199894795040626, "loss": 2.2405, "step": 12410 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.00019989471026388297, "loss": 2.1442, "step": 12415 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.0001998946254530141, "loss": 2.1988, "step": 12420 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019989454060801947, "loss": 2.3937, "step": 12425 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.000199894455728899, "loss": 2.2776, "step": 12430 }, { "epoch": 0.03, "grad_norm": 2.0, "learning_rate": 0.0001998943708156528, "loss": 2.1848, "step": 12435 }, { "epoch": 0.03, "grad_norm": 2.5, "learning_rate": 0.00019989428586828086, "loss": 2.1922, "step": 12440 }, { "epoch": 0.03, "grad_norm": 1.953125, "learning_rate": 0.00019989420088678322, "loss": 2.0966, "step": 12445 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.00019989411587115994, "loss": 2.0985, "step": 12450 }, { "epoch": 0.03, "grad_norm": 1.953125, "learning_rate": 0.000199894030821411, "loss": 2.1884, "step": 12455 }, { "epoch": 0.03, "grad_norm": 1.5703125, "learning_rate": 0.00019989394573753644, "loss": 2.2578, "step": 12460 }, { "epoch": 0.03, "grad_norm": 1.6953125, "learning_rate": 0.00019989386061953628, "loss": 2.2498, "step": 12465 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.0001998937754674106, "loss": 2.374, "step": 12470 }, { "epoch": 0.03, "grad_norm": 1.6328125, "learning_rate": 0.00019989369028115936, "loss": 2.342, "step": 12475 }, { "epoch": 0.03, "grad_norm": 1.640625, "learning_rate": 0.00019989360506078265, "loss": 2.2357, "step": 12480 }, { "epoch": 0.03, "grad_norm": 1.6328125, "learning_rate": 0.00019989351980628046, "loss": 2.3642, "step": 12485 }, { "epoch": 0.03, "grad_norm": 2.046875, "learning_rate": 0.00019989343451765285, "loss": 2.1192, "step": 12490 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.0001998933491948998, "loss": 2.1498, "step": 12495 }, { "epoch": 0.03, "grad_norm": 1.765625, "learning_rate": 0.0001998932638380214, "loss": 2.2022, "step": 12500 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.00019989317844701764, "loss": 2.2064, "step": 12505 }, { "epoch": 0.03, "grad_norm": 1.484375, "learning_rate": 0.00019989309302188856, "loss": 2.2566, "step": 12510 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.00019989300756263417, "loss": 2.2575, "step": 12515 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.00019989292206925452, "loss": 2.2699, "step": 12520 }, { "epoch": 0.03, "grad_norm": 1.5078125, "learning_rate": 0.00019989283654174965, "loss": 2.3059, "step": 12525 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.00019989275098011958, "loss": 2.107, "step": 12530 }, { "epoch": 0.03, "grad_norm": 1.890625, "learning_rate": 0.00019989266538436432, "loss": 2.3375, "step": 12535 }, { "epoch": 0.03, "grad_norm": 1.5703125, "learning_rate": 0.00019989257975448394, "loss": 2.2833, "step": 12540 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.0001998924940904784, "loss": 2.1958, "step": 12545 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.00019989240839234782, "loss": 2.3277, "step": 12550 }, { "epoch": 0.03, "grad_norm": 1.515625, "learning_rate": 0.00019989232266009216, "loss": 2.0642, "step": 12555 }, { "epoch": 0.03, "grad_norm": 1.6484375, "learning_rate": 0.00019989223689371145, "loss": 2.1586, "step": 12560 }, { "epoch": 0.03, "grad_norm": 1.953125, "learning_rate": 0.00019989215109320578, "loss": 2.4419, "step": 12565 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.0001998920652585751, "loss": 2.2472, "step": 12570 }, { "epoch": 0.03, "grad_norm": 1.890625, "learning_rate": 0.00019989197938981952, "loss": 2.3347, "step": 12575 }, { "epoch": 0.03, "grad_norm": 1.9921875, "learning_rate": 0.00019989189348693898, "loss": 2.2235, "step": 12580 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.0001998918075499336, "loss": 2.3429, "step": 12585 }, { "epoch": 0.03, "grad_norm": 1.6953125, "learning_rate": 0.00019989172157880333, "loss": 2.213, "step": 12590 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.00019989163557354827, "loss": 2.2009, "step": 12595 }, { "epoch": 0.03, "grad_norm": 1.609375, "learning_rate": 0.0001998915495341684, "loss": 2.2382, "step": 12600 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.00019989146346066375, "loss": 2.3708, "step": 12605 }, { "epoch": 0.03, "grad_norm": 1.6484375, "learning_rate": 0.00019989137735303437, "loss": 2.2269, "step": 12610 }, { "epoch": 0.03, "grad_norm": 1.9609375, "learning_rate": 0.00019989129121128032, "loss": 2.1788, "step": 12615 }, { "epoch": 0.03, "grad_norm": 2.046875, "learning_rate": 0.00019989120503540158, "loss": 2.2561, "step": 12620 }, { "epoch": 0.03, "grad_norm": 1.9765625, "learning_rate": 0.00019989111882539817, "loss": 2.1492, "step": 12625 }, { "epoch": 0.03, "grad_norm": 2.21875, "learning_rate": 0.00019989103258127015, "loss": 2.2762, "step": 12630 }, { "epoch": 0.03, "grad_norm": 1.6796875, "learning_rate": 0.00019989094630301754, "loss": 2.3228, "step": 12635 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.0001998908599906404, "loss": 2.0521, "step": 12640 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.0001998907736441387, "loss": 2.1769, "step": 12645 }, { "epoch": 0.03, "grad_norm": 1.4921875, "learning_rate": 0.0001998906872635125, "loss": 2.1667, "step": 12650 }, { "epoch": 0.03, "grad_norm": 1.6953125, "learning_rate": 0.00019989060084876184, "loss": 2.2867, "step": 12655 }, { "epoch": 0.03, "grad_norm": 1.515625, "learning_rate": 0.00019989051439988674, "loss": 2.2918, "step": 12660 }, { "epoch": 0.03, "grad_norm": 2.265625, "learning_rate": 0.0001998904279168872, "loss": 2.3155, "step": 12665 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.00019989034139976332, "loss": 2.3094, "step": 12670 }, { "epoch": 0.03, "grad_norm": 1.46875, "learning_rate": 0.00019989025484851508, "loss": 2.3075, "step": 12675 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019989016826314247, "loss": 2.1317, "step": 12680 }, { "epoch": 0.03, "grad_norm": 1.5625, "learning_rate": 0.00019989008164364563, "loss": 2.2244, "step": 12685 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.0001998899949900245, "loss": 2.3094, "step": 12690 }, { "epoch": 0.03, "grad_norm": 1.5625, "learning_rate": 0.00019988990830227914, "loss": 2.2254, "step": 12695 }, { "epoch": 0.03, "grad_norm": 1.640625, "learning_rate": 0.00019988982158040958, "loss": 2.2769, "step": 12700 }, { "epoch": 0.03, "grad_norm": 2.0625, "learning_rate": 0.00019988973482441583, "loss": 2.3123, "step": 12705 }, { "epoch": 0.03, "grad_norm": 1.796875, "learning_rate": 0.00019988964803429796, "loss": 2.3403, "step": 12710 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.00019988956121005594, "loss": 2.2132, "step": 12715 }, { "epoch": 0.03, "grad_norm": 1.5390625, "learning_rate": 0.00019988947435168984, "loss": 2.1231, "step": 12720 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.0001998893874591997, "loss": 2.2705, "step": 12725 }, { "epoch": 0.03, "grad_norm": 2.8125, "learning_rate": 0.00019988930053258555, "loss": 2.2656, "step": 12730 }, { "epoch": 0.03, "grad_norm": 1.7578125, "learning_rate": 0.00019988921357184738, "loss": 2.0321, "step": 12735 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019988912657698525, "loss": 2.2762, "step": 12740 }, { "epoch": 0.03, "grad_norm": 1.8515625, "learning_rate": 0.00019988903954799918, "loss": 1.9666, "step": 12745 }, { "epoch": 0.03, "grad_norm": 1.859375, "learning_rate": 0.0001998889524848892, "loss": 2.3327, "step": 12750 }, { "epoch": 0.03, "grad_norm": 1.796875, "learning_rate": 0.00019988886538765534, "loss": 2.1977, "step": 12755 }, { "epoch": 0.03, "grad_norm": 1.8984375, "learning_rate": 0.0001998887782562976, "loss": 2.3099, "step": 12760 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.00019988869109081612, "loss": 2.0319, "step": 12765 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.0001998886038912108, "loss": 2.2037, "step": 12770 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.00019988851665748172, "loss": 2.1341, "step": 12775 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.00019988842938962892, "loss": 2.1013, "step": 12780 }, { "epoch": 0.03, "grad_norm": 1.5546875, "learning_rate": 0.00019988834208765243, "loss": 2.2356, "step": 12785 }, { "epoch": 0.03, "grad_norm": 2.140625, "learning_rate": 0.00019988825475155227, "loss": 2.2262, "step": 12790 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.00019988816738132848, "loss": 2.3004, "step": 12795 }, { "epoch": 0.03, "grad_norm": 1.6796875, "learning_rate": 0.00019988807997698105, "loss": 2.1546, "step": 12800 }, { "epoch": 0.03, "grad_norm": 1.5546875, "learning_rate": 0.00019988799253851006, "loss": 2.2499, "step": 12805 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019988790506591552, "loss": 2.014, "step": 12810 }, { "epoch": 0.03, "grad_norm": 2.265625, "learning_rate": 0.00019988781755919744, "loss": 1.965, "step": 12815 }, { "epoch": 0.03, "grad_norm": 1.9609375, "learning_rate": 0.00019988773001835587, "loss": 2.191, "step": 12820 }, { "epoch": 0.03, "grad_norm": 1.46875, "learning_rate": 0.00019988764244339088, "loss": 2.2928, "step": 12825 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.00019988755483430244, "loss": 2.312, "step": 12830 }, { "epoch": 0.03, "grad_norm": 1.578125, "learning_rate": 0.00019988746719109058, "loss": 2.1342, "step": 12835 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.00019988737951375535, "loss": 2.017, "step": 12840 }, { "epoch": 0.03, "grad_norm": 2.109375, "learning_rate": 0.00019988729180229678, "loss": 2.3274, "step": 12845 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019988720405671493, "loss": 2.1514, "step": 12850 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.00019988711627700977, "loss": 2.103, "step": 12855 }, { "epoch": 0.03, "grad_norm": 1.5703125, "learning_rate": 0.0001998870284631814, "loss": 2.2215, "step": 12860 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.00019988694061522978, "loss": 2.2226, "step": 12865 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.00019988685273315495, "loss": 2.2721, "step": 12870 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.00019988676481695698, "loss": 2.0589, "step": 12875 }, { "epoch": 0.03, "grad_norm": 1.890625, "learning_rate": 0.0001998866768666359, "loss": 2.0951, "step": 12880 }, { "epoch": 0.03, "grad_norm": 1.9140625, "learning_rate": 0.00019988658888219168, "loss": 2.2473, "step": 12885 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.00019988650086362442, "loss": 2.2846, "step": 12890 }, { "epoch": 0.03, "grad_norm": 2.28125, "learning_rate": 0.00019988641281093412, "loss": 2.2513, "step": 12895 }, { "epoch": 0.03, "grad_norm": 1.53125, "learning_rate": 0.0001998863247241208, "loss": 2.2031, "step": 12900 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.00019988623660318448, "loss": 2.1935, "step": 12905 }, { "epoch": 0.03, "grad_norm": 1.53125, "learning_rate": 0.00019988614844812524, "loss": 2.1074, "step": 12910 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019988606025894308, "loss": 2.1331, "step": 12915 }, { "epoch": 0.03, "grad_norm": 1.9765625, "learning_rate": 0.000199885972035638, "loss": 2.2856, "step": 12920 }, { "epoch": 0.03, "grad_norm": 1.859375, "learning_rate": 0.00019988588377821008, "loss": 2.2009, "step": 12925 }, { "epoch": 0.03, "grad_norm": 1.984375, "learning_rate": 0.00019988579548665936, "loss": 2.2636, "step": 12930 }, { "epoch": 0.03, "grad_norm": 1.515625, "learning_rate": 0.0001998857071609858, "loss": 2.0945, "step": 12935 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.00019988561880118948, "loss": 2.2504, "step": 12940 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019988553040727042, "loss": 2.2449, "step": 12945 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019988544197922865, "loss": 2.2837, "step": 12950 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.00019988535351706422, "loss": 2.1021, "step": 12955 }, { "epoch": 0.03, "grad_norm": 1.953125, "learning_rate": 0.00019988526502077712, "loss": 2.2646, "step": 12960 }, { "epoch": 0.03, "grad_norm": 1.8984375, "learning_rate": 0.0001998851764903674, "loss": 2.337, "step": 12965 }, { "epoch": 0.03, "grad_norm": 2.140625, "learning_rate": 0.0001998850879258351, "loss": 2.224, "step": 12970 }, { "epoch": 0.03, "grad_norm": 1.984375, "learning_rate": 0.00019988499932718025, "loss": 2.2024, "step": 12975 }, { "epoch": 0.03, "grad_norm": 1.53125, "learning_rate": 0.00019988491069440286, "loss": 2.3228, "step": 12980 }, { "epoch": 0.03, "grad_norm": 1.84375, "learning_rate": 0.000199884822027503, "loss": 2.0778, "step": 12985 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.00019988473332648065, "loss": 2.2689, "step": 12990 }, { "epoch": 0.03, "grad_norm": 1.9296875, "learning_rate": 0.00019988464459133587, "loss": 2.1132, "step": 12995 }, { "epoch": 0.03, "grad_norm": 1.5625, "learning_rate": 0.00019988455582206868, "loss": 2.1917, "step": 13000 }, { "epoch": 0.03, "grad_norm": 1.8515625, "learning_rate": 0.0001998844670186791, "loss": 2.2857, "step": 13005 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019988437818116718, "loss": 2.1132, "step": 13010 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019988428930953298, "loss": 2.0456, "step": 13015 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.00019988420040377647, "loss": 2.1081, "step": 13020 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.0001998841114638977, "loss": 2.255, "step": 13025 }, { "epoch": 0.03, "grad_norm": 1.953125, "learning_rate": 0.00019988402248989672, "loss": 2.2026, "step": 13030 }, { "epoch": 0.03, "grad_norm": 1.6953125, "learning_rate": 0.00019988393348177353, "loss": 2.072, "step": 13035 }, { "epoch": 0.03, "grad_norm": 1.6953125, "learning_rate": 0.0001998838444395282, "loss": 2.2006, "step": 13040 }, { "epoch": 0.03, "grad_norm": 1.96875, "learning_rate": 0.0001998837553631607, "loss": 2.2118, "step": 13045 }, { "epoch": 0.03, "grad_norm": 1.84375, "learning_rate": 0.00019988366625267113, "loss": 2.3196, "step": 13050 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.00019988357710805948, "loss": 2.1509, "step": 13055 }, { "epoch": 0.03, "grad_norm": 1.6875, "learning_rate": 0.00019988348792932578, "loss": 2.3081, "step": 13060 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.0001998833987164701, "loss": 2.1762, "step": 13065 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.0001998833094694924, "loss": 2.1512, "step": 13070 }, { "epoch": 0.03, "grad_norm": 1.640625, "learning_rate": 0.00019988322018839276, "loss": 2.3904, "step": 13075 }, { "epoch": 0.03, "grad_norm": 1.6875, "learning_rate": 0.0001998831308731712, "loss": 2.0336, "step": 13080 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019988304152382776, "loss": 2.2052, "step": 13085 }, { "epoch": 0.03, "grad_norm": 1.609375, "learning_rate": 0.00019988295214036245, "loss": 2.0248, "step": 13090 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.00019988286272277535, "loss": 2.0978, "step": 13095 }, { "epoch": 0.03, "grad_norm": 2.171875, "learning_rate": 0.00019988277327106642, "loss": 2.2002, "step": 13100 }, { "epoch": 0.03, "grad_norm": 1.796875, "learning_rate": 0.0001998826837852357, "loss": 2.2896, "step": 13105 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.00019988259426528327, "loss": 2.114, "step": 13110 }, { "epoch": 0.03, "grad_norm": 1.9765625, "learning_rate": 0.00019988250471120913, "loss": 2.2407, "step": 13115 }, { "epoch": 0.03, "grad_norm": 1.9453125, "learning_rate": 0.00019988241512301332, "loss": 2.1504, "step": 13120 }, { "epoch": 0.03, "grad_norm": 1.484375, "learning_rate": 0.0001998823255006959, "loss": 2.213, "step": 13125 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.0001998822358442568, "loss": 2.1221, "step": 13130 }, { "epoch": 0.03, "grad_norm": 1.765625, "learning_rate": 0.00019988214615369616, "loss": 2.3309, "step": 13135 }, { "epoch": 0.03, "grad_norm": 1.859375, "learning_rate": 0.00019988205642901394, "loss": 2.2055, "step": 13140 }, { "epoch": 0.03, "grad_norm": 1.4375, "learning_rate": 0.00019988196667021022, "loss": 2.0752, "step": 13145 }, { "epoch": 0.03, "grad_norm": 1.5625, "learning_rate": 0.00019988187687728498, "loss": 2.1409, "step": 13150 }, { "epoch": 0.03, "grad_norm": 1.6796875, "learning_rate": 0.00019988178705023833, "loss": 2.3376, "step": 13155 }, { "epoch": 0.03, "grad_norm": 1.9609375, "learning_rate": 0.0001998816971890702, "loss": 1.9501, "step": 13160 }, { "epoch": 0.03, "grad_norm": 2.125, "learning_rate": 0.00019988160729378073, "loss": 2.1071, "step": 13165 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.00019988151736436983, "loss": 2.1555, "step": 13170 }, { "epoch": 0.03, "grad_norm": 1.5390625, "learning_rate": 0.00019988142740083763, "loss": 2.2441, "step": 13175 }, { "epoch": 0.03, "grad_norm": 1.625, "learning_rate": 0.0001998813374031841, "loss": 2.0994, "step": 13180 }, { "epoch": 0.03, "grad_norm": 1.90625, "learning_rate": 0.0001998812473714093, "loss": 2.0278, "step": 13185 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.00019988115730551327, "loss": 2.1701, "step": 13190 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.000199881067205496, "loss": 2.2068, "step": 13195 }, { "epoch": 0.03, "grad_norm": 1.6484375, "learning_rate": 0.00019988097707135757, "loss": 2.2514, "step": 13200 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.00019988088690309796, "loss": 2.2618, "step": 13205 }, { "epoch": 0.03, "grad_norm": 1.5, "learning_rate": 0.00019988079670071724, "loss": 2.1593, "step": 13210 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.00019988070646421543, "loss": 2.2268, "step": 13215 }, { "epoch": 0.03, "grad_norm": 1.96875, "learning_rate": 0.00019988061619359256, "loss": 2.0489, "step": 13220 }, { "epoch": 0.03, "grad_norm": 1.640625, "learning_rate": 0.00019988052588884868, "loss": 2.234, "step": 13225 }, { "epoch": 0.03, "grad_norm": 2.265625, "learning_rate": 0.00019988043554998377, "loss": 2.2849, "step": 13230 }, { "epoch": 0.03, "grad_norm": 1.5703125, "learning_rate": 0.0001998803451769979, "loss": 2.1057, "step": 13235 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.0001998802547698911, "loss": 2.2843, "step": 13240 }, { "epoch": 0.03, "grad_norm": 1.9609375, "learning_rate": 0.00019988016432866338, "loss": 2.2576, "step": 13245 }, { "epoch": 0.03, "grad_norm": 1.9453125, "learning_rate": 0.00019988007385331478, "loss": 2.1038, "step": 13250 }, { "epoch": 0.03, "grad_norm": 1.9921875, "learning_rate": 0.00019987998334384537, "loss": 2.0567, "step": 13255 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.00019987989280025511, "loss": 2.1881, "step": 13260 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.0001998798022225441, "loss": 2.189, "step": 13265 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019987971161071233, "loss": 2.2928, "step": 13270 }, { "epoch": 0.03, "grad_norm": 1.5546875, "learning_rate": 0.0001998796209647598, "loss": 2.0078, "step": 13275 }, { "epoch": 0.03, "grad_norm": 1.4140625, "learning_rate": 0.00019987953028468663, "loss": 2.0236, "step": 13280 }, { "epoch": 0.03, "grad_norm": 2.125, "learning_rate": 0.00019987943957049277, "loss": 2.0509, "step": 13285 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.0001998793488221783, "loss": 2.1725, "step": 13290 }, { "epoch": 0.03, "grad_norm": 1.59375, "learning_rate": 0.00019987925803974322, "loss": 2.1825, "step": 13295 }, { "epoch": 0.03, "grad_norm": 1.9765625, "learning_rate": 0.00019987916722318758, "loss": 2.1863, "step": 13300 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.0001998790763725114, "loss": 2.4962, "step": 13305 }, { "epoch": 0.03, "grad_norm": 1.6484375, "learning_rate": 0.00019987898548771472, "loss": 2.1869, "step": 13310 }, { "epoch": 0.03, "grad_norm": 1.5703125, "learning_rate": 0.00019987889456879758, "loss": 2.1631, "step": 13315 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019987880361576, "loss": 2.1627, "step": 13320 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.00019987871262860198, "loss": 2.3233, "step": 13325 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.0001998786216073236, "loss": 2.0386, "step": 13330 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.00019987853055192486, "loss": 2.1971, "step": 13335 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.0001998784394624058, "loss": 2.175, "step": 13340 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019987834833876648, "loss": 2.2618, "step": 13345 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.0001998782571810069, "loss": 2.2382, "step": 13350 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.00019987816598912708, "loss": 2.3576, "step": 13355 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.00019987807476312707, "loss": 2.1914, "step": 13360 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.0001998779835030069, "loss": 2.2206, "step": 13365 }, { "epoch": 0.03, "grad_norm": 2.0, "learning_rate": 0.0001998778922087666, "loss": 2.2582, "step": 13370 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.0001998778008804062, "loss": 2.122, "step": 13375 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019987770951792572, "loss": 2.2801, "step": 13380 }, { "epoch": 0.03, "grad_norm": 1.984375, "learning_rate": 0.00019987761812132523, "loss": 2.1157, "step": 13385 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.0001998775266906047, "loss": 2.1358, "step": 13390 }, { "epoch": 0.03, "grad_norm": 2.0625, "learning_rate": 0.00019987743522576423, "loss": 2.0485, "step": 13395 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.0001998773437268038, "loss": 2.2229, "step": 13400 }, { "epoch": 0.03, "grad_norm": 1.625, "learning_rate": 0.00019987725219372348, "loss": 2.1367, "step": 13405 }, { "epoch": 0.03, "grad_norm": 1.953125, "learning_rate": 0.00019987716062652323, "loss": 2.1944, "step": 13410 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.00019987706902520317, "loss": 2.2412, "step": 13415 }, { "epoch": 0.03, "grad_norm": 2.0625, "learning_rate": 0.0001998769773897633, "loss": 2.1065, "step": 13420 }, { "epoch": 0.03, "grad_norm": 2.109375, "learning_rate": 0.0001998768857202036, "loss": 2.2513, "step": 13425 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.00019987679401652417, "loss": 2.1618, "step": 13430 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.000199876702278725, "loss": 2.0345, "step": 13435 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019987661050680617, "loss": 2.1846, "step": 13440 }, { "epoch": 0.03, "grad_norm": 1.578125, "learning_rate": 0.00019987651870076768, "loss": 2.2795, "step": 13445 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019987642686060953, "loss": 2.1452, "step": 13450 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.0001998763349863318, "loss": 2.2829, "step": 13455 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.0001998762430779345, "loss": 2.1147, "step": 13460 }, { "epoch": 0.03, "grad_norm": 2.734375, "learning_rate": 0.00019987615113541764, "loss": 2.1094, "step": 13465 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.0001998760591587813, "loss": 2.2756, "step": 13470 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.00019987596714802548, "loss": 2.0829, "step": 13475 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.0001998758751031502, "loss": 2.132, "step": 13480 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.00019987578302415552, "loss": 2.3379, "step": 13485 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.00019987569091104148, "loss": 2.2923, "step": 13490 }, { "epoch": 0.03, "grad_norm": 1.7578125, "learning_rate": 0.0001998755987638081, "loss": 2.163, "step": 13495 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.00019987550658245537, "loss": 2.2508, "step": 13500 }, { "epoch": 0.03, "grad_norm": 1.9765625, "learning_rate": 0.00019987541436698337, "loss": 2.13, "step": 13505 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.00019987532211739213, "loss": 2.0938, "step": 13510 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.00019987522983368162, "loss": 2.3042, "step": 13515 }, { "epoch": 0.03, "grad_norm": 2.4375, "learning_rate": 0.000199875137515852, "loss": 2.2203, "step": 13520 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.00019987504516390316, "loss": 2.2865, "step": 13525 }, { "epoch": 0.03, "grad_norm": 1.9609375, "learning_rate": 0.00019987495277783523, "loss": 2.2077, "step": 13530 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.00019987486035764817, "loss": 2.033, "step": 13535 }, { "epoch": 0.03, "grad_norm": 1.484375, "learning_rate": 0.00019987476790334205, "loss": 2.2173, "step": 13540 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.00019987467541491692, "loss": 2.1501, "step": 13545 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.0001998745828923728, "loss": 2.2739, "step": 13550 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.00019987449033570968, "loss": 1.9175, "step": 13555 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.00019987439774492766, "loss": 2.0808, "step": 13560 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.0001998743051200267, "loss": 2.142, "step": 13565 }, { "epoch": 0.03, "grad_norm": 1.4609375, "learning_rate": 0.00019987421246100685, "loss": 2.2577, "step": 13570 }, { "epoch": 0.03, "grad_norm": 1.3671875, "learning_rate": 0.00019987411976786818, "loss": 2.2139, "step": 13575 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019987402704061072, "loss": 2.0951, "step": 13580 }, { "epoch": 0.03, "grad_norm": 2.125, "learning_rate": 0.00019987393427923448, "loss": 2.199, "step": 13585 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.00019987384148373948, "loss": 2.2032, "step": 13590 }, { "epoch": 0.03, "grad_norm": 2.21875, "learning_rate": 0.00019987374865412574, "loss": 2.1881, "step": 13595 }, { "epoch": 0.03, "grad_norm": 1.59375, "learning_rate": 0.00019987365579039333, "loss": 2.2668, "step": 13600 }, { "epoch": 0.03, "grad_norm": 2.109375, "learning_rate": 0.0001998735628925423, "loss": 2.0461, "step": 13605 }, { "epoch": 0.03, "grad_norm": 1.765625, "learning_rate": 0.00019987346996057262, "loss": 2.245, "step": 13610 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.00019987337699448437, "loss": 2.1247, "step": 13615 }, { "epoch": 0.03, "grad_norm": 1.640625, "learning_rate": 0.00019987328399427755, "loss": 2.0527, "step": 13620 }, { "epoch": 0.03, "grad_norm": 1.8515625, "learning_rate": 0.00019987319095995219, "loss": 2.284, "step": 13625 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.00019987309789150834, "loss": 2.2613, "step": 13630 }, { "epoch": 0.03, "grad_norm": 2.0625, "learning_rate": 0.00019987300478894603, "loss": 2.1771, "step": 13635 }, { "epoch": 0.03, "grad_norm": 1.546875, "learning_rate": 0.0001998729116522653, "loss": 2.1544, "step": 13640 }, { "epoch": 0.03, "grad_norm": 1.765625, "learning_rate": 0.00019987281848146617, "loss": 2.2499, "step": 13645 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.0001998727252765487, "loss": 2.2461, "step": 13650 }, { "epoch": 0.03, "grad_norm": 1.9453125, "learning_rate": 0.00019987263203751285, "loss": 2.2765, "step": 13655 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.00019987253876435873, "loss": 2.0951, "step": 13660 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.0001998724454570863, "loss": 2.1437, "step": 13665 }, { "epoch": 0.03, "grad_norm": 1.7578125, "learning_rate": 0.00019987235211569568, "loss": 2.2075, "step": 13670 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.00019987225874018686, "loss": 2.2715, "step": 13675 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.00019987216533055982, "loss": 2.3394, "step": 13680 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019987207188681464, "loss": 2.2196, "step": 13685 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.0001998719784089514, "loss": 2.235, "step": 13690 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.00019987188489697003, "loss": 2.2758, "step": 13695 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.0001998717913508706, "loss": 2.1068, "step": 13700 }, { "epoch": 0.03, "grad_norm": 2.28125, "learning_rate": 0.0001998716977706532, "loss": 2.1758, "step": 13705 }, { "epoch": 0.03, "grad_norm": 1.765625, "learning_rate": 0.0001998716041563178, "loss": 2.3227, "step": 13710 }, { "epoch": 0.03, "grad_norm": 2.390625, "learning_rate": 0.00019987151050786443, "loss": 2.0648, "step": 13715 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.00019987141682529318, "loss": 2.0609, "step": 13720 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.000199871323108604, "loss": 2.2, "step": 13725 }, { "epoch": 0.03, "grad_norm": 1.953125, "learning_rate": 0.000199871229357797, "loss": 2.2, "step": 13730 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.0001998711355728722, "loss": 2.3486, "step": 13735 }, { "epoch": 0.03, "grad_norm": 1.9296875, "learning_rate": 0.00019987104175382954, "loss": 2.0787, "step": 13740 }, { "epoch": 0.03, "grad_norm": 1.859375, "learning_rate": 0.00019987094790066917, "loss": 2.0932, "step": 13745 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.00019987085401339103, "loss": 2.3866, "step": 13750 }, { "epoch": 0.03, "grad_norm": 1.8984375, "learning_rate": 0.00019987076009199523, "loss": 2.2979, "step": 13755 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019987066613648175, "loss": 2.0437, "step": 13760 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.00019987057214685064, "loss": 2.284, "step": 13765 }, { "epoch": 0.03, "grad_norm": 1.984375, "learning_rate": 0.00019987047812310195, "loss": 2.1786, "step": 13770 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.0001998703840652357, "loss": 2.1357, "step": 13775 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.00019987028997325187, "loss": 2.2932, "step": 13780 }, { "epoch": 0.03, "grad_norm": 2.046875, "learning_rate": 0.00019987019584715059, "loss": 2.1464, "step": 13785 }, { "epoch": 0.03, "grad_norm": 2.046875, "learning_rate": 0.00019987010168693182, "loss": 2.2134, "step": 13790 }, { "epoch": 0.03, "grad_norm": 1.4921875, "learning_rate": 0.0001998700074925956, "loss": 2.1371, "step": 13795 }, { "epoch": 0.03, "grad_norm": 1.9375, "learning_rate": 0.00019986991326414197, "loss": 2.1118, "step": 13800 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019986981900157097, "loss": 2.3311, "step": 13805 }, { "epoch": 0.03, "grad_norm": 1.90625, "learning_rate": 0.00019986972470488265, "loss": 2.1899, "step": 13810 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.000199869630374077, "loss": 2.1676, "step": 13815 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.0001998695360091541, "loss": 2.0698, "step": 13820 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.0001998694416101139, "loss": 2.0344, "step": 13825 }, { "epoch": 0.03, "grad_norm": 1.9609375, "learning_rate": 0.00019986934717695657, "loss": 2.2901, "step": 13830 }, { "epoch": 0.03, "grad_norm": 1.984375, "learning_rate": 0.00019986925270968196, "loss": 2.3112, "step": 13835 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019986915820829028, "loss": 2.1135, "step": 13840 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019986906367278144, "loss": 2.1246, "step": 13845 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.00019986896910315554, "loss": 2.3256, "step": 13850 }, { "epoch": 0.03, "grad_norm": 1.46875, "learning_rate": 0.0001998688744994126, "loss": 2.2243, "step": 13855 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.0001998687798615526, "loss": 2.3499, "step": 13860 }, { "epoch": 0.03, "grad_norm": 1.4609375, "learning_rate": 0.00019986868518957567, "loss": 2.143, "step": 13865 }, { "epoch": 0.03, "grad_norm": 1.546875, "learning_rate": 0.00019986859048348174, "loss": 2.3754, "step": 13870 }, { "epoch": 0.03, "grad_norm": 1.84375, "learning_rate": 0.00019986849574327092, "loss": 2.3032, "step": 13875 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019986840096894318, "loss": 2.1282, "step": 13880 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019986830616049858, "loss": 1.9671, "step": 13885 }, { "epoch": 0.03, "grad_norm": 1.765625, "learning_rate": 0.00019986821131793718, "loss": 2.3106, "step": 13890 }, { "epoch": 0.03, "grad_norm": 1.6484375, "learning_rate": 0.00019986811644125897, "loss": 2.2574, "step": 13895 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019986802153046403, "loss": 2.2886, "step": 13900 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.00019986792658555235, "loss": 2.1911, "step": 13905 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.000199867831606524, "loss": 2.2005, "step": 13910 }, { "epoch": 0.03, "grad_norm": 1.796875, "learning_rate": 0.00019986773659337893, "loss": 2.4038, "step": 13915 }, { "epoch": 0.03, "grad_norm": 1.59375, "learning_rate": 0.00019986764154611723, "loss": 2.1534, "step": 13920 }, { "epoch": 0.03, "grad_norm": 1.90625, "learning_rate": 0.00019986754646473897, "loss": 2.1737, "step": 13925 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019986745134924414, "loss": 2.0975, "step": 13930 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.00019986735619963278, "loss": 2.2293, "step": 13935 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.0001998672610159049, "loss": 2.1974, "step": 13940 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.00019986716579806056, "loss": 2.174, "step": 13945 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.00019986707054609978, "loss": 2.1588, "step": 13950 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.00019986697526002263, "loss": 2.3622, "step": 13955 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.00019986687993982908, "loss": 2.3277, "step": 13960 }, { "epoch": 0.03, "grad_norm": 2.078125, "learning_rate": 0.0001998667845855192, "loss": 2.495, "step": 13965 }, { "epoch": 0.03, "grad_norm": 1.6875, "learning_rate": 0.00019986668919709302, "loss": 2.427, "step": 13970 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.00019986659377455054, "loss": 2.1862, "step": 13975 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.00019986649831789185, "loss": 2.2439, "step": 13980 }, { "epoch": 0.03, "grad_norm": 1.7578125, "learning_rate": 0.00019986640282711695, "loss": 2.1247, "step": 13985 }, { "epoch": 0.03, "grad_norm": 1.5234375, "learning_rate": 0.00019986630730222586, "loss": 2.1596, "step": 13990 }, { "epoch": 0.03, "grad_norm": 1.84375, "learning_rate": 0.00019986621174321865, "loss": 2.328, "step": 13995 }, { "epoch": 0.03, "grad_norm": 2.171875, "learning_rate": 0.0001998661161500953, "loss": 2.311, "step": 14000 }, { "epoch": 0.03, "grad_norm": 1.6328125, "learning_rate": 0.0001998660205228559, "loss": 1.9292, "step": 14005 }, { "epoch": 0.03, "grad_norm": 1.9296875, "learning_rate": 0.00019986592486150045, "loss": 2.223, "step": 14010 }, { "epoch": 0.03, "grad_norm": 1.9375, "learning_rate": 0.000199865829166029, "loss": 2.1274, "step": 14015 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019986573343644154, "loss": 2.1932, "step": 14020 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.00019986563767273818, "loss": 2.0384, "step": 14025 }, { "epoch": 0.03, "grad_norm": 1.9140625, "learning_rate": 0.00019986554187491885, "loss": 2.1573, "step": 14030 }, { "epoch": 0.03, "grad_norm": 2.078125, "learning_rate": 0.00019986544604298366, "loss": 2.2592, "step": 14035 }, { "epoch": 0.03, "grad_norm": 1.9921875, "learning_rate": 0.00019986535017693267, "loss": 2.2136, "step": 14040 }, { "epoch": 0.03, "grad_norm": 1.9140625, "learning_rate": 0.00019986525427676585, "loss": 2.25, "step": 14045 }, { "epoch": 0.03, "grad_norm": 2.140625, "learning_rate": 0.0001998651583424832, "loss": 2.3575, "step": 14050 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.00019986506237408482, "loss": 2.0171, "step": 14055 }, { "epoch": 0.03, "grad_norm": 2.109375, "learning_rate": 0.00019986496637157072, "loss": 2.0963, "step": 14060 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.00019986487033494098, "loss": 2.3971, "step": 14065 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.00019986477426419555, "loss": 2.3034, "step": 14070 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.0001998646781593345, "loss": 2.0194, "step": 14075 }, { "epoch": 0.03, "grad_norm": 1.859375, "learning_rate": 0.00019986458202035786, "loss": 2.1795, "step": 14080 }, { "epoch": 0.03, "grad_norm": 1.78125, "learning_rate": 0.0001998644858472657, "loss": 2.0819, "step": 14085 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.000199864389640058, "loss": 2.1216, "step": 14090 }, { "epoch": 0.03, "grad_norm": 1.625, "learning_rate": 0.00019986429339873481, "loss": 2.5002, "step": 14095 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.00019986419712329618, "loss": 2.0941, "step": 14100 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.00019986410081374213, "loss": 2.282, "step": 14105 }, { "epoch": 0.03, "grad_norm": 1.6875, "learning_rate": 0.00019986400447007268, "loss": 2.1882, "step": 14110 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.00019986390809228786, "loss": 2.2248, "step": 14115 }, { "epoch": 0.03, "grad_norm": 1.5390625, "learning_rate": 0.00019986381168038772, "loss": 2.1334, "step": 14120 }, { "epoch": 0.03, "grad_norm": 1.9140625, "learning_rate": 0.00019986371523437232, "loss": 2.2645, "step": 14125 }, { "epoch": 0.03, "grad_norm": 1.8515625, "learning_rate": 0.00019986361875424163, "loss": 2.248, "step": 14130 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.00019986352223999574, "loss": 2.1481, "step": 14135 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.00019986342569163466, "loss": 2.2193, "step": 14140 }, { "epoch": 0.03, "grad_norm": 1.5, "learning_rate": 0.0001998633291091584, "loss": 2.1912, "step": 14145 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.00019986323249256708, "loss": 2.321, "step": 14150 }, { "epoch": 0.03, "grad_norm": 1.8125, "learning_rate": 0.0001998631358418606, "loss": 2.0678, "step": 14155 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.00019986303915703907, "loss": 2.1586, "step": 14160 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.00019986294243810253, "loss": 2.1556, "step": 14165 }, { "epoch": 0.03, "grad_norm": 1.625, "learning_rate": 0.000199862845685051, "loss": 2.1789, "step": 14170 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019986274889788452, "loss": 2.1468, "step": 14175 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.00019986265207660311, "loss": 2.288, "step": 14180 }, { "epoch": 0.03, "grad_norm": 1.625, "learning_rate": 0.00019986255522120677, "loss": 2.291, "step": 14185 }, { "epoch": 0.03, "grad_norm": 1.4921875, "learning_rate": 0.0001998624583316956, "loss": 2.2697, "step": 14190 }, { "epoch": 0.03, "grad_norm": 2.046875, "learning_rate": 0.00019986236140806963, "loss": 2.3115, "step": 14195 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.00019986226445032883, "loss": 2.2104, "step": 14200 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.00019986216745847325, "loss": 2.3722, "step": 14205 }, { "epoch": 0.03, "grad_norm": 1.6015625, "learning_rate": 0.000199862070432503, "loss": 2.0268, "step": 14210 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.00019986197337241803, "loss": 2.2044, "step": 14215 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.00019986187627821838, "loss": 2.2486, "step": 14220 }, { "epoch": 0.03, "grad_norm": 1.671875, "learning_rate": 0.00019986177914990412, "loss": 2.2236, "step": 14225 }, { "epoch": 0.03, "grad_norm": 2.578125, "learning_rate": 0.00019986168198747527, "loss": 2.3249, "step": 14230 }, { "epoch": 0.03, "grad_norm": 2.046875, "learning_rate": 0.00019986158479093184, "loss": 2.1745, "step": 14235 }, { "epoch": 0.03, "grad_norm": 1.890625, "learning_rate": 0.0001998614875602739, "loss": 2.3516, "step": 14240 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.00019986139029550147, "loss": 2.1836, "step": 14245 }, { "epoch": 0.03, "grad_norm": 1.6875, "learning_rate": 0.00019986129299661456, "loss": 2.1773, "step": 14250 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.0001998611956636132, "loss": 2.221, "step": 14255 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.00019986109829649745, "loss": 2.1741, "step": 14260 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.00019986100089526738, "loss": 2.1277, "step": 14265 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.00019986090345992298, "loss": 2.1743, "step": 14270 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.00019986080599046426, "loss": 2.2926, "step": 14275 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.00019986070848689128, "loss": 2.2986, "step": 14280 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.00019986061094920404, "loss": 2.2455, "step": 14285 }, { "epoch": 0.03, "grad_norm": 1.9375, "learning_rate": 0.00019986051337740266, "loss": 2.0975, "step": 14290 }, { "epoch": 0.03, "grad_norm": 1.7265625, "learning_rate": 0.0001998604157714871, "loss": 2.2735, "step": 14295 }, { "epoch": 0.03, "grad_norm": 1.6484375, "learning_rate": 0.00019986031813145738, "loss": 2.1886, "step": 14300 }, { "epoch": 0.03, "grad_norm": 1.6640625, "learning_rate": 0.0001998602204573136, "loss": 2.1155, "step": 14305 }, { "epoch": 0.03, "grad_norm": 2.046875, "learning_rate": 0.00019986012274905574, "loss": 2.2458, "step": 14310 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.00019986002500668387, "loss": 2.3051, "step": 14315 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019985992723019796, "loss": 2.1176, "step": 14320 }, { "epoch": 0.03, "grad_norm": 1.6953125, "learning_rate": 0.00019985982941959812, "loss": 2.2328, "step": 14325 }, { "epoch": 0.03, "grad_norm": 1.625, "learning_rate": 0.00019985973157488437, "loss": 2.1164, "step": 14330 }, { "epoch": 0.03, "grad_norm": 1.5859375, "learning_rate": 0.00019985963369605671, "loss": 2.2355, "step": 14335 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019985953578311518, "loss": 2.1327, "step": 14340 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.00019985943783605982, "loss": 2.1705, "step": 14345 }, { "epoch": 0.03, "grad_norm": 1.6796875, "learning_rate": 0.00019985933985489066, "loss": 1.9827, "step": 14350 }, { "epoch": 0.03, "grad_norm": 1.421875, "learning_rate": 0.00019985924183960776, "loss": 2.1181, "step": 14355 }, { "epoch": 0.03, "grad_norm": 1.6875, "learning_rate": 0.00019985914379021114, "loss": 2.2239, "step": 14360 }, { "epoch": 0.03, "grad_norm": 1.6875, "learning_rate": 0.0001998590457067008, "loss": 1.9994, "step": 14365 }, { "epoch": 0.03, "grad_norm": 1.6953125, "learning_rate": 0.00019985894758907678, "loss": 2.2366, "step": 14370 }, { "epoch": 0.03, "grad_norm": 1.9296875, "learning_rate": 0.00019985884943733918, "loss": 2.0162, "step": 14375 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.00019985875125148796, "loss": 2.2844, "step": 14380 }, { "epoch": 0.03, "grad_norm": 1.546875, "learning_rate": 0.00019985865303152317, "loss": 2.0931, "step": 14385 }, { "epoch": 0.03, "grad_norm": 1.53125, "learning_rate": 0.00019985855477744487, "loss": 2.2372, "step": 14390 }, { "epoch": 0.03, "grad_norm": 1.7578125, "learning_rate": 0.00019985845648925308, "loss": 2.2181, "step": 14395 }, { "epoch": 0.03, "grad_norm": 1.6953125, "learning_rate": 0.0001998583581669478, "loss": 2.2121, "step": 14400 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.00019985825981052913, "loss": 2.3573, "step": 14405 }, { "epoch": 0.03, "grad_norm": 1.75, "learning_rate": 0.00019985816141999705, "loss": 2.316, "step": 14410 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.0001998580629953516, "loss": 2.2004, "step": 14415 }, { "epoch": 0.03, "grad_norm": 1.953125, "learning_rate": 0.00019985796453659286, "loss": 2.3496, "step": 14420 }, { "epoch": 0.03, "grad_norm": 1.8984375, "learning_rate": 0.0001998578660437208, "loss": 2.2444, "step": 14425 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.0001998577675167355, "loss": 2.0239, "step": 14430 }, { "epoch": 0.03, "grad_norm": 1.9921875, "learning_rate": 0.00019985766895563696, "loss": 2.1138, "step": 14435 }, { "epoch": 0.03, "grad_norm": 1.90625, "learning_rate": 0.00019985757036042524, "loss": 2.1955, "step": 14440 }, { "epoch": 0.03, "grad_norm": 1.9453125, "learning_rate": 0.00019985747173110034, "loss": 2.3511, "step": 14445 }, { "epoch": 0.03, "grad_norm": 1.6796875, "learning_rate": 0.00019985737306766236, "loss": 2.3189, "step": 14450 }, { "epoch": 0.03, "grad_norm": 1.9140625, "learning_rate": 0.00019985727437011125, "loss": 2.2453, "step": 14455 }, { "epoch": 0.03, "grad_norm": 1.5625, "learning_rate": 0.00019985717563844707, "loss": 2.2181, "step": 14460 }, { "epoch": 0.03, "grad_norm": 1.6171875, "learning_rate": 0.0001998570768726699, "loss": 2.1695, "step": 14465 }, { "epoch": 0.03, "grad_norm": 1.8515625, "learning_rate": 0.00019985697807277974, "loss": 2.3581, "step": 14470 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.00019985687923877664, "loss": 2.0454, "step": 14475 }, { "epoch": 0.03, "grad_norm": 1.5625, "learning_rate": 0.0001998567803706606, "loss": 2.0637, "step": 14480 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.00019985668146843167, "loss": 2.3941, "step": 14485 }, { "epoch": 0.03, "grad_norm": 2.015625, "learning_rate": 0.00019985658253208989, "loss": 2.2012, "step": 14490 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.0001998564835616353, "loss": 2.1975, "step": 14495 }, { "epoch": 0.03, "grad_norm": 1.8515625, "learning_rate": 0.00019985638455706793, "loss": 2.2611, "step": 14500 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.0001998562855183878, "loss": 2.2943, "step": 14505 }, { "epoch": 0.03, "grad_norm": 2.0, "learning_rate": 0.00019985618644559493, "loss": 2.2427, "step": 14510 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.0001998560873386894, "loss": 2.3028, "step": 14515 }, { "epoch": 0.03, "grad_norm": 1.8984375, "learning_rate": 0.00019985598819767118, "loss": 2.0129, "step": 14520 }, { "epoch": 0.03, "grad_norm": 1.8203125, "learning_rate": 0.0001998558890225404, "loss": 2.3203, "step": 14525 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.000199855789813297, "loss": 2.2524, "step": 14530 }, { "epoch": 0.03, "grad_norm": 1.7890625, "learning_rate": 0.00019985569056994108, "loss": 2.1175, "step": 14535 }, { "epoch": 0.03, "grad_norm": 1.609375, "learning_rate": 0.00019985559129247262, "loss": 2.1952, "step": 14540 }, { "epoch": 0.03, "grad_norm": 1.6171875, "learning_rate": 0.00019985549198089168, "loss": 2.1035, "step": 14545 }, { "epoch": 0.03, "grad_norm": 1.578125, "learning_rate": 0.0001998553926351983, "loss": 2.1327, "step": 14550 }, { "epoch": 0.03, "grad_norm": 1.5078125, "learning_rate": 0.00019985529325539252, "loss": 2.1849, "step": 14555 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.00019985519384147434, "loss": 2.2238, "step": 14560 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.0001998550943934438, "loss": 2.3159, "step": 14565 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.000199854994911301, "loss": 2.2933, "step": 14570 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.00019985489539504588, "loss": 2.3729, "step": 14575 }, { "epoch": 0.03, "grad_norm": 1.6796875, "learning_rate": 0.00019985479584467856, "loss": 2.3347, "step": 14580 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.000199854696260199, "loss": 2.2709, "step": 14585 }, { "epoch": 0.03, "grad_norm": 1.6484375, "learning_rate": 0.00019985459664160726, "loss": 2.1792, "step": 14590 }, { "epoch": 0.03, "grad_norm": 1.640625, "learning_rate": 0.00019985449698890336, "loss": 2.0932, "step": 14595 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.0001998543973020874, "loss": 2.2166, "step": 14600 }, { "epoch": 0.03, "grad_norm": 1.640625, "learning_rate": 0.00019985429758115935, "loss": 2.2413, "step": 14605 }, { "epoch": 0.03, "grad_norm": 1.5625, "learning_rate": 0.00019985419782611924, "loss": 2.2295, "step": 14610 }, { "epoch": 0.03, "grad_norm": 2.0, "learning_rate": 0.00019985409803696715, "loss": 2.1404, "step": 14615 }, { "epoch": 0.03, "grad_norm": 1.703125, "learning_rate": 0.00019985399821370307, "loss": 2.1887, "step": 14620 }, { "epoch": 0.03, "grad_norm": 1.453125, "learning_rate": 0.0001998538983563271, "loss": 1.97, "step": 14625 }, { "epoch": 0.03, "grad_norm": 1.4296875, "learning_rate": 0.00019985379846483917, "loss": 2.3008, "step": 14630 }, { "epoch": 0.03, "grad_norm": 1.640625, "learning_rate": 0.0001998536985392394, "loss": 2.0303, "step": 14635 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.0001998535985795278, "loss": 2.2064, "step": 14640 }, { "epoch": 0.03, "grad_norm": 1.7578125, "learning_rate": 0.00019985349858570438, "loss": 2.1649, "step": 14645 }, { "epoch": 0.03, "grad_norm": 1.765625, "learning_rate": 0.0001998533985577692, "loss": 2.3271, "step": 14650 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.00019985329849572228, "loss": 2.175, "step": 14655 }, { "epoch": 0.03, "grad_norm": 2.125, "learning_rate": 0.00019985319839956368, "loss": 2.0384, "step": 14660 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.0001998530982692934, "loss": 1.9489, "step": 14665 }, { "epoch": 0.03, "grad_norm": 1.5, "learning_rate": 0.0001998529981049115, "loss": 2.2261, "step": 14670 }, { "epoch": 0.03, "grad_norm": 1.7421875, "learning_rate": 0.00019985289790641798, "loss": 2.2533, "step": 14675 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019985279767381292, "loss": 2.3039, "step": 14680 }, { "epoch": 0.03, "grad_norm": 2.25, "learning_rate": 0.00019985269740709636, "loss": 2.1517, "step": 14685 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.0001998525971062683, "loss": 2.1799, "step": 14690 }, { "epoch": 0.03, "grad_norm": 2.03125, "learning_rate": 0.00019985249677132875, "loss": 2.173, "step": 14695 }, { "epoch": 0.03, "grad_norm": 1.6875, "learning_rate": 0.0001998523964022778, "loss": 2.2912, "step": 14700 }, { "epoch": 0.03, "grad_norm": 1.453125, "learning_rate": 0.00019985229599911545, "loss": 2.2397, "step": 14705 }, { "epoch": 0.03, "grad_norm": 1.796875, "learning_rate": 0.00019985219556184177, "loss": 2.2647, "step": 14710 }, { "epoch": 0.03, "grad_norm": 1.984375, "learning_rate": 0.00019985209509045673, "loss": 2.2206, "step": 14715 }, { "epoch": 0.03, "grad_norm": 2.078125, "learning_rate": 0.00019985199458496042, "loss": 2.2571, "step": 14720 }, { "epoch": 0.03, "grad_norm": 1.9765625, "learning_rate": 0.00019985189404535288, "loss": 2.2028, "step": 14725 }, { "epoch": 0.03, "grad_norm": 1.46875, "learning_rate": 0.00019985179347163407, "loss": 2.1674, "step": 14730 }, { "epoch": 0.03, "grad_norm": 1.8359375, "learning_rate": 0.0001998516928638041, "loss": 2.253, "step": 14735 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019985159222186302, "loss": 2.3053, "step": 14740 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019985149154581078, "loss": 2.2135, "step": 14745 }, { "epoch": 0.03, "grad_norm": 2.15625, "learning_rate": 0.00019985139083564745, "loss": 2.1514, "step": 14750 }, { "epoch": 0.03, "grad_norm": 1.828125, "learning_rate": 0.00019985129009137312, "loss": 2.2525, "step": 14755 }, { "epoch": 0.03, "grad_norm": 1.7734375, "learning_rate": 0.00019985118931298775, "loss": 2.2338, "step": 14760 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.00019985108850049138, "loss": 2.1571, "step": 14765 }, { "epoch": 0.03, "grad_norm": 1.59375, "learning_rate": 0.00019985098765388414, "loss": 2.1545, "step": 14770 }, { "epoch": 0.03, "grad_norm": 1.65625, "learning_rate": 0.00019985088677316592, "loss": 2.1424, "step": 14775 }, { "epoch": 0.03, "grad_norm": 1.515625, "learning_rate": 0.00019985078585833685, "loss": 2.1075, "step": 14780 }, { "epoch": 0.03, "grad_norm": 1.875, "learning_rate": 0.00019985068490939694, "loss": 2.1994, "step": 14785 }, { "epoch": 0.03, "grad_norm": 1.8671875, "learning_rate": 0.00019985058392634625, "loss": 2.136, "step": 14790 }, { "epoch": 0.03, "grad_norm": 2.078125, "learning_rate": 0.0001998504829091848, "loss": 2.23, "step": 14795 }, { "epoch": 0.03, "grad_norm": 1.59375, "learning_rate": 0.00019985038185791254, "loss": 2.2089, "step": 14800 }, { "epoch": 0.03, "grad_norm": 1.6875, "learning_rate": 0.00019985028077252964, "loss": 2.1721, "step": 14805 }, { "epoch": 0.03, "grad_norm": 1.734375, "learning_rate": 0.00019985017965303605, "loss": 2.1247, "step": 14810 }, { "epoch": 0.03, "grad_norm": 2.09375, "learning_rate": 0.00019985007849943181, "loss": 2.2187, "step": 14815 }, { "epoch": 0.03, "grad_norm": 1.8046875, "learning_rate": 0.00019984997731171703, "loss": 2.067, "step": 14820 }, { "epoch": 0.03, "grad_norm": 1.71875, "learning_rate": 0.00019984987608989162, "loss": 2.3186, "step": 14825 }, { "epoch": 0.03, "grad_norm": 1.9453125, "learning_rate": 0.0001998497748339557, "loss": 2.3734, "step": 14830 }, { "epoch": 0.03, "grad_norm": 1.8828125, "learning_rate": 0.0001998496735439093, "loss": 2.3659, "step": 14835 }, { "epoch": 0.03, "grad_norm": 1.7109375, "learning_rate": 0.00019984957221975247, "loss": 2.2927, "step": 14840 }, { "epoch": 0.03, "grad_norm": 1.96875, "learning_rate": 0.00019984947086148518, "loss": 2.2257, "step": 14845 }, { "epoch": 0.03, "grad_norm": 1.921875, "learning_rate": 0.00019984936946910748, "loss": 2.1576, "step": 14850 }, { "epoch": 0.03, "grad_norm": 3.015625, "learning_rate": 0.00019984926804261948, "loss": 2.214, "step": 14855 }, { "epoch": 0.03, "grad_norm": 1.9296875, "learning_rate": 0.00019984916658202112, "loss": 2.3267, "step": 14860 }, { "epoch": 0.03, "grad_norm": 1.9453125, "learning_rate": 0.00019984906508731248, "loss": 2.1428, "step": 14865 }, { "epoch": 0.03, "grad_norm": 2.140625, "learning_rate": 0.00019984896355849358, "loss": 2.4209, "step": 14870 }, { "epoch": 0.04, "grad_norm": 2.15625, "learning_rate": 0.00019984886199556447, "loss": 2.2712, "step": 14875 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.0001998487603985252, "loss": 2.2486, "step": 14880 }, { "epoch": 0.04, "grad_norm": 1.765625, "learning_rate": 0.00019984865876737575, "loss": 2.0162, "step": 14885 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.0001998485571021162, "loss": 2.1606, "step": 14890 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019984845540274657, "loss": 2.2384, "step": 14895 }, { "epoch": 0.04, "grad_norm": 1.546875, "learning_rate": 0.00019984835366926694, "loss": 2.1368, "step": 14900 }, { "epoch": 0.04, "grad_norm": 1.9375, "learning_rate": 0.00019984825190167725, "loss": 2.1784, "step": 14905 }, { "epoch": 0.04, "grad_norm": 2.234375, "learning_rate": 0.0001998481500999776, "loss": 2.2732, "step": 14910 }, { "epoch": 0.04, "grad_norm": 1.609375, "learning_rate": 0.00019984804826416797, "loss": 2.0728, "step": 14915 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.00019984794639424852, "loss": 2.0836, "step": 14920 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.00019984784449021914, "loss": 2.2114, "step": 14925 }, { "epoch": 0.04, "grad_norm": 1.5234375, "learning_rate": 0.00019984774255207995, "loss": 2.1934, "step": 14930 }, { "epoch": 0.04, "grad_norm": 1.921875, "learning_rate": 0.00019984764057983095, "loss": 2.2501, "step": 14935 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019984753857347218, "loss": 2.3275, "step": 14940 }, { "epoch": 0.04, "grad_norm": 1.5625, "learning_rate": 0.0001998474365330037, "loss": 2.2348, "step": 14945 }, { "epoch": 0.04, "grad_norm": 1.671875, "learning_rate": 0.00019984733445842549, "loss": 2.1849, "step": 14950 }, { "epoch": 0.04, "grad_norm": 1.90625, "learning_rate": 0.00019984723234973766, "loss": 2.2038, "step": 14955 }, { "epoch": 0.04, "grad_norm": 1.6484375, "learning_rate": 0.00019984713020694016, "loss": 2.237, "step": 14960 }, { "epoch": 0.04, "grad_norm": 1.953125, "learning_rate": 0.00019984702803003311, "loss": 2.0994, "step": 14965 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.0001998469258190165, "loss": 2.0233, "step": 14970 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019984682357389034, "loss": 2.2024, "step": 14975 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019984672129465473, "loss": 2.1844, "step": 14980 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.00019984661898130965, "loss": 2.2899, "step": 14985 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.00019984651663385513, "loss": 2.1024, "step": 14990 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.0001998464142522913, "loss": 2.2347, "step": 14995 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.00019984631183661805, "loss": 2.1287, "step": 15000 }, { "epoch": 0.04, "grad_norm": 1.7890625, "learning_rate": 0.00019984620938683553, "loss": 2.1392, "step": 15005 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.00019984610690294372, "loss": 2.0241, "step": 15010 }, { "epoch": 0.04, "grad_norm": 1.46875, "learning_rate": 0.00019984600438494268, "loss": 2.1572, "step": 15015 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.00019984590183283243, "loss": 2.2885, "step": 15020 }, { "epoch": 0.04, "grad_norm": 2.078125, "learning_rate": 0.000199845799246613, "loss": 2.1181, "step": 15025 }, { "epoch": 0.04, "grad_norm": 1.828125, "learning_rate": 0.00019984569662628442, "loss": 2.2444, "step": 15030 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019984559397184676, "loss": 2.1224, "step": 15035 }, { "epoch": 0.04, "grad_norm": 1.5, "learning_rate": 0.00019984549128330005, "loss": 2.1581, "step": 15040 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.0001998453885606443, "loss": 2.1116, "step": 15045 }, { "epoch": 0.04, "grad_norm": 2.0, "learning_rate": 0.00019984528580387954, "loss": 2.2336, "step": 15050 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.00019984518301300583, "loss": 2.4287, "step": 15055 }, { "epoch": 0.04, "grad_norm": 1.4375, "learning_rate": 0.00019984508018802317, "loss": 2.2486, "step": 15060 }, { "epoch": 0.04, "grad_norm": 2.40625, "learning_rate": 0.00019984497732893167, "loss": 2.1969, "step": 15065 }, { "epoch": 0.04, "grad_norm": 1.875, "learning_rate": 0.00019984487443573125, "loss": 2.1454, "step": 15070 }, { "epoch": 0.04, "grad_norm": 1.703125, "learning_rate": 0.00019984477150842206, "loss": 2.1242, "step": 15075 }, { "epoch": 0.04, "grad_norm": 1.578125, "learning_rate": 0.00019984466854700407, "loss": 2.2323, "step": 15080 }, { "epoch": 0.04, "grad_norm": 2.078125, "learning_rate": 0.00019984456555147736, "loss": 2.1914, "step": 15085 }, { "epoch": 0.04, "grad_norm": 1.8203125, "learning_rate": 0.0001998444625218419, "loss": 2.1267, "step": 15090 }, { "epoch": 0.04, "grad_norm": 1.6640625, "learning_rate": 0.00019984435945809775, "loss": 2.0176, "step": 15095 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.000199844256360245, "loss": 2.4675, "step": 15100 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.00019984415322828357, "loss": 2.2767, "step": 15105 }, { "epoch": 0.04, "grad_norm": 1.6796875, "learning_rate": 0.00019984405006221362, "loss": 2.1623, "step": 15110 }, { "epoch": 0.04, "grad_norm": 1.9296875, "learning_rate": 0.00019984394686203513, "loss": 2.1994, "step": 15115 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.00019984384362774815, "loss": 2.1833, "step": 15120 }, { "epoch": 0.04, "grad_norm": 1.84375, "learning_rate": 0.00019984374035935266, "loss": 2.0933, "step": 15125 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.0001998436370568488, "loss": 2.1621, "step": 15130 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.0001998435337202365, "loss": 2.3191, "step": 15135 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019984343034951582, "loss": 2.1798, "step": 15140 }, { "epoch": 0.04, "grad_norm": 1.875, "learning_rate": 0.00019984332694468685, "loss": 2.3821, "step": 15145 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019984322350574956, "loss": 2.2141, "step": 15150 }, { "epoch": 0.04, "grad_norm": 1.671875, "learning_rate": 0.00019984312003270405, "loss": 2.2793, "step": 15155 }, { "epoch": 0.04, "grad_norm": 1.8984375, "learning_rate": 0.00019984301652555027, "loss": 2.075, "step": 15160 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019984291298428834, "loss": 2.1978, "step": 15165 }, { "epoch": 0.04, "grad_norm": 2.15625, "learning_rate": 0.00019984280940891824, "loss": 2.2364, "step": 15170 }, { "epoch": 0.04, "grad_norm": 1.8125, "learning_rate": 0.00019984270579944004, "loss": 2.2775, "step": 15175 }, { "epoch": 0.04, "grad_norm": 1.8203125, "learning_rate": 0.00019984260215585376, "loss": 2.23, "step": 15180 }, { "epoch": 0.04, "grad_norm": 2.0, "learning_rate": 0.00019984249847815945, "loss": 2.234, "step": 15185 }, { "epoch": 0.04, "grad_norm": 1.5703125, "learning_rate": 0.0001998423947663571, "loss": 2.1499, "step": 15190 }, { "epoch": 0.04, "grad_norm": 1.96875, "learning_rate": 0.0001998422910204468, "loss": 2.2745, "step": 15195 }, { "epoch": 0.04, "grad_norm": 1.9296875, "learning_rate": 0.00019984218724042855, "loss": 2.2351, "step": 15200 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019984208342630242, "loss": 2.1869, "step": 15205 }, { "epoch": 0.04, "grad_norm": 1.6484375, "learning_rate": 0.0001998419795780684, "loss": 2.305, "step": 15210 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019984187569572657, "loss": 2.3366, "step": 15215 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019984177177927693, "loss": 2.2218, "step": 15220 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019984166782871952, "loss": 1.9519, "step": 15225 }, { "epoch": 0.04, "grad_norm": 1.7890625, "learning_rate": 0.00019984156384405443, "loss": 2.404, "step": 15230 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019984145982528158, "loss": 2.1575, "step": 15235 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.00019984135577240112, "loss": 2.2073, "step": 15240 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.00019984125168541304, "loss": 2.0848, "step": 15245 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.00019984114756431737, "loss": 2.2718, "step": 15250 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019984104340911417, "loss": 2.1799, "step": 15255 }, { "epoch": 0.04, "grad_norm": 1.5390625, "learning_rate": 0.00019984093921980345, "loss": 2.116, "step": 15260 }, { "epoch": 0.04, "grad_norm": 2.203125, "learning_rate": 0.00019984083499638525, "loss": 2.0849, "step": 15265 }, { "epoch": 0.04, "grad_norm": 2.109375, "learning_rate": 0.00019984073073885962, "loss": 2.1981, "step": 15270 }, { "epoch": 0.04, "grad_norm": 2.234375, "learning_rate": 0.00019984062644722657, "loss": 2.0889, "step": 15275 }, { "epoch": 0.04, "grad_norm": 1.875, "learning_rate": 0.00019984052212148617, "loss": 2.2735, "step": 15280 }, { "epoch": 0.04, "grad_norm": 2.109375, "learning_rate": 0.00019984041776163842, "loss": 2.1576, "step": 15285 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.00019984031336768338, "loss": 2.2416, "step": 15290 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019984020893962104, "loss": 2.2052, "step": 15295 }, { "epoch": 0.04, "grad_norm": 1.71875, "learning_rate": 0.00019984010447745155, "loss": 2.3374, "step": 15300 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019983999998117482, "loss": 2.3089, "step": 15305 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019983989545079096, "loss": 2.2062, "step": 15310 }, { "epoch": 0.04, "grad_norm": 1.84375, "learning_rate": 0.00019983979088629994, "loss": 2.225, "step": 15315 }, { "epoch": 0.04, "grad_norm": 1.6796875, "learning_rate": 0.0001998396862877019, "loss": 2.2973, "step": 15320 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019983958165499676, "loss": 2.1803, "step": 15325 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.00019983947698818462, "loss": 2.0945, "step": 15330 }, { "epoch": 0.04, "grad_norm": 1.875, "learning_rate": 0.0001998393722872655, "loss": 2.2723, "step": 15335 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019983926755223945, "loss": 2.3565, "step": 15340 }, { "epoch": 0.04, "grad_norm": 1.5625, "learning_rate": 0.0001998391627831065, "loss": 2.1069, "step": 15345 }, { "epoch": 0.04, "grad_norm": 1.9453125, "learning_rate": 0.00019983905797986667, "loss": 2.2029, "step": 15350 }, { "epoch": 0.04, "grad_norm": 1.453125, "learning_rate": 0.00019983895314252002, "loss": 2.1582, "step": 15355 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019983884827106657, "loss": 2.1682, "step": 15360 }, { "epoch": 0.04, "grad_norm": 1.7890625, "learning_rate": 0.00019983874336550634, "loss": 2.1357, "step": 15365 }, { "epoch": 0.04, "grad_norm": 1.3359375, "learning_rate": 0.0001998386384258394, "loss": 2.2901, "step": 15370 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.00019983853345206578, "loss": 2.1104, "step": 15375 }, { "epoch": 0.04, "grad_norm": 1.96875, "learning_rate": 0.00019983842844418546, "loss": 2.0706, "step": 15380 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.00019983832340219858, "loss": 2.2137, "step": 15385 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.00019983821832610509, "loss": 1.9661, "step": 15390 }, { "epoch": 0.04, "grad_norm": 1.703125, "learning_rate": 0.00019983811321590506, "loss": 2.1063, "step": 15395 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.00019983800807159852, "loss": 2.2571, "step": 15400 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.0001998379028931855, "loss": 2.2555, "step": 15405 }, { "epoch": 0.04, "grad_norm": 1.625, "learning_rate": 0.00019983779768066606, "loss": 2.4518, "step": 15410 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.0001998376924340402, "loss": 2.2936, "step": 15415 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.00019983758715330798, "loss": 2.1986, "step": 15420 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019983748183846942, "loss": 2.2151, "step": 15425 }, { "epoch": 0.04, "grad_norm": 1.5234375, "learning_rate": 0.00019983737648952458, "loss": 2.0655, "step": 15430 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019983727110647344, "loss": 2.3353, "step": 15435 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.00019983716568931615, "loss": 2.203, "step": 15440 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019983706023805263, "loss": 2.2245, "step": 15445 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.00019983695475268295, "loss": 2.3882, "step": 15450 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019983684923320717, "loss": 2.1629, "step": 15455 }, { "epoch": 0.04, "grad_norm": 1.5703125, "learning_rate": 0.00019983674367962532, "loss": 2.2636, "step": 15460 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.00019983663809193742, "loss": 2.0746, "step": 15465 }, { "epoch": 0.04, "grad_norm": 2.109375, "learning_rate": 0.0001998365324701435, "loss": 2.3102, "step": 15470 }, { "epoch": 0.04, "grad_norm": 2.265625, "learning_rate": 0.00019983642681424365, "loss": 2.2046, "step": 15475 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019983632112423783, "loss": 2.2982, "step": 15480 }, { "epoch": 0.04, "grad_norm": 2.125, "learning_rate": 0.00019983621540012612, "loss": 2.2791, "step": 15485 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.00019983610964190854, "loss": 2.412, "step": 15490 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019983600384958515, "loss": 2.1972, "step": 15495 }, { "epoch": 0.04, "grad_norm": 1.8984375, "learning_rate": 0.00019983589802315596, "loss": 2.4288, "step": 15500 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.00019983579216262102, "loss": 2.2428, "step": 15505 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019983568626798036, "loss": 2.0222, "step": 15510 }, { "epoch": 0.04, "grad_norm": 1.984375, "learning_rate": 0.00019983558033923404, "loss": 2.2826, "step": 15515 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019983547437638205, "loss": 2.2854, "step": 15520 }, { "epoch": 0.04, "grad_norm": 1.625, "learning_rate": 0.00019983536837942447, "loss": 2.1677, "step": 15525 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019983526234836128, "loss": 2.3512, "step": 15530 }, { "epoch": 0.04, "grad_norm": 1.953125, "learning_rate": 0.0001998351562831926, "loss": 2.1564, "step": 15535 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.0001998350501839184, "loss": 2.158, "step": 15540 }, { "epoch": 0.04, "grad_norm": 1.9453125, "learning_rate": 0.00019983494405053875, "loss": 2.2087, "step": 15545 }, { "epoch": 0.04, "grad_norm": 2.125, "learning_rate": 0.00019983483788305363, "loss": 2.2038, "step": 15550 }, { "epoch": 0.04, "grad_norm": 1.84375, "learning_rate": 0.00019983473168146316, "loss": 2.2311, "step": 15555 }, { "epoch": 0.04, "grad_norm": 2.015625, "learning_rate": 0.00019983462544576733, "loss": 2.0916, "step": 15560 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.00019983451917596616, "loss": 2.0922, "step": 15565 }, { "epoch": 0.04, "grad_norm": 1.84375, "learning_rate": 0.00019983441287205973, "loss": 2.4126, "step": 15570 }, { "epoch": 0.04, "grad_norm": 2.9375, "learning_rate": 0.00019983430653404803, "loss": 2.1403, "step": 15575 }, { "epoch": 0.04, "grad_norm": 1.9453125, "learning_rate": 0.00019983420016193116, "loss": 2.2181, "step": 15580 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019983409375570909, "loss": 2.2675, "step": 15585 }, { "epoch": 0.04, "grad_norm": 1.96875, "learning_rate": 0.00019983398731538189, "loss": 2.0128, "step": 15590 }, { "epoch": 0.04, "grad_norm": 2.0625, "learning_rate": 0.0001998338808409496, "loss": 2.2871, "step": 15595 }, { "epoch": 0.04, "grad_norm": 2.203125, "learning_rate": 0.00019983377433241222, "loss": 2.2228, "step": 15600 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.0001998336677897698, "loss": 2.1463, "step": 15605 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019983356121302243, "loss": 2.3306, "step": 15610 }, { "epoch": 0.04, "grad_norm": 1.875, "learning_rate": 0.0001998334546021701, "loss": 2.1934, "step": 15615 }, { "epoch": 0.04, "grad_norm": 1.6015625, "learning_rate": 0.00019983334795721283, "loss": 2.2216, "step": 15620 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.00019983324127815068, "loss": 1.9794, "step": 15625 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.0001998331345649837, "loss": 2.2034, "step": 15630 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.0001998330278177119, "loss": 2.2131, "step": 15635 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.0001998329210363353, "loss": 2.2806, "step": 15640 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.000199832814220854, "loss": 2.1124, "step": 15645 }, { "epoch": 0.04, "grad_norm": 2.203125, "learning_rate": 0.000199832707371268, "loss": 2.1778, "step": 15650 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019983260048757733, "loss": 2.11, "step": 15655 }, { "epoch": 0.04, "grad_norm": 1.625, "learning_rate": 0.00019983249356978204, "loss": 2.4421, "step": 15660 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019983238661788213, "loss": 2.2421, "step": 15665 }, { "epoch": 0.04, "grad_norm": 2.140625, "learning_rate": 0.0001998322796318777, "loss": 2.3452, "step": 15670 }, { "epoch": 0.04, "grad_norm": 1.9765625, "learning_rate": 0.00019983217261176873, "loss": 2.324, "step": 15675 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.0001998320655575553, "loss": 2.0912, "step": 15680 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.0001998319584692374, "loss": 2.1236, "step": 15685 }, { "epoch": 0.04, "grad_norm": 1.71875, "learning_rate": 0.00019983185134681513, "loss": 2.0869, "step": 15690 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.00019983174419028847, "loss": 2.2812, "step": 15695 }, { "epoch": 0.04, "grad_norm": 1.6640625, "learning_rate": 0.00019983163699965745, "loss": 2.3075, "step": 15700 }, { "epoch": 0.04, "grad_norm": 1.8125, "learning_rate": 0.00019983152977492218, "loss": 2.1255, "step": 15705 }, { "epoch": 0.04, "grad_norm": 1.65625, "learning_rate": 0.00019983142251608262, "loss": 2.2325, "step": 15710 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019983131522313883, "loss": 2.1629, "step": 15715 }, { "epoch": 0.04, "grad_norm": 1.640625, "learning_rate": 0.00019983120789609084, "loss": 2.0507, "step": 15720 }, { "epoch": 0.04, "grad_norm": 2.359375, "learning_rate": 0.00019983110053493875, "loss": 2.3249, "step": 15725 }, { "epoch": 0.04, "grad_norm": 1.8125, "learning_rate": 0.0001998309931396825, "loss": 2.074, "step": 15730 }, { "epoch": 0.04, "grad_norm": 1.6484375, "learning_rate": 0.0001998308857103222, "loss": 2.4163, "step": 15735 }, { "epoch": 0.04, "grad_norm": 1.71875, "learning_rate": 0.00019983077824685784, "loss": 2.2929, "step": 15740 }, { "epoch": 0.04, "grad_norm": 1.5546875, "learning_rate": 0.0001998306707492895, "loss": 2.27, "step": 15745 }, { "epoch": 0.04, "grad_norm": 2.90625, "learning_rate": 0.00019983056321761715, "loss": 2.3792, "step": 15750 }, { "epoch": 0.04, "grad_norm": 1.9453125, "learning_rate": 0.00019983045565184087, "loss": 2.3697, "step": 15755 }, { "epoch": 0.04, "grad_norm": 1.703125, "learning_rate": 0.00019983034805196075, "loss": 2.1729, "step": 15760 }, { "epoch": 0.04, "grad_norm": 1.5546875, "learning_rate": 0.00019983024041797672, "loss": 2.4077, "step": 15765 }, { "epoch": 0.04, "grad_norm": 2.0, "learning_rate": 0.0001998301327498889, "loss": 2.1154, "step": 15770 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.0001998300250476973, "loss": 2.1843, "step": 15775 }, { "epoch": 0.04, "grad_norm": 1.984375, "learning_rate": 0.00019982991731140194, "loss": 2.0135, "step": 15780 }, { "epoch": 0.04, "grad_norm": 2.71875, "learning_rate": 0.00019982980954100286, "loss": 2.2872, "step": 15785 }, { "epoch": 0.04, "grad_norm": 1.5, "learning_rate": 0.0001998297017365001, "loss": 2.0585, "step": 15790 }, { "epoch": 0.04, "grad_norm": 1.84375, "learning_rate": 0.00019982959389789375, "loss": 2.2562, "step": 15795 }, { "epoch": 0.04, "grad_norm": 1.5625, "learning_rate": 0.00019982948602518378, "loss": 2.2171, "step": 15800 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019982937811837022, "loss": 2.2086, "step": 15805 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019982927017745316, "loss": 2.1346, "step": 15810 }, { "epoch": 0.04, "grad_norm": 2.140625, "learning_rate": 0.00019982916220243258, "loss": 2.0718, "step": 15815 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.0001998290541933086, "loss": 2.3827, "step": 15820 }, { "epoch": 0.04, "grad_norm": 1.765625, "learning_rate": 0.00019982894615008117, "loss": 2.1801, "step": 15825 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.00019982883807275034, "loss": 2.2003, "step": 15830 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.0001998287299613162, "loss": 2.1616, "step": 15835 }, { "epoch": 0.04, "grad_norm": 1.921875, "learning_rate": 0.00019982862181577878, "loss": 1.921, "step": 15840 }, { "epoch": 0.04, "grad_norm": 1.8984375, "learning_rate": 0.00019982851363613803, "loss": 2.4469, "step": 15845 }, { "epoch": 0.04, "grad_norm": 1.953125, "learning_rate": 0.0001998284054223941, "loss": 2.1787, "step": 15850 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019982829717454697, "loss": 2.2181, "step": 15855 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019982818889259664, "loss": 2.1647, "step": 15860 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.00019982808057654323, "loss": 2.3147, "step": 15865 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019982797222638671, "loss": 2.0918, "step": 15870 }, { "epoch": 0.04, "grad_norm": 2.078125, "learning_rate": 0.0001998278638421272, "loss": 2.121, "step": 15875 }, { "epoch": 0.04, "grad_norm": 2.34375, "learning_rate": 0.0001998277554237646, "loss": 2.2085, "step": 15880 }, { "epoch": 0.04, "grad_norm": 2.203125, "learning_rate": 0.0001998276469712991, "loss": 2.1884, "step": 15885 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.00019982753848473062, "loss": 2.1727, "step": 15890 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.00019982742996405928, "loss": 2.0585, "step": 15895 }, { "epoch": 0.04, "grad_norm": 1.921875, "learning_rate": 0.00019982732140928503, "loss": 2.0203, "step": 15900 }, { "epoch": 0.04, "grad_norm": 2.15625, "learning_rate": 0.000199827212820408, "loss": 2.2823, "step": 15905 }, { "epoch": 0.04, "grad_norm": 1.5703125, "learning_rate": 0.0001998271041974282, "loss": 2.1525, "step": 15910 }, { "epoch": 0.04, "grad_norm": 2.140625, "learning_rate": 0.00019982699554034562, "loss": 2.1411, "step": 15915 }, { "epoch": 0.04, "grad_norm": 1.828125, "learning_rate": 0.0001998268868491603, "loss": 2.2208, "step": 15920 }, { "epoch": 0.04, "grad_norm": 1.703125, "learning_rate": 0.00019982677812387234, "loss": 2.2788, "step": 15925 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.00019982666936448174, "loss": 2.2611, "step": 15930 }, { "epoch": 0.04, "grad_norm": 1.7265625, "learning_rate": 0.0001998265605709885, "loss": 2.3244, "step": 15935 }, { "epoch": 0.04, "grad_norm": 1.90625, "learning_rate": 0.00019982645174339275, "loss": 2.0947, "step": 15940 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.00019982634288169445, "loss": 2.3524, "step": 15945 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.00019982623398589366, "loss": 2.1372, "step": 15950 }, { "epoch": 0.04, "grad_norm": 1.625, "learning_rate": 0.00019982612505599042, "loss": 2.195, "step": 15955 }, { "epoch": 0.04, "grad_norm": 1.828125, "learning_rate": 0.0001998260160919848, "loss": 2.2037, "step": 15960 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.00019982590709387673, "loss": 2.4058, "step": 15965 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019982579806166636, "loss": 2.3035, "step": 15970 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.0001998256889953537, "loss": 2.2529, "step": 15975 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019982557989493878, "loss": 2.3092, "step": 15980 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.0001998254707604216, "loss": 2.0788, "step": 15985 }, { "epoch": 0.04, "grad_norm": 1.7265625, "learning_rate": 0.00019982536159180225, "loss": 2.2031, "step": 15990 }, { "epoch": 0.04, "grad_norm": 1.65625, "learning_rate": 0.00019982525238908074, "loss": 2.276, "step": 15995 }, { "epoch": 0.04, "grad_norm": 1.828125, "learning_rate": 0.0001998251431522571, "loss": 2.1822, "step": 16000 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.0001998250338813314, "loss": 2.2769, "step": 16005 }, { "epoch": 0.04, "grad_norm": 1.828125, "learning_rate": 0.00019982492457630365, "loss": 2.0976, "step": 16010 }, { "epoch": 0.04, "grad_norm": 1.84375, "learning_rate": 0.0001998248152371739, "loss": 2.1241, "step": 16015 }, { "epoch": 0.04, "grad_norm": 1.7265625, "learning_rate": 0.00019982470586394217, "loss": 1.94, "step": 16020 }, { "epoch": 0.04, "grad_norm": 2.140625, "learning_rate": 0.00019982459645660853, "loss": 2.5068, "step": 16025 }, { "epoch": 0.04, "grad_norm": 1.625, "learning_rate": 0.00019982448701517298, "loss": 2.1413, "step": 16030 }, { "epoch": 0.04, "grad_norm": 1.5078125, "learning_rate": 0.00019982437753963557, "loss": 2.0551, "step": 16035 }, { "epoch": 0.04, "grad_norm": 2.109375, "learning_rate": 0.00019982426802999637, "loss": 2.0584, "step": 16040 }, { "epoch": 0.04, "grad_norm": 1.9296875, "learning_rate": 0.00019982415848625537, "loss": 2.3287, "step": 16045 }, { "epoch": 0.04, "grad_norm": 1.6328125, "learning_rate": 0.00019982404890841261, "loss": 2.0878, "step": 16050 }, { "epoch": 0.04, "grad_norm": 1.546875, "learning_rate": 0.00019982393929646818, "loss": 2.1652, "step": 16055 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.00019982382965042205, "loss": 2.1647, "step": 16060 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.0001998237199702743, "loss": 2.1459, "step": 16065 }, { "epoch": 0.04, "grad_norm": 1.765625, "learning_rate": 0.00019982361025602495, "loss": 2.2645, "step": 16070 }, { "epoch": 0.04, "grad_norm": 1.5703125, "learning_rate": 0.00019982350050767406, "loss": 2.0704, "step": 16075 }, { "epoch": 0.04, "grad_norm": 1.609375, "learning_rate": 0.00019982339072522167, "loss": 2.1735, "step": 16080 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.0001998232809086678, "loss": 2.109, "step": 16085 }, { "epoch": 0.04, "grad_norm": 1.6640625, "learning_rate": 0.00019982317105801241, "loss": 2.1948, "step": 16090 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.0001998230611732557, "loss": 1.9919, "step": 16095 }, { "epoch": 0.04, "grad_norm": 1.5859375, "learning_rate": 0.0001998229512543976, "loss": 2.0379, "step": 16100 }, { "epoch": 0.04, "grad_norm": 2.0, "learning_rate": 0.00019982284130143815, "loss": 2.096, "step": 16105 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.0001998227313143774, "loss": 2.3217, "step": 16110 }, { "epoch": 0.04, "grad_norm": 1.53125, "learning_rate": 0.00019982262129321544, "loss": 2.3147, "step": 16115 }, { "epoch": 0.04, "grad_norm": 2.0625, "learning_rate": 0.00019982251123795223, "loss": 2.1382, "step": 16120 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019982240114858784, "loss": 2.3274, "step": 16125 }, { "epoch": 0.04, "grad_norm": 1.9765625, "learning_rate": 0.00019982229102512232, "loss": 2.3004, "step": 16130 }, { "epoch": 0.04, "grad_norm": 1.9296875, "learning_rate": 0.00019982218086755569, "loss": 2.2006, "step": 16135 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.000199822070675888, "loss": 2.3587, "step": 16140 }, { "epoch": 0.04, "grad_norm": 1.984375, "learning_rate": 0.00019982196045011924, "loss": 2.2088, "step": 16145 }, { "epoch": 0.04, "grad_norm": 1.6328125, "learning_rate": 0.00019982185019024954, "loss": 2.2469, "step": 16150 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019982173989627883, "loss": 2.2316, "step": 16155 }, { "epoch": 0.04, "grad_norm": 2.046875, "learning_rate": 0.00019982162956820726, "loss": 2.2564, "step": 16160 }, { "epoch": 0.04, "grad_norm": 1.828125, "learning_rate": 0.00019982151920603478, "loss": 2.2332, "step": 16165 }, { "epoch": 0.04, "grad_norm": 1.6171875, "learning_rate": 0.00019982140880976144, "loss": 2.0774, "step": 16170 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019982129837938732, "loss": 2.3805, "step": 16175 }, { "epoch": 0.04, "grad_norm": 2.625, "learning_rate": 0.00019982118791491245, "loss": 2.2211, "step": 16180 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.00019982107741633683, "loss": 2.2048, "step": 16185 }, { "epoch": 0.04, "grad_norm": 1.71875, "learning_rate": 0.00019982096688366053, "loss": 2.4611, "step": 16190 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.00019982085631688358, "loss": 2.3249, "step": 16195 }, { "epoch": 0.04, "grad_norm": 2.046875, "learning_rate": 0.00019982074571600603, "loss": 2.2888, "step": 16200 }, { "epoch": 0.04, "grad_norm": 1.90625, "learning_rate": 0.00019982063508102785, "loss": 2.1883, "step": 16205 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.0001998205244119492, "loss": 2.2309, "step": 16210 }, { "epoch": 0.04, "grad_norm": 1.671875, "learning_rate": 0.00019982041370876998, "loss": 2.304, "step": 16215 }, { "epoch": 0.04, "grad_norm": 2.046875, "learning_rate": 0.00019982030297149034, "loss": 2.3007, "step": 16220 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.00019982019220011026, "loss": 2.2359, "step": 16225 }, { "epoch": 0.04, "grad_norm": 1.6484375, "learning_rate": 0.0001998200813946298, "loss": 2.0183, "step": 16230 }, { "epoch": 0.04, "grad_norm": 2.390625, "learning_rate": 0.000199819970555049, "loss": 2.1387, "step": 16235 }, { "epoch": 0.04, "grad_norm": 2.09375, "learning_rate": 0.00019981985968136785, "loss": 2.2269, "step": 16240 }, { "epoch": 0.04, "grad_norm": 2.265625, "learning_rate": 0.00019981974877358643, "loss": 2.2152, "step": 16245 }, { "epoch": 0.04, "grad_norm": 2.25, "learning_rate": 0.0001998196378317048, "loss": 2.2704, "step": 16250 }, { "epoch": 0.04, "grad_norm": 1.9453125, "learning_rate": 0.00019981952685572298, "loss": 2.124, "step": 16255 }, { "epoch": 0.04, "grad_norm": 1.9296875, "learning_rate": 0.00019981941584564097, "loss": 2.3103, "step": 16260 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019981930480145884, "loss": 2.1438, "step": 16265 }, { "epoch": 0.04, "grad_norm": 2.046875, "learning_rate": 0.00019981919372317667, "loss": 2.1483, "step": 16270 }, { "epoch": 0.04, "grad_norm": 1.90625, "learning_rate": 0.0001998190826107944, "loss": 2.3403, "step": 16275 }, { "epoch": 0.04, "grad_norm": 1.703125, "learning_rate": 0.00019981897146431211, "loss": 2.2922, "step": 16280 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.0001998188602837299, "loss": 2.0378, "step": 16285 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019981874906904775, "loss": 2.1083, "step": 16290 }, { "epoch": 0.04, "grad_norm": 2.15625, "learning_rate": 0.0001998186378202657, "loss": 2.1133, "step": 16295 }, { "epoch": 0.04, "grad_norm": 1.640625, "learning_rate": 0.00019981852653738375, "loss": 2.2572, "step": 16300 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019981841522040202, "loss": 2.3003, "step": 16305 }, { "epoch": 0.04, "grad_norm": 1.6015625, "learning_rate": 0.0001998183038693205, "loss": 2.3768, "step": 16310 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019981819248413924, "loss": 2.0623, "step": 16315 }, { "epoch": 0.04, "grad_norm": 2.046875, "learning_rate": 0.00019981808106485827, "loss": 2.3118, "step": 16320 }, { "epoch": 0.04, "grad_norm": 1.9765625, "learning_rate": 0.00019981796961147764, "loss": 2.3352, "step": 16325 }, { "epoch": 0.04, "grad_norm": 1.7265625, "learning_rate": 0.00019981785812399737, "loss": 2.1786, "step": 16330 }, { "epoch": 0.04, "grad_norm": 1.6796875, "learning_rate": 0.00019981774660241753, "loss": 2.0774, "step": 16335 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019981763504673815, "loss": 2.1859, "step": 16340 }, { "epoch": 0.04, "grad_norm": 2.234375, "learning_rate": 0.0001998175234569592, "loss": 2.1395, "step": 16345 }, { "epoch": 0.04, "grad_norm": 1.90625, "learning_rate": 0.00019981741183308082, "loss": 2.0622, "step": 16350 }, { "epoch": 0.04, "grad_norm": 2.265625, "learning_rate": 0.00019981730017510298, "loss": 2.0565, "step": 16355 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019981718848302576, "loss": 2.0633, "step": 16360 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019981707675684914, "loss": 1.9741, "step": 16365 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019981696499657322, "loss": 2.2876, "step": 16370 }, { "epoch": 0.04, "grad_norm": 2.015625, "learning_rate": 0.00019981685320219806, "loss": 2.077, "step": 16375 }, { "epoch": 0.04, "grad_norm": 2.015625, "learning_rate": 0.00019981674137372357, "loss": 2.283, "step": 16380 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.00019981662951114993, "loss": 2.0654, "step": 16385 }, { "epoch": 0.04, "grad_norm": 1.6484375, "learning_rate": 0.00019981651761447712, "loss": 2.3534, "step": 16390 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.00019981640568370512, "loss": 2.1886, "step": 16395 }, { "epoch": 0.04, "grad_norm": 1.5546875, "learning_rate": 0.00019981629371883407, "loss": 2.2522, "step": 16400 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019981618171986397, "loss": 2.2233, "step": 16405 }, { "epoch": 0.04, "grad_norm": 2.390625, "learning_rate": 0.00019981606968679484, "loss": 2.221, "step": 16410 }, { "epoch": 0.04, "grad_norm": 1.4921875, "learning_rate": 0.0001998159576196267, "loss": 2.1054, "step": 16415 }, { "epoch": 0.04, "grad_norm": 1.5859375, "learning_rate": 0.00019981584551835967, "loss": 2.1145, "step": 16420 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.0001998157333829937, "loss": 2.0702, "step": 16425 }, { "epoch": 0.04, "grad_norm": 2.078125, "learning_rate": 0.00019981562121352888, "loss": 2.1861, "step": 16430 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019981550900996525, "loss": 2.2927, "step": 16435 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019981539677230278, "loss": 2.3089, "step": 16440 }, { "epoch": 0.04, "grad_norm": 1.7890625, "learning_rate": 0.0001998152845005416, "loss": 2.2725, "step": 16445 }, { "epoch": 0.04, "grad_norm": 1.953125, "learning_rate": 0.00019981517219468171, "loss": 2.228, "step": 16450 }, { "epoch": 0.04, "grad_norm": 2.171875, "learning_rate": 0.00019981505985472312, "loss": 2.3035, "step": 16455 }, { "epoch": 0.04, "grad_norm": 1.6015625, "learning_rate": 0.00019981494748066593, "loss": 2.1416, "step": 16460 }, { "epoch": 0.04, "grad_norm": 1.5546875, "learning_rate": 0.00019981483507251015, "loss": 1.9785, "step": 16465 }, { "epoch": 0.04, "grad_norm": 1.609375, "learning_rate": 0.00019981472263025577, "loss": 2.2622, "step": 16470 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.00019981461015390292, "loss": 2.1345, "step": 16475 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.00019981449764345155, "loss": 2.2773, "step": 16480 }, { "epoch": 0.04, "grad_norm": 1.9375, "learning_rate": 0.00019981438509890172, "loss": 2.2187, "step": 16485 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019981427252025352, "loss": 2.0813, "step": 16490 }, { "epoch": 0.04, "grad_norm": 1.40625, "learning_rate": 0.00019981415990750695, "loss": 2.2392, "step": 16495 }, { "epoch": 0.04, "grad_norm": 1.515625, "learning_rate": 0.00019981404726066205, "loss": 2.158, "step": 16500 }, { "epoch": 0.04, "grad_norm": 1.640625, "learning_rate": 0.0001998139345797189, "loss": 2.2426, "step": 16505 }, { "epoch": 0.04, "grad_norm": 2.28125, "learning_rate": 0.00019981382186467743, "loss": 2.1346, "step": 16510 }, { "epoch": 0.04, "grad_norm": 1.9453125, "learning_rate": 0.0001998137091155378, "loss": 2.1198, "step": 16515 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019981359633229997, "loss": 2.212, "step": 16520 }, { "epoch": 0.04, "grad_norm": 2.09375, "learning_rate": 0.000199813483514964, "loss": 2.2306, "step": 16525 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019981337066352995, "loss": 2.3999, "step": 16530 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019981325777799783, "loss": 2.0932, "step": 16535 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.0001998131448583677, "loss": 2.3221, "step": 16540 }, { "epoch": 0.04, "grad_norm": 1.65625, "learning_rate": 0.00019981303190463957, "loss": 2.2617, "step": 16545 }, { "epoch": 0.04, "grad_norm": 1.875, "learning_rate": 0.00019981291891681355, "loss": 2.1882, "step": 16550 }, { "epoch": 0.04, "grad_norm": 1.8125, "learning_rate": 0.0001998128058948896, "loss": 2.138, "step": 16555 }, { "epoch": 0.04, "grad_norm": 1.6875, "learning_rate": 0.00019981269283886777, "loss": 2.3515, "step": 16560 }, { "epoch": 0.04, "grad_norm": 1.53125, "learning_rate": 0.00019981257974874812, "loss": 2.383, "step": 16565 }, { "epoch": 0.04, "grad_norm": 1.578125, "learning_rate": 0.00019981246662453069, "loss": 2.2429, "step": 16570 }, { "epoch": 0.04, "grad_norm": 1.6328125, "learning_rate": 0.0001998123534662155, "loss": 2.0887, "step": 16575 }, { "epoch": 0.04, "grad_norm": 1.703125, "learning_rate": 0.00019981224027380262, "loss": 2.1031, "step": 16580 }, { "epoch": 0.04, "grad_norm": 1.90625, "learning_rate": 0.00019981212704729204, "loss": 2.2061, "step": 16585 }, { "epoch": 0.04, "grad_norm": 1.6875, "learning_rate": 0.00019981201378668385, "loss": 2.1839, "step": 16590 }, { "epoch": 0.04, "grad_norm": 1.421875, "learning_rate": 0.00019981190049197804, "loss": 2.0302, "step": 16595 }, { "epoch": 0.04, "grad_norm": 1.640625, "learning_rate": 0.0001998117871631747, "loss": 2.1716, "step": 16600 }, { "epoch": 0.04, "grad_norm": 1.5546875, "learning_rate": 0.00019981167380027384, "loss": 2.247, "step": 16605 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019981156040327548, "loss": 2.1623, "step": 16610 }, { "epoch": 0.04, "grad_norm": 1.40625, "learning_rate": 0.00019981144697217972, "loss": 2.0423, "step": 16615 }, { "epoch": 0.04, "grad_norm": 1.6328125, "learning_rate": 0.00019981133350698653, "loss": 2.4098, "step": 16620 }, { "epoch": 0.04, "grad_norm": 2.40625, "learning_rate": 0.000199811220007696, "loss": 2.0005, "step": 16625 }, { "epoch": 0.04, "grad_norm": 1.671875, "learning_rate": 0.00019981110647430812, "loss": 2.101, "step": 16630 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.00019981099290682298, "loss": 2.0037, "step": 16635 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019981087930524057, "loss": 2.3298, "step": 16640 }, { "epoch": 0.04, "grad_norm": 1.5859375, "learning_rate": 0.00019981076566956096, "loss": 2.2934, "step": 16645 }, { "epoch": 0.04, "grad_norm": 1.5859375, "learning_rate": 0.00019981065199978419, "loss": 2.2265, "step": 16650 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.0001998105382959103, "loss": 2.2067, "step": 16655 }, { "epoch": 0.04, "grad_norm": 1.84375, "learning_rate": 0.00019981042455793931, "loss": 2.2625, "step": 16660 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019981031078587127, "loss": 2.1414, "step": 16665 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.0001998101969797062, "loss": 2.439, "step": 16670 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.0001998100831394442, "loss": 2.2335, "step": 16675 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019980996926508522, "loss": 2.308, "step": 16680 }, { "epoch": 0.04, "grad_norm": 2.109375, "learning_rate": 0.00019980985535662937, "loss": 2.3321, "step": 16685 }, { "epoch": 0.04, "grad_norm": 1.6875, "learning_rate": 0.00019980974141407666, "loss": 2.2157, "step": 16690 }, { "epoch": 0.04, "grad_norm": 2.0625, "learning_rate": 0.00019980962743742714, "loss": 1.9713, "step": 16695 }, { "epoch": 0.04, "grad_norm": 2.25, "learning_rate": 0.00019980951342668082, "loss": 2.3191, "step": 16700 }, { "epoch": 0.04, "grad_norm": 1.5625, "learning_rate": 0.00019980939938183778, "loss": 2.1621, "step": 16705 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.00019980928530289802, "loss": 2.1764, "step": 16710 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.0001998091711898616, "loss": 2.3045, "step": 16715 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019980905704272857, "loss": 2.2806, "step": 16720 }, { "epoch": 0.04, "grad_norm": 1.6484375, "learning_rate": 0.00019980894286149897, "loss": 2.1484, "step": 16725 }, { "epoch": 0.04, "grad_norm": 2.15625, "learning_rate": 0.00019980882864617278, "loss": 2.2444, "step": 16730 }, { "epoch": 0.04, "grad_norm": 1.9609375, "learning_rate": 0.0001998087143967501, "loss": 2.2821, "step": 16735 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019980860011323097, "loss": 2.1912, "step": 16740 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.00019980848579561543, "loss": 2.1487, "step": 16745 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019980837144390346, "loss": 1.9825, "step": 16750 }, { "epoch": 0.04, "grad_norm": 2.0625, "learning_rate": 0.00019980825705809516, "loss": 2.2538, "step": 16755 }, { "epoch": 0.04, "grad_norm": 1.546875, "learning_rate": 0.00019980814263819053, "loss": 2.1633, "step": 16760 }, { "epoch": 0.04, "grad_norm": 1.6015625, "learning_rate": 0.00019980802818418965, "loss": 2.3028, "step": 16765 }, { "epoch": 0.04, "grad_norm": 1.6171875, "learning_rate": 0.00019980791369609253, "loss": 2.1267, "step": 16770 }, { "epoch": 0.04, "grad_norm": 1.6171875, "learning_rate": 0.00019980779917389924, "loss": 2.2415, "step": 16775 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019980768461760973, "loss": 2.2739, "step": 16780 }, { "epoch": 0.04, "grad_norm": 1.5703125, "learning_rate": 0.00019980757002722416, "loss": 2.2258, "step": 16785 }, { "epoch": 0.04, "grad_norm": 1.6328125, "learning_rate": 0.00019980745540274251, "loss": 2.2312, "step": 16790 }, { "epoch": 0.04, "grad_norm": 1.9765625, "learning_rate": 0.00019980734074416482, "loss": 2.1987, "step": 16795 }, { "epoch": 0.04, "grad_norm": 1.90625, "learning_rate": 0.00019980722605149112, "loss": 2.1385, "step": 16800 }, { "epoch": 0.04, "grad_norm": 2.59375, "learning_rate": 0.00019980711132472147, "loss": 2.1801, "step": 16805 }, { "epoch": 0.04, "grad_norm": 1.53125, "learning_rate": 0.0001998069965638559, "loss": 2.2495, "step": 16810 }, { "epoch": 0.04, "grad_norm": 2.09375, "learning_rate": 0.00019980688176889446, "loss": 2.2853, "step": 16815 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019980676693983715, "loss": 2.2777, "step": 16820 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019980665207668406, "loss": 2.401, "step": 16825 }, { "epoch": 0.04, "grad_norm": 1.625, "learning_rate": 0.0001998065371794352, "loss": 2.1542, "step": 16830 }, { "epoch": 0.04, "grad_norm": 1.9296875, "learning_rate": 0.00019980642224809062, "loss": 2.0412, "step": 16835 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019980630728265038, "loss": 2.2023, "step": 16840 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.00019980619228311447, "loss": 2.3547, "step": 16845 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.00019980607724948295, "loss": 2.2184, "step": 16850 }, { "epoch": 0.04, "grad_norm": 1.6796875, "learning_rate": 0.00019980596218175588, "loss": 2.1428, "step": 16855 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019980584707993327, "loss": 2.1146, "step": 16860 }, { "epoch": 0.04, "grad_norm": 1.9609375, "learning_rate": 0.00019980573194401514, "loss": 2.1965, "step": 16865 }, { "epoch": 0.04, "grad_norm": 1.4765625, "learning_rate": 0.00019980561677400164, "loss": 2.3299, "step": 16870 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.00019980550156989266, "loss": 2.2403, "step": 16875 }, { "epoch": 0.04, "grad_norm": 2.484375, "learning_rate": 0.00019980538633168835, "loss": 2.6245, "step": 16880 }, { "epoch": 0.04, "grad_norm": 1.6875, "learning_rate": 0.00019980527105938872, "loss": 2.2993, "step": 16885 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019980515575299376, "loss": 2.3072, "step": 16890 }, { "epoch": 0.04, "grad_norm": 1.8984375, "learning_rate": 0.0001998050404125036, "loss": 2.2042, "step": 16895 }, { "epoch": 0.04, "grad_norm": 2.15625, "learning_rate": 0.00019980492503791816, "loss": 2.2276, "step": 16900 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.0001998048096292376, "loss": 2.1734, "step": 16905 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.00019980469418646188, "loss": 2.1983, "step": 16910 }, { "epoch": 0.04, "grad_norm": 1.9765625, "learning_rate": 0.00019980457870959109, "loss": 2.1458, "step": 16915 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.00019980446319862522, "loss": 2.2777, "step": 16920 }, { "epoch": 0.04, "grad_norm": 1.6171875, "learning_rate": 0.00019980434765356434, "loss": 2.279, "step": 16925 }, { "epoch": 0.04, "grad_norm": 1.484375, "learning_rate": 0.0001998042320744085, "loss": 2.1858, "step": 16930 }, { "epoch": 0.04, "grad_norm": 1.640625, "learning_rate": 0.00019980411646115773, "loss": 2.1185, "step": 16935 }, { "epoch": 0.04, "grad_norm": 1.703125, "learning_rate": 0.00019980400081381206, "loss": 2.3146, "step": 16940 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.0001998038851323715, "loss": 2.2121, "step": 16945 }, { "epoch": 0.04, "grad_norm": 1.546875, "learning_rate": 0.00019980376941683615, "loss": 2.2951, "step": 16950 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019980365366720603, "loss": 2.2871, "step": 16955 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019980353788348116, "loss": 2.2157, "step": 16960 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.00019980342206566158, "loss": 2.3124, "step": 16965 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019980330621374736, "loss": 2.0785, "step": 16970 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.0001998031903277385, "loss": 2.0366, "step": 16975 }, { "epoch": 0.04, "grad_norm": 1.6328125, "learning_rate": 0.0001998030744076351, "loss": 1.9419, "step": 16980 }, { "epoch": 0.04, "grad_norm": 1.390625, "learning_rate": 0.00019980295845343715, "loss": 2.0723, "step": 16985 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.00019980284246514467, "loss": 2.0367, "step": 16990 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019980272644275774, "loss": 2.2319, "step": 16995 }, { "epoch": 0.04, "grad_norm": 2.109375, "learning_rate": 0.0001998026103862764, "loss": 2.2769, "step": 17000 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.00019980249429570065, "loss": 2.1904, "step": 17005 }, { "epoch": 0.04, "grad_norm": 1.84375, "learning_rate": 0.00019980237817103058, "loss": 2.3255, "step": 17010 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.00019980226201226622, "loss": 2.3231, "step": 17015 }, { "epoch": 0.04, "grad_norm": 1.9609375, "learning_rate": 0.00019980214581940758, "loss": 2.2641, "step": 17020 }, { "epoch": 0.04, "grad_norm": 2.15625, "learning_rate": 0.00019980202959245472, "loss": 2.0299, "step": 17025 }, { "epoch": 0.04, "grad_norm": 2.171875, "learning_rate": 0.00019980191333140768, "loss": 2.24, "step": 17030 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.0001998017970362665, "loss": 2.2914, "step": 17035 }, { "epoch": 0.04, "grad_norm": 1.5390625, "learning_rate": 0.0001998016807070312, "loss": 2.2259, "step": 17040 }, { "epoch": 0.04, "grad_norm": 1.9453125, "learning_rate": 0.00019980156434370186, "loss": 2.2034, "step": 17045 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019980144794627845, "loss": 2.0972, "step": 17050 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.0001998013315147611, "loss": 2.2293, "step": 17055 }, { "epoch": 0.04, "grad_norm": 1.8125, "learning_rate": 0.00019980121504914979, "loss": 2.0959, "step": 17060 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.00019980109854944459, "loss": 2.0822, "step": 17065 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.0001998009820156455, "loss": 2.1869, "step": 17070 }, { "epoch": 0.04, "grad_norm": 2.140625, "learning_rate": 0.0001998008654477526, "loss": 2.256, "step": 17075 }, { "epoch": 0.04, "grad_norm": 1.921875, "learning_rate": 0.00019980074884576592, "loss": 2.1288, "step": 17080 }, { "epoch": 0.04, "grad_norm": 2.65625, "learning_rate": 0.00019980063220968549, "loss": 2.1226, "step": 17085 }, { "epoch": 0.04, "grad_norm": 1.8984375, "learning_rate": 0.00019980051553951134, "loss": 2.4202, "step": 17090 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.00019980039883524354, "loss": 2.3752, "step": 17095 }, { "epoch": 0.04, "grad_norm": 2.09375, "learning_rate": 0.00019980028209688207, "loss": 2.3014, "step": 17100 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.00019980016532442706, "loss": 2.339, "step": 17105 }, { "epoch": 0.04, "grad_norm": 2.1875, "learning_rate": 0.0001998000485178785, "loss": 2.1857, "step": 17110 }, { "epoch": 0.04, "grad_norm": 1.6328125, "learning_rate": 0.0001997999316772364, "loss": 2.2769, "step": 17115 }, { "epoch": 0.04, "grad_norm": 1.8203125, "learning_rate": 0.00019979981480250088, "loss": 2.1394, "step": 17120 }, { "epoch": 0.04, "grad_norm": 2.1875, "learning_rate": 0.0001997996978936719, "loss": 2.2673, "step": 17125 }, { "epoch": 0.04, "grad_norm": 1.4375, "learning_rate": 0.00019979958095074957, "loss": 2.1673, "step": 17130 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019979946397373385, "loss": 2.2371, "step": 17135 }, { "epoch": 0.04, "grad_norm": 1.6796875, "learning_rate": 0.00019979934696262485, "loss": 2.321, "step": 17140 }, { "epoch": 0.04, "grad_norm": 1.71875, "learning_rate": 0.0001997992299174226, "loss": 2.1268, "step": 17145 }, { "epoch": 0.04, "grad_norm": 3.609375, "learning_rate": 0.0001997991128381271, "loss": 2.2604, "step": 17150 }, { "epoch": 0.04, "grad_norm": 1.875, "learning_rate": 0.0001997989957247384, "loss": 2.1662, "step": 17155 }, { "epoch": 0.04, "grad_norm": 1.7265625, "learning_rate": 0.00019979887857725657, "loss": 2.268, "step": 17160 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.0001997987613956816, "loss": 2.3302, "step": 17165 }, { "epoch": 0.04, "grad_norm": 2.453125, "learning_rate": 0.0001997986441800136, "loss": 2.2435, "step": 17170 }, { "epoch": 0.04, "grad_norm": 1.921875, "learning_rate": 0.00019979852693025255, "loss": 2.3918, "step": 17175 }, { "epoch": 0.04, "grad_norm": 2.109375, "learning_rate": 0.00019979840964639856, "loss": 2.2003, "step": 17180 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019979829232845157, "loss": 2.1889, "step": 17185 }, { "epoch": 0.04, "grad_norm": 1.71875, "learning_rate": 0.0001997981749764117, "loss": 2.2706, "step": 17190 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019979805759027895, "loss": 2.2029, "step": 17195 }, { "epoch": 0.04, "grad_norm": 1.6015625, "learning_rate": 0.00019979794017005337, "loss": 2.2547, "step": 17200 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019979782271573504, "loss": 2.0847, "step": 17205 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.00019979770522732392, "loss": 2.3982, "step": 17210 }, { "epoch": 0.04, "grad_norm": 1.9765625, "learning_rate": 0.00019979758770482012, "loss": 2.2676, "step": 17215 }, { "epoch": 0.04, "grad_norm": 1.71875, "learning_rate": 0.00019979747014822364, "loss": 2.3235, "step": 17220 }, { "epoch": 0.04, "grad_norm": 1.6875, "learning_rate": 0.00019979735255753456, "loss": 2.3038, "step": 17225 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019979723493275287, "loss": 2.0275, "step": 17230 }, { "epoch": 0.04, "grad_norm": 2.015625, "learning_rate": 0.00019979711727387864, "loss": 2.1785, "step": 17235 }, { "epoch": 0.04, "grad_norm": 1.8984375, "learning_rate": 0.0001997969995809119, "loss": 2.1369, "step": 17240 }, { "epoch": 0.04, "grad_norm": 2.109375, "learning_rate": 0.00019979688185385271, "loss": 2.1093, "step": 17245 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.00019979676409270108, "loss": 2.1137, "step": 17250 }, { "epoch": 0.04, "grad_norm": 1.84375, "learning_rate": 0.00019979664629745706, "loss": 2.1283, "step": 17255 }, { "epoch": 0.04, "grad_norm": 1.6328125, "learning_rate": 0.00019979652846812074, "loss": 2.3118, "step": 17260 }, { "epoch": 0.04, "grad_norm": 1.984375, "learning_rate": 0.00019979641060469206, "loss": 2.1478, "step": 17265 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.00019979629270717116, "loss": 2.2298, "step": 17270 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.00019979617477555798, "loss": 2.2515, "step": 17275 }, { "epoch": 0.04, "grad_norm": 1.4765625, "learning_rate": 0.00019979605680985266, "loss": 2.1232, "step": 17280 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.0001997959388100552, "loss": 2.2662, "step": 17285 }, { "epoch": 0.04, "grad_norm": 1.6875, "learning_rate": 0.00019979582077616563, "loss": 2.3499, "step": 17290 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.000199795702708184, "loss": 2.2977, "step": 17295 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019979558460611035, "loss": 2.1968, "step": 17300 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.0001997954664699447, "loss": 2.1644, "step": 17305 }, { "epoch": 0.04, "grad_norm": 1.4375, "learning_rate": 0.00019979534829968714, "loss": 2.306, "step": 17310 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019979523009533762, "loss": 2.2919, "step": 17315 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.0001997951118568963, "loss": 2.2042, "step": 17320 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.00019979499358436313, "loss": 2.2196, "step": 17325 }, { "epoch": 0.04, "grad_norm": 1.7890625, "learning_rate": 0.0001997948752777382, "loss": 2.04, "step": 17330 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.0001997947569370215, "loss": 2.2259, "step": 17335 }, { "epoch": 0.04, "grad_norm": 2.171875, "learning_rate": 0.00019979463856221314, "loss": 2.2431, "step": 17340 }, { "epoch": 0.04, "grad_norm": 1.828125, "learning_rate": 0.00019979452015331307, "loss": 2.1832, "step": 17345 }, { "epoch": 0.04, "grad_norm": 2.046875, "learning_rate": 0.00019979440171032144, "loss": 2.1389, "step": 17350 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.0001997942832332382, "loss": 2.3957, "step": 17355 }, { "epoch": 0.04, "grad_norm": 2.265625, "learning_rate": 0.00019979416472206345, "loss": 2.238, "step": 17360 }, { "epoch": 0.04, "grad_norm": 1.6015625, "learning_rate": 0.0001997940461767972, "loss": 2.1855, "step": 17365 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.00019979392759743945, "loss": 2.1218, "step": 17370 }, { "epoch": 0.04, "grad_norm": 1.5, "learning_rate": 0.00019979380898399031, "loss": 2.4993, "step": 17375 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019979369033644982, "loss": 2.243, "step": 17380 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019979357165481797, "loss": 2.1984, "step": 17385 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.0001997934529390948, "loss": 2.3074, "step": 17390 }, { "epoch": 0.04, "grad_norm": 1.7265625, "learning_rate": 0.00019979333418928042, "loss": 2.1995, "step": 17395 }, { "epoch": 0.04, "grad_norm": 1.671875, "learning_rate": 0.0001997932154053748, "loss": 2.4247, "step": 17400 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019979309658737805, "loss": 2.0778, "step": 17405 }, { "epoch": 0.04, "grad_norm": 1.6875, "learning_rate": 0.0001997929777352901, "loss": 2.1841, "step": 17410 }, { "epoch": 0.04, "grad_norm": 1.4765625, "learning_rate": 0.0001997928588491111, "loss": 2.2237, "step": 17415 }, { "epoch": 0.04, "grad_norm": 2.0, "learning_rate": 0.00019979273992884106, "loss": 2.1569, "step": 17420 }, { "epoch": 0.04, "grad_norm": 1.6875, "learning_rate": 0.00019979262097448, "loss": 2.1021, "step": 17425 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.00019979250198602798, "loss": 2.23, "step": 17430 }, { "epoch": 0.04, "grad_norm": 1.546875, "learning_rate": 0.000199792382963485, "loss": 2.1904, "step": 17435 }, { "epoch": 0.04, "grad_norm": 1.96875, "learning_rate": 0.00019979226390685114, "loss": 2.2971, "step": 17440 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019979214481612644, "loss": 2.4196, "step": 17445 }, { "epoch": 0.04, "grad_norm": 1.8125, "learning_rate": 0.00019979202569131096, "loss": 2.1615, "step": 17450 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019979190653240467, "loss": 2.1782, "step": 17455 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.0001997917873394077, "loss": 2.1581, "step": 17460 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.00019979166811232002, "loss": 2.2542, "step": 17465 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.0001997915488511417, "loss": 2.3914, "step": 17470 }, { "epoch": 0.04, "grad_norm": 1.6015625, "learning_rate": 0.00019979142955587275, "loss": 2.1175, "step": 17475 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019979131022651329, "loss": 2.2496, "step": 17480 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019979119086306328, "loss": 2.2206, "step": 17485 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.0001997910714655228, "loss": 1.8955, "step": 17490 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019979095203389186, "loss": 2.2938, "step": 17495 }, { "epoch": 0.04, "grad_norm": 5.84375, "learning_rate": 0.00019979083256817053, "loss": 2.3215, "step": 17500 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.00019979071306835887, "loss": 2.2768, "step": 17505 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019979059353445684, "loss": 2.3023, "step": 17510 }, { "epoch": 0.04, "grad_norm": 1.921875, "learning_rate": 0.00019979047396646458, "loss": 2.008, "step": 17515 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.00019979035436438206, "loss": 2.1521, "step": 17520 }, { "epoch": 0.04, "grad_norm": 1.6796875, "learning_rate": 0.00019979023472820932, "loss": 2.4578, "step": 17525 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.00019979011505794647, "loss": 2.3585, "step": 17530 }, { "epoch": 0.04, "grad_norm": 1.8203125, "learning_rate": 0.00019978999535359351, "loss": 2.3059, "step": 17535 }, { "epoch": 0.04, "grad_norm": 2.25, "learning_rate": 0.00019978987561515042, "loss": 2.0884, "step": 17540 }, { "epoch": 0.04, "grad_norm": 1.9296875, "learning_rate": 0.00019978975584261736, "loss": 2.2847, "step": 17545 }, { "epoch": 0.04, "grad_norm": 1.6328125, "learning_rate": 0.0001997896360359943, "loss": 2.1384, "step": 17550 }, { "epoch": 0.04, "grad_norm": 1.921875, "learning_rate": 0.00019978951619528128, "loss": 2.1532, "step": 17555 }, { "epoch": 0.04, "grad_norm": 2.046875, "learning_rate": 0.00019978939632047836, "loss": 2.2899, "step": 17560 }, { "epoch": 0.04, "grad_norm": 1.9453125, "learning_rate": 0.00019978927641158556, "loss": 2.2545, "step": 17565 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.00019978915646860293, "loss": 2.2103, "step": 17570 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.00019978903649153052, "loss": 2.1966, "step": 17575 }, { "epoch": 0.04, "grad_norm": 2.140625, "learning_rate": 0.00019978891648036838, "loss": 2.2793, "step": 17580 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.0001997887964351165, "loss": 2.1784, "step": 17585 }, { "epoch": 0.04, "grad_norm": 2.21875, "learning_rate": 0.000199788676355775, "loss": 2.2864, "step": 17590 }, { "epoch": 0.04, "grad_norm": 1.609375, "learning_rate": 0.00019978855624234386, "loss": 2.0747, "step": 17595 }, { "epoch": 0.04, "grad_norm": 1.5, "learning_rate": 0.00019978843609482313, "loss": 2.283, "step": 17600 }, { "epoch": 0.04, "grad_norm": 2.171875, "learning_rate": 0.00019978831591321289, "loss": 2.0843, "step": 17605 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019978819569751313, "loss": 2.1044, "step": 17610 }, { "epoch": 0.04, "grad_norm": 1.71875, "learning_rate": 0.0001997880754477239, "loss": 2.1266, "step": 17615 }, { "epoch": 0.04, "grad_norm": 1.5703125, "learning_rate": 0.00019978795516384525, "loss": 2.3046, "step": 17620 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.00019978783484587726, "loss": 2.3325, "step": 17625 }, { "epoch": 0.04, "grad_norm": 1.5859375, "learning_rate": 0.00019978771449381992, "loss": 2.2544, "step": 17630 }, { "epoch": 0.04, "grad_norm": 1.6796875, "learning_rate": 0.00019978759410767326, "loss": 2.2471, "step": 17635 }, { "epoch": 0.04, "grad_norm": 1.65625, "learning_rate": 0.0001997874736874374, "loss": 2.1782, "step": 17640 }, { "epoch": 0.04, "grad_norm": 1.6484375, "learning_rate": 0.0001997873532331123, "loss": 2.2562, "step": 17645 }, { "epoch": 0.04, "grad_norm": 2.4375, "learning_rate": 0.00019978723274469802, "loss": 2.1445, "step": 17650 }, { "epoch": 0.04, "grad_norm": 2.015625, "learning_rate": 0.00019978711222219463, "loss": 2.1079, "step": 17655 }, { "epoch": 0.04, "grad_norm": 1.8125, "learning_rate": 0.00019978699166560215, "loss": 2.0704, "step": 17660 }, { "epoch": 0.04, "grad_norm": 2.125, "learning_rate": 0.0001997868710749206, "loss": 2.1343, "step": 17665 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.00019978675045015008, "loss": 2.1825, "step": 17670 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.00019978662979129057, "loss": 2.3435, "step": 17675 }, { "epoch": 0.04, "grad_norm": 1.453125, "learning_rate": 0.00019978650909834215, "loss": 2.083, "step": 17680 }, { "epoch": 0.04, "grad_norm": 1.6171875, "learning_rate": 0.00019978638837130484, "loss": 2.1493, "step": 17685 }, { "epoch": 0.04, "grad_norm": 1.875, "learning_rate": 0.0001997862676101787, "loss": 2.1531, "step": 17690 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.00019978614681496378, "loss": 2.2159, "step": 17695 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019978602598566008, "loss": 2.2152, "step": 17700 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.00019978590512226765, "loss": 2.3999, "step": 17705 }, { "epoch": 0.04, "grad_norm": 2.078125, "learning_rate": 0.00019978578422478658, "loss": 2.2738, "step": 17710 }, { "epoch": 0.04, "grad_norm": 1.578125, "learning_rate": 0.00019978566329321686, "loss": 2.2592, "step": 17715 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.00019978554232755852, "loss": 2.3361, "step": 17720 }, { "epoch": 0.04, "grad_norm": 1.65625, "learning_rate": 0.00019978542132781168, "loss": 2.2634, "step": 17725 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019978530029397628, "loss": 2.2314, "step": 17730 }, { "epoch": 0.04, "grad_norm": 1.765625, "learning_rate": 0.00019978517922605244, "loss": 2.2812, "step": 17735 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.00019978505812404018, "loss": 2.1594, "step": 17740 }, { "epoch": 0.04, "grad_norm": 1.984375, "learning_rate": 0.00019978493698793952, "loss": 2.4027, "step": 17745 }, { "epoch": 0.04, "grad_norm": 1.9375, "learning_rate": 0.00019978481581775054, "loss": 2.2778, "step": 17750 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.0001997846946134732, "loss": 2.2002, "step": 17755 }, { "epoch": 0.04, "grad_norm": 1.96875, "learning_rate": 0.00019978457337510764, "loss": 2.284, "step": 17760 }, { "epoch": 0.04, "grad_norm": 2.015625, "learning_rate": 0.00019978445210265387, "loss": 2.2216, "step": 17765 }, { "epoch": 0.04, "grad_norm": 1.671875, "learning_rate": 0.0001997843307961119, "loss": 2.1164, "step": 17770 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019978420945548182, "loss": 2.114, "step": 17775 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.0001997840880807636, "loss": 2.1131, "step": 17780 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.00019978396667195736, "loss": 2.2229, "step": 17785 }, { "epoch": 0.04, "grad_norm": 1.7890625, "learning_rate": 0.0001997838452290631, "loss": 2.2516, "step": 17790 }, { "epoch": 0.04, "grad_norm": 1.5390625, "learning_rate": 0.00019978372375208084, "loss": 2.0912, "step": 17795 }, { "epoch": 0.04, "grad_norm": 2.0625, "learning_rate": 0.0001997836022410107, "loss": 2.0779, "step": 17800 }, { "epoch": 0.04, "grad_norm": 1.578125, "learning_rate": 0.00019978348069585265, "loss": 2.2632, "step": 17805 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019978335911660674, "loss": 2.1167, "step": 17810 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019978323750327305, "loss": 2.3101, "step": 17815 }, { "epoch": 0.04, "grad_norm": 1.5859375, "learning_rate": 0.00019978311585585159, "loss": 2.1039, "step": 17820 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.0001997829941743424, "loss": 2.2369, "step": 17825 }, { "epoch": 0.04, "grad_norm": 1.9375, "learning_rate": 0.00019978287245874551, "loss": 2.296, "step": 17830 }, { "epoch": 0.04, "grad_norm": 1.6875, "learning_rate": 0.00019978275070906102, "loss": 2.3086, "step": 17835 }, { "epoch": 0.04, "grad_norm": 1.7890625, "learning_rate": 0.0001997826289252889, "loss": 2.2281, "step": 17840 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019978250710742926, "loss": 2.1042, "step": 17845 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019978238525548205, "loss": 2.2563, "step": 17850 }, { "epoch": 0.04, "grad_norm": 1.5859375, "learning_rate": 0.00019978226336944742, "loss": 2.167, "step": 17855 }, { "epoch": 0.04, "grad_norm": 1.9609375, "learning_rate": 0.00019978214144932534, "loss": 2.1608, "step": 17860 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019978201949511588, "loss": 2.2756, "step": 17865 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.00019978189750681903, "loss": 2.1937, "step": 17870 }, { "epoch": 0.04, "grad_norm": 1.8203125, "learning_rate": 0.00019978177548443492, "loss": 2.0867, "step": 17875 }, { "epoch": 0.04, "grad_norm": 1.7890625, "learning_rate": 0.00019978165342796354, "loss": 2.2086, "step": 17880 }, { "epoch": 0.04, "grad_norm": 1.96875, "learning_rate": 0.00019978153133740493, "loss": 2.4258, "step": 17885 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019978140921275914, "loss": 2.1606, "step": 17890 }, { "epoch": 0.04, "grad_norm": 2.15625, "learning_rate": 0.00019978128705402622, "loss": 2.2519, "step": 17895 }, { "epoch": 0.04, "grad_norm": 1.8984375, "learning_rate": 0.0001997811648612062, "loss": 2.2818, "step": 17900 }, { "epoch": 0.04, "grad_norm": 2.15625, "learning_rate": 0.00019978104263429912, "loss": 2.2702, "step": 17905 }, { "epoch": 0.04, "grad_norm": 1.6640625, "learning_rate": 0.00019978092037330504, "loss": 2.3108, "step": 17910 }, { "epoch": 0.04, "grad_norm": 1.359375, "learning_rate": 0.00019978079807822394, "loss": 2.2433, "step": 17915 }, { "epoch": 0.04, "grad_norm": 1.6796875, "learning_rate": 0.00019978067574905596, "loss": 2.2977, "step": 17920 }, { "epoch": 0.04, "grad_norm": 1.84375, "learning_rate": 0.0001997805533858011, "loss": 2.1017, "step": 17925 }, { "epoch": 0.04, "grad_norm": 1.65625, "learning_rate": 0.00019978043098845935, "loss": 2.1581, "step": 17930 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.0001997803085570308, "loss": 2.2069, "step": 17935 }, { "epoch": 0.04, "grad_norm": 2.359375, "learning_rate": 0.0001997801860915155, "loss": 2.1646, "step": 17940 }, { "epoch": 0.04, "grad_norm": 2.40625, "learning_rate": 0.0001997800635919135, "loss": 2.1822, "step": 17945 }, { "epoch": 0.04, "grad_norm": 1.4921875, "learning_rate": 0.00019977994105822478, "loss": 2.1681, "step": 17950 }, { "epoch": 0.04, "grad_norm": 2.078125, "learning_rate": 0.00019977981849044943, "loss": 2.184, "step": 17955 }, { "epoch": 0.04, "grad_norm": 1.6796875, "learning_rate": 0.0001997796958885875, "loss": 2.0344, "step": 17960 }, { "epoch": 0.04, "grad_norm": 2.125, "learning_rate": 0.00019977957325263904, "loss": 1.9432, "step": 17965 }, { "epoch": 0.04, "grad_norm": 2.0, "learning_rate": 0.00019977945058260402, "loss": 2.2914, "step": 17970 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019977932787848256, "loss": 2.3538, "step": 17975 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019977920514027467, "loss": 2.2269, "step": 17980 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019977908236798038, "loss": 2.3883, "step": 17985 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.00019977895956159976, "loss": 2.2355, "step": 17990 }, { "epoch": 0.04, "grad_norm": 1.53125, "learning_rate": 0.0001997788367211328, "loss": 2.1467, "step": 17995 }, { "epoch": 0.04, "grad_norm": 2.078125, "learning_rate": 0.00019977871384657962, "loss": 2.33, "step": 18000 }, { "epoch": 0.04, "grad_norm": 1.671875, "learning_rate": 0.00019977859093794022, "loss": 2.1548, "step": 18005 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.00019977846799521464, "loss": 2.1626, "step": 18010 }, { "epoch": 0.04, "grad_norm": 1.6875, "learning_rate": 0.0001997783450184029, "loss": 2.213, "step": 18015 }, { "epoch": 0.04, "grad_norm": 1.71875, "learning_rate": 0.0001997782220075051, "loss": 2.0688, "step": 18020 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.00019977809896252124, "loss": 2.1689, "step": 18025 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.00019977797588345138, "loss": 2.0591, "step": 18030 }, { "epoch": 0.04, "grad_norm": 2.15625, "learning_rate": 0.00019977785277029554, "loss": 2.2491, "step": 18035 }, { "epoch": 0.04, "grad_norm": 2.234375, "learning_rate": 0.00019977772962305376, "loss": 2.0727, "step": 18040 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019977760644172615, "loss": 2.2797, "step": 18045 }, { "epoch": 0.04, "grad_norm": 1.8203125, "learning_rate": 0.00019977748322631266, "loss": 2.1538, "step": 18050 }, { "epoch": 0.04, "grad_norm": 1.8984375, "learning_rate": 0.00019977735997681338, "loss": 2.1465, "step": 18055 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019977723669322835, "loss": 2.1111, "step": 18060 }, { "epoch": 0.04, "grad_norm": 1.8125, "learning_rate": 0.0001997771133755576, "loss": 2.3526, "step": 18065 }, { "epoch": 0.04, "grad_norm": 1.9296875, "learning_rate": 0.00019977699002380117, "loss": 2.1816, "step": 18070 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019977686663795913, "loss": 2.1538, "step": 18075 }, { "epoch": 0.04, "grad_norm": 1.6796875, "learning_rate": 0.00019977674321803148, "loss": 2.1482, "step": 18080 }, { "epoch": 0.04, "grad_norm": 1.828125, "learning_rate": 0.00019977661976401832, "loss": 2.1311, "step": 18085 }, { "epoch": 0.04, "grad_norm": 1.84375, "learning_rate": 0.00019977649627591964, "loss": 2.319, "step": 18090 }, { "epoch": 0.04, "grad_norm": 1.703125, "learning_rate": 0.0001997763727537355, "loss": 2.1454, "step": 18095 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019977624919746592, "loss": 2.2765, "step": 18100 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019977612560711098, "loss": 2.2535, "step": 18105 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.0001997760019826707, "loss": 2.3637, "step": 18110 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019977587832414515, "loss": 2.2629, "step": 18115 }, { "epoch": 0.04, "grad_norm": 1.5703125, "learning_rate": 0.00019977575463153433, "loss": 2.0462, "step": 18120 }, { "epoch": 0.04, "grad_norm": 2.0, "learning_rate": 0.00019977563090483832, "loss": 2.0845, "step": 18125 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.0001997755071440571, "loss": 2.1553, "step": 18130 }, { "epoch": 0.04, "grad_norm": 1.921875, "learning_rate": 0.00019977538334919081, "loss": 2.187, "step": 18135 }, { "epoch": 0.04, "grad_norm": 1.5859375, "learning_rate": 0.00019977525952023944, "loss": 2.1642, "step": 18140 }, { "epoch": 0.04, "grad_norm": 1.6875, "learning_rate": 0.000199775135657203, "loss": 2.1588, "step": 18145 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.0001997750117600816, "loss": 2.0459, "step": 18150 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.0001997748878288752, "loss": 2.1457, "step": 18155 }, { "epoch": 0.04, "grad_norm": 1.953125, "learning_rate": 0.00019977476386358388, "loss": 2.2451, "step": 18160 }, { "epoch": 0.04, "grad_norm": 2.0625, "learning_rate": 0.00019977463986420774, "loss": 2.0993, "step": 18165 }, { "epoch": 0.04, "grad_norm": 2.703125, "learning_rate": 0.00019977451583074674, "loss": 2.2618, "step": 18170 }, { "epoch": 0.04, "grad_norm": 2.265625, "learning_rate": 0.00019977439176320098, "loss": 2.175, "step": 18175 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019977426766157048, "loss": 2.0902, "step": 18180 }, { "epoch": 0.04, "grad_norm": 1.7890625, "learning_rate": 0.0001997741435258553, "loss": 2.1914, "step": 18185 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.0001997740193560554, "loss": 2.1537, "step": 18190 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.0001997738951521709, "loss": 2.48, "step": 18195 }, { "epoch": 0.04, "grad_norm": 1.4296875, "learning_rate": 0.00019977377091420187, "loss": 2.2218, "step": 18200 }, { "epoch": 0.04, "grad_norm": 1.8203125, "learning_rate": 0.0001997736466421483, "loss": 2.193, "step": 18205 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.0001997735223360102, "loss": 2.2744, "step": 18210 }, { "epoch": 0.04, "grad_norm": 1.5390625, "learning_rate": 0.00019977339799578768, "loss": 2.0999, "step": 18215 }, { "epoch": 0.04, "grad_norm": 1.671875, "learning_rate": 0.0001997732736214808, "loss": 2.1292, "step": 18220 }, { "epoch": 0.04, "grad_norm": 1.640625, "learning_rate": 0.00019977314921308952, "loss": 2.2137, "step": 18225 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.0001997730247706139, "loss": 2.1664, "step": 18230 }, { "epoch": 0.04, "grad_norm": 1.5546875, "learning_rate": 0.00019977290029405404, "loss": 2.0869, "step": 18235 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019977277578340995, "loss": 2.3067, "step": 18240 }, { "epoch": 0.04, "grad_norm": 1.515625, "learning_rate": 0.00019977265123868166, "loss": 2.0676, "step": 18245 }, { "epoch": 0.04, "grad_norm": 1.984375, "learning_rate": 0.0001997725266598692, "loss": 2.1972, "step": 18250 }, { "epoch": 0.04, "grad_norm": 1.8203125, "learning_rate": 0.00019977240204697267, "loss": 2.3132, "step": 18255 }, { "epoch": 0.04, "grad_norm": 1.703125, "learning_rate": 0.00019977227739999208, "loss": 2.0338, "step": 18260 }, { "epoch": 0.04, "grad_norm": 1.6796875, "learning_rate": 0.00019977215271892745, "loss": 2.317, "step": 18265 }, { "epoch": 0.04, "grad_norm": 1.6171875, "learning_rate": 0.00019977202800377885, "loss": 2.2835, "step": 18270 }, { "epoch": 0.04, "grad_norm": 2.015625, "learning_rate": 0.0001997719032545463, "loss": 2.0905, "step": 18275 }, { "epoch": 0.04, "grad_norm": 1.640625, "learning_rate": 0.00019977177847122987, "loss": 2.1934, "step": 18280 }, { "epoch": 0.04, "grad_norm": 1.6484375, "learning_rate": 0.0001997716536538296, "loss": 2.3644, "step": 18285 }, { "epoch": 0.04, "grad_norm": 1.6015625, "learning_rate": 0.00019977152880234554, "loss": 2.2837, "step": 18290 }, { "epoch": 0.04, "grad_norm": 1.921875, "learning_rate": 0.0001997714039167777, "loss": 2.1096, "step": 18295 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.0001997712789971261, "loss": 2.266, "step": 18300 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.00019977115404339087, "loss": 2.1191, "step": 18305 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019977102905557198, "loss": 2.3248, "step": 18310 }, { "epoch": 0.04, "grad_norm": 1.546875, "learning_rate": 0.0001997709040336695, "loss": 2.2278, "step": 18315 }, { "epoch": 0.04, "grad_norm": 1.5078125, "learning_rate": 0.00019977077897768348, "loss": 2.2575, "step": 18320 }, { "epoch": 0.04, "grad_norm": 1.921875, "learning_rate": 0.00019977065388761397, "loss": 2.2427, "step": 18325 }, { "epoch": 0.04, "grad_norm": 1.609375, "learning_rate": 0.00019977052876346096, "loss": 2.1542, "step": 18330 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.00019977040360522454, "loss": 2.2979, "step": 18335 }, { "epoch": 0.04, "grad_norm": 1.484375, "learning_rate": 0.00019977027841290473, "loss": 2.2484, "step": 18340 }, { "epoch": 0.04, "grad_norm": 2.078125, "learning_rate": 0.00019977015318650162, "loss": 2.3344, "step": 18345 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.00019977002792601517, "loss": 2.4188, "step": 18350 }, { "epoch": 0.04, "grad_norm": 2.1875, "learning_rate": 0.0001997699026314455, "loss": 2.1625, "step": 18355 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.00019976977730279264, "loss": 2.1912, "step": 18360 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.00019976965194005658, "loss": 2.2741, "step": 18365 }, { "epoch": 0.04, "grad_norm": 1.6328125, "learning_rate": 0.0001997695265432374, "loss": 2.2322, "step": 18370 }, { "epoch": 0.04, "grad_norm": 2.109375, "learning_rate": 0.00019976940111233514, "loss": 2.1414, "step": 18375 }, { "epoch": 0.04, "grad_norm": 2.1875, "learning_rate": 0.00019976927564734984, "loss": 2.2263, "step": 18380 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.00019976915014828153, "loss": 2.2076, "step": 18385 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.0001997690246151303, "loss": 2.2452, "step": 18390 }, { "epoch": 0.04, "grad_norm": 1.828125, "learning_rate": 0.00019976889904789618, "loss": 2.0882, "step": 18395 }, { "epoch": 0.04, "grad_norm": 1.8515625, "learning_rate": 0.00019976877344657918, "loss": 2.3623, "step": 18400 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.0001997686478111793, "loss": 2.1744, "step": 18405 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.00019976852214169673, "loss": 2.1276, "step": 18410 }, { "epoch": 0.04, "grad_norm": 2.015625, "learning_rate": 0.0001997683964381314, "loss": 2.2367, "step": 18415 }, { "epoch": 0.04, "grad_norm": 1.4375, "learning_rate": 0.00019976827070048334, "loss": 2.2558, "step": 18420 }, { "epoch": 0.04, "grad_norm": 1.7578125, "learning_rate": 0.00019976814492875264, "loss": 2.2769, "step": 18425 }, { "epoch": 0.04, "grad_norm": 1.765625, "learning_rate": 0.00019976801912293933, "loss": 2.0875, "step": 18430 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019976789328304348, "loss": 2.3738, "step": 18435 }, { "epoch": 0.04, "grad_norm": 1.671875, "learning_rate": 0.0001997677674090651, "loss": 2.3863, "step": 18440 }, { "epoch": 0.04, "grad_norm": 2.015625, "learning_rate": 0.00019976764150100423, "loss": 2.2251, "step": 18445 }, { "epoch": 0.04, "grad_norm": 2.3125, "learning_rate": 0.00019976751555886093, "loss": 2.1244, "step": 18450 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.00019976738958263522, "loss": 2.1557, "step": 18455 }, { "epoch": 0.04, "grad_norm": 1.6328125, "learning_rate": 0.0001997672635723272, "loss": 2.2513, "step": 18460 }, { "epoch": 0.04, "grad_norm": 1.9453125, "learning_rate": 0.00019976713752793682, "loss": 2.0827, "step": 18465 }, { "epoch": 0.04, "grad_norm": 2.15625, "learning_rate": 0.0001997670114494642, "loss": 2.4128, "step": 18470 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.00019976688533690937, "loss": 2.1615, "step": 18475 }, { "epoch": 0.04, "grad_norm": 1.9609375, "learning_rate": 0.00019976675919027237, "loss": 2.3186, "step": 18480 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.00019976663300955323, "loss": 2.0903, "step": 18485 }, { "epoch": 0.04, "grad_norm": 1.84375, "learning_rate": 0.00019976650679475198, "loss": 2.1987, "step": 18490 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.00019976638054586873, "loss": 2.116, "step": 18495 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.00019976625426290344, "loss": 2.2335, "step": 18500 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.00019976612794585615, "loss": 2.1745, "step": 18505 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.000199766001594727, "loss": 2.1898, "step": 18510 }, { "epoch": 0.04, "grad_norm": 1.796875, "learning_rate": 0.00019976587520951594, "loss": 2.2413, "step": 18515 }, { "epoch": 0.04, "grad_norm": 1.8125, "learning_rate": 0.00019976574879022307, "loss": 2.089, "step": 18520 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.0001997656223368484, "loss": 2.2074, "step": 18525 }, { "epoch": 0.04, "grad_norm": 1.8984375, "learning_rate": 0.000199765495849392, "loss": 2.1401, "step": 18530 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.00019976536932785389, "loss": 2.176, "step": 18535 }, { "epoch": 0.04, "grad_norm": 1.96875, "learning_rate": 0.0001997652427722341, "loss": 2.0437, "step": 18540 }, { "epoch": 0.04, "grad_norm": 2.21875, "learning_rate": 0.00019976511618253272, "loss": 2.1555, "step": 18545 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019976498955874973, "loss": 2.1645, "step": 18550 }, { "epoch": 0.04, "grad_norm": 1.6484375, "learning_rate": 0.00019976486290088525, "loss": 2.1639, "step": 18555 }, { "epoch": 0.04, "grad_norm": 1.9296875, "learning_rate": 0.00019976473620893925, "loss": 2.2348, "step": 18560 }, { "epoch": 0.04, "grad_norm": 1.6015625, "learning_rate": 0.00019976460948291185, "loss": 2.3282, "step": 18565 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.000199764482722803, "loss": 2.0864, "step": 18570 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019976435592861282, "loss": 2.3048, "step": 18575 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019976422910034133, "loss": 2.286, "step": 18580 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.0001997641022379886, "loss": 2.4102, "step": 18585 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.00019976397534155458, "loss": 2.343, "step": 18590 }, { "epoch": 0.04, "grad_norm": 1.171875, "learning_rate": 0.0001997638484110394, "loss": 1.8021, "step": 18595 }, { "epoch": 0.04, "grad_norm": 1.703125, "learning_rate": 0.00019976372144644308, "loss": 2.1424, "step": 18600 }, { "epoch": 0.04, "grad_norm": 1.5859375, "learning_rate": 0.0001997635944477657, "loss": 2.1307, "step": 18605 }, { "epoch": 0.04, "grad_norm": 1.6328125, "learning_rate": 0.00019976346741500722, "loss": 2.167, "step": 18610 }, { "epoch": 0.04, "grad_norm": 1.625, "learning_rate": 0.00019976334034816774, "loss": 1.9858, "step": 18615 }, { "epoch": 0.04, "grad_norm": 1.9921875, "learning_rate": 0.0001997632132472473, "loss": 2.2381, "step": 18620 }, { "epoch": 0.04, "grad_norm": 1.609375, "learning_rate": 0.00019976308611224593, "loss": 2.193, "step": 18625 }, { "epoch": 0.04, "grad_norm": 1.5859375, "learning_rate": 0.0001997629589431637, "loss": 2.159, "step": 18630 }, { "epoch": 0.04, "grad_norm": 1.65625, "learning_rate": 0.0001997628317400006, "loss": 2.4221, "step": 18635 }, { "epoch": 0.04, "grad_norm": 2.109375, "learning_rate": 0.00019976270450275675, "loss": 2.1832, "step": 18640 }, { "epoch": 0.04, "grad_norm": 1.7265625, "learning_rate": 0.00019976257723143212, "loss": 2.1123, "step": 18645 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.0001997624499260268, "loss": 2.3364, "step": 18650 }, { "epoch": 0.04, "grad_norm": 1.8984375, "learning_rate": 0.00019976232258654082, "loss": 2.3264, "step": 18655 }, { "epoch": 0.04, "grad_norm": 1.8359375, "learning_rate": 0.0001997621952129742, "loss": 2.2988, "step": 18660 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.00019976206780532703, "loss": 2.3102, "step": 18665 }, { "epoch": 0.04, "grad_norm": 1.59375, "learning_rate": 0.00019976194036359929, "loss": 2.1268, "step": 18670 }, { "epoch": 0.04, "grad_norm": 2.03125, "learning_rate": 0.0001997618128877911, "loss": 2.2181, "step": 18675 }, { "epoch": 0.04, "grad_norm": 1.6796875, "learning_rate": 0.00019976168537790248, "loss": 2.1324, "step": 18680 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.0001997615578339334, "loss": 2.1153, "step": 18685 }, { "epoch": 0.04, "grad_norm": 1.6640625, "learning_rate": 0.000199761430255884, "loss": 2.3087, "step": 18690 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.00019976130264375428, "loss": 2.0605, "step": 18695 }, { "epoch": 0.04, "grad_norm": 1.8984375, "learning_rate": 0.0001997611749975443, "loss": 2.1032, "step": 18700 }, { "epoch": 0.04, "grad_norm": 1.96875, "learning_rate": 0.00019976104731725408, "loss": 2.2113, "step": 18705 }, { "epoch": 0.04, "grad_norm": 1.7265625, "learning_rate": 0.0001997609196028837, "loss": 2.1819, "step": 18710 }, { "epoch": 0.04, "grad_norm": 2.0625, "learning_rate": 0.00019976079185443315, "loss": 2.2264, "step": 18715 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.00019976066407190253, "loss": 2.2149, "step": 18720 }, { "epoch": 0.04, "grad_norm": 1.8203125, "learning_rate": 0.00019976053625529183, "loss": 2.2915, "step": 18725 }, { "epoch": 0.04, "grad_norm": 1.8125, "learning_rate": 0.00019976040840460115, "loss": 2.1434, "step": 18730 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019976028051983048, "loss": 2.182, "step": 18735 }, { "epoch": 0.04, "grad_norm": 1.65625, "learning_rate": 0.0001997601526009799, "loss": 2.2152, "step": 18740 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.00019976002464804944, "loss": 2.3434, "step": 18745 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.00019975989666103913, "loss": 2.1783, "step": 18750 }, { "epoch": 0.04, "grad_norm": 1.5234375, "learning_rate": 0.00019975976863994906, "loss": 2.2754, "step": 18755 }, { "epoch": 0.04, "grad_norm": 1.5625, "learning_rate": 0.00019975964058477924, "loss": 2.1202, "step": 18760 }, { "epoch": 0.04, "grad_norm": 1.875, "learning_rate": 0.00019975951249552972, "loss": 2.3414, "step": 18765 }, { "epoch": 0.04, "grad_norm": 1.6875, "learning_rate": 0.00019975938437220053, "loss": 2.1915, "step": 18770 }, { "epoch": 0.04, "grad_norm": 1.65625, "learning_rate": 0.00019975925621479172, "loss": 2.138, "step": 18775 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019975912802330334, "loss": 2.0817, "step": 18780 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.00019975899979773543, "loss": 2.0469, "step": 18785 }, { "epoch": 0.04, "grad_norm": 1.8125, "learning_rate": 0.00019975887153808806, "loss": 2.0853, "step": 18790 }, { "epoch": 0.04, "grad_norm": 2.203125, "learning_rate": 0.00019975874324436123, "loss": 2.2863, "step": 18795 }, { "epoch": 0.04, "grad_norm": 1.7109375, "learning_rate": 0.000199758614916555, "loss": 2.0243, "step": 18800 }, { "epoch": 0.04, "grad_norm": 1.9609375, "learning_rate": 0.00019975848655466943, "loss": 2.1727, "step": 18805 }, { "epoch": 0.04, "grad_norm": 1.609375, "learning_rate": 0.00019975835815870454, "loss": 2.1839, "step": 18810 }, { "epoch": 0.04, "grad_norm": 1.9765625, "learning_rate": 0.0001997582297286604, "loss": 2.1243, "step": 18815 }, { "epoch": 0.04, "grad_norm": 1.9296875, "learning_rate": 0.00019975810126453703, "loss": 2.2195, "step": 18820 }, { "epoch": 0.04, "grad_norm": 1.9765625, "learning_rate": 0.0001997579727663345, "loss": 2.2523, "step": 18825 }, { "epoch": 0.04, "grad_norm": 2.015625, "learning_rate": 0.00019975784423405282, "loss": 2.2566, "step": 18830 }, { "epoch": 0.04, "grad_norm": 1.46875, "learning_rate": 0.00019975771566769205, "loss": 2.1343, "step": 18835 }, { "epoch": 0.04, "grad_norm": 1.5234375, "learning_rate": 0.00019975758706725224, "loss": 2.2067, "step": 18840 }, { "epoch": 0.04, "grad_norm": 1.8203125, "learning_rate": 0.00019975745843273343, "loss": 2.1547, "step": 18845 }, { "epoch": 0.04, "grad_norm": 2.109375, "learning_rate": 0.00019975732976413565, "loss": 2.2353, "step": 18850 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.000199757201061459, "loss": 2.1307, "step": 18855 }, { "epoch": 0.04, "grad_norm": 1.8046875, "learning_rate": 0.00019975707232470344, "loss": 1.9911, "step": 18860 }, { "epoch": 0.04, "grad_norm": 1.984375, "learning_rate": 0.00019975694355386906, "loss": 2.4119, "step": 18865 }, { "epoch": 0.04, "grad_norm": 1.609375, "learning_rate": 0.0001997568147489559, "loss": 2.1336, "step": 18870 }, { "epoch": 0.04, "grad_norm": 1.6171875, "learning_rate": 0.00019975668590996402, "loss": 2.0773, "step": 18875 }, { "epoch": 0.04, "grad_norm": 2.015625, "learning_rate": 0.00019975655703689343, "loss": 2.2761, "step": 18880 }, { "epoch": 0.04, "grad_norm": 1.953125, "learning_rate": 0.0001997564281297442, "loss": 2.0251, "step": 18885 }, { "epoch": 0.04, "grad_norm": 1.890625, "learning_rate": 0.00019975629918851637, "loss": 2.295, "step": 18890 }, { "epoch": 0.04, "grad_norm": 2.5625, "learning_rate": 0.00019975617021320997, "loss": 2.3453, "step": 18895 }, { "epoch": 0.04, "grad_norm": 1.75, "learning_rate": 0.00019975604120382504, "loss": 2.1712, "step": 18900 }, { "epoch": 0.04, "grad_norm": 1.5078125, "learning_rate": 0.00019975591216036165, "loss": 2.1717, "step": 18905 }, { "epoch": 0.04, "grad_norm": 1.7265625, "learning_rate": 0.00019975578308281984, "loss": 2.1701, "step": 18910 }, { "epoch": 0.04, "grad_norm": 1.640625, "learning_rate": 0.00019975565397119965, "loss": 2.1731, "step": 18915 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.0001997555248255011, "loss": 2.2065, "step": 18920 }, { "epoch": 0.04, "grad_norm": 1.8203125, "learning_rate": 0.00019975539564572422, "loss": 2.2112, "step": 18925 }, { "epoch": 0.04, "grad_norm": 1.7890625, "learning_rate": 0.00019975526643186914, "loss": 2.1545, "step": 18930 }, { "epoch": 0.04, "grad_norm": 1.71875, "learning_rate": 0.00019975513718393587, "loss": 2.1612, "step": 18935 }, { "epoch": 0.04, "grad_norm": 1.5, "learning_rate": 0.0001997550079019244, "loss": 2.1452, "step": 18940 }, { "epoch": 0.04, "grad_norm": 2.0625, "learning_rate": 0.0001997548785858348, "loss": 2.3038, "step": 18945 }, { "epoch": 0.04, "grad_norm": 2.296875, "learning_rate": 0.00019975474923566715, "loss": 2.0656, "step": 18950 }, { "epoch": 0.04, "grad_norm": 1.5859375, "learning_rate": 0.00019975461985142142, "loss": 2.2562, "step": 18955 }, { "epoch": 0.04, "grad_norm": 2.140625, "learning_rate": 0.00019975449043309776, "loss": 2.2803, "step": 18960 }, { "epoch": 0.04, "grad_norm": 2.15625, "learning_rate": 0.00019975436098069614, "loss": 2.2874, "step": 18965 }, { "epoch": 0.04, "grad_norm": 1.8203125, "learning_rate": 0.0001997542314942166, "loss": 2.2894, "step": 18970 }, { "epoch": 0.04, "grad_norm": 1.7734375, "learning_rate": 0.00019975410197365924, "loss": 2.0527, "step": 18975 }, { "epoch": 0.04, "grad_norm": 2.890625, "learning_rate": 0.00019975397241902404, "loss": 2.1162, "step": 18980 }, { "epoch": 0.04, "grad_norm": 1.9609375, "learning_rate": 0.0001997538428303111, "loss": 2.0684, "step": 18985 }, { "epoch": 0.04, "grad_norm": 1.859375, "learning_rate": 0.0001997537132075204, "loss": 2.1643, "step": 18990 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.00019975358355065205, "loss": 2.1498, "step": 18995 }, { "epoch": 0.04, "grad_norm": 1.6015625, "learning_rate": 0.00019975345385970608, "loss": 2.149, "step": 19000 }, { "epoch": 0.04, "grad_norm": 1.5625, "learning_rate": 0.0001997533241346825, "loss": 2.2373, "step": 19005 }, { "epoch": 0.04, "grad_norm": 1.8828125, "learning_rate": 0.0001997531943755814, "loss": 2.217, "step": 19010 }, { "epoch": 0.04, "grad_norm": 1.8671875, "learning_rate": 0.00019975306458240276, "loss": 2.2639, "step": 19015 }, { "epoch": 0.04, "grad_norm": 1.6953125, "learning_rate": 0.0001997529347551467, "loss": 2.2818, "step": 19020 }, { "epoch": 0.04, "grad_norm": 2.453125, "learning_rate": 0.0001997528048938132, "loss": 2.2553, "step": 19025 }, { "epoch": 0.04, "grad_norm": 1.6015625, "learning_rate": 0.00019975267499840237, "loss": 2.3518, "step": 19030 }, { "epoch": 0.04, "grad_norm": 1.640625, "learning_rate": 0.00019975254506891417, "loss": 2.4552, "step": 19035 }, { "epoch": 0.04, "grad_norm": 1.8984375, "learning_rate": 0.00019975241510534873, "loss": 2.3995, "step": 19040 }, { "epoch": 0.04, "grad_norm": 1.625, "learning_rate": 0.00019975228510770606, "loss": 2.1914, "step": 19045 }, { "epoch": 0.04, "grad_norm": 2.046875, "learning_rate": 0.00019975215507598616, "loss": 2.1568, "step": 19050 }, { "epoch": 0.04, "grad_norm": 1.9765625, "learning_rate": 0.00019975202501018913, "loss": 2.1536, "step": 19055 }, { "epoch": 0.04, "grad_norm": 1.9140625, "learning_rate": 0.000199751894910315, "loss": 2.1704, "step": 19060 }, { "epoch": 0.04, "grad_norm": 1.625, "learning_rate": 0.0001997517647763638, "loss": 2.3188, "step": 19065 }, { "epoch": 0.04, "grad_norm": 1.7421875, "learning_rate": 0.00019975163460833563, "loss": 2.2849, "step": 19070 }, { "epoch": 0.04, "grad_norm": 1.734375, "learning_rate": 0.00019975150440623045, "loss": 2.3457, "step": 19075 }, { "epoch": 0.04, "grad_norm": 1.90625, "learning_rate": 0.0001997513741700484, "loss": 2.1677, "step": 19080 }, { "epoch": 0.04, "grad_norm": 1.703125, "learning_rate": 0.0001997512438997894, "loss": 2.1749, "step": 19085 }, { "epoch": 0.04, "grad_norm": 1.703125, "learning_rate": 0.00019975111359545362, "loss": 2.2778, "step": 19090 }, { "epoch": 0.04, "grad_norm": 1.71875, "learning_rate": 0.00019975098325704106, "loss": 2.1394, "step": 19095 }, { "epoch": 0.04, "grad_norm": 1.5703125, "learning_rate": 0.0001997508528845517, "loss": 2.309, "step": 19100 }, { "epoch": 0.04, "grad_norm": 1.875, "learning_rate": 0.00019975072247798567, "loss": 2.2348, "step": 19105 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019975059203734298, "loss": 2.2952, "step": 19110 }, { "epoch": 0.04, "grad_norm": 1.6171875, "learning_rate": 0.00019975046156262368, "loss": 2.2978, "step": 19115 }, { "epoch": 0.04, "grad_norm": 1.78125, "learning_rate": 0.00019975033105382783, "loss": 2.2815, "step": 19120 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.00019975020051095544, "loss": 2.108, "step": 19125 }, { "epoch": 0.05, "grad_norm": 2.21875, "learning_rate": 0.00019975006993400656, "loss": 2.079, "step": 19130 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019974993932298128, "loss": 1.99, "step": 19135 }, { "epoch": 0.05, "grad_norm": 1.953125, "learning_rate": 0.00019974980867787958, "loss": 2.2834, "step": 19140 }, { "epoch": 0.05, "grad_norm": 1.859375, "learning_rate": 0.00019974967799870155, "loss": 2.2435, "step": 19145 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019974954728544723, "loss": 2.0586, "step": 19150 }, { "epoch": 0.05, "grad_norm": 2.8125, "learning_rate": 0.00019974941653811662, "loss": 2.2709, "step": 19155 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019974928575670987, "loss": 2.2702, "step": 19160 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019974915494122688, "loss": 2.1404, "step": 19165 }, { "epoch": 0.05, "grad_norm": 1.5234375, "learning_rate": 0.0001997490240916678, "loss": 2.2825, "step": 19170 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019974889320803266, "loss": 2.1142, "step": 19175 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.00019974876229032147, "loss": 2.3446, "step": 19180 }, { "epoch": 0.05, "grad_norm": 1.9609375, "learning_rate": 0.0001997486313385343, "loss": 2.2607, "step": 19185 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.00019974850035267116, "loss": 2.3628, "step": 19190 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019974836933273218, "loss": 2.1569, "step": 19195 }, { "epoch": 0.05, "grad_norm": 1.890625, "learning_rate": 0.00019974823827871729, "loss": 2.3965, "step": 19200 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019974810719062665, "loss": 2.2399, "step": 19205 }, { "epoch": 0.05, "grad_norm": 1.71875, "learning_rate": 0.0001997479760684602, "loss": 2.4276, "step": 19210 }, { "epoch": 0.05, "grad_norm": 1.875, "learning_rate": 0.00019974784491221805, "loss": 2.2206, "step": 19215 }, { "epoch": 0.05, "grad_norm": 1.9375, "learning_rate": 0.00019974771372190025, "loss": 2.2463, "step": 19220 }, { "epoch": 0.05, "grad_norm": 1.546875, "learning_rate": 0.00019974758249750678, "loss": 2.1499, "step": 19225 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019974745123903776, "loss": 1.9797, "step": 19230 }, { "epoch": 0.05, "grad_norm": 1.7265625, "learning_rate": 0.00019974731994649316, "loss": 2.1679, "step": 19235 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019974718861987312, "loss": 2.0322, "step": 19240 }, { "epoch": 0.05, "grad_norm": 1.6796875, "learning_rate": 0.0001997470572591776, "loss": 2.2645, "step": 19245 }, { "epoch": 0.05, "grad_norm": 1.875, "learning_rate": 0.00019974692586440668, "loss": 2.4731, "step": 19250 }, { "epoch": 0.05, "grad_norm": 1.515625, "learning_rate": 0.00019974679443556038, "loss": 2.2289, "step": 19255 }, { "epoch": 0.05, "grad_norm": 2.21875, "learning_rate": 0.00019974666297263883, "loss": 2.2909, "step": 19260 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019974653147564195, "loss": 2.1652, "step": 19265 }, { "epoch": 0.05, "grad_norm": 1.59375, "learning_rate": 0.00019974639994456986, "loss": 2.2282, "step": 19270 }, { "epoch": 0.05, "grad_norm": 1.9296875, "learning_rate": 0.0001997462683794226, "loss": 2.1584, "step": 19275 }, { "epoch": 0.05, "grad_norm": 1.6640625, "learning_rate": 0.00019974613678020023, "loss": 2.2016, "step": 19280 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019974600514690273, "loss": 2.0429, "step": 19285 }, { "epoch": 0.05, "grad_norm": 1.5859375, "learning_rate": 0.0001997458734795302, "loss": 2.2333, "step": 19290 }, { "epoch": 0.05, "grad_norm": 1.9140625, "learning_rate": 0.00019974574177808268, "loss": 2.404, "step": 19295 }, { "epoch": 0.05, "grad_norm": 1.53125, "learning_rate": 0.00019974561004256017, "loss": 2.2574, "step": 19300 }, { "epoch": 0.05, "grad_norm": 1.6640625, "learning_rate": 0.00019974547827296277, "loss": 2.2337, "step": 19305 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019974534646929052, "loss": 2.2101, "step": 19310 }, { "epoch": 0.05, "grad_norm": 1.859375, "learning_rate": 0.00019974521463154345, "loss": 2.2402, "step": 19315 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.00019974508275972158, "loss": 2.1577, "step": 19320 }, { "epoch": 0.05, "grad_norm": 2.4375, "learning_rate": 0.000199744950853825, "loss": 2.1695, "step": 19325 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.00019974481891385374, "loss": 1.9833, "step": 19330 }, { "epoch": 0.05, "grad_norm": 1.671875, "learning_rate": 0.00019974468693980784, "loss": 2.2632, "step": 19335 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.00019974455493168734, "loss": 2.3129, "step": 19340 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.0001997444228894923, "loss": 2.2173, "step": 19345 }, { "epoch": 0.05, "grad_norm": 1.9609375, "learning_rate": 0.00019974429081322272, "loss": 2.2226, "step": 19350 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.00019974415870287874, "loss": 2.132, "step": 19355 }, { "epoch": 0.05, "grad_norm": 2.328125, "learning_rate": 0.00019974402655846028, "loss": 2.2504, "step": 19360 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.00019974389437996752, "loss": 2.2958, "step": 19365 }, { "epoch": 0.05, "grad_norm": 1.5625, "learning_rate": 0.0001997437621674004, "loss": 2.139, "step": 19370 }, { "epoch": 0.05, "grad_norm": 2.125, "learning_rate": 0.000199743629920759, "loss": 2.2259, "step": 19375 }, { "epoch": 0.05, "grad_norm": 1.8125, "learning_rate": 0.00019974349764004335, "loss": 2.287, "step": 19380 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.00019974336532525358, "loss": 2.1391, "step": 19385 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019974323297638962, "loss": 2.2178, "step": 19390 }, { "epoch": 0.05, "grad_norm": 1.515625, "learning_rate": 0.00019974310059345154, "loss": 2.171, "step": 19395 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019974296817643945, "loss": 2.1734, "step": 19400 }, { "epoch": 0.05, "grad_norm": 1.90625, "learning_rate": 0.00019974283572535333, "loss": 2.1677, "step": 19405 }, { "epoch": 0.05, "grad_norm": 1.875, "learning_rate": 0.00019974270324019325, "loss": 2.2191, "step": 19410 }, { "epoch": 0.05, "grad_norm": 1.875, "learning_rate": 0.00019974257072095925, "loss": 1.9789, "step": 19415 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019974243816765143, "loss": 2.1698, "step": 19420 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.0001997423055802697, "loss": 2.2227, "step": 19425 }, { "epoch": 0.05, "grad_norm": 1.5234375, "learning_rate": 0.00019974217295881425, "loss": 2.0926, "step": 19430 }, { "epoch": 0.05, "grad_norm": 2.140625, "learning_rate": 0.00019974204030328503, "loss": 2.3066, "step": 19435 }, { "epoch": 0.05, "grad_norm": 1.6640625, "learning_rate": 0.00019974190761368215, "loss": 2.1748, "step": 19440 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.0001997417748900056, "loss": 2.2546, "step": 19445 }, { "epoch": 0.05, "grad_norm": 1.671875, "learning_rate": 0.00019974164213225546, "loss": 2.2057, "step": 19450 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019974150934043178, "loss": 2.2255, "step": 19455 }, { "epoch": 0.05, "grad_norm": 1.9375, "learning_rate": 0.00019974137651453458, "loss": 2.2858, "step": 19460 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019974124365456392, "loss": 2.1011, "step": 19465 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.0001997411107605198, "loss": 2.2118, "step": 19470 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019974097783240238, "loss": 2.214, "step": 19475 }, { "epoch": 0.05, "grad_norm": 2.28125, "learning_rate": 0.00019974084487021161, "loss": 2.1426, "step": 19480 }, { "epoch": 0.05, "grad_norm": 1.5625, "learning_rate": 0.00019974071187394755, "loss": 2.1899, "step": 19485 }, { "epoch": 0.05, "grad_norm": 2.125, "learning_rate": 0.00019974057884361023, "loss": 2.2654, "step": 19490 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019974044577919978, "loss": 2.1889, "step": 19495 }, { "epoch": 0.05, "grad_norm": 1.5625, "learning_rate": 0.00019974031268071613, "loss": 2.2205, "step": 19500 }, { "epoch": 0.05, "grad_norm": 2.4375, "learning_rate": 0.0001997401795481594, "loss": 2.164, "step": 19505 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.0001997400463815296, "loss": 2.2139, "step": 19510 }, { "epoch": 0.05, "grad_norm": 1.5390625, "learning_rate": 0.00019973991318082682, "loss": 2.2033, "step": 19515 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019973977994605107, "loss": 2.2097, "step": 19520 }, { "epoch": 0.05, "grad_norm": 1.953125, "learning_rate": 0.00019973964667720237, "loss": 2.1821, "step": 19525 }, { "epoch": 0.05, "grad_norm": 2.125, "learning_rate": 0.00019973951337428082, "loss": 2.2112, "step": 19530 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019973938003728644, "loss": 2.0757, "step": 19535 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.00019973924666621932, "loss": 2.0541, "step": 19540 }, { "epoch": 0.05, "grad_norm": 1.6171875, "learning_rate": 0.00019973911326107938, "loss": 2.3025, "step": 19545 }, { "epoch": 0.05, "grad_norm": 1.59375, "learning_rate": 0.00019973897982186682, "loss": 2.3345, "step": 19550 }, { "epoch": 0.05, "grad_norm": 1.9765625, "learning_rate": 0.0001997388463485816, "loss": 2.1995, "step": 19555 }, { "epoch": 0.05, "grad_norm": 1.4375, "learning_rate": 0.00019973871284122377, "loss": 2.2039, "step": 19560 }, { "epoch": 0.05, "grad_norm": 1.65625, "learning_rate": 0.0001997385792997934, "loss": 2.096, "step": 19565 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.0001997384457242905, "loss": 2.1882, "step": 19570 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.00019973831211471515, "loss": 2.2257, "step": 19575 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.00019973817847106738, "loss": 2.1115, "step": 19580 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019973804479334726, "loss": 2.3632, "step": 19585 }, { "epoch": 0.05, "grad_norm": 1.6796875, "learning_rate": 0.0001997379110815548, "loss": 1.9378, "step": 19590 }, { "epoch": 0.05, "grad_norm": 2.203125, "learning_rate": 0.00019973777733569005, "loss": 2.3762, "step": 19595 }, { "epoch": 0.05, "grad_norm": 1.890625, "learning_rate": 0.00019973764355575308, "loss": 2.1979, "step": 19600 }, { "epoch": 0.05, "grad_norm": 1.5703125, "learning_rate": 0.00019973750974174392, "loss": 2.2705, "step": 19605 }, { "epoch": 0.05, "grad_norm": 1.9375, "learning_rate": 0.00019973737589366262, "loss": 2.1793, "step": 19610 }, { "epoch": 0.05, "grad_norm": 2.078125, "learning_rate": 0.0001997372420115092, "loss": 2.1254, "step": 19615 }, { "epoch": 0.05, "grad_norm": 2.09375, "learning_rate": 0.00019973710809528377, "loss": 2.1368, "step": 19620 }, { "epoch": 0.05, "grad_norm": 1.609375, "learning_rate": 0.0001997369741449863, "loss": 2.2015, "step": 19625 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.0001997368401606169, "loss": 2.1785, "step": 19630 }, { "epoch": 0.05, "grad_norm": 1.9921875, "learning_rate": 0.00019973670614217558, "loss": 2.1371, "step": 19635 }, { "epoch": 0.05, "grad_norm": 1.9453125, "learning_rate": 0.00019973657208966237, "loss": 2.095, "step": 19640 }, { "epoch": 0.05, "grad_norm": 1.5625, "learning_rate": 0.00019973643800307738, "loss": 2.2011, "step": 19645 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019973630388242058, "loss": 2.214, "step": 19650 }, { "epoch": 0.05, "grad_norm": 1.8671875, "learning_rate": 0.00019973616972769205, "loss": 2.1669, "step": 19655 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.00019973603553889184, "loss": 2.2697, "step": 19660 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.00019973590131602, "loss": 2.1803, "step": 19665 }, { "epoch": 0.05, "grad_norm": 1.9296875, "learning_rate": 0.00019973576705907656, "loss": 1.9375, "step": 19670 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.00019973563276806157, "loss": 2.3405, "step": 19675 }, { "epoch": 0.05, "grad_norm": 1.8125, "learning_rate": 0.00019973549844297508, "loss": 2.0801, "step": 19680 }, { "epoch": 0.05, "grad_norm": 1.625, "learning_rate": 0.00019973536408381714, "loss": 2.1372, "step": 19685 }, { "epoch": 0.05, "grad_norm": 1.8125, "learning_rate": 0.0001997352296905878, "loss": 2.1539, "step": 19690 }, { "epoch": 0.05, "grad_norm": 1.96875, "learning_rate": 0.0001997350952632871, "loss": 2.31, "step": 19695 }, { "epoch": 0.05, "grad_norm": 2.09375, "learning_rate": 0.00019973496080191505, "loss": 2.1885, "step": 19700 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.00019973482630647173, "loss": 2.459, "step": 19705 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.0001997346917769572, "loss": 2.3044, "step": 19710 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.0001997345572133715, "loss": 2.2615, "step": 19715 }, { "epoch": 0.05, "grad_norm": 2.0625, "learning_rate": 0.00019973442261571466, "loss": 2.1668, "step": 19720 }, { "epoch": 0.05, "grad_norm": 1.953125, "learning_rate": 0.00019973428798398675, "loss": 2.0846, "step": 19725 }, { "epoch": 0.05, "grad_norm": 1.640625, "learning_rate": 0.00019973415331818776, "loss": 2.2187, "step": 19730 }, { "epoch": 0.05, "grad_norm": 1.65625, "learning_rate": 0.0001997340186183178, "loss": 2.0222, "step": 19735 }, { "epoch": 0.05, "grad_norm": 2.109375, "learning_rate": 0.00019973388388437688, "loss": 2.1813, "step": 19740 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.00019973374911636506, "loss": 2.1138, "step": 19745 }, { "epoch": 0.05, "grad_norm": 1.4453125, "learning_rate": 0.00019973361431428237, "loss": 2.1967, "step": 19750 }, { "epoch": 0.05, "grad_norm": 1.6875, "learning_rate": 0.0001997334794781289, "loss": 2.2554, "step": 19755 }, { "epoch": 0.05, "grad_norm": 1.671875, "learning_rate": 0.00019973334460790464, "loss": 2.1583, "step": 19760 }, { "epoch": 0.05, "grad_norm": 1.609375, "learning_rate": 0.00019973320970360968, "loss": 2.2117, "step": 19765 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.00019973307476524402, "loss": 2.3041, "step": 19770 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019973293979280775, "loss": 2.2124, "step": 19775 }, { "epoch": 0.05, "grad_norm": 1.828125, "learning_rate": 0.0001997328047863009, "loss": 2.2604, "step": 19780 }, { "epoch": 0.05, "grad_norm": 1.6796875, "learning_rate": 0.0001997326697457235, "loss": 2.2862, "step": 19785 }, { "epoch": 0.05, "grad_norm": 1.53125, "learning_rate": 0.00019973253467107563, "loss": 2.2183, "step": 19790 }, { "epoch": 0.05, "grad_norm": 1.578125, "learning_rate": 0.0001997323995623573, "loss": 2.2573, "step": 19795 }, { "epoch": 0.05, "grad_norm": 1.921875, "learning_rate": 0.0001997322644195686, "loss": 2.252, "step": 19800 }, { "epoch": 0.05, "grad_norm": 1.5625, "learning_rate": 0.00019973212924270954, "loss": 2.2952, "step": 19805 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.00019973199403178015, "loss": 2.3692, "step": 19810 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019973185878678054, "loss": 2.2144, "step": 19815 }, { "epoch": 0.05, "grad_norm": 1.6484375, "learning_rate": 0.0001997317235077107, "loss": 2.2536, "step": 19820 }, { "epoch": 0.05, "grad_norm": 1.3671875, "learning_rate": 0.00019973158819457066, "loss": 2.1296, "step": 19825 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019973145284736058, "loss": 2.3987, "step": 19830 }, { "epoch": 0.05, "grad_norm": 1.7578125, "learning_rate": 0.00019973131746608036, "loss": 2.2986, "step": 19835 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.00019973118205073012, "loss": 2.1908, "step": 19840 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.00019973104660130994, "loss": 2.0941, "step": 19845 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.0001997309111178198, "loss": 2.2655, "step": 19850 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.00019973077560025976, "loss": 2.4038, "step": 19855 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.0001997306400486299, "loss": 2.1189, "step": 19860 }, { "epoch": 0.05, "grad_norm": 1.9609375, "learning_rate": 0.00019973050446293024, "loss": 2.3861, "step": 19865 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.00019973036884316083, "loss": 2.3349, "step": 19870 }, { "epoch": 0.05, "grad_norm": 1.9765625, "learning_rate": 0.00019973023318932168, "loss": 2.1639, "step": 19875 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.0001997300975014129, "loss": 2.3667, "step": 19880 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.00019972996177943455, "loss": 2.1427, "step": 19885 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.0001997298260233866, "loss": 2.2492, "step": 19890 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019972969023326914, "loss": 2.1387, "step": 19895 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.0001997295544090822, "loss": 2.1611, "step": 19900 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019972941855082583, "loss": 2.0918, "step": 19905 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.0001997292826585001, "loss": 2.0342, "step": 19910 }, { "epoch": 0.05, "grad_norm": 1.828125, "learning_rate": 0.00019972914673210503, "loss": 2.2519, "step": 19915 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019972901077164068, "loss": 2.2724, "step": 19920 }, { "epoch": 0.05, "grad_norm": 1.6640625, "learning_rate": 0.0001997288747771071, "loss": 2.0798, "step": 19925 }, { "epoch": 0.05, "grad_norm": 1.9453125, "learning_rate": 0.0001997287387485043, "loss": 2.2168, "step": 19930 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.00019972860268583238, "loss": 2.203, "step": 19935 }, { "epoch": 0.05, "grad_norm": 2.421875, "learning_rate": 0.00019972846658909135, "loss": 2.2578, "step": 19940 }, { "epoch": 0.05, "grad_norm": 1.6796875, "learning_rate": 0.00019972833045828127, "loss": 2.1652, "step": 19945 }, { "epoch": 0.05, "grad_norm": 1.6484375, "learning_rate": 0.00019972819429340218, "loss": 2.3049, "step": 19950 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019972805809445412, "loss": 2.0743, "step": 19955 }, { "epoch": 0.05, "grad_norm": 1.484375, "learning_rate": 0.00019972792186143717, "loss": 2.2342, "step": 19960 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019972778559435134, "loss": 2.2233, "step": 19965 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019972764929319668, "loss": 2.2235, "step": 19970 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.00019972751295797324, "loss": 2.3664, "step": 19975 }, { "epoch": 0.05, "grad_norm": 1.828125, "learning_rate": 0.0001997273765886811, "loss": 2.2177, "step": 19980 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.00019972724018532028, "loss": 2.405, "step": 19985 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.0001997271037478908, "loss": 2.279, "step": 19990 }, { "epoch": 0.05, "grad_norm": 1.640625, "learning_rate": 0.00019972696727639275, "loss": 2.142, "step": 19995 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019972683077082616, "loss": 2.2444, "step": 20000 }, { "epoch": 0.05, "grad_norm": 2.109375, "learning_rate": 0.0001997266942311911, "loss": 2.3739, "step": 20005 }, { "epoch": 0.05, "grad_norm": 1.546875, "learning_rate": 0.00019972655765748755, "loss": 2.0027, "step": 20010 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.0001997264210497156, "loss": 2.205, "step": 20015 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.00019972628440787532, "loss": 2.0941, "step": 20020 }, { "epoch": 0.05, "grad_norm": 1.6875, "learning_rate": 0.00019972614773196675, "loss": 2.2176, "step": 20025 }, { "epoch": 0.05, "grad_norm": 1.484375, "learning_rate": 0.00019972601102198986, "loss": 2.1797, "step": 20030 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019972587427794482, "loss": 2.0902, "step": 20035 }, { "epoch": 0.05, "grad_norm": 1.59375, "learning_rate": 0.00019972573749983157, "loss": 2.2732, "step": 20040 }, { "epoch": 0.05, "grad_norm": 2.328125, "learning_rate": 0.00019972560068765022, "loss": 2.2282, "step": 20045 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019972546384140077, "loss": 2.0767, "step": 20050 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.0001997253269610833, "loss": 2.0064, "step": 20055 }, { "epoch": 0.05, "grad_norm": 1.7578125, "learning_rate": 0.00019972519004669788, "loss": 2.1779, "step": 20060 }, { "epoch": 0.05, "grad_norm": 1.828125, "learning_rate": 0.00019972505309824449, "loss": 2.2024, "step": 20065 }, { "epoch": 0.05, "grad_norm": 1.890625, "learning_rate": 0.00019972491611572324, "loss": 2.392, "step": 20070 }, { "epoch": 0.05, "grad_norm": 1.6640625, "learning_rate": 0.00019972477909913414, "loss": 2.2566, "step": 20075 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.00019972464204847724, "loss": 2.2532, "step": 20080 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019972450496375262, "loss": 2.1856, "step": 20085 }, { "epoch": 0.05, "grad_norm": 1.65625, "learning_rate": 0.00019972436784496028, "loss": 2.1957, "step": 20090 }, { "epoch": 0.05, "grad_norm": 1.7265625, "learning_rate": 0.0001997242306921003, "loss": 2.2246, "step": 20095 }, { "epoch": 0.05, "grad_norm": 1.71875, "learning_rate": 0.00019972409350517268, "loss": 2.2711, "step": 20100 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.0001997239562841775, "loss": 2.1716, "step": 20105 }, { "epoch": 0.05, "grad_norm": 2.34375, "learning_rate": 0.00019972381902911486, "loss": 2.3382, "step": 20110 }, { "epoch": 0.05, "grad_norm": 2.1875, "learning_rate": 0.0001997236817399847, "loss": 2.0338, "step": 20115 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.0001997235444167872, "loss": 2.1238, "step": 20120 }, { "epoch": 0.05, "grad_norm": 1.46875, "learning_rate": 0.00019972340705952225, "loss": 2.1335, "step": 20125 }, { "epoch": 0.05, "grad_norm": 2.21875, "learning_rate": 0.00019972326966819, "loss": 2.4208, "step": 20130 }, { "epoch": 0.05, "grad_norm": 1.9296875, "learning_rate": 0.00019972313224279046, "loss": 2.2031, "step": 20135 }, { "epoch": 0.05, "grad_norm": 1.46875, "learning_rate": 0.00019972299478332372, "loss": 2.1613, "step": 20140 }, { "epoch": 0.05, "grad_norm": 1.9296875, "learning_rate": 0.0001997228572897898, "loss": 2.3256, "step": 20145 }, { "epoch": 0.05, "grad_norm": 1.5546875, "learning_rate": 0.0001997227197621887, "loss": 2.2738, "step": 20150 }, { "epoch": 0.05, "grad_norm": 1.4375, "learning_rate": 0.00019972258220052054, "loss": 2.2359, "step": 20155 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.0001997224446047853, "loss": 2.1787, "step": 20160 }, { "epoch": 0.05, "grad_norm": 2.140625, "learning_rate": 0.00019972230697498314, "loss": 2.3483, "step": 20165 }, { "epoch": 0.05, "grad_norm": 1.9453125, "learning_rate": 0.00019972216931111398, "loss": 2.1522, "step": 20170 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.00019972203161317792, "loss": 2.2796, "step": 20175 }, { "epoch": 0.05, "grad_norm": 1.9453125, "learning_rate": 0.000199721893881175, "loss": 2.1089, "step": 20180 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.0001997217561151053, "loss": 2.1506, "step": 20185 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.00019972161831496884, "loss": 2.147, "step": 20190 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019972148048076562, "loss": 2.1297, "step": 20195 }, { "epoch": 0.05, "grad_norm": 1.90625, "learning_rate": 0.00019972134261249575, "loss": 1.9267, "step": 20200 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.00019972120471015929, "loss": 2.2557, "step": 20205 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019972106677375624, "loss": 2.227, "step": 20210 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.0001997209288032867, "loss": 2.288, "step": 20215 }, { "epoch": 0.05, "grad_norm": 1.5703125, "learning_rate": 0.0001997207907987506, "loss": 2.167, "step": 20220 }, { "epoch": 0.05, "grad_norm": 1.5703125, "learning_rate": 0.00019972065276014815, "loss": 2.0868, "step": 20225 }, { "epoch": 0.05, "grad_norm": 1.59375, "learning_rate": 0.0001997205146874793, "loss": 2.3552, "step": 20230 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019972037658074406, "loss": 2.0381, "step": 20235 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.0001997202384399426, "loss": 2.2675, "step": 20240 }, { "epoch": 0.05, "grad_norm": 1.90625, "learning_rate": 0.00019972010026507485, "loss": 2.2343, "step": 20245 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019971996205614092, "loss": 2.3456, "step": 20250 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019971982381314084, "loss": 2.1584, "step": 20255 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.0001997196855360747, "loss": 2.115, "step": 20260 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019971954722494247, "loss": 2.5052, "step": 20265 }, { "epoch": 0.05, "grad_norm": 1.578125, "learning_rate": 0.00019971940887974422, "loss": 2.3283, "step": 20270 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.00019971927050048004, "loss": 2.37, "step": 20275 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.00019971913208714992, "loss": 2.1776, "step": 20280 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.00019971899363975396, "loss": 2.127, "step": 20285 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.00019971885515829218, "loss": 2.2529, "step": 20290 }, { "epoch": 0.05, "grad_norm": 1.7265625, "learning_rate": 0.00019971871664276462, "loss": 2.1234, "step": 20295 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019971857809317133, "loss": 2.2238, "step": 20300 }, { "epoch": 0.05, "grad_norm": 1.5234375, "learning_rate": 0.00019971843950951238, "loss": 2.2812, "step": 20305 }, { "epoch": 0.05, "grad_norm": 1.875, "learning_rate": 0.00019971830089178782, "loss": 2.1951, "step": 20310 }, { "epoch": 0.05, "grad_norm": 1.6875, "learning_rate": 0.00019971816223999763, "loss": 2.1358, "step": 20315 }, { "epoch": 0.05, "grad_norm": 2.125, "learning_rate": 0.00019971802355414194, "loss": 2.1948, "step": 20320 }, { "epoch": 0.05, "grad_norm": 1.6171875, "learning_rate": 0.00019971788483422078, "loss": 2.1396, "step": 20325 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.00019971774608023416, "loss": 2.1684, "step": 20330 }, { "epoch": 0.05, "grad_norm": 1.453125, "learning_rate": 0.00019971760729218217, "loss": 2.2094, "step": 20335 }, { "epoch": 0.05, "grad_norm": 1.96875, "learning_rate": 0.0001997174684700648, "loss": 2.4088, "step": 20340 }, { "epoch": 0.05, "grad_norm": 2.390625, "learning_rate": 0.00019971732961388217, "loss": 2.2972, "step": 20345 }, { "epoch": 0.05, "grad_norm": 1.7578125, "learning_rate": 0.00019971719072363429, "loss": 2.0864, "step": 20350 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.0001997170517993212, "loss": 2.2701, "step": 20355 }, { "epoch": 0.05, "grad_norm": 1.65625, "learning_rate": 0.00019971691284094293, "loss": 2.2085, "step": 20360 }, { "epoch": 0.05, "grad_norm": 1.9296875, "learning_rate": 0.0001997167738484996, "loss": 2.3235, "step": 20365 }, { "epoch": 0.05, "grad_norm": 1.9765625, "learning_rate": 0.00019971663482199118, "loss": 2.2775, "step": 20370 }, { "epoch": 0.05, "grad_norm": 2.28125, "learning_rate": 0.00019971649576141774, "loss": 2.2093, "step": 20375 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019971635666677937, "loss": 2.1032, "step": 20380 }, { "epoch": 0.05, "grad_norm": 1.890625, "learning_rate": 0.00019971621753807608, "loss": 2.3552, "step": 20385 }, { "epoch": 0.05, "grad_norm": 1.9453125, "learning_rate": 0.0001997160783753079, "loss": 2.1658, "step": 20390 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.00019971593917847492, "loss": 2.2637, "step": 20395 }, { "epoch": 0.05, "grad_norm": 1.5625, "learning_rate": 0.00019971579994757717, "loss": 2.1934, "step": 20400 }, { "epoch": 0.05, "grad_norm": 1.578125, "learning_rate": 0.00019971566068261468, "loss": 2.0475, "step": 20405 }, { "epoch": 0.05, "grad_norm": 1.5390625, "learning_rate": 0.0001997155213835875, "loss": 2.1011, "step": 20410 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.0001997153820504957, "loss": 2.2363, "step": 20415 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019971524268333935, "loss": 2.0982, "step": 20420 }, { "epoch": 0.05, "grad_norm": 3.109375, "learning_rate": 0.00019971510328211842, "loss": 2.3072, "step": 20425 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.00019971496384683303, "loss": 2.1787, "step": 20430 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.00019971482437748317, "loss": 2.0913, "step": 20435 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.00019971468487406892, "loss": 2.1659, "step": 20440 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.00019971454533659034, "loss": 2.232, "step": 20445 }, { "epoch": 0.05, "grad_norm": 1.421875, "learning_rate": 0.00019971440576504746, "loss": 1.9981, "step": 20450 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.00019971426615944036, "loss": 2.2604, "step": 20455 }, { "epoch": 0.05, "grad_norm": 1.859375, "learning_rate": 0.000199714126519769, "loss": 2.2216, "step": 20460 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019971398684603354, "loss": 2.1108, "step": 20465 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.00019971384713823395, "loss": 2.1932, "step": 20470 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019971370739637028, "loss": 2.1469, "step": 20475 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.00019971356762044264, "loss": 2.1554, "step": 20480 }, { "epoch": 0.05, "grad_norm": 1.625, "learning_rate": 0.000199713427810451, "loss": 2.2867, "step": 20485 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.00019971328796639548, "loss": 2.125, "step": 20490 }, { "epoch": 0.05, "grad_norm": 1.828125, "learning_rate": 0.0001997131480882761, "loss": 2.4218, "step": 20495 }, { "epoch": 0.05, "grad_norm": 1.59375, "learning_rate": 0.00019971300817609288, "loss": 2.363, "step": 20500 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.00019971286822984585, "loss": 2.3791, "step": 20505 }, { "epoch": 0.05, "grad_norm": 1.546875, "learning_rate": 0.00019971272824953514, "loss": 2.4493, "step": 20510 }, { "epoch": 0.05, "grad_norm": 1.609375, "learning_rate": 0.00019971258823516075, "loss": 2.2766, "step": 20515 }, { "epoch": 0.05, "grad_norm": 2.09375, "learning_rate": 0.0001997124481867227, "loss": 2.314, "step": 20520 }, { "epoch": 0.05, "grad_norm": 1.65625, "learning_rate": 0.0001997123081042211, "loss": 2.1409, "step": 20525 }, { "epoch": 0.05, "grad_norm": 2.28125, "learning_rate": 0.000199712167987656, "loss": 1.9834, "step": 20530 }, { "epoch": 0.05, "grad_norm": 1.953125, "learning_rate": 0.00019971202783702734, "loss": 2.0035, "step": 20535 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019971188765233527, "loss": 2.0001, "step": 20540 }, { "epoch": 0.05, "grad_norm": 1.625, "learning_rate": 0.00019971174743357983, "loss": 2.0301, "step": 20545 }, { "epoch": 0.05, "grad_norm": 2.09375, "learning_rate": 0.00019971160718076103, "loss": 2.1447, "step": 20550 }, { "epoch": 0.05, "grad_norm": 1.4453125, "learning_rate": 0.00019971146689387896, "loss": 2.0242, "step": 20555 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019971132657293364, "loss": 2.2254, "step": 20560 }, { "epoch": 0.05, "grad_norm": 1.6484375, "learning_rate": 0.0001997111862179251, "loss": 2.4716, "step": 20565 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019971104582885342, "loss": 1.991, "step": 20570 }, { "epoch": 0.05, "grad_norm": 1.5546875, "learning_rate": 0.00019971090540571863, "loss": 2.193, "step": 20575 }, { "epoch": 0.05, "grad_norm": 1.9140625, "learning_rate": 0.0001997107649485208, "loss": 2.1103, "step": 20580 }, { "epoch": 0.05, "grad_norm": 1.921875, "learning_rate": 0.00019971062445725996, "loss": 2.3867, "step": 20585 }, { "epoch": 0.05, "grad_norm": 1.9921875, "learning_rate": 0.0001997104839319362, "loss": 2.2704, "step": 20590 }, { "epoch": 0.05, "grad_norm": 2.28125, "learning_rate": 0.00019971034337254947, "loss": 2.245, "step": 20595 }, { "epoch": 0.05, "grad_norm": 3.515625, "learning_rate": 0.0001997102027790999, "loss": 2.1755, "step": 20600 }, { "epoch": 0.05, "grad_norm": 2.3125, "learning_rate": 0.00019971006215158753, "loss": 2.1321, "step": 20605 }, { "epoch": 0.05, "grad_norm": 1.9921875, "learning_rate": 0.00019970992149001237, "loss": 2.345, "step": 20610 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.0001997097807943745, "loss": 2.2903, "step": 20615 }, { "epoch": 0.05, "grad_norm": 1.8125, "learning_rate": 0.000199709640064674, "loss": 2.2361, "step": 20620 }, { "epoch": 0.05, "grad_norm": 3.25, "learning_rate": 0.00019970949930091082, "loss": 2.2617, "step": 20625 }, { "epoch": 0.05, "grad_norm": 1.65625, "learning_rate": 0.0001997093585030851, "loss": 2.2957, "step": 20630 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019970921767119686, "loss": 2.2154, "step": 20635 }, { "epoch": 0.05, "grad_norm": 1.625, "learning_rate": 0.00019970907680524612, "loss": 2.2104, "step": 20640 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019970893590523297, "loss": 2.1337, "step": 20645 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.00019970879497115742, "loss": 2.1986, "step": 20650 }, { "epoch": 0.05, "grad_norm": 2.265625, "learning_rate": 0.00019970865400301954, "loss": 2.1897, "step": 20655 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.0001997085130008194, "loss": 2.0737, "step": 20660 }, { "epoch": 0.05, "grad_norm": 1.6640625, "learning_rate": 0.000199708371964557, "loss": 2.147, "step": 20665 }, { "epoch": 0.05, "grad_norm": 1.5078125, "learning_rate": 0.0001997082308942324, "loss": 2.1887, "step": 20670 }, { "epoch": 0.05, "grad_norm": 1.9375, "learning_rate": 0.0001997080897898457, "loss": 2.2655, "step": 20675 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.0001997079486513969, "loss": 2.2049, "step": 20680 }, { "epoch": 0.05, "grad_norm": 1.4140625, "learning_rate": 0.00019970780747888603, "loss": 2.4257, "step": 20685 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.00019970766627231318, "loss": 2.0779, "step": 20690 }, { "epoch": 0.05, "grad_norm": 1.828125, "learning_rate": 0.00019970752503167835, "loss": 2.2187, "step": 20695 }, { "epoch": 0.05, "grad_norm": 1.828125, "learning_rate": 0.00019970738375698165, "loss": 2.1907, "step": 20700 }, { "epoch": 0.05, "grad_norm": 2.3125, "learning_rate": 0.00019970724244822312, "loss": 2.1714, "step": 20705 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019970710110540275, "loss": 2.3014, "step": 20710 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.00019970695972852068, "loss": 2.285, "step": 20715 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019970681831757686, "loss": 2.2353, "step": 20720 }, { "epoch": 0.05, "grad_norm": 1.71875, "learning_rate": 0.00019970667687257138, "loss": 2.2824, "step": 20725 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.0001997065353935043, "loss": 2.0746, "step": 20730 }, { "epoch": 0.05, "grad_norm": 2.140625, "learning_rate": 0.00019970639388037568, "loss": 2.0745, "step": 20735 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019970625233318553, "loss": 2.2338, "step": 20740 }, { "epoch": 0.05, "grad_norm": 1.65625, "learning_rate": 0.0001997061107519339, "loss": 2.1781, "step": 20745 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.00019970596913662086, "loss": 2.1961, "step": 20750 }, { "epoch": 0.05, "grad_norm": 1.5546875, "learning_rate": 0.00019970582748724648, "loss": 2.2956, "step": 20755 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019970568580381074, "loss": 2.3627, "step": 20760 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019970554408631376, "loss": 2.2778, "step": 20765 }, { "epoch": 0.05, "grad_norm": 2.578125, "learning_rate": 0.00019970540233475557, "loss": 2.2115, "step": 20770 }, { "epoch": 0.05, "grad_norm": 1.578125, "learning_rate": 0.0001997052605491362, "loss": 2.2535, "step": 20775 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019970511872945568, "loss": 2.1425, "step": 20780 }, { "epoch": 0.05, "grad_norm": 1.96875, "learning_rate": 0.00019970497687571407, "loss": 2.2321, "step": 20785 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.00019970483498791145, "loss": 2.1875, "step": 20790 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.0001997046930660479, "loss": 2.1221, "step": 20795 }, { "epoch": 0.05, "grad_norm": 2.0625, "learning_rate": 0.00019970455111012337, "loss": 2.2027, "step": 20800 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.00019970440912013795, "loss": 1.9617, "step": 20805 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.0001997042670960917, "loss": 2.1749, "step": 20810 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019970412503798467, "loss": 2.2751, "step": 20815 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.0001997039829458169, "loss": 2.206, "step": 20820 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.00019970384081958846, "loss": 1.9908, "step": 20825 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.00019970369865929935, "loss": 2.2005, "step": 20830 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.0001997035564649497, "loss": 2.292, "step": 20835 }, { "epoch": 0.05, "grad_norm": 1.59375, "learning_rate": 0.00019970341423653943, "loss": 2.3347, "step": 20840 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.00019970327197406872, "loss": 2.1944, "step": 20845 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.00019970312967753758, "loss": 2.1781, "step": 20850 }, { "epoch": 0.05, "grad_norm": 2.203125, "learning_rate": 0.000199702987346946, "loss": 2.3193, "step": 20855 }, { "epoch": 0.05, "grad_norm": 1.484375, "learning_rate": 0.00019970284498229408, "loss": 2.2366, "step": 20860 }, { "epoch": 0.05, "grad_norm": 1.671875, "learning_rate": 0.00019970270258358187, "loss": 1.9914, "step": 20865 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.0001997025601508094, "loss": 2.2364, "step": 20870 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.00019970241768397674, "loss": 2.0979, "step": 20875 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019970227518308387, "loss": 2.1861, "step": 20880 }, { "epoch": 0.05, "grad_norm": 1.453125, "learning_rate": 0.00019970213264813098, "loss": 2.2955, "step": 20885 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.000199701990079118, "loss": 2.1907, "step": 20890 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019970184747604498, "loss": 2.2108, "step": 20895 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019970170483891205, "loss": 2.1951, "step": 20900 }, { "epoch": 0.05, "grad_norm": 1.9765625, "learning_rate": 0.00019970156216771919, "loss": 2.2595, "step": 20905 }, { "epoch": 0.05, "grad_norm": 1.9453125, "learning_rate": 0.00019970141946246646, "loss": 2.4632, "step": 20910 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.0001997012767231539, "loss": 2.2632, "step": 20915 }, { "epoch": 0.05, "grad_norm": 1.8125, "learning_rate": 0.0001997011339497816, "loss": 2.1072, "step": 20920 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.0001997009911423496, "loss": 2.1249, "step": 20925 }, { "epoch": 0.05, "grad_norm": 1.7578125, "learning_rate": 0.00019970084830085792, "loss": 2.0161, "step": 20930 }, { "epoch": 0.05, "grad_norm": 1.9140625, "learning_rate": 0.0001997007054253066, "loss": 2.1818, "step": 20935 }, { "epoch": 0.05, "grad_norm": 2.203125, "learning_rate": 0.00019970056251569573, "loss": 2.336, "step": 20940 }, { "epoch": 0.05, "grad_norm": 1.59375, "learning_rate": 0.00019970041957202535, "loss": 2.3468, "step": 20945 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019970027659429546, "loss": 2.0072, "step": 20950 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.0001997001335825062, "loss": 2.4195, "step": 20955 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019969999053665754, "loss": 2.2141, "step": 20960 }, { "epoch": 0.05, "grad_norm": 1.71875, "learning_rate": 0.00019969984745674954, "loss": 2.1991, "step": 20965 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019969970434278227, "loss": 2.1106, "step": 20970 }, { "epoch": 0.05, "grad_norm": 1.6171875, "learning_rate": 0.0001996995611947558, "loss": 2.2492, "step": 20975 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.00019969941801267012, "loss": 2.0409, "step": 20980 }, { "epoch": 0.05, "grad_norm": 2.21875, "learning_rate": 0.00019969927479652534, "loss": 2.2067, "step": 20985 }, { "epoch": 0.05, "grad_norm": 2.09375, "learning_rate": 0.00019969913154632146, "loss": 2.1117, "step": 20990 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019969898826205853, "loss": 2.1533, "step": 20995 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.00019969884494373664, "loss": 2.0745, "step": 21000 }, { "epoch": 0.05, "grad_norm": 1.578125, "learning_rate": 0.00019969870159135584, "loss": 2.2816, "step": 21005 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019969855820491613, "loss": 2.2129, "step": 21010 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.00019969841478441759, "loss": 2.0893, "step": 21015 }, { "epoch": 0.05, "grad_norm": 2.0625, "learning_rate": 0.00019969827132986024, "loss": 2.2428, "step": 21020 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.00019969812784124418, "loss": 2.2991, "step": 21025 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019969798431856942, "loss": 2.2717, "step": 21030 }, { "epoch": 0.05, "grad_norm": 1.890625, "learning_rate": 0.00019969784076183603, "loss": 2.2997, "step": 21035 }, { "epoch": 0.05, "grad_norm": 2.46875, "learning_rate": 0.00019969769717104402, "loss": 2.1307, "step": 21040 }, { "epoch": 0.05, "grad_norm": 1.9296875, "learning_rate": 0.0001996975535461935, "loss": 2.3393, "step": 21045 }, { "epoch": 0.05, "grad_norm": 1.5390625, "learning_rate": 0.0001996974098872845, "loss": 2.1903, "step": 21050 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.000199697266194317, "loss": 2.1354, "step": 21055 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.00019969712246729117, "loss": 2.0158, "step": 21060 }, { "epoch": 0.05, "grad_norm": 2.359375, "learning_rate": 0.00019969697870620697, "loss": 2.358, "step": 21065 }, { "epoch": 0.05, "grad_norm": 1.6484375, "learning_rate": 0.00019969683491106446, "loss": 2.1755, "step": 21070 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.0001996966910818637, "loss": 2.1413, "step": 21075 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019969654721860476, "loss": 2.1655, "step": 21080 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.00019969640332128768, "loss": 2.1995, "step": 21085 }, { "epoch": 0.05, "grad_norm": 1.640625, "learning_rate": 0.0001996962593899125, "loss": 2.0598, "step": 21090 }, { "epoch": 0.05, "grad_norm": 1.484375, "learning_rate": 0.00019969611542447924, "loss": 2.1917, "step": 21095 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019969597142498798, "loss": 2.2137, "step": 21100 }, { "epoch": 0.05, "grad_norm": 1.5234375, "learning_rate": 0.0001996958273914388, "loss": 2.1605, "step": 21105 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019969568332383171, "loss": 2.2635, "step": 21110 }, { "epoch": 0.05, "grad_norm": 4.1875, "learning_rate": 0.00019969553922216676, "loss": 2.2358, "step": 21115 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.00019969539508644398, "loss": 2.1165, "step": 21120 }, { "epoch": 0.05, "grad_norm": 1.671875, "learning_rate": 0.00019969525091666348, "loss": 2.2932, "step": 21125 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.00019969510671282524, "loss": 2.2189, "step": 21130 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.00019969496247492942, "loss": 2.1554, "step": 21135 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.0001996948182029759, "loss": 2.3697, "step": 21140 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.00019969467389696486, "loss": 2.3651, "step": 21145 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.00019969452955689634, "loss": 2.2497, "step": 21150 }, { "epoch": 0.05, "grad_norm": 2.21875, "learning_rate": 0.0001996943851827703, "loss": 2.2738, "step": 21155 }, { "epoch": 0.05, "grad_norm": 1.921875, "learning_rate": 0.0001996942407745869, "loss": 2.3706, "step": 21160 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.00019969409633234611, "loss": 2.2715, "step": 21165 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019969395185604803, "loss": 2.1691, "step": 21170 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.0001996938073456927, "loss": 2.1725, "step": 21175 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019969366280128012, "loss": 2.2442, "step": 21180 }, { "epoch": 0.05, "grad_norm": 1.9453125, "learning_rate": 0.0001996935182228104, "loss": 2.382, "step": 21185 }, { "epoch": 0.05, "grad_norm": 1.640625, "learning_rate": 0.00019969337361028355, "loss": 2.0721, "step": 21190 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.00019969322896369964, "loss": 2.0805, "step": 21195 }, { "epoch": 0.05, "grad_norm": 1.875, "learning_rate": 0.0001996930842830587, "loss": 2.2286, "step": 21200 }, { "epoch": 0.05, "grad_norm": 1.921875, "learning_rate": 0.00019969293956836084, "loss": 2.3139, "step": 21205 }, { "epoch": 0.05, "grad_norm": 1.53125, "learning_rate": 0.00019969279481960603, "loss": 2.275, "step": 21210 }, { "epoch": 0.05, "grad_norm": 1.6875, "learning_rate": 0.00019969265003679437, "loss": 2.2335, "step": 21215 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019969250521992588, "loss": 2.2781, "step": 21220 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019969236036900063, "loss": 2.0663, "step": 21225 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019969221548401866, "loss": 2.2328, "step": 21230 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.00019969207056498003, "loss": 2.28, "step": 21235 }, { "epoch": 0.05, "grad_norm": 1.921875, "learning_rate": 0.00019969192561188477, "loss": 2.2328, "step": 21240 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019969178062473293, "loss": 2.1749, "step": 21245 }, { "epoch": 0.05, "grad_norm": 1.9140625, "learning_rate": 0.00019969163560352456, "loss": 1.9456, "step": 21250 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.00019969149054825975, "loss": 2.212, "step": 21255 }, { "epoch": 0.05, "grad_norm": 1.640625, "learning_rate": 0.0001996913454589385, "loss": 2.3169, "step": 21260 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019969120033556088, "loss": 2.1777, "step": 21265 }, { "epoch": 0.05, "grad_norm": 1.375, "learning_rate": 0.00019969105517812696, "loss": 2.3339, "step": 21270 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.00019969090998663675, "loss": 2.2302, "step": 21275 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019969076476109032, "loss": 2.1909, "step": 21280 }, { "epoch": 0.05, "grad_norm": 1.65625, "learning_rate": 0.00019969061950148774, "loss": 2.2714, "step": 21285 }, { "epoch": 0.05, "grad_norm": 1.6484375, "learning_rate": 0.000199690474207829, "loss": 2.0499, "step": 21290 }, { "epoch": 0.05, "grad_norm": 2.15625, "learning_rate": 0.00019969032888011418, "loss": 2.3315, "step": 21295 }, { "epoch": 0.05, "grad_norm": 2.265625, "learning_rate": 0.00019969018351834335, "loss": 2.2493, "step": 21300 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019969003812251654, "loss": 2.44, "step": 21305 }, { "epoch": 0.05, "grad_norm": 2.140625, "learning_rate": 0.00019968989269263384, "loss": 2.119, "step": 21310 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.00019968974722869525, "loss": 2.2898, "step": 21315 }, { "epoch": 0.05, "grad_norm": 1.546875, "learning_rate": 0.0001996896017307008, "loss": 2.1203, "step": 21320 }, { "epoch": 0.05, "grad_norm": 1.9609375, "learning_rate": 0.0001996894561986506, "loss": 2.2482, "step": 21325 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.0001996893106325447, "loss": 2.2386, "step": 21330 }, { "epoch": 0.05, "grad_norm": 1.890625, "learning_rate": 0.00019968916503238307, "loss": 2.195, "step": 21335 }, { "epoch": 0.05, "grad_norm": 1.9921875, "learning_rate": 0.00019968901939816585, "loss": 2.1793, "step": 21340 }, { "epoch": 0.05, "grad_norm": 1.875, "learning_rate": 0.00019968887372989302, "loss": 2.3011, "step": 21345 }, { "epoch": 0.05, "grad_norm": 1.9375, "learning_rate": 0.0001996887280275647, "loss": 2.1033, "step": 21350 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019968858229118088, "loss": 2.2433, "step": 21355 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019968843652074162, "loss": 2.1029, "step": 21360 }, { "epoch": 0.05, "grad_norm": 1.9609375, "learning_rate": 0.000199688290716247, "loss": 2.0879, "step": 21365 }, { "epoch": 0.05, "grad_norm": 1.6484375, "learning_rate": 0.00019968814487769705, "loss": 2.1954, "step": 21370 }, { "epoch": 0.05, "grad_norm": 1.53125, "learning_rate": 0.00019968799900509185, "loss": 2.1172, "step": 21375 }, { "epoch": 0.05, "grad_norm": 1.6171875, "learning_rate": 0.00019968785309843138, "loss": 2.2179, "step": 21380 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.00019968770715771575, "loss": 2.1603, "step": 21385 }, { "epoch": 0.05, "grad_norm": 2.078125, "learning_rate": 0.00019968756118294502, "loss": 2.2651, "step": 21390 }, { "epoch": 0.05, "grad_norm": 1.9609375, "learning_rate": 0.00019968741517411917, "loss": 2.2861, "step": 21395 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.00019968726913123828, "loss": 2.2129, "step": 21400 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019968712305430244, "loss": 2.2964, "step": 21405 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019968697694331167, "loss": 2.1574, "step": 21410 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.000199686830798266, "loss": 2.2414, "step": 21415 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.0001996866846191655, "loss": 2.0649, "step": 21420 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.00019968653840601026, "loss": 2.2751, "step": 21425 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.00019968639215880025, "loss": 2.17, "step": 21430 }, { "epoch": 0.05, "grad_norm": 2.546875, "learning_rate": 0.00019968624587753556, "loss": 2.0569, "step": 21435 }, { "epoch": 0.05, "grad_norm": 1.53125, "learning_rate": 0.00019968609956221627, "loss": 1.9929, "step": 21440 }, { "epoch": 0.05, "grad_norm": 1.90625, "learning_rate": 0.00019968595321284238, "loss": 2.2845, "step": 21445 }, { "epoch": 0.05, "grad_norm": 1.59375, "learning_rate": 0.00019968580682941397, "loss": 2.3229, "step": 21450 }, { "epoch": 0.05, "grad_norm": 1.5703125, "learning_rate": 0.0001996856604119311, "loss": 2.2204, "step": 21455 }, { "epoch": 0.05, "grad_norm": 3.765625, "learning_rate": 0.00019968551396039376, "loss": 2.108, "step": 21460 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.00019968536747480206, "loss": 2.18, "step": 21465 }, { "epoch": 0.05, "grad_norm": 1.96875, "learning_rate": 0.00019968522095515604, "loss": 2.2788, "step": 21470 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.0001996850744014557, "loss": 2.233, "step": 21475 }, { "epoch": 0.05, "grad_norm": 1.890625, "learning_rate": 0.0001996849278137012, "loss": 2.4211, "step": 21480 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019968478119189247, "loss": 2.1934, "step": 21485 }, { "epoch": 0.05, "grad_norm": 1.828125, "learning_rate": 0.00019968463453602962, "loss": 2.0698, "step": 21490 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019968448784611268, "loss": 2.2547, "step": 21495 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.0001996843411221417, "loss": 2.2148, "step": 21500 }, { "epoch": 0.05, "grad_norm": 1.5, "learning_rate": 0.00019968419436411678, "loss": 2.0833, "step": 21505 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019968404757203794, "loss": 2.3541, "step": 21510 }, { "epoch": 0.05, "grad_norm": 1.640625, "learning_rate": 0.0001996839007459052, "loss": 2.246, "step": 21515 }, { "epoch": 0.05, "grad_norm": 1.65625, "learning_rate": 0.00019968375388571861, "loss": 2.0251, "step": 21520 }, { "epoch": 0.05, "grad_norm": 2.15625, "learning_rate": 0.00019968360699147827, "loss": 2.0994, "step": 21525 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.0001996834600631842, "loss": 2.3067, "step": 21530 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019968331310083644, "loss": 2.3034, "step": 21535 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019968316610443506, "loss": 2.1777, "step": 21540 }, { "epoch": 0.05, "grad_norm": 1.90625, "learning_rate": 0.0001996830190739801, "loss": 2.2728, "step": 21545 }, { "epoch": 0.05, "grad_norm": 1.578125, "learning_rate": 0.00019968287200947162, "loss": 2.2151, "step": 21550 }, { "epoch": 0.05, "grad_norm": 2.21875, "learning_rate": 0.00019968272491090966, "loss": 2.1214, "step": 21555 }, { "epoch": 0.05, "grad_norm": 1.9453125, "learning_rate": 0.00019968257777829427, "loss": 2.2098, "step": 21560 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019968243061162548, "loss": 2.3178, "step": 21565 }, { "epoch": 0.05, "grad_norm": 1.3203125, "learning_rate": 0.0001996822834109034, "loss": 2.2558, "step": 21570 }, { "epoch": 0.05, "grad_norm": 1.6640625, "learning_rate": 0.00019968213617612804, "loss": 2.2403, "step": 21575 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019968198890729942, "loss": 2.3507, "step": 21580 }, { "epoch": 0.05, "grad_norm": 1.546875, "learning_rate": 0.00019968184160441767, "loss": 2.142, "step": 21585 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.00019968169426748278, "loss": 2.2167, "step": 21590 }, { "epoch": 0.05, "grad_norm": 1.828125, "learning_rate": 0.0001996815468964948, "loss": 2.0909, "step": 21595 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.0001996813994914538, "loss": 2.1553, "step": 21600 }, { "epoch": 0.05, "grad_norm": 1.578125, "learning_rate": 0.00019968125205235986, "loss": 2.0413, "step": 21605 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.00019968110457921297, "loss": 2.1545, "step": 21610 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.0001996809570720132, "loss": 2.2874, "step": 21615 }, { "epoch": 0.05, "grad_norm": 2.328125, "learning_rate": 0.0001996808095307606, "loss": 2.288, "step": 21620 }, { "epoch": 0.05, "grad_norm": 1.6640625, "learning_rate": 0.00019968066195545525, "loss": 2.1709, "step": 21625 }, { "epoch": 0.05, "grad_norm": 1.9375, "learning_rate": 0.0001996805143460972, "loss": 2.2438, "step": 21630 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.00019968036670268643, "loss": 2.2731, "step": 21635 }, { "epoch": 0.05, "grad_norm": 1.890625, "learning_rate": 0.00019968021902522305, "loss": 2.1977, "step": 21640 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019968007131370712, "loss": 2.0353, "step": 21645 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019967992356813863, "loss": 2.2197, "step": 21650 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.0001996797757885177, "loss": 1.9615, "step": 21655 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019967962797484435, "loss": 2.274, "step": 21660 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.00019967948012711864, "loss": 2.2515, "step": 21665 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.0001996793322453406, "loss": 2.0158, "step": 21670 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019967918432951028, "loss": 2.2048, "step": 21675 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019967903637962774, "loss": 2.1597, "step": 21680 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.0001996788883956931, "loss": 1.98, "step": 21685 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019967874037770625, "loss": 2.1698, "step": 21690 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.0001996785923256674, "loss": 2.2725, "step": 21695 }, { "epoch": 0.05, "grad_norm": 1.3203125, "learning_rate": 0.0001996784442395765, "loss": 2.0743, "step": 21700 }, { "epoch": 0.05, "grad_norm": 1.578125, "learning_rate": 0.00019967829611943367, "loss": 2.2154, "step": 21705 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.0001996781479652389, "loss": 2.049, "step": 21710 }, { "epoch": 0.05, "grad_norm": 1.890625, "learning_rate": 0.00019967799977699225, "loss": 2.311, "step": 21715 }, { "epoch": 0.05, "grad_norm": 1.7578125, "learning_rate": 0.0001996778515546938, "loss": 2.2783, "step": 21720 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019967770329834363, "loss": 2.1547, "step": 21725 }, { "epoch": 0.05, "grad_norm": 1.484375, "learning_rate": 0.00019967755500794172, "loss": 2.2157, "step": 21730 }, { "epoch": 0.05, "grad_norm": 1.828125, "learning_rate": 0.00019967740668348812, "loss": 2.2293, "step": 21735 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019967725832498295, "loss": 2.2419, "step": 21740 }, { "epoch": 0.05, "grad_norm": 1.6015625, "learning_rate": 0.00019967710993242622, "loss": 2.357, "step": 21745 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019967696150581795, "loss": 2.2504, "step": 21750 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.00019967681304515825, "loss": 2.2993, "step": 21755 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019967666455044715, "loss": 2.1252, "step": 21760 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.00019967651602168464, "loss": 2.1119, "step": 21765 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.00019967636745887087, "loss": 2.2522, "step": 21770 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019967621886200583, "loss": 2.2572, "step": 21775 }, { "epoch": 0.05, "grad_norm": 1.9765625, "learning_rate": 0.00019967607023108958, "loss": 2.277, "step": 21780 }, { "epoch": 0.05, "grad_norm": 1.4375, "learning_rate": 0.0001996759215661222, "loss": 2.1187, "step": 21785 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019967577286710372, "loss": 2.1708, "step": 21790 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.00019967562413403416, "loss": 2.283, "step": 21795 }, { "epoch": 0.05, "grad_norm": 1.8671875, "learning_rate": 0.00019967547536691363, "loss": 2.1675, "step": 21800 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.00019967532656574213, "loss": 2.1438, "step": 21805 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.00019967517773051974, "loss": 2.1684, "step": 21810 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.00019967502886124647, "loss": 2.2619, "step": 21815 }, { "epoch": 0.05, "grad_norm": 1.9921875, "learning_rate": 0.00019967487995792245, "loss": 2.0654, "step": 21820 }, { "epoch": 0.05, "grad_norm": 1.71875, "learning_rate": 0.00019967473102054764, "loss": 2.2296, "step": 21825 }, { "epoch": 0.05, "grad_norm": 1.3828125, "learning_rate": 0.00019967458204912217, "loss": 2.0986, "step": 21830 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019967443304364605, "loss": 2.2034, "step": 21835 }, { "epoch": 0.05, "grad_norm": 1.8671875, "learning_rate": 0.00019967428400411932, "loss": 2.4809, "step": 21840 }, { "epoch": 0.05, "grad_norm": 1.9375, "learning_rate": 0.00019967413493054205, "loss": 2.2284, "step": 21845 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.0001996739858229143, "loss": 2.3357, "step": 21850 }, { "epoch": 0.05, "grad_norm": 1.875, "learning_rate": 0.0001996738366812361, "loss": 2.2568, "step": 21855 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.0001996736875055075, "loss": 2.1968, "step": 21860 }, { "epoch": 0.05, "grad_norm": 1.46875, "learning_rate": 0.00019967353829572856, "loss": 2.164, "step": 21865 }, { "epoch": 0.05, "grad_norm": 1.59375, "learning_rate": 0.00019967338905189935, "loss": 2.1225, "step": 21870 }, { "epoch": 0.05, "grad_norm": 1.71875, "learning_rate": 0.0001996732397740199, "loss": 2.0469, "step": 21875 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.00019967309046209025, "loss": 2.3864, "step": 21880 }, { "epoch": 0.05, "grad_norm": 1.5546875, "learning_rate": 0.0001996729411161105, "loss": 2.2208, "step": 21885 }, { "epoch": 0.05, "grad_norm": 1.7578125, "learning_rate": 0.0001996727917360806, "loss": 2.0987, "step": 21890 }, { "epoch": 0.05, "grad_norm": 1.6640625, "learning_rate": 0.00019967264232200073, "loss": 2.1621, "step": 21895 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.00019967249287387086, "loss": 2.1249, "step": 21900 }, { "epoch": 0.05, "grad_norm": 2.109375, "learning_rate": 0.00019967234339169105, "loss": 2.2523, "step": 21905 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.00019967219387546136, "loss": 2.0926, "step": 21910 }, { "epoch": 0.05, "grad_norm": 1.6640625, "learning_rate": 0.00019967204432518184, "loss": 2.0568, "step": 21915 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019967189474085254, "loss": 2.2946, "step": 21920 }, { "epoch": 0.05, "grad_norm": 2.15625, "learning_rate": 0.00019967174512247352, "loss": 2.1673, "step": 21925 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.0001996715954700448, "loss": 1.9296, "step": 21930 }, { "epoch": 0.05, "grad_norm": 2.125, "learning_rate": 0.00019967144578356648, "loss": 2.1592, "step": 21935 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.0001996712960630386, "loss": 2.3013, "step": 21940 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.0001996711463084612, "loss": 2.211, "step": 21945 }, { "epoch": 0.05, "grad_norm": 1.6875, "learning_rate": 0.0001996709965198343, "loss": 2.1743, "step": 21950 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.00019967084669715801, "loss": 2.1968, "step": 21955 }, { "epoch": 0.05, "grad_norm": 1.9765625, "learning_rate": 0.00019967069684043233, "loss": 1.9937, "step": 21960 }, { "epoch": 0.05, "grad_norm": 1.90625, "learning_rate": 0.00019967054694965732, "loss": 2.2593, "step": 21965 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.00019967039702483306, "loss": 2.1604, "step": 21970 }, { "epoch": 0.05, "grad_norm": 1.53125, "learning_rate": 0.0001996702470659596, "loss": 2.1058, "step": 21975 }, { "epoch": 0.05, "grad_norm": 1.859375, "learning_rate": 0.0001996700970730369, "loss": 2.236, "step": 21980 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.0001996699470460652, "loss": 2.0594, "step": 21985 }, { "epoch": 0.05, "grad_norm": 2.328125, "learning_rate": 0.00019966979698504437, "loss": 2.2936, "step": 21990 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.00019966964688997453, "loss": 2.1974, "step": 21995 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019966949676085576, "loss": 2.113, "step": 22000 }, { "epoch": 0.05, "grad_norm": 2.296875, "learning_rate": 0.00019966934659768805, "loss": 2.288, "step": 22005 }, { "epoch": 0.05, "grad_norm": 1.5859375, "learning_rate": 0.0001996691964004715, "loss": 2.1716, "step": 22010 }, { "epoch": 0.05, "grad_norm": 1.671875, "learning_rate": 0.00019966904616920616, "loss": 2.3936, "step": 22015 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019966889590389208, "loss": 2.2496, "step": 22020 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.00019966874560452925, "loss": 2.3541, "step": 22025 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.00019966859527111778, "loss": 2.2226, "step": 22030 }, { "epoch": 0.05, "grad_norm": 2.359375, "learning_rate": 0.0001996684449036577, "loss": 2.2757, "step": 22035 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.0001996682945021491, "loss": 2.238, "step": 22040 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.000199668144066592, "loss": 2.248, "step": 22045 }, { "epoch": 0.05, "grad_norm": 1.9765625, "learning_rate": 0.00019966799359698645, "loss": 2.3461, "step": 22050 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.00019966784309333248, "loss": 2.3275, "step": 22055 }, { "epoch": 0.05, "grad_norm": 2.34375, "learning_rate": 0.00019966769255563022, "loss": 2.3527, "step": 22060 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.00019966754198387962, "loss": 2.1489, "step": 22065 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.0001996673913780808, "loss": 2.2961, "step": 22070 }, { "epoch": 0.05, "grad_norm": 1.7265625, "learning_rate": 0.0001996672407382338, "loss": 1.9795, "step": 22075 }, { "epoch": 0.05, "grad_norm": 1.921875, "learning_rate": 0.00019966709006433866, "loss": 2.0491, "step": 22080 }, { "epoch": 0.05, "grad_norm": 1.9375, "learning_rate": 0.0001996669393563954, "loss": 2.2203, "step": 22085 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.00019966678861440415, "loss": 2.1467, "step": 22090 }, { "epoch": 0.05, "grad_norm": 1.5625, "learning_rate": 0.00019966663783836493, "loss": 2.1039, "step": 22095 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019966648702827771, "loss": 2.222, "step": 22100 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019966633618414265, "loss": 2.1726, "step": 22105 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.00019966618530595977, "loss": 2.102, "step": 22110 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.0001996660343937291, "loss": 2.2384, "step": 22115 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019966588344745074, "loss": 2.261, "step": 22120 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019966573246712468, "loss": 2.2666, "step": 22125 }, { "epoch": 0.05, "grad_norm": 2.578125, "learning_rate": 0.00019966558145275099, "loss": 2.3007, "step": 22130 }, { "epoch": 0.05, "grad_norm": 1.859375, "learning_rate": 0.0001996654304043297, "loss": 2.1623, "step": 22135 }, { "epoch": 0.05, "grad_norm": 2.140625, "learning_rate": 0.00019966527932186096, "loss": 2.1143, "step": 22140 }, { "epoch": 0.05, "grad_norm": 2.15625, "learning_rate": 0.00019966512820534474, "loss": 2.3919, "step": 22145 }, { "epoch": 0.05, "grad_norm": 1.6875, "learning_rate": 0.00019966497705478105, "loss": 2.3609, "step": 22150 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019966482587017005, "loss": 2.2293, "step": 22155 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019966467465151172, "loss": 2.0728, "step": 22160 }, { "epoch": 0.05, "grad_norm": 1.921875, "learning_rate": 0.00019966452339880615, "loss": 2.2408, "step": 22165 }, { "epoch": 0.05, "grad_norm": 1.7578125, "learning_rate": 0.00019966437211205333, "loss": 2.2385, "step": 22170 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019966422079125337, "loss": 2.2911, "step": 22175 }, { "epoch": 0.05, "grad_norm": 1.90625, "learning_rate": 0.0001996640694364063, "loss": 2.3711, "step": 22180 }, { "epoch": 0.05, "grad_norm": 1.9296875, "learning_rate": 0.0001996639180475122, "loss": 2.1631, "step": 22185 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019966376662457108, "loss": 2.2313, "step": 22190 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019966361516758304, "loss": 2.1976, "step": 22195 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.00019966346367654805, "loss": 2.3681, "step": 22200 }, { "epoch": 0.05, "grad_norm": 1.6484375, "learning_rate": 0.00019966331215146626, "loss": 2.2522, "step": 22205 }, { "epoch": 0.05, "grad_norm": 1.9375, "learning_rate": 0.00019966316059233762, "loss": 2.0703, "step": 22210 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019966300899916227, "loss": 2.2032, "step": 22215 }, { "epoch": 0.05, "grad_norm": 1.4140625, "learning_rate": 0.00019966285737194024, "loss": 2.234, "step": 22220 }, { "epoch": 0.05, "grad_norm": 1.6796875, "learning_rate": 0.00019966270571067157, "loss": 2.4088, "step": 22225 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.0001996625540153563, "loss": 2.1882, "step": 22230 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.00019966240228599447, "loss": 2.1607, "step": 22235 }, { "epoch": 0.05, "grad_norm": 1.578125, "learning_rate": 0.0001996622505225862, "loss": 2.1755, "step": 22240 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.00019966209872513145, "loss": 2.1342, "step": 22245 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019966194689363036, "loss": 2.111, "step": 22250 }, { "epoch": 0.05, "grad_norm": 2.140625, "learning_rate": 0.00019966179502808293, "loss": 2.0874, "step": 22255 }, { "epoch": 0.05, "grad_norm": 1.828125, "learning_rate": 0.00019966164312848922, "loss": 2.1082, "step": 22260 }, { "epoch": 0.05, "grad_norm": 1.9921875, "learning_rate": 0.0001996614911948493, "loss": 2.2437, "step": 22265 }, { "epoch": 0.05, "grad_norm": 1.5234375, "learning_rate": 0.00019966133922716322, "loss": 2.3084, "step": 22270 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.000199661187225431, "loss": 2.2546, "step": 22275 }, { "epoch": 0.05, "grad_norm": 2.125, "learning_rate": 0.00019966103518965266, "loss": 2.2453, "step": 22280 }, { "epoch": 0.05, "grad_norm": 1.5390625, "learning_rate": 0.00019966088311982837, "loss": 2.1143, "step": 22285 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.00019966073101595808, "loss": 2.2758, "step": 22290 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.0001996605788780419, "loss": 2.0584, "step": 22295 }, { "epoch": 0.05, "grad_norm": 1.9609375, "learning_rate": 0.00019966042670607986, "loss": 2.1798, "step": 22300 }, { "epoch": 0.05, "grad_norm": 1.6171875, "learning_rate": 0.000199660274500072, "loss": 2.3406, "step": 22305 }, { "epoch": 0.05, "grad_norm": 1.6796875, "learning_rate": 0.00019966012226001838, "loss": 2.1829, "step": 22310 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.00019965996998591904, "loss": 2.1429, "step": 22315 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.00019965981767777408, "loss": 2.2875, "step": 22320 }, { "epoch": 0.05, "grad_norm": 1.8125, "learning_rate": 0.0001996596653355835, "loss": 2.1772, "step": 22325 }, { "epoch": 0.05, "grad_norm": 1.953125, "learning_rate": 0.00019965951295934737, "loss": 2.0801, "step": 22330 }, { "epoch": 0.05, "grad_norm": 2.390625, "learning_rate": 0.00019965936054906576, "loss": 2.1781, "step": 22335 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.0001996592081047387, "loss": 2.1686, "step": 22340 }, { "epoch": 0.05, "grad_norm": 2.125, "learning_rate": 0.00019965905562636623, "loss": 2.1053, "step": 22345 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.00019965890311394842, "loss": 2.3763, "step": 22350 }, { "epoch": 0.05, "grad_norm": 1.671875, "learning_rate": 0.00019965875056748533, "loss": 2.0849, "step": 22355 }, { "epoch": 0.05, "grad_norm": 1.890625, "learning_rate": 0.00019965859798697698, "loss": 2.1769, "step": 22360 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.0001996584453724235, "loss": 2.1083, "step": 22365 }, { "epoch": 0.05, "grad_norm": 1.6484375, "learning_rate": 0.00019965829272382485, "loss": 2.1701, "step": 22370 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.00019965814004118112, "loss": 2.1866, "step": 22375 }, { "epoch": 0.05, "grad_norm": 1.984375, "learning_rate": 0.00019965798732449238, "loss": 2.17, "step": 22380 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019965783457375866, "loss": 1.9904, "step": 22385 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.00019965768178898, "loss": 2.2899, "step": 22390 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019965752897015647, "loss": 2.3538, "step": 22395 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019965737611728814, "loss": 2.2291, "step": 22400 }, { "epoch": 0.05, "grad_norm": 2.40625, "learning_rate": 0.00019965722323037502, "loss": 2.3124, "step": 22405 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.0001996570703094172, "loss": 2.2234, "step": 22410 }, { "epoch": 0.05, "grad_norm": 1.96875, "learning_rate": 0.00019965691735441472, "loss": 2.2767, "step": 22415 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.0001996567643653676, "loss": 2.4321, "step": 22420 }, { "epoch": 0.05, "grad_norm": 1.71875, "learning_rate": 0.00019965661134227594, "loss": 2.1889, "step": 22425 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.0001996564582851398, "loss": 2.1085, "step": 22430 }, { "epoch": 0.05, "grad_norm": 2.171875, "learning_rate": 0.0001996563051939592, "loss": 2.1836, "step": 22435 }, { "epoch": 0.05, "grad_norm": 1.25, "learning_rate": 0.00019965615206873417, "loss": 2.1917, "step": 22440 }, { "epoch": 0.05, "grad_norm": 2.171875, "learning_rate": 0.0001996559989094648, "loss": 2.3086, "step": 22445 }, { "epoch": 0.05, "grad_norm": 1.9296875, "learning_rate": 0.00019965584571615113, "loss": 2.3811, "step": 22450 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019965569248879322, "loss": 2.1829, "step": 22455 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.00019965553922739114, "loss": 2.2179, "step": 22460 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.0001996553859319449, "loss": 2.1944, "step": 22465 }, { "epoch": 0.05, "grad_norm": 1.671875, "learning_rate": 0.00019965523260245458, "loss": 2.2475, "step": 22470 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.00019965507923892022, "loss": 2.2732, "step": 22475 }, { "epoch": 0.05, "grad_norm": 2.140625, "learning_rate": 0.00019965492584134186, "loss": 2.2739, "step": 22480 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.0001996547724097196, "loss": 2.1461, "step": 22485 }, { "epoch": 0.05, "grad_norm": 2.234375, "learning_rate": 0.00019965461894405346, "loss": 2.2675, "step": 22490 }, { "epoch": 0.05, "grad_norm": 1.9375, "learning_rate": 0.00019965446544434348, "loss": 2.1994, "step": 22495 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019965431191058973, "loss": 2.1582, "step": 22500 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.00019965415834279226, "loss": 2.1716, "step": 22505 }, { "epoch": 0.05, "grad_norm": 1.9609375, "learning_rate": 0.00019965400474095116, "loss": 2.2854, "step": 22510 }, { "epoch": 0.05, "grad_norm": 2.0625, "learning_rate": 0.00019965385110506638, "loss": 2.2848, "step": 22515 }, { "epoch": 0.05, "grad_norm": 2.375, "learning_rate": 0.0001996536974351381, "loss": 2.0081, "step": 22520 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.00019965354373116626, "loss": 2.2697, "step": 22525 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019965338999315097, "loss": 2.2929, "step": 22530 }, { "epoch": 0.05, "grad_norm": 2.203125, "learning_rate": 0.0001996532362210923, "loss": 2.222, "step": 22535 }, { "epoch": 0.05, "grad_norm": 2.140625, "learning_rate": 0.00019965308241499025, "loss": 2.3165, "step": 22540 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.0001996529285748449, "loss": 2.3643, "step": 22545 }, { "epoch": 0.05, "grad_norm": 1.6640625, "learning_rate": 0.00019965277470065633, "loss": 2.1102, "step": 22550 }, { "epoch": 0.05, "grad_norm": 1.890625, "learning_rate": 0.00019965262079242452, "loss": 2.2074, "step": 22555 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.0001996524668501496, "loss": 2.2529, "step": 22560 }, { "epoch": 0.05, "grad_norm": 1.828125, "learning_rate": 0.00019965231287383158, "loss": 2.2477, "step": 22565 }, { "epoch": 0.05, "grad_norm": 1.8046875, "learning_rate": 0.00019965215886347056, "loss": 2.0216, "step": 22570 }, { "epoch": 0.05, "grad_norm": 2.078125, "learning_rate": 0.0001996520048190665, "loss": 2.1338, "step": 22575 }, { "epoch": 0.05, "grad_norm": 1.8671875, "learning_rate": 0.00019965185074061954, "loss": 2.2432, "step": 22580 }, { "epoch": 0.05, "grad_norm": 1.9140625, "learning_rate": 0.0001996516966281297, "loss": 2.186, "step": 22585 }, { "epoch": 0.05, "grad_norm": 1.671875, "learning_rate": 0.00019965154248159701, "loss": 2.2236, "step": 22590 }, { "epoch": 0.05, "grad_norm": 1.6171875, "learning_rate": 0.00019965138830102153, "loss": 2.1539, "step": 22595 }, { "epoch": 0.05, "grad_norm": 2.234375, "learning_rate": 0.0001996512340864034, "loss": 2.0963, "step": 22600 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.00019965107983774253, "loss": 2.1378, "step": 22605 }, { "epoch": 0.05, "grad_norm": 1.6875, "learning_rate": 0.00019965092555503908, "loss": 2.2511, "step": 22610 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.00019965077123829307, "loss": 2.1817, "step": 22615 }, { "epoch": 0.05, "grad_norm": 2.25, "learning_rate": 0.00019965061688750454, "loss": 2.2924, "step": 22620 }, { "epoch": 0.05, "grad_norm": 1.671875, "learning_rate": 0.00019965046250267356, "loss": 2.1886, "step": 22625 }, { "epoch": 0.05, "grad_norm": 1.890625, "learning_rate": 0.00019965030808380017, "loss": 2.2468, "step": 22630 }, { "epoch": 0.05, "grad_norm": 2.15625, "learning_rate": 0.0001996501536308844, "loss": 2.3184, "step": 22635 }, { "epoch": 0.05, "grad_norm": 1.4609375, "learning_rate": 0.00019964999914392638, "loss": 2.0538, "step": 22640 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019964984462292606, "loss": 2.1758, "step": 22645 }, { "epoch": 0.05, "grad_norm": 1.6875, "learning_rate": 0.0001996496900678836, "loss": 2.134, "step": 22650 }, { "epoch": 0.05, "grad_norm": 1.640625, "learning_rate": 0.00019964953547879896, "loss": 2.0579, "step": 22655 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.00019964938085567223, "loss": 2.0462, "step": 22660 }, { "epoch": 0.05, "grad_norm": 1.7265625, "learning_rate": 0.0001996492261985035, "loss": 2.2022, "step": 22665 }, { "epoch": 0.05, "grad_norm": 1.53125, "learning_rate": 0.00019964907150729275, "loss": 2.1347, "step": 22670 }, { "epoch": 0.05, "grad_norm": 1.59375, "learning_rate": 0.00019964891678204006, "loss": 2.1786, "step": 22675 }, { "epoch": 0.05, "grad_norm": 1.4765625, "learning_rate": 0.00019964876202274555, "loss": 2.1932, "step": 22680 }, { "epoch": 0.05, "grad_norm": 1.5625, "learning_rate": 0.00019964860722940917, "loss": 2.2308, "step": 22685 }, { "epoch": 0.05, "grad_norm": 1.5546875, "learning_rate": 0.00019964845240203106, "loss": 2.1191, "step": 22690 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019964829754061118, "loss": 2.1622, "step": 22695 }, { "epoch": 0.05, "grad_norm": 1.921875, "learning_rate": 0.00019964814264514965, "loss": 2.4249, "step": 22700 }, { "epoch": 0.05, "grad_norm": 1.5859375, "learning_rate": 0.00019964798771564653, "loss": 2.0742, "step": 22705 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.00019964783275210182, "loss": 2.2864, "step": 22710 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.00019964767775451563, "loss": 2.3401, "step": 22715 }, { "epoch": 0.05, "grad_norm": 2.375, "learning_rate": 0.00019964752272288796, "loss": 2.1719, "step": 22720 }, { "epoch": 0.05, "grad_norm": 1.5625, "learning_rate": 0.0001996473676572189, "loss": 2.1731, "step": 22725 }, { "epoch": 0.05, "grad_norm": 1.609375, "learning_rate": 0.00019964721255750852, "loss": 2.1512, "step": 22730 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.0001996470574237568, "loss": 2.0849, "step": 22735 }, { "epoch": 0.05, "grad_norm": 1.9921875, "learning_rate": 0.00019964690225596386, "loss": 2.3202, "step": 22740 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019964674705412973, "loss": 2.1453, "step": 22745 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019964659181825447, "loss": 2.3386, "step": 22750 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.00019964643654833813, "loss": 2.2388, "step": 22755 }, { "epoch": 0.05, "grad_norm": 1.6796875, "learning_rate": 0.00019964628124438075, "loss": 2.1084, "step": 22760 }, { "epoch": 0.05, "grad_norm": 1.6953125, "learning_rate": 0.0001996461259063824, "loss": 2.0005, "step": 22765 }, { "epoch": 0.05, "grad_norm": 1.6875, "learning_rate": 0.0001996459705343431, "loss": 2.2309, "step": 22770 }, { "epoch": 0.05, "grad_norm": 1.9921875, "learning_rate": 0.00019964581512826296, "loss": 2.2817, "step": 22775 }, { "epoch": 0.05, "grad_norm": 1.5703125, "learning_rate": 0.00019964565968814198, "loss": 2.2856, "step": 22780 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019964550421398025, "loss": 2.1606, "step": 22785 }, { "epoch": 0.05, "grad_norm": 1.6796875, "learning_rate": 0.0001996453487057778, "loss": 2.1712, "step": 22790 }, { "epoch": 0.05, "grad_norm": 1.71875, "learning_rate": 0.0001996451931635347, "loss": 2.1498, "step": 22795 }, { "epoch": 0.05, "grad_norm": 1.6796875, "learning_rate": 0.000199645037587251, "loss": 2.1905, "step": 22800 }, { "epoch": 0.05, "grad_norm": 1.90625, "learning_rate": 0.00019964488197692673, "loss": 2.2166, "step": 22805 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.00019964472633256198, "loss": 2.2407, "step": 22810 }, { "epoch": 0.05, "grad_norm": 2.21875, "learning_rate": 0.00019964457065415676, "loss": 2.3648, "step": 22815 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.00019964441494171116, "loss": 2.2287, "step": 22820 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.00019964425919522522, "loss": 2.0913, "step": 22825 }, { "epoch": 0.05, "grad_norm": 1.609375, "learning_rate": 0.00019964410341469902, "loss": 2.3787, "step": 22830 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019964394760013256, "loss": 2.2612, "step": 22835 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.0001996437917515259, "loss": 2.1708, "step": 22840 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.00019964363586887914, "loss": 2.2549, "step": 22845 }, { "epoch": 0.05, "grad_norm": 1.8125, "learning_rate": 0.00019964347995219233, "loss": 2.1565, "step": 22850 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.00019964332400146547, "loss": 2.3015, "step": 22855 }, { "epoch": 0.05, "grad_norm": 1.4453125, "learning_rate": 0.00019964316801669864, "loss": 2.0949, "step": 22860 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.0001996430119978919, "loss": 2.2374, "step": 22865 }, { "epoch": 0.05, "grad_norm": 1.8984375, "learning_rate": 0.0001996428559450453, "loss": 2.41, "step": 22870 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.0001996426998581589, "loss": 2.1446, "step": 22875 }, { "epoch": 0.05, "grad_norm": 1.90625, "learning_rate": 0.00019964254373723277, "loss": 2.0625, "step": 22880 }, { "epoch": 0.05, "grad_norm": 1.96875, "learning_rate": 0.0001996423875822669, "loss": 2.0749, "step": 22885 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.00019964223139326141, "loss": 2.3227, "step": 22890 }, { "epoch": 0.05, "grad_norm": 1.65625, "learning_rate": 0.0001996420751702163, "loss": 2.1548, "step": 22895 }, { "epoch": 0.05, "grad_norm": 1.90625, "learning_rate": 0.0001996419189131317, "loss": 2.3543, "step": 22900 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.00019964176262200756, "loss": 2.2548, "step": 22905 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.000199641606296844, "loss": 2.1744, "step": 22910 }, { "epoch": 0.05, "grad_norm": 2.09375, "learning_rate": 0.00019964144993764107, "loss": 2.2631, "step": 22915 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.0001996412935443988, "loss": 2.224, "step": 22920 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.00019964113711711727, "loss": 2.0245, "step": 22925 }, { "epoch": 0.05, "grad_norm": 1.609375, "learning_rate": 0.00019964098065579654, "loss": 2.1349, "step": 22930 }, { "epoch": 0.05, "grad_norm": 2.109375, "learning_rate": 0.00019964082416043663, "loss": 2.1008, "step": 22935 }, { "epoch": 0.05, "grad_norm": 1.7421875, "learning_rate": 0.00019964066763103756, "loss": 2.217, "step": 22940 }, { "epoch": 0.05, "grad_norm": 1.859375, "learning_rate": 0.0001996405110675995, "loss": 1.9658, "step": 22945 }, { "epoch": 0.05, "grad_norm": 2.078125, "learning_rate": 0.00019964035447012237, "loss": 2.1461, "step": 22950 }, { "epoch": 0.05, "grad_norm": 2.21875, "learning_rate": 0.00019964019783860632, "loss": 2.1996, "step": 22955 }, { "epoch": 0.05, "grad_norm": 1.5234375, "learning_rate": 0.00019964004117305138, "loss": 2.087, "step": 22960 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.00019963988447345757, "loss": 2.1586, "step": 22965 }, { "epoch": 0.05, "grad_norm": 1.9921875, "learning_rate": 0.000199639727739825, "loss": 2.1375, "step": 22970 }, { "epoch": 0.05, "grad_norm": 2.4375, "learning_rate": 0.0001996395709721537, "loss": 2.0376, "step": 22975 }, { "epoch": 0.05, "grad_norm": 1.5546875, "learning_rate": 0.00019963941417044367, "loss": 2.1542, "step": 22980 }, { "epoch": 0.05, "grad_norm": 1.71875, "learning_rate": 0.00019963925733469505, "loss": 2.2342, "step": 22985 }, { "epoch": 0.05, "grad_norm": 1.8671875, "learning_rate": 0.0001996391004649078, "loss": 2.0049, "step": 22990 }, { "epoch": 0.05, "grad_norm": 3.3125, "learning_rate": 0.00019963894356108208, "loss": 2.2758, "step": 22995 }, { "epoch": 0.05, "grad_norm": 2.046875, "learning_rate": 0.0001996387866232179, "loss": 2.2602, "step": 23000 }, { "epoch": 0.05, "grad_norm": 2.109375, "learning_rate": 0.00019963862965131525, "loss": 2.3461, "step": 23005 }, { "epoch": 0.05, "grad_norm": 2.140625, "learning_rate": 0.00019963847264537426, "loss": 2.1293, "step": 23010 }, { "epoch": 0.05, "grad_norm": 1.2421875, "learning_rate": 0.00019963831560539496, "loss": 2.0404, "step": 23015 }, { "epoch": 0.05, "grad_norm": 2.015625, "learning_rate": 0.0001996381585313774, "loss": 2.1067, "step": 23020 }, { "epoch": 0.05, "grad_norm": 2.28125, "learning_rate": 0.00019963800142332164, "loss": 2.1954, "step": 23025 }, { "epoch": 0.05, "grad_norm": 2.03125, "learning_rate": 0.00019963784428122776, "loss": 2.2357, "step": 23030 }, { "epoch": 0.05, "grad_norm": 2.15625, "learning_rate": 0.00019963768710509573, "loss": 2.0931, "step": 23035 }, { "epoch": 0.05, "grad_norm": 1.9375, "learning_rate": 0.0001996375298949257, "loss": 2.3001, "step": 23040 }, { "epoch": 0.05, "grad_norm": 1.71875, "learning_rate": 0.0001996373726507177, "loss": 2.2044, "step": 23045 }, { "epoch": 0.05, "grad_norm": 1.8359375, "learning_rate": 0.00019963721537247171, "loss": 2.2064, "step": 23050 }, { "epoch": 0.05, "grad_norm": 2.171875, "learning_rate": 0.00019963705806018788, "loss": 2.1534, "step": 23055 }, { "epoch": 0.05, "grad_norm": 2.15625, "learning_rate": 0.00019963690071386623, "loss": 2.2564, "step": 23060 }, { "epoch": 0.05, "grad_norm": 1.484375, "learning_rate": 0.00019963674333350678, "loss": 2.1742, "step": 23065 }, { "epoch": 0.05, "grad_norm": 1.875, "learning_rate": 0.00019963658591910961, "loss": 2.1231, "step": 23070 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.0001996364284706748, "loss": 2.1034, "step": 23075 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019963627098820236, "loss": 2.1993, "step": 23080 }, { "epoch": 0.05, "grad_norm": 1.6796875, "learning_rate": 0.00019963611347169238, "loss": 2.0511, "step": 23085 }, { "epoch": 0.05, "grad_norm": 1.4453125, "learning_rate": 0.00019963595592114488, "loss": 2.3098, "step": 23090 }, { "epoch": 0.05, "grad_norm": 1.90625, "learning_rate": 0.00019963579833655995, "loss": 2.3098, "step": 23095 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019963564071793762, "loss": 2.2052, "step": 23100 }, { "epoch": 0.05, "grad_norm": 1.9921875, "learning_rate": 0.00019963548306527792, "loss": 2.2354, "step": 23105 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019963532537858094, "loss": 2.2597, "step": 23110 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019963516765784676, "loss": 2.245, "step": 23115 }, { "epoch": 0.05, "grad_norm": 1.6328125, "learning_rate": 0.00019963500990307536, "loss": 2.1536, "step": 23120 }, { "epoch": 0.05, "grad_norm": 1.7734375, "learning_rate": 0.00019963485211426684, "loss": 2.0609, "step": 23125 }, { "epoch": 0.05, "grad_norm": 1.71875, "learning_rate": 0.00019963469429142128, "loss": 2.403, "step": 23130 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019963453643453866, "loss": 2.2604, "step": 23135 }, { "epoch": 0.05, "grad_norm": 1.6640625, "learning_rate": 0.00019963437854361913, "loss": 2.0818, "step": 23140 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019963422061866263, "loss": 2.1115, "step": 23145 }, { "epoch": 0.05, "grad_norm": 1.703125, "learning_rate": 0.0001996340626596693, "loss": 2.2347, "step": 23150 }, { "epoch": 0.05, "grad_norm": 1.7109375, "learning_rate": 0.00019963390466663916, "loss": 2.1949, "step": 23155 }, { "epoch": 0.05, "grad_norm": 1.609375, "learning_rate": 0.00019963374663957228, "loss": 2.2277, "step": 23160 }, { "epoch": 0.05, "grad_norm": 2.25, "learning_rate": 0.0001996335885784687, "loss": 1.9486, "step": 23165 }, { "epoch": 0.05, "grad_norm": 1.875, "learning_rate": 0.00019963343048332849, "loss": 2.3233, "step": 23170 }, { "epoch": 0.05, "grad_norm": 1.96875, "learning_rate": 0.0001996332723541517, "loss": 2.1416, "step": 23175 }, { "epoch": 0.05, "grad_norm": 1.9375, "learning_rate": 0.00019963311419093837, "loss": 2.1951, "step": 23180 }, { "epoch": 0.05, "grad_norm": 1.75, "learning_rate": 0.00019963295599368852, "loss": 2.1807, "step": 23185 }, { "epoch": 0.05, "grad_norm": 1.9609375, "learning_rate": 0.0001996327977624023, "loss": 2.1911, "step": 23190 }, { "epoch": 0.05, "grad_norm": 1.859375, "learning_rate": 0.00019963263949707968, "loss": 2.2139, "step": 23195 }, { "epoch": 0.05, "grad_norm": 1.8203125, "learning_rate": 0.00019963248119772074, "loss": 2.2862, "step": 23200 }, { "epoch": 0.05, "grad_norm": 1.765625, "learning_rate": 0.00019963232286432554, "loss": 2.3934, "step": 23205 }, { "epoch": 0.05, "grad_norm": 1.65625, "learning_rate": 0.00019963216449689415, "loss": 2.2148, "step": 23210 }, { "epoch": 0.05, "grad_norm": 2.0, "learning_rate": 0.00019963200609542662, "loss": 2.2826, "step": 23215 }, { "epoch": 0.05, "grad_norm": 1.859375, "learning_rate": 0.00019963184765992297, "loss": 2.2311, "step": 23220 }, { "epoch": 0.05, "grad_norm": 1.9765625, "learning_rate": 0.00019963168919038323, "loss": 2.2097, "step": 23225 }, { "epoch": 0.05, "grad_norm": 1.7578125, "learning_rate": 0.00019963153068680757, "loss": 2.0324, "step": 23230 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019963137214919593, "loss": 2.0933, "step": 23235 }, { "epoch": 0.05, "grad_norm": 1.9921875, "learning_rate": 0.0001996312135775484, "loss": 2.2596, "step": 23240 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019963105497186506, "loss": 2.0999, "step": 23245 }, { "epoch": 0.05, "grad_norm": 1.796875, "learning_rate": 0.00019963089633214597, "loss": 2.1132, "step": 23250 }, { "epoch": 0.05, "grad_norm": 2.15625, "learning_rate": 0.00019963073765839111, "loss": 2.316, "step": 23255 }, { "epoch": 0.05, "grad_norm": 1.9453125, "learning_rate": 0.0001996305789506006, "loss": 2.1355, "step": 23260 }, { "epoch": 0.05, "grad_norm": 1.546875, "learning_rate": 0.00019963042020877447, "loss": 2.2258, "step": 23265 }, { "epoch": 0.05, "grad_norm": 1.4609375, "learning_rate": 0.0001996302614329128, "loss": 2.2148, "step": 23270 }, { "epoch": 0.05, "grad_norm": 1.71875, "learning_rate": 0.00019963010262301562, "loss": 2.174, "step": 23275 }, { "epoch": 0.05, "grad_norm": 2.265625, "learning_rate": 0.00019962994377908298, "loss": 2.2241, "step": 23280 }, { "epoch": 0.05, "grad_norm": 3.6875, "learning_rate": 0.00019962978490111496, "loss": 2.2255, "step": 23285 }, { "epoch": 0.05, "grad_norm": 1.6875, "learning_rate": 0.0001996296259891116, "loss": 2.174, "step": 23290 }, { "epoch": 0.05, "grad_norm": 1.609375, "learning_rate": 0.00019962946704307294, "loss": 2.2175, "step": 23295 }, { "epoch": 0.05, "grad_norm": 1.78125, "learning_rate": 0.00019962930806299904, "loss": 2.2273, "step": 23300 }, { "epoch": 0.05, "grad_norm": 1.7578125, "learning_rate": 0.00019962914904888995, "loss": 2.2912, "step": 23305 }, { "epoch": 0.05, "grad_norm": 1.9140625, "learning_rate": 0.00019962899000074578, "loss": 2.1775, "step": 23310 }, { "epoch": 0.05, "grad_norm": 1.7890625, "learning_rate": 0.0001996288309185665, "loss": 2.4173, "step": 23315 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019962867180235225, "loss": 2.1606, "step": 23320 }, { "epoch": 0.05, "grad_norm": 1.6796875, "learning_rate": 0.000199628512652103, "loss": 2.1667, "step": 23325 }, { "epoch": 0.05, "grad_norm": 2.125, "learning_rate": 0.00019962835346781885, "loss": 2.1336, "step": 23330 }, { "epoch": 0.05, "grad_norm": 1.84375, "learning_rate": 0.00019962819424949985, "loss": 1.9518, "step": 23335 }, { "epoch": 0.05, "grad_norm": 1.90625, "learning_rate": 0.00019962803499714606, "loss": 2.1419, "step": 23340 }, { "epoch": 0.05, "grad_norm": 1.8828125, "learning_rate": 0.0001996278757107575, "loss": 2.1855, "step": 23345 }, { "epoch": 0.05, "grad_norm": 2.359375, "learning_rate": 0.0001996277163903343, "loss": 2.1874, "step": 23350 }, { "epoch": 0.05, "grad_norm": 1.9921875, "learning_rate": 0.00019962755703587643, "loss": 2.0703, "step": 23355 }, { "epoch": 0.05, "grad_norm": 1.8515625, "learning_rate": 0.00019962739764738398, "loss": 2.2121, "step": 23360 }, { "epoch": 0.05, "grad_norm": 1.734375, "learning_rate": 0.00019962723822485701, "loss": 2.2479, "step": 23365 }, { "epoch": 0.05, "grad_norm": 1.9609375, "learning_rate": 0.0001996270787682956, "loss": 2.0802, "step": 23370 }, { "epoch": 0.06, "grad_norm": 1.875, "learning_rate": 0.00019962691927769973, "loss": 2.2801, "step": 23375 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.0001996267597530695, "loss": 2.2238, "step": 23380 }, { "epoch": 0.06, "grad_norm": 2.125, "learning_rate": 0.00019962660019440498, "loss": 2.308, "step": 23385 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.0001996264406017062, "loss": 2.2236, "step": 23390 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.0001996262809749732, "loss": 2.1656, "step": 23395 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019962612131420608, "loss": 2.2338, "step": 23400 }, { "epoch": 0.06, "grad_norm": 1.578125, "learning_rate": 0.00019962596161940486, "loss": 2.1561, "step": 23405 }, { "epoch": 0.06, "grad_norm": 2.046875, "learning_rate": 0.00019962580189056964, "loss": 2.1493, "step": 23410 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.0001996256421277004, "loss": 2.1613, "step": 23415 }, { "epoch": 0.06, "grad_norm": 1.4765625, "learning_rate": 0.00019962548233079722, "loss": 1.9867, "step": 23420 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.0001996253224998602, "loss": 2.1259, "step": 23425 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.00019962516263488935, "loss": 2.0963, "step": 23430 }, { "epoch": 0.06, "grad_norm": 1.75, "learning_rate": 0.00019962500273588475, "loss": 2.2514, "step": 23435 }, { "epoch": 0.06, "grad_norm": 2.078125, "learning_rate": 0.00019962484280284645, "loss": 2.2995, "step": 23440 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.00019962468283577444, "loss": 2.1363, "step": 23445 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019962452283466887, "loss": 2.1553, "step": 23450 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.00019962436279952978, "loss": 2.2932, "step": 23455 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.00019962420273035719, "loss": 2.3933, "step": 23460 }, { "epoch": 0.06, "grad_norm": 1.5703125, "learning_rate": 0.00019962404262715116, "loss": 2.159, "step": 23465 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.00019962388248991172, "loss": 2.1185, "step": 23470 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.000199623722318639, "loss": 2.1746, "step": 23475 }, { "epoch": 0.06, "grad_norm": 1.9140625, "learning_rate": 0.00019962356211333297, "loss": 2.2286, "step": 23480 }, { "epoch": 0.06, "grad_norm": 1.953125, "learning_rate": 0.00019962340187399375, "loss": 2.376, "step": 23485 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019962324160062135, "loss": 1.9201, "step": 23490 }, { "epoch": 0.06, "grad_norm": 1.609375, "learning_rate": 0.00019962308129321588, "loss": 2.1811, "step": 23495 }, { "epoch": 0.06, "grad_norm": 1.5625, "learning_rate": 0.00019962292095177734, "loss": 2.045, "step": 23500 }, { "epoch": 0.06, "grad_norm": 1.5859375, "learning_rate": 0.0001996227605763058, "loss": 2.3632, "step": 23505 }, { "epoch": 0.06, "grad_norm": 1.6171875, "learning_rate": 0.00019962260016680131, "loss": 2.1301, "step": 23510 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019962243972326395, "loss": 2.2454, "step": 23515 }, { "epoch": 0.06, "grad_norm": 1.9453125, "learning_rate": 0.00019962227924569376, "loss": 2.2213, "step": 23520 }, { "epoch": 0.06, "grad_norm": 1.6171875, "learning_rate": 0.00019962211873409075, "loss": 2.1932, "step": 23525 }, { "epoch": 0.06, "grad_norm": 1.65625, "learning_rate": 0.00019962195818845507, "loss": 2.2818, "step": 23530 }, { "epoch": 0.06, "grad_norm": 1.9765625, "learning_rate": 0.0001996217976087867, "loss": 2.2879, "step": 23535 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019962163699508573, "loss": 2.2065, "step": 23540 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019962147634735217, "loss": 2.2943, "step": 23545 }, { "epoch": 0.06, "grad_norm": 1.53125, "learning_rate": 0.00019962131566558612, "loss": 1.9307, "step": 23550 }, { "epoch": 0.06, "grad_norm": 2.4375, "learning_rate": 0.00019962115494978763, "loss": 2.2936, "step": 23555 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019962099419995678, "loss": 2.1805, "step": 23560 }, { "epoch": 0.06, "grad_norm": 2.203125, "learning_rate": 0.00019962083341609353, "loss": 2.3401, "step": 23565 }, { "epoch": 0.06, "grad_norm": 1.953125, "learning_rate": 0.00019962067259819802, "loss": 2.2565, "step": 23570 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.0001996205117462703, "loss": 2.1981, "step": 23575 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.00019962035086031039, "loss": 2.4191, "step": 23580 }, { "epoch": 0.06, "grad_norm": 1.5546875, "learning_rate": 0.00019962018994031837, "loss": 2.225, "step": 23585 }, { "epoch": 0.06, "grad_norm": 1.75, "learning_rate": 0.00019962002898629426, "loss": 2.3915, "step": 23590 }, { "epoch": 0.06, "grad_norm": 1.8515625, "learning_rate": 0.00019961986799823817, "loss": 2.1465, "step": 23595 }, { "epoch": 0.06, "grad_norm": 1.5859375, "learning_rate": 0.00019961970697615012, "loss": 2.3338, "step": 23600 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019961954592003018, "loss": 2.1538, "step": 23605 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.0001996193848298784, "loss": 2.335, "step": 23610 }, { "epoch": 0.06, "grad_norm": 1.921875, "learning_rate": 0.0001996192237056948, "loss": 2.1664, "step": 23615 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.00019961906254747949, "loss": 2.2368, "step": 23620 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.0001996189013552325, "loss": 2.1401, "step": 23625 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019961874012895388, "loss": 2.1217, "step": 23630 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.00019961857886864368, "loss": 2.1189, "step": 23635 }, { "epoch": 0.06, "grad_norm": 1.984375, "learning_rate": 0.00019961841757430197, "loss": 2.1912, "step": 23640 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.0001996182562459288, "loss": 2.2416, "step": 23645 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.00019961809488352428, "loss": 2.3319, "step": 23650 }, { "epoch": 0.06, "grad_norm": 1.4140625, "learning_rate": 0.00019961793348708838, "loss": 2.2661, "step": 23655 }, { "epoch": 0.06, "grad_norm": 1.65625, "learning_rate": 0.00019961777205662116, "loss": 2.2976, "step": 23660 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.0001996176105921227, "loss": 2.0651, "step": 23665 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.0001996174490935931, "loss": 2.3574, "step": 23670 }, { "epoch": 0.06, "grad_norm": 1.5859375, "learning_rate": 0.00019961728756103236, "loss": 2.2036, "step": 23675 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019961712599444052, "loss": 2.2207, "step": 23680 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.0001996169643938177, "loss": 2.2019, "step": 23685 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.0001996168027591639, "loss": 2.1652, "step": 23690 }, { "epoch": 0.06, "grad_norm": 1.5546875, "learning_rate": 0.00019961664109047916, "loss": 2.1701, "step": 23695 }, { "epoch": 0.06, "grad_norm": 2.09375, "learning_rate": 0.00019961647938776365, "loss": 2.365, "step": 23700 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.0001996163176510173, "loss": 2.1387, "step": 23705 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.00019961615588024017, "loss": 2.1939, "step": 23710 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.0001996159940754324, "loss": 2.1922, "step": 23715 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.000199615832236594, "loss": 2.307, "step": 23720 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.00019961567036372503, "loss": 2.1958, "step": 23725 }, { "epoch": 0.06, "grad_norm": 1.625, "learning_rate": 0.00019961550845682552, "loss": 2.0155, "step": 23730 }, { "epoch": 0.06, "grad_norm": 1.8515625, "learning_rate": 0.00019961534651589556, "loss": 2.1335, "step": 23735 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.00019961518454093518, "loss": 2.1921, "step": 23740 }, { "epoch": 0.06, "grad_norm": 2.125, "learning_rate": 0.00019961502253194443, "loss": 2.3541, "step": 23745 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019961486048892342, "loss": 2.1312, "step": 23750 }, { "epoch": 0.06, "grad_norm": 1.84375, "learning_rate": 0.00019961469841187212, "loss": 2.2038, "step": 23755 }, { "epoch": 0.06, "grad_norm": 1.9921875, "learning_rate": 0.00019961453630079067, "loss": 2.2661, "step": 23760 }, { "epoch": 0.06, "grad_norm": 1.484375, "learning_rate": 0.00019961437415567907, "loss": 2.1613, "step": 23765 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.0001996142119765374, "loss": 2.1624, "step": 23770 }, { "epoch": 0.06, "grad_norm": 1.875, "learning_rate": 0.0001996140497633657, "loss": 2.364, "step": 23775 }, { "epoch": 0.06, "grad_norm": 1.5546875, "learning_rate": 0.00019961388751616406, "loss": 2.0998, "step": 23780 }, { "epoch": 0.06, "grad_norm": 1.609375, "learning_rate": 0.00019961372523493248, "loss": 2.2304, "step": 23785 }, { "epoch": 0.06, "grad_norm": 1.640625, "learning_rate": 0.00019961356291967107, "loss": 1.898, "step": 23790 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019961340057037986, "loss": 2.2364, "step": 23795 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019961323818705888, "loss": 2.3148, "step": 23800 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.00019961307576970826, "loss": 2.1348, "step": 23805 }, { "epoch": 0.06, "grad_norm": 2.125, "learning_rate": 0.00019961291331832795, "loss": 2.0293, "step": 23810 }, { "epoch": 0.06, "grad_norm": 1.6640625, "learning_rate": 0.00019961275083291808, "loss": 2.1894, "step": 23815 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.0001996125883134787, "loss": 2.2188, "step": 23820 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.00019961242576000982, "loss": 2.2514, "step": 23825 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019961226317251156, "loss": 2.2319, "step": 23830 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.00019961210055098393, "loss": 2.2919, "step": 23835 }, { "epoch": 0.06, "grad_norm": 1.6796875, "learning_rate": 0.00019961193789542702, "loss": 2.2804, "step": 23840 }, { "epoch": 0.06, "grad_norm": 1.9296875, "learning_rate": 0.00019961177520584085, "loss": 2.2835, "step": 23845 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.0001996116124822255, "loss": 2.0693, "step": 23850 }, { "epoch": 0.06, "grad_norm": 1.640625, "learning_rate": 0.00019961144972458104, "loss": 2.2253, "step": 23855 }, { "epoch": 0.06, "grad_norm": 1.6953125, "learning_rate": 0.00019961128693290746, "loss": 2.22, "step": 23860 }, { "epoch": 0.06, "grad_norm": 1.7421875, "learning_rate": 0.00019961112410720486, "loss": 2.0909, "step": 23865 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.0001996109612474733, "loss": 2.3618, "step": 23870 }, { "epoch": 0.06, "grad_norm": 1.578125, "learning_rate": 0.00019961079835371286, "loss": 2.061, "step": 23875 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.00019961063542592352, "loss": 2.3017, "step": 23880 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.0001996104724641054, "loss": 2.3266, "step": 23885 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.00019961030946825855, "loss": 2.2003, "step": 23890 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019961014643838298, "loss": 2.2332, "step": 23895 }, { "epoch": 0.06, "grad_norm": 1.6953125, "learning_rate": 0.0001996099833744788, "loss": 2.1816, "step": 23900 }, { "epoch": 0.06, "grad_norm": 2.109375, "learning_rate": 0.00019960982027654605, "loss": 2.183, "step": 23905 }, { "epoch": 0.06, "grad_norm": 1.3515625, "learning_rate": 0.00019960965714458474, "loss": 2.1035, "step": 23910 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019960949397859501, "loss": 2.2465, "step": 23915 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.00019960933077857687, "loss": 2.3844, "step": 23920 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.00019960916754453034, "loss": 2.1649, "step": 23925 }, { "epoch": 0.06, "grad_norm": 1.5625, "learning_rate": 0.00019960900427645552, "loss": 1.9881, "step": 23930 }, { "epoch": 0.06, "grad_norm": 1.84375, "learning_rate": 0.00019960884097435248, "loss": 2.1116, "step": 23935 }, { "epoch": 0.06, "grad_norm": 1.71875, "learning_rate": 0.00019960867763822124, "loss": 2.1071, "step": 23940 }, { "epoch": 0.06, "grad_norm": 1.984375, "learning_rate": 0.00019960851426806185, "loss": 2.1943, "step": 23945 }, { "epoch": 0.06, "grad_norm": 1.359375, "learning_rate": 0.0001996083508638744, "loss": 2.3264, "step": 23950 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019960818742565897, "loss": 2.1141, "step": 23955 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.00019960802395341552, "loss": 2.2331, "step": 23960 }, { "epoch": 0.06, "grad_norm": 2.484375, "learning_rate": 0.0001996078604471442, "loss": 2.1468, "step": 23965 }, { "epoch": 0.06, "grad_norm": 1.578125, "learning_rate": 0.000199607696906845, "loss": 2.2319, "step": 23970 }, { "epoch": 0.06, "grad_norm": 2.0625, "learning_rate": 0.00019960753333251802, "loss": 2.1355, "step": 23975 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.0001996073697241633, "loss": 2.1306, "step": 23980 }, { "epoch": 0.06, "grad_norm": 2.140625, "learning_rate": 0.0001996072060817809, "loss": 2.0349, "step": 23985 }, { "epoch": 0.06, "grad_norm": 1.3359375, "learning_rate": 0.00019960704240537088, "loss": 2.1126, "step": 23990 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.00019960687869493326, "loss": 2.1706, "step": 23995 }, { "epoch": 0.06, "grad_norm": 1.5078125, "learning_rate": 0.00019960671495046812, "loss": 2.1145, "step": 24000 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019960655117197553, "loss": 2.2525, "step": 24005 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.00019960638735945556, "loss": 2.2251, "step": 24010 }, { "epoch": 0.06, "grad_norm": 1.59375, "learning_rate": 0.00019960622351290823, "loss": 2.1823, "step": 24015 }, { "epoch": 0.06, "grad_norm": 1.9453125, "learning_rate": 0.00019960605963233362, "loss": 2.1623, "step": 24020 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019960589571773173, "loss": 2.1726, "step": 24025 }, { "epoch": 0.06, "grad_norm": 2.0, "learning_rate": 0.0001996057317691027, "loss": 2.3334, "step": 24030 }, { "epoch": 0.06, "grad_norm": 2.140625, "learning_rate": 0.00019960556778644652, "loss": 2.336, "step": 24035 }, { "epoch": 0.06, "grad_norm": 1.6171875, "learning_rate": 0.00019960540376976325, "loss": 2.1397, "step": 24040 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019960523971905304, "loss": 2.0825, "step": 24045 }, { "epoch": 0.06, "grad_norm": 1.984375, "learning_rate": 0.00019960507563431584, "loss": 2.0917, "step": 24050 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.0001996049115155517, "loss": 2.3564, "step": 24055 }, { "epoch": 0.06, "grad_norm": 2.046875, "learning_rate": 0.00019960474736276077, "loss": 2.3149, "step": 24060 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019960458317594304, "loss": 2.3497, "step": 24065 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.00019960441895509857, "loss": 2.3048, "step": 24070 }, { "epoch": 0.06, "grad_norm": 2.453125, "learning_rate": 0.00019960425470022742, "loss": 2.0293, "step": 24075 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.00019960409041132966, "loss": 2.226, "step": 24080 }, { "epoch": 0.06, "grad_norm": 1.6796875, "learning_rate": 0.00019960392608840534, "loss": 2.1191, "step": 24085 }, { "epoch": 0.06, "grad_norm": 1.625, "learning_rate": 0.00019960376173145446, "loss": 2.0765, "step": 24090 }, { "epoch": 0.06, "grad_norm": 1.9296875, "learning_rate": 0.0001996035973404772, "loss": 2.2384, "step": 24095 }, { "epoch": 0.06, "grad_norm": 1.84375, "learning_rate": 0.0001996034329154735, "loss": 2.2153, "step": 24100 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019960326845644346, "loss": 2.164, "step": 24105 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.00019960310396338718, "loss": 2.1648, "step": 24110 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.00019960293943630463, "loss": 2.1061, "step": 24115 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019960277487519594, "loss": 2.3103, "step": 24120 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.0001996026102800611, "loss": 2.1802, "step": 24125 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.00019960244565090025, "loss": 2.2136, "step": 24130 }, { "epoch": 0.06, "grad_norm": 1.90625, "learning_rate": 0.00019960228098771335, "loss": 2.3221, "step": 24135 }, { "epoch": 0.06, "grad_norm": 1.6328125, "learning_rate": 0.00019960211629050054, "loss": 2.1957, "step": 24140 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019960195155926182, "loss": 2.2612, "step": 24145 }, { "epoch": 0.06, "grad_norm": 1.9453125, "learning_rate": 0.0001996017867939973, "loss": 2.1187, "step": 24150 }, { "epoch": 0.06, "grad_norm": 1.7890625, "learning_rate": 0.00019960162199470695, "loss": 2.1909, "step": 24155 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.00019960145716139093, "loss": 2.0984, "step": 24160 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.0001996012922940492, "loss": 2.2581, "step": 24165 }, { "epoch": 0.06, "grad_norm": 1.59375, "learning_rate": 0.0001996011273926819, "loss": 2.3175, "step": 24170 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.00019960096245728902, "loss": 2.3701, "step": 24175 }, { "epoch": 0.06, "grad_norm": 1.4609375, "learning_rate": 0.00019960079748787066, "loss": 2.2529, "step": 24180 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019960063248442684, "loss": 2.2153, "step": 24185 }, { "epoch": 0.06, "grad_norm": 1.84375, "learning_rate": 0.0001996004674469577, "loss": 2.4146, "step": 24190 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.00019960030237546316, "loss": 2.1257, "step": 24195 }, { "epoch": 0.06, "grad_norm": 1.6484375, "learning_rate": 0.00019960013726994338, "loss": 2.2937, "step": 24200 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.0001995999721303984, "loss": 2.0826, "step": 24205 }, { "epoch": 0.06, "grad_norm": 1.625, "learning_rate": 0.00019959980695682825, "loss": 2.2385, "step": 24210 }, { "epoch": 0.06, "grad_norm": 1.28125, "learning_rate": 0.000199599641749233, "loss": 2.1238, "step": 24215 }, { "epoch": 0.06, "grad_norm": 2.140625, "learning_rate": 0.0001995994765076127, "loss": 2.2407, "step": 24220 }, { "epoch": 0.06, "grad_norm": 1.9296875, "learning_rate": 0.00019959931123196743, "loss": 2.1473, "step": 24225 }, { "epoch": 0.06, "grad_norm": 1.65625, "learning_rate": 0.00019959914592229723, "loss": 2.1361, "step": 24230 }, { "epoch": 0.06, "grad_norm": 1.6015625, "learning_rate": 0.00019959898057860214, "loss": 2.155, "step": 24235 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019959881520088222, "loss": 2.1686, "step": 24240 }, { "epoch": 0.06, "grad_norm": 1.5, "learning_rate": 0.00019959864978913756, "loss": 2.3067, "step": 24245 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.0001995984843433682, "loss": 2.3003, "step": 24250 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.00019959831886357416, "loss": 2.1551, "step": 24255 }, { "epoch": 0.06, "grad_norm": 1.7890625, "learning_rate": 0.00019959815334975555, "loss": 2.2216, "step": 24260 }, { "epoch": 0.06, "grad_norm": 2.140625, "learning_rate": 0.0001995979878019124, "loss": 2.3198, "step": 24265 }, { "epoch": 0.06, "grad_norm": 1.921875, "learning_rate": 0.0001995978222200448, "loss": 2.2393, "step": 24270 }, { "epoch": 0.06, "grad_norm": 1.9296875, "learning_rate": 0.00019959765660415273, "loss": 2.1784, "step": 24275 }, { "epoch": 0.06, "grad_norm": 1.671875, "learning_rate": 0.0001995974909542363, "loss": 2.1572, "step": 24280 }, { "epoch": 0.06, "grad_norm": 1.9453125, "learning_rate": 0.0001995973252702956, "loss": 2.1717, "step": 24285 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.00019959715955233064, "loss": 2.1972, "step": 24290 }, { "epoch": 0.06, "grad_norm": 2.109375, "learning_rate": 0.00019959699380034144, "loss": 2.206, "step": 24295 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.00019959682801432813, "loss": 2.3035, "step": 24300 }, { "epoch": 0.06, "grad_norm": 1.9765625, "learning_rate": 0.00019959666219429074, "loss": 2.2164, "step": 24305 }, { "epoch": 0.06, "grad_norm": 1.484375, "learning_rate": 0.00019959649634022932, "loss": 2.1404, "step": 24310 }, { "epoch": 0.06, "grad_norm": 1.9140625, "learning_rate": 0.00019959633045214394, "loss": 2.2305, "step": 24315 }, { "epoch": 0.06, "grad_norm": 1.6328125, "learning_rate": 0.00019959616453003463, "loss": 2.2348, "step": 24320 }, { "epoch": 0.06, "grad_norm": 1.65625, "learning_rate": 0.00019959599857390147, "loss": 2.2026, "step": 24325 }, { "epoch": 0.06, "grad_norm": 2.046875, "learning_rate": 0.0001995958325837445, "loss": 2.3696, "step": 24330 }, { "epoch": 0.06, "grad_norm": 1.6796875, "learning_rate": 0.0001995956665595638, "loss": 2.3124, "step": 24335 }, { "epoch": 0.06, "grad_norm": 1.96875, "learning_rate": 0.00019959550050135944, "loss": 2.1782, "step": 24340 }, { "epoch": 0.06, "grad_norm": 1.7890625, "learning_rate": 0.0001995953344091314, "loss": 2.0802, "step": 24345 }, { "epoch": 0.06, "grad_norm": 2.28125, "learning_rate": 0.00019959516828287982, "loss": 2.2472, "step": 24350 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019959500212260472, "loss": 1.9773, "step": 24355 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019959483592830614, "loss": 2.1257, "step": 24360 }, { "epoch": 0.06, "grad_norm": 1.921875, "learning_rate": 0.0001995946696999842, "loss": 2.2232, "step": 24365 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.00019959450343763887, "loss": 2.1259, "step": 24370 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.0001995943371412703, "loss": 2.1657, "step": 24375 }, { "epoch": 0.06, "grad_norm": 1.71875, "learning_rate": 0.00019959417081087842, "loss": 2.3004, "step": 24380 }, { "epoch": 0.06, "grad_norm": 1.9453125, "learning_rate": 0.00019959400444646342, "loss": 2.3032, "step": 24385 }, { "epoch": 0.06, "grad_norm": 2.09375, "learning_rate": 0.00019959383804802532, "loss": 2.1819, "step": 24390 }, { "epoch": 0.06, "grad_norm": 1.53125, "learning_rate": 0.00019959367161556416, "loss": 2.064, "step": 24395 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.00019959350514907995, "loss": 2.1492, "step": 24400 }, { "epoch": 0.06, "grad_norm": 2.109375, "learning_rate": 0.0001995933386485728, "loss": 2.1284, "step": 24405 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.00019959317211404277, "loss": 2.0585, "step": 24410 }, { "epoch": 0.06, "grad_norm": 1.640625, "learning_rate": 0.00019959300554548993, "loss": 1.9982, "step": 24415 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019959283894291428, "loss": 2.3434, "step": 24420 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.0001995926723063159, "loss": 2.197, "step": 24425 }, { "epoch": 0.06, "grad_norm": 1.6171875, "learning_rate": 0.0001995925056356949, "loss": 2.3208, "step": 24430 }, { "epoch": 0.06, "grad_norm": 2.046875, "learning_rate": 0.00019959233893105126, "loss": 2.3114, "step": 24435 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.00019959217219238512, "loss": 2.2244, "step": 24440 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.00019959200541969644, "loss": 2.3569, "step": 24445 }, { "epoch": 0.06, "grad_norm": 1.5703125, "learning_rate": 0.0001995918386129853, "loss": 2.2607, "step": 24450 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.00019959167177225183, "loss": 2.1217, "step": 24455 }, { "epoch": 0.06, "grad_norm": 1.7890625, "learning_rate": 0.00019959150489749604, "loss": 2.1045, "step": 24460 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019959133798871795, "loss": 2.2041, "step": 24465 }, { "epoch": 0.06, "grad_norm": 2.203125, "learning_rate": 0.00019959117104591768, "loss": 2.2292, "step": 24470 }, { "epoch": 0.06, "grad_norm": 2.46875, "learning_rate": 0.00019959100406909526, "loss": 2.4468, "step": 24475 }, { "epoch": 0.06, "grad_norm": 1.6328125, "learning_rate": 0.00019959083705825073, "loss": 2.2045, "step": 24480 }, { "epoch": 0.06, "grad_norm": 2.109375, "learning_rate": 0.0001995906700133842, "loss": 2.1206, "step": 24485 }, { "epoch": 0.06, "grad_norm": 1.96875, "learning_rate": 0.00019959050293449565, "loss": 2.1416, "step": 24490 }, { "epoch": 0.06, "grad_norm": 1.9453125, "learning_rate": 0.0001995903358215852, "loss": 2.2399, "step": 24495 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.0001995901686746529, "loss": 2.2232, "step": 24500 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.00019959000149369875, "loss": 2.2238, "step": 24505 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.00019958983427872286, "loss": 2.309, "step": 24510 }, { "epoch": 0.06, "grad_norm": 2.09375, "learning_rate": 0.0001995896670297253, "loss": 2.3082, "step": 24515 }, { "epoch": 0.06, "grad_norm": 1.9921875, "learning_rate": 0.00019958949974670608, "loss": 2.2016, "step": 24520 }, { "epoch": 0.06, "grad_norm": 1.921875, "learning_rate": 0.0001995893324296653, "loss": 2.3122, "step": 24525 }, { "epoch": 0.06, "grad_norm": 1.71875, "learning_rate": 0.00019958916507860298, "loss": 2.0466, "step": 24530 }, { "epoch": 0.06, "grad_norm": 1.6328125, "learning_rate": 0.0001995889976935192, "loss": 2.1688, "step": 24535 }, { "epoch": 0.06, "grad_norm": 1.6484375, "learning_rate": 0.00019958883027441403, "loss": 2.3411, "step": 24540 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.00019958866282128747, "loss": 2.2837, "step": 24545 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.00019958849533413965, "loss": 2.2295, "step": 24550 }, { "epoch": 0.06, "grad_norm": 1.546875, "learning_rate": 0.00019958832781297062, "loss": 2.2181, "step": 24555 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.00019958816025778035, "loss": 2.1938, "step": 24560 }, { "epoch": 0.06, "grad_norm": 1.5, "learning_rate": 0.000199587992668569, "loss": 2.2698, "step": 24565 }, { "epoch": 0.06, "grad_norm": 1.6953125, "learning_rate": 0.00019958782504533656, "loss": 2.2449, "step": 24570 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.00019958765738808314, "loss": 2.0197, "step": 24575 }, { "epoch": 0.06, "grad_norm": 2.171875, "learning_rate": 0.00019958748969680875, "loss": 2.3046, "step": 24580 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019958732197151345, "loss": 2.2386, "step": 24585 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.00019958715421219733, "loss": 2.2467, "step": 24590 }, { "epoch": 0.06, "grad_norm": 1.875, "learning_rate": 0.00019958698641886046, "loss": 2.1327, "step": 24595 }, { "epoch": 0.06, "grad_norm": 1.3828125, "learning_rate": 0.00019958681859150285, "loss": 2.102, "step": 24600 }, { "epoch": 0.06, "grad_norm": 1.6484375, "learning_rate": 0.00019958665073012457, "loss": 2.2063, "step": 24605 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019958648283472568, "loss": 2.1762, "step": 24610 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.00019958631490530625, "loss": 2.1796, "step": 24615 }, { "epoch": 0.06, "grad_norm": 1.65625, "learning_rate": 0.00019958614694186633, "loss": 2.068, "step": 24620 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.000199585978944406, "loss": 2.239, "step": 24625 }, { "epoch": 0.06, "grad_norm": 1.8515625, "learning_rate": 0.00019958581091292525, "loss": 2.3198, "step": 24630 }, { "epoch": 0.06, "grad_norm": 1.6484375, "learning_rate": 0.0001995856428474242, "loss": 2.0982, "step": 24635 }, { "epoch": 0.06, "grad_norm": 1.7421875, "learning_rate": 0.0001995854747479029, "loss": 2.2216, "step": 24640 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.00019958530661436138, "loss": 2.2633, "step": 24645 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019958513844679971, "loss": 2.1961, "step": 24650 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.00019958497024521793, "loss": 2.2804, "step": 24655 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.00019958480200961618, "loss": 2.2467, "step": 24660 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019958463373999442, "loss": 2.4099, "step": 24665 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019958446543635275, "loss": 2.2555, "step": 24670 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.0001995842970986912, "loss": 2.2326, "step": 24675 }, { "epoch": 0.06, "grad_norm": 1.953125, "learning_rate": 0.00019958412872700984, "loss": 2.0238, "step": 24680 }, { "epoch": 0.06, "grad_norm": 1.984375, "learning_rate": 0.00019958396032130878, "loss": 2.2827, "step": 24685 }, { "epoch": 0.06, "grad_norm": 2.0, "learning_rate": 0.00019958379188158803, "loss": 2.2112, "step": 24690 }, { "epoch": 0.06, "grad_norm": 2.0625, "learning_rate": 0.0001995836234078476, "loss": 2.1712, "step": 24695 }, { "epoch": 0.06, "grad_norm": 1.578125, "learning_rate": 0.00019958345490008763, "loss": 2.3682, "step": 24700 }, { "epoch": 0.06, "grad_norm": 1.6796875, "learning_rate": 0.00019958328635830816, "loss": 2.2454, "step": 24705 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.0001995831177825092, "loss": 2.1641, "step": 24710 }, { "epoch": 0.06, "grad_norm": 1.875, "learning_rate": 0.00019958294917269087, "loss": 2.1638, "step": 24715 }, { "epoch": 0.06, "grad_norm": 1.9921875, "learning_rate": 0.00019958278052885318, "loss": 2.2765, "step": 24720 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019958261185099623, "loss": 2.2802, "step": 24725 }, { "epoch": 0.06, "grad_norm": 1.953125, "learning_rate": 0.00019958244313912006, "loss": 2.2033, "step": 24730 }, { "epoch": 0.06, "grad_norm": 1.984375, "learning_rate": 0.00019958227439322468, "loss": 2.0663, "step": 24735 }, { "epoch": 0.06, "grad_norm": 1.71875, "learning_rate": 0.0001995821056133102, "loss": 2.2656, "step": 24740 }, { "epoch": 0.06, "grad_norm": 1.6875, "learning_rate": 0.00019958193679937668, "loss": 2.174, "step": 24745 }, { "epoch": 0.06, "grad_norm": 1.90625, "learning_rate": 0.00019958176795142416, "loss": 2.3938, "step": 24750 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019958159906945268, "loss": 2.2613, "step": 24755 }, { "epoch": 0.06, "grad_norm": 1.609375, "learning_rate": 0.00019958143015346235, "loss": 2.0662, "step": 24760 }, { "epoch": 0.06, "grad_norm": 1.7421875, "learning_rate": 0.0001995812612034532, "loss": 2.3136, "step": 24765 }, { "epoch": 0.06, "grad_norm": 1.5546875, "learning_rate": 0.00019958109221942526, "loss": 2.2945, "step": 24770 }, { "epoch": 0.06, "grad_norm": 2.234375, "learning_rate": 0.00019958092320137864, "loss": 2.1762, "step": 24775 }, { "epoch": 0.06, "grad_norm": 1.9140625, "learning_rate": 0.00019958075414931337, "loss": 2.1437, "step": 24780 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.00019958058506322948, "loss": 2.3639, "step": 24785 }, { "epoch": 0.06, "grad_norm": 1.59375, "learning_rate": 0.00019958041594312707, "loss": 2.1595, "step": 24790 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.0001995802467890062, "loss": 2.1376, "step": 24795 }, { "epoch": 0.06, "grad_norm": 2.359375, "learning_rate": 0.0001995800776008669, "loss": 2.1246, "step": 24800 }, { "epoch": 0.06, "grad_norm": 2.203125, "learning_rate": 0.00019957990837870924, "loss": 2.052, "step": 24805 }, { "epoch": 0.06, "grad_norm": 1.8046875, "learning_rate": 0.00019957973912253327, "loss": 2.0138, "step": 24810 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019957956983233905, "loss": 2.2312, "step": 24815 }, { "epoch": 0.06, "grad_norm": 1.65625, "learning_rate": 0.00019957940050812666, "loss": 2.1496, "step": 24820 }, { "epoch": 0.06, "grad_norm": 2.21875, "learning_rate": 0.00019957923114989612, "loss": 1.9346, "step": 24825 }, { "epoch": 0.06, "grad_norm": 2.21875, "learning_rate": 0.00019957906175764755, "loss": 2.2363, "step": 24830 }, { "epoch": 0.06, "grad_norm": 1.671875, "learning_rate": 0.0001995788923313809, "loss": 2.1801, "step": 24835 }, { "epoch": 0.06, "grad_norm": 1.7890625, "learning_rate": 0.00019957872287109635, "loss": 2.1608, "step": 24840 }, { "epoch": 0.06, "grad_norm": 1.90625, "learning_rate": 0.0001995785533767939, "loss": 2.2877, "step": 24845 }, { "epoch": 0.06, "grad_norm": 2.5, "learning_rate": 0.00019957838384847357, "loss": 2.2934, "step": 24850 }, { "epoch": 0.06, "grad_norm": 2.171875, "learning_rate": 0.0001995782142861355, "loss": 2.2293, "step": 24855 }, { "epoch": 0.06, "grad_norm": 2.09375, "learning_rate": 0.0001995780446897797, "loss": 1.9735, "step": 24860 }, { "epoch": 0.06, "grad_norm": 1.9765625, "learning_rate": 0.00019957787505940622, "loss": 2.1035, "step": 24865 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.00019957770539501512, "loss": 2.1977, "step": 24870 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019957753569660648, "loss": 2.213, "step": 24875 }, { "epoch": 0.06, "grad_norm": 2.046875, "learning_rate": 0.00019957736596418036, "loss": 2.1344, "step": 24880 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.0001995771961977368, "loss": 2.4249, "step": 24885 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.00019957702639727588, "loss": 2.2985, "step": 24890 }, { "epoch": 0.06, "grad_norm": 1.75, "learning_rate": 0.0001995768565627976, "loss": 2.1365, "step": 24895 }, { "epoch": 0.06, "grad_norm": 1.671875, "learning_rate": 0.00019957668669430212, "loss": 2.0982, "step": 24900 }, { "epoch": 0.06, "grad_norm": 1.484375, "learning_rate": 0.0001995765167917894, "loss": 2.1772, "step": 24905 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019957634685525952, "loss": 2.0156, "step": 24910 }, { "epoch": 0.06, "grad_norm": 1.921875, "learning_rate": 0.00019957617688471258, "loss": 2.372, "step": 24915 }, { "epoch": 0.06, "grad_norm": 1.6484375, "learning_rate": 0.0001995760068801486, "loss": 2.1577, "step": 24920 }, { "epoch": 0.06, "grad_norm": 1.484375, "learning_rate": 0.00019957583684156765, "loss": 2.3114, "step": 24925 }, { "epoch": 0.06, "grad_norm": 1.9921875, "learning_rate": 0.0001995756667689698, "loss": 2.2252, "step": 24930 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.00019957549666235508, "loss": 2.2546, "step": 24935 }, { "epoch": 0.06, "grad_norm": 1.6796875, "learning_rate": 0.0001995753265217236, "loss": 2.2735, "step": 24940 }, { "epoch": 0.06, "grad_norm": 1.6640625, "learning_rate": 0.00019957515634707535, "loss": 2.1583, "step": 24945 }, { "epoch": 0.06, "grad_norm": 1.421875, "learning_rate": 0.0001995749861384104, "loss": 2.106, "step": 24950 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019957481589572885, "loss": 2.1629, "step": 24955 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019957464561903075, "loss": 2.1856, "step": 24960 }, { "epoch": 0.06, "grad_norm": 1.875, "learning_rate": 0.00019957447530831614, "loss": 2.1851, "step": 24965 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.00019957430496358512, "loss": 2.2869, "step": 24970 }, { "epoch": 0.06, "grad_norm": 1.609375, "learning_rate": 0.00019957413458483766, "loss": 2.1743, "step": 24975 }, { "epoch": 0.06, "grad_norm": 1.875, "learning_rate": 0.00019957396417207387, "loss": 2.2255, "step": 24980 }, { "epoch": 0.06, "grad_norm": 1.75, "learning_rate": 0.0001995737937252938, "loss": 2.1865, "step": 24985 }, { "epoch": 0.06, "grad_norm": 1.71875, "learning_rate": 0.00019957362324449758, "loss": 2.1745, "step": 24990 }, { "epoch": 0.06, "grad_norm": 2.0625, "learning_rate": 0.00019957345272968516, "loss": 2.2696, "step": 24995 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.00019957328218085665, "loss": 2.1295, "step": 25000 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.0001995731115980121, "loss": 2.1194, "step": 25005 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019957294098115152, "loss": 2.2179, "step": 25010 }, { "epoch": 0.06, "grad_norm": 1.640625, "learning_rate": 0.0001995727703302751, "loss": 2.1787, "step": 25015 }, { "epoch": 0.06, "grad_norm": 2.5, "learning_rate": 0.00019957259964538277, "loss": 2.0651, "step": 25020 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.00019957242892647463, "loss": 2.0775, "step": 25025 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.00019957225817355075, "loss": 2.0039, "step": 25030 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.0001995720873866112, "loss": 2.2831, "step": 25035 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.000199571916565656, "loss": 2.2271, "step": 25040 }, { "epoch": 0.06, "grad_norm": 2.0, "learning_rate": 0.00019957174571068524, "loss": 2.3782, "step": 25045 }, { "epoch": 0.06, "grad_norm": 2.3125, "learning_rate": 0.00019957157482169895, "loss": 2.3067, "step": 25050 }, { "epoch": 0.06, "grad_norm": 1.96875, "learning_rate": 0.0001995714038986972, "loss": 2.2074, "step": 25055 }, { "epoch": 0.06, "grad_norm": 2.234375, "learning_rate": 0.00019957123294168008, "loss": 2.2773, "step": 25060 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019957106195064758, "loss": 2.1667, "step": 25065 }, { "epoch": 0.06, "grad_norm": 1.9296875, "learning_rate": 0.00019957089092559983, "loss": 2.1739, "step": 25070 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019957071986653687, "loss": 2.2272, "step": 25075 }, { "epoch": 0.06, "grad_norm": 1.3046875, "learning_rate": 0.0001995705487734587, "loss": 2.1934, "step": 25080 }, { "epoch": 0.06, "grad_norm": 1.484375, "learning_rate": 0.00019957037764636548, "loss": 2.1077, "step": 25085 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019957020648525718, "loss": 2.078, "step": 25090 }, { "epoch": 0.06, "grad_norm": 1.875, "learning_rate": 0.00019957003529013392, "loss": 2.162, "step": 25095 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.00019956986406099569, "loss": 2.2464, "step": 25100 }, { "epoch": 0.06, "grad_norm": 1.6328125, "learning_rate": 0.0001995696927978426, "loss": 2.2793, "step": 25105 }, { "epoch": 0.06, "grad_norm": 1.4765625, "learning_rate": 0.00019956952150067473, "loss": 2.2306, "step": 25110 }, { "epoch": 0.06, "grad_norm": 2.0, "learning_rate": 0.00019956935016949206, "loss": 2.204, "step": 25115 }, { "epoch": 0.06, "grad_norm": 1.71875, "learning_rate": 0.00019956917880429472, "loss": 2.3126, "step": 25120 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.00019956900740508276, "loss": 1.871, "step": 25125 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.00019956883597185615, "loss": 2.1055, "step": 25130 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019956866450461508, "loss": 2.1949, "step": 25135 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019956849300335953, "loss": 2.2122, "step": 25140 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.0001995683214680896, "loss": 2.1348, "step": 25145 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.0001995681498988053, "loss": 2.1808, "step": 25150 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.00019956797829550675, "loss": 2.2777, "step": 25155 }, { "epoch": 0.06, "grad_norm": 2.0625, "learning_rate": 0.00019956780665819396, "loss": 2.1953, "step": 25160 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.00019956763498686696, "loss": 2.1061, "step": 25165 }, { "epoch": 0.06, "grad_norm": 1.6171875, "learning_rate": 0.00019956746328152588, "loss": 2.1847, "step": 25170 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.00019956729154217074, "loss": 1.9644, "step": 25175 }, { "epoch": 0.06, "grad_norm": 1.75, "learning_rate": 0.00019956711976880162, "loss": 2.3617, "step": 25180 }, { "epoch": 0.06, "grad_norm": 1.6015625, "learning_rate": 0.00019956694796141857, "loss": 2.227, "step": 25185 }, { "epoch": 0.06, "grad_norm": 1.90625, "learning_rate": 0.00019956677612002164, "loss": 2.2426, "step": 25190 }, { "epoch": 0.06, "grad_norm": 1.5859375, "learning_rate": 0.0001995666042446109, "loss": 2.0938, "step": 25195 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.00019956643233518638, "loss": 2.1216, "step": 25200 }, { "epoch": 0.06, "grad_norm": 1.984375, "learning_rate": 0.00019956626039174818, "loss": 2.2154, "step": 25205 }, { "epoch": 0.06, "grad_norm": 1.9296875, "learning_rate": 0.00019956608841429632, "loss": 2.1497, "step": 25210 }, { "epoch": 0.06, "grad_norm": 2.234375, "learning_rate": 0.00019956591640283086, "loss": 2.3392, "step": 25215 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019956574435735192, "loss": 2.0253, "step": 25220 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.0001995655722778595, "loss": 2.2334, "step": 25225 }, { "epoch": 0.06, "grad_norm": 1.5703125, "learning_rate": 0.0001995654001643537, "loss": 2.2815, "step": 25230 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019956522801683452, "loss": 2.2751, "step": 25235 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019956505583530208, "loss": 2.1402, "step": 25240 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.0001995648836197564, "loss": 2.0989, "step": 25245 }, { "epoch": 0.06, "grad_norm": 1.6875, "learning_rate": 0.00019956471137019753, "loss": 2.1575, "step": 25250 }, { "epoch": 0.06, "grad_norm": 1.5546875, "learning_rate": 0.00019956453908662555, "loss": 2.2409, "step": 25255 }, { "epoch": 0.06, "grad_norm": 1.84375, "learning_rate": 0.00019956436676904057, "loss": 2.2148, "step": 25260 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019956419441744254, "loss": 2.272, "step": 25265 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.0001995640220318316, "loss": 2.2077, "step": 25270 }, { "epoch": 0.06, "grad_norm": 1.578125, "learning_rate": 0.00019956384961220778, "loss": 2.1484, "step": 25275 }, { "epoch": 0.06, "grad_norm": 1.625, "learning_rate": 0.00019956367715857112, "loss": 2.16, "step": 25280 }, { "epoch": 0.06, "grad_norm": 2.25, "learning_rate": 0.00019956350467092174, "loss": 2.3609, "step": 25285 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.00019956333214925963, "loss": 2.0959, "step": 25290 }, { "epoch": 0.06, "grad_norm": 1.984375, "learning_rate": 0.00019956315959358492, "loss": 2.2096, "step": 25295 }, { "epoch": 0.06, "grad_norm": 1.671875, "learning_rate": 0.00019956298700389758, "loss": 2.174, "step": 25300 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019956281438019774, "loss": 2.2176, "step": 25305 }, { "epoch": 0.06, "grad_norm": 1.9140625, "learning_rate": 0.00019956264172248545, "loss": 2.2287, "step": 25310 }, { "epoch": 0.06, "grad_norm": 2.0625, "learning_rate": 0.00019956246903076073, "loss": 2.3905, "step": 25315 }, { "epoch": 0.06, "grad_norm": 2.140625, "learning_rate": 0.0001995622963050237, "loss": 2.0088, "step": 25320 }, { "epoch": 0.06, "grad_norm": 1.5625, "learning_rate": 0.00019956212354527435, "loss": 2.0997, "step": 25325 }, { "epoch": 0.06, "grad_norm": 1.96875, "learning_rate": 0.0001995619507515128, "loss": 2.1017, "step": 25330 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.00019956177792373908, "loss": 2.234, "step": 25335 }, { "epoch": 0.06, "grad_norm": 1.8515625, "learning_rate": 0.00019956160506195324, "loss": 2.08, "step": 25340 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019956143216615533, "loss": 1.9607, "step": 25345 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.00019956125923634545, "loss": 2.2638, "step": 25350 }, { "epoch": 0.06, "grad_norm": 1.671875, "learning_rate": 0.00019956108627252364, "loss": 2.1214, "step": 25355 }, { "epoch": 0.06, "grad_norm": 1.953125, "learning_rate": 0.00019956091327468994, "loss": 2.0588, "step": 25360 }, { "epoch": 0.06, "grad_norm": 1.9921875, "learning_rate": 0.00019956074024284445, "loss": 2.0787, "step": 25365 }, { "epoch": 0.06, "grad_norm": 1.6015625, "learning_rate": 0.0001995605671769872, "loss": 2.1115, "step": 25370 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019956039407711823, "loss": 2.2272, "step": 25375 }, { "epoch": 0.06, "grad_norm": 1.984375, "learning_rate": 0.00019956022094323768, "loss": 2.2901, "step": 25380 }, { "epoch": 0.06, "grad_norm": 1.671875, "learning_rate": 0.00019956004777534552, "loss": 2.038, "step": 25385 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019955987457344183, "loss": 2.1225, "step": 25390 }, { "epoch": 0.06, "grad_norm": 1.921875, "learning_rate": 0.0001995597013375267, "loss": 2.244, "step": 25395 }, { "epoch": 0.06, "grad_norm": 1.4921875, "learning_rate": 0.00019955952806760015, "loss": 2.2546, "step": 25400 }, { "epoch": 0.06, "grad_norm": 1.6171875, "learning_rate": 0.00019955935476366227, "loss": 2.2196, "step": 25405 }, { "epoch": 0.06, "grad_norm": 1.96875, "learning_rate": 0.00019955918142571313, "loss": 2.2149, "step": 25410 }, { "epoch": 0.06, "grad_norm": 1.5859375, "learning_rate": 0.00019955900805375274, "loss": 2.2403, "step": 25415 }, { "epoch": 0.06, "grad_norm": 1.40625, "learning_rate": 0.0001995588346477812, "loss": 2.2188, "step": 25420 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019955866120779854, "loss": 2.18, "step": 25425 }, { "epoch": 0.06, "grad_norm": 1.9453125, "learning_rate": 0.00019955848773380486, "loss": 2.0989, "step": 25430 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019955831422580018, "loss": 2.0445, "step": 25435 }, { "epoch": 0.06, "grad_norm": 2.09375, "learning_rate": 0.00019955814068378457, "loss": 2.1628, "step": 25440 }, { "epoch": 0.06, "grad_norm": 1.75, "learning_rate": 0.00019955796710775808, "loss": 2.151, "step": 25445 }, { "epoch": 0.06, "grad_norm": 1.90625, "learning_rate": 0.00019955779349772082, "loss": 2.0537, "step": 25450 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.00019955761985367278, "loss": 2.0754, "step": 25455 }, { "epoch": 0.06, "grad_norm": 1.640625, "learning_rate": 0.0001995574461756141, "loss": 2.0041, "step": 25460 }, { "epoch": 0.06, "grad_norm": 2.078125, "learning_rate": 0.00019955727246354475, "loss": 2.112, "step": 25465 }, { "epoch": 0.06, "grad_norm": 2.21875, "learning_rate": 0.0001995570987174648, "loss": 2.396, "step": 25470 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.00019955692493737442, "loss": 2.1869, "step": 25475 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019955675112327354, "loss": 2.2695, "step": 25480 }, { "epoch": 0.06, "grad_norm": 1.59375, "learning_rate": 0.00019955657727516225, "loss": 2.1362, "step": 25485 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.00019955640339304066, "loss": 2.2752, "step": 25490 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.0001995562294769088, "loss": 2.2179, "step": 25495 }, { "epoch": 0.06, "grad_norm": 1.7890625, "learning_rate": 0.00019955605552676674, "loss": 2.2292, "step": 25500 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.00019955588154261448, "loss": 2.1169, "step": 25505 }, { "epoch": 0.06, "grad_norm": 1.609375, "learning_rate": 0.00019955570752445214, "loss": 2.1647, "step": 25510 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.00019955553347227977, "loss": 2.2486, "step": 25515 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019955535938609743, "loss": 2.304, "step": 25520 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019955518526590518, "loss": 2.1293, "step": 25525 }, { "epoch": 0.06, "grad_norm": 1.7890625, "learning_rate": 0.00019955501111170305, "loss": 2.0334, "step": 25530 }, { "epoch": 0.06, "grad_norm": 1.6875, "learning_rate": 0.00019955483692349116, "loss": 2.2348, "step": 25535 }, { "epoch": 0.06, "grad_norm": 1.484375, "learning_rate": 0.0001995546627012695, "loss": 2.0932, "step": 25540 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019955448844503817, "loss": 2.2232, "step": 25545 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.0001995543141547972, "loss": 2.3639, "step": 25550 }, { "epoch": 0.06, "grad_norm": 1.84375, "learning_rate": 0.0001995541398305467, "loss": 2.4881, "step": 25555 }, { "epoch": 0.06, "grad_norm": 2.171875, "learning_rate": 0.0001995539654722867, "loss": 2.2088, "step": 25560 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.00019955379108001724, "loss": 2.2804, "step": 25565 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.0001995536166537384, "loss": 2.2242, "step": 25570 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.00019955344219345028, "loss": 2.1247, "step": 25575 }, { "epoch": 0.06, "grad_norm": 2.046875, "learning_rate": 0.00019955326769915285, "loss": 2.0636, "step": 25580 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019955309317084622, "loss": 2.1636, "step": 25585 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.0001995529186085305, "loss": 2.2004, "step": 25590 }, { "epoch": 0.06, "grad_norm": 1.59375, "learning_rate": 0.00019955274401220563, "loss": 2.291, "step": 25595 }, { "epoch": 0.06, "grad_norm": 2.078125, "learning_rate": 0.0001995525693818718, "loss": 2.3074, "step": 25600 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.00019955239471752896, "loss": 2.0955, "step": 25605 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019955222001917725, "loss": 2.1391, "step": 25610 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019955204528681664, "loss": 2.1667, "step": 25615 }, { "epoch": 0.06, "grad_norm": 1.9296875, "learning_rate": 0.0001995518705204473, "loss": 2.0701, "step": 25620 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.00019955169572006922, "loss": 2.2704, "step": 25625 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.00019955152088568247, "loss": 2.2479, "step": 25630 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.00019955134601728713, "loss": 2.252, "step": 25635 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.00019955117111488324, "loss": 2.019, "step": 25640 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.00019955099617847085, "loss": 2.2616, "step": 25645 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019955082120805005, "loss": 2.1684, "step": 25650 }, { "epoch": 0.06, "grad_norm": 1.625, "learning_rate": 0.00019955064620362089, "loss": 2.1533, "step": 25655 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.0001995504711651834, "loss": 2.3578, "step": 25660 }, { "epoch": 0.06, "grad_norm": 2.140625, "learning_rate": 0.00019955029609273768, "loss": 2.2898, "step": 25665 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.00019955012098628376, "loss": 2.1819, "step": 25670 }, { "epoch": 0.06, "grad_norm": 2.078125, "learning_rate": 0.00019954994584582173, "loss": 2.1238, "step": 25675 }, { "epoch": 0.06, "grad_norm": 1.9140625, "learning_rate": 0.00019954977067135162, "loss": 2.1234, "step": 25680 }, { "epoch": 0.06, "grad_norm": 1.75, "learning_rate": 0.00019954959546287352, "loss": 2.0301, "step": 25685 }, { "epoch": 0.06, "grad_norm": 1.90625, "learning_rate": 0.00019954942022038744, "loss": 2.1483, "step": 25690 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019954924494389352, "loss": 2.2123, "step": 25695 }, { "epoch": 0.06, "grad_norm": 1.90625, "learning_rate": 0.00019954906963339173, "loss": 2.2148, "step": 25700 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.0001995488942888822, "loss": 2.2508, "step": 25705 }, { "epoch": 0.06, "grad_norm": 1.90625, "learning_rate": 0.00019954871891036496, "loss": 1.9905, "step": 25710 }, { "epoch": 0.06, "grad_norm": 1.84375, "learning_rate": 0.00019954854349784004, "loss": 2.3665, "step": 25715 }, { "epoch": 0.06, "grad_norm": 2.0, "learning_rate": 0.00019954836805130755, "loss": 2.146, "step": 25720 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019954819257076754, "loss": 2.2615, "step": 25725 }, { "epoch": 0.06, "grad_norm": 1.6015625, "learning_rate": 0.00019954801705622006, "loss": 1.9081, "step": 25730 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019954784150766518, "loss": 2.1384, "step": 25735 }, { "epoch": 0.06, "grad_norm": 2.0625, "learning_rate": 0.00019954766592510294, "loss": 2.3699, "step": 25740 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019954749030853342, "loss": 2.3108, "step": 25745 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019954731465795665, "loss": 2.291, "step": 25750 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.00019954713897337275, "loss": 2.2725, "step": 25755 }, { "epoch": 0.06, "grad_norm": 2.15625, "learning_rate": 0.0001995469632547817, "loss": 2.1816, "step": 25760 }, { "epoch": 0.06, "grad_norm": 1.4609375, "learning_rate": 0.00019954678750218362, "loss": 2.1472, "step": 25765 }, { "epoch": 0.06, "grad_norm": 1.4921875, "learning_rate": 0.00019954661171557854, "loss": 2.0299, "step": 25770 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.00019954643589496657, "loss": 2.4059, "step": 25775 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.0001995462600403477, "loss": 2.1265, "step": 25780 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019954608415172204, "loss": 2.2635, "step": 25785 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.00019954590822908962, "loss": 2.2001, "step": 25790 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.00019954573227245052, "loss": 2.166, "step": 25795 }, { "epoch": 0.06, "grad_norm": 1.984375, "learning_rate": 0.0001995455562818048, "loss": 2.1799, "step": 25800 }, { "epoch": 0.06, "grad_norm": 1.96875, "learning_rate": 0.00019954538025715248, "loss": 2.0833, "step": 25805 }, { "epoch": 0.06, "grad_norm": 1.90625, "learning_rate": 0.00019954520419849367, "loss": 2.1653, "step": 25810 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.00019954502810582843, "loss": 2.0606, "step": 25815 }, { "epoch": 0.06, "grad_norm": 2.15625, "learning_rate": 0.00019954485197915678, "loss": 2.1277, "step": 25820 }, { "epoch": 0.06, "grad_norm": 2.25, "learning_rate": 0.00019954467581847883, "loss": 2.2189, "step": 25825 }, { "epoch": 0.06, "grad_norm": 2.484375, "learning_rate": 0.0001995444996237946, "loss": 2.2145, "step": 25830 }, { "epoch": 0.06, "grad_norm": 1.9921875, "learning_rate": 0.00019954432339510417, "loss": 2.1987, "step": 25835 }, { "epoch": 0.06, "grad_norm": 1.75, "learning_rate": 0.00019954414713240756, "loss": 2.1555, "step": 25840 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019954397083570487, "loss": 2.2442, "step": 25845 }, { "epoch": 0.06, "grad_norm": 2.1875, "learning_rate": 0.0001995437945049962, "loss": 2.1503, "step": 25850 }, { "epoch": 0.06, "grad_norm": 2.078125, "learning_rate": 0.00019954361814028152, "loss": 2.3548, "step": 25855 }, { "epoch": 0.06, "grad_norm": 2.046875, "learning_rate": 0.00019954344174156097, "loss": 2.2612, "step": 25860 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.00019954326530883455, "loss": 2.2192, "step": 25865 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.00019954308884210235, "loss": 2.0679, "step": 25870 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.00019954291234136444, "loss": 1.976, "step": 25875 }, { "epoch": 0.06, "grad_norm": 2.09375, "learning_rate": 0.00019954273580662083, "loss": 2.2375, "step": 25880 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.00019954255923787165, "loss": 2.3143, "step": 25885 }, { "epoch": 0.06, "grad_norm": 1.6171875, "learning_rate": 0.0001995423826351169, "loss": 2.25, "step": 25890 }, { "epoch": 0.06, "grad_norm": 1.6015625, "learning_rate": 0.0001995422059983567, "loss": 2.2911, "step": 25895 }, { "epoch": 0.06, "grad_norm": 1.7421875, "learning_rate": 0.00019954202932759106, "loss": 2.2755, "step": 25900 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019954185262282005, "loss": 2.3447, "step": 25905 }, { "epoch": 0.06, "grad_norm": 1.2265625, "learning_rate": 0.00019954167588404374, "loss": 2.1786, "step": 25910 }, { "epoch": 0.06, "grad_norm": 1.6796875, "learning_rate": 0.00019954149911126221, "loss": 2.1702, "step": 25915 }, { "epoch": 0.06, "grad_norm": 1.6484375, "learning_rate": 0.00019954132230447547, "loss": 2.0255, "step": 25920 }, { "epoch": 0.06, "grad_norm": 2.125, "learning_rate": 0.0001995411454636836, "loss": 2.1302, "step": 25925 }, { "epoch": 0.06, "grad_norm": 1.6171875, "learning_rate": 0.0001995409685888867, "loss": 2.2373, "step": 25930 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.0001995407916800848, "loss": 2.2578, "step": 25935 }, { "epoch": 0.06, "grad_norm": 2.21875, "learning_rate": 0.00019954061473727793, "loss": 2.1484, "step": 25940 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019954043776046623, "loss": 2.3253, "step": 25945 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.00019954026074964966, "loss": 2.2129, "step": 25950 }, { "epoch": 0.06, "grad_norm": 1.96875, "learning_rate": 0.00019954008370482836, "loss": 2.0926, "step": 25955 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019953990662600237, "loss": 2.2712, "step": 25960 }, { "epoch": 0.06, "grad_norm": 2.453125, "learning_rate": 0.00019953972951317173, "loss": 2.2715, "step": 25965 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.0001995395523663365, "loss": 2.379, "step": 25970 }, { "epoch": 0.06, "grad_norm": 2.109375, "learning_rate": 0.00019953937518549678, "loss": 2.1989, "step": 25975 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.00019953919797065258, "loss": 2.2363, "step": 25980 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.00019953902072180398, "loss": 2.1536, "step": 25985 }, { "epoch": 0.06, "grad_norm": 1.4609375, "learning_rate": 0.00019953884343895109, "loss": 2.0008, "step": 25990 }, { "epoch": 0.06, "grad_norm": 1.9453125, "learning_rate": 0.0001995386661220939, "loss": 2.3047, "step": 25995 }, { "epoch": 0.06, "grad_norm": 1.8046875, "learning_rate": 0.00019953848877123247, "loss": 2.1063, "step": 26000 }, { "epoch": 0.06, "grad_norm": 1.75, "learning_rate": 0.0001995383113863669, "loss": 2.2262, "step": 26005 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019953813396749725, "loss": 2.2619, "step": 26010 }, { "epoch": 0.06, "grad_norm": 1.546875, "learning_rate": 0.00019953795651462357, "loss": 2.3144, "step": 26015 }, { "epoch": 0.06, "grad_norm": 1.7890625, "learning_rate": 0.00019953777902774591, "loss": 2.1899, "step": 26020 }, { "epoch": 0.06, "grad_norm": 1.421875, "learning_rate": 0.00019953760150686435, "loss": 2.1999, "step": 26025 }, { "epoch": 0.06, "grad_norm": 1.7421875, "learning_rate": 0.00019953742395197895, "loss": 2.2005, "step": 26030 }, { "epoch": 0.06, "grad_norm": 2.203125, "learning_rate": 0.00019953724636308972, "loss": 2.2625, "step": 26035 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.0001995370687401968, "loss": 2.2312, "step": 26040 }, { "epoch": 0.06, "grad_norm": 1.578125, "learning_rate": 0.00019953689108330021, "loss": 2.0479, "step": 26045 }, { "epoch": 0.06, "grad_norm": 1.671875, "learning_rate": 0.0001995367133924, "loss": 2.0686, "step": 26050 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019953653566749622, "loss": 2.2336, "step": 26055 }, { "epoch": 0.06, "grad_norm": 1.9140625, "learning_rate": 0.00019953635790858896, "loss": 2.289, "step": 26060 }, { "epoch": 0.06, "grad_norm": 1.953125, "learning_rate": 0.0001995361801156783, "loss": 2.2363, "step": 26065 }, { "epoch": 0.06, "grad_norm": 1.640625, "learning_rate": 0.00019953600228876428, "loss": 2.2519, "step": 26070 }, { "epoch": 0.06, "grad_norm": 1.7421875, "learning_rate": 0.00019953582442784696, "loss": 2.2794, "step": 26075 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.00019953564653292638, "loss": 2.074, "step": 26080 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.0001995354686040026, "loss": 2.2127, "step": 26085 }, { "epoch": 0.06, "grad_norm": 2.1875, "learning_rate": 0.00019953529064107572, "loss": 2.3528, "step": 26090 }, { "epoch": 0.06, "grad_norm": 1.953125, "learning_rate": 0.00019953511264414578, "loss": 2.1717, "step": 26095 }, { "epoch": 0.06, "grad_norm": 1.5859375, "learning_rate": 0.00019953493461321285, "loss": 2.1978, "step": 26100 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019953475654827698, "loss": 2.1738, "step": 26105 }, { "epoch": 0.06, "grad_norm": 1.65625, "learning_rate": 0.0001995345784493382, "loss": 2.1353, "step": 26110 }, { "epoch": 0.06, "grad_norm": 1.453125, "learning_rate": 0.00019953440031639664, "loss": 2.1366, "step": 26115 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.0001995342221494523, "loss": 2.3148, "step": 26120 }, { "epoch": 0.06, "grad_norm": 1.7421875, "learning_rate": 0.00019953404394850528, "loss": 2.2938, "step": 26125 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.0001995338657135556, "loss": 1.9893, "step": 26130 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.0001995336874446034, "loss": 2.3737, "step": 26135 }, { "epoch": 0.06, "grad_norm": 2.140625, "learning_rate": 0.00019953350914164863, "loss": 2.2866, "step": 26140 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.00019953333080469142, "loss": 2.1214, "step": 26145 }, { "epoch": 0.06, "grad_norm": 1.640625, "learning_rate": 0.00019953315243373182, "loss": 2.1876, "step": 26150 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019953297402876993, "loss": 1.9562, "step": 26155 }, { "epoch": 0.06, "grad_norm": 1.8046875, "learning_rate": 0.00019953279558980572, "loss": 2.2709, "step": 26160 }, { "epoch": 0.06, "grad_norm": 1.8046875, "learning_rate": 0.00019953261711683933, "loss": 2.3187, "step": 26165 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.0001995324386098708, "loss": 2.2519, "step": 26170 }, { "epoch": 0.06, "grad_norm": 2.25, "learning_rate": 0.00019953226006890016, "loss": 2.1831, "step": 26175 }, { "epoch": 0.06, "grad_norm": 1.9296875, "learning_rate": 0.0001995320814939275, "loss": 2.4291, "step": 26180 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.0001995319028849529, "loss": 2.1635, "step": 26185 }, { "epoch": 0.06, "grad_norm": 1.6484375, "learning_rate": 0.00019953172424197634, "loss": 2.1847, "step": 26190 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.000199531545564998, "loss": 2.3687, "step": 26195 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.00019953136685401784, "loss": 2.1495, "step": 26200 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.00019953118810903597, "loss": 2.1635, "step": 26205 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.00019953100933005246, "loss": 2.2419, "step": 26210 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.00019953083051706735, "loss": 2.2687, "step": 26215 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019953065167008067, "loss": 2.1445, "step": 26220 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.00019953047278909253, "loss": 2.2177, "step": 26225 }, { "epoch": 0.06, "grad_norm": 1.7421875, "learning_rate": 0.00019953029387410299, "loss": 2.2578, "step": 26230 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019953011492511208, "loss": 2.1041, "step": 26235 }, { "epoch": 0.06, "grad_norm": 2.421875, "learning_rate": 0.0001995299359421199, "loss": 2.1777, "step": 26240 }, { "epoch": 0.06, "grad_norm": 2.140625, "learning_rate": 0.00019952975692512647, "loss": 2.2736, "step": 26245 }, { "epoch": 0.06, "grad_norm": 2.171875, "learning_rate": 0.00019952957787413187, "loss": 2.2411, "step": 26250 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019952939878913615, "loss": 2.1923, "step": 26255 }, { "epoch": 0.06, "grad_norm": 1.5625, "learning_rate": 0.0001995292196701394, "loss": 2.1249, "step": 26260 }, { "epoch": 0.06, "grad_norm": 2.203125, "learning_rate": 0.00019952904051714166, "loss": 2.259, "step": 26265 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.00019952886133014298, "loss": 2.0862, "step": 26270 }, { "epoch": 0.06, "grad_norm": 1.59375, "learning_rate": 0.00019952868210914345, "loss": 2.2368, "step": 26275 }, { "epoch": 0.06, "grad_norm": 1.5703125, "learning_rate": 0.00019952850285414312, "loss": 2.2619, "step": 26280 }, { "epoch": 0.06, "grad_norm": 1.9296875, "learning_rate": 0.00019952832356514205, "loss": 2.1809, "step": 26285 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.0001995281442421403, "loss": 2.3358, "step": 26290 }, { "epoch": 0.06, "grad_norm": 2.5625, "learning_rate": 0.00019952796488513792, "loss": 2.2676, "step": 26295 }, { "epoch": 0.06, "grad_norm": 1.5859375, "learning_rate": 0.000199527785494135, "loss": 2.1131, "step": 26300 }, { "epoch": 0.06, "grad_norm": 1.640625, "learning_rate": 0.00019952760606913156, "loss": 2.1772, "step": 26305 }, { "epoch": 0.06, "grad_norm": 2.828125, "learning_rate": 0.0001995274266101277, "loss": 2.2209, "step": 26310 }, { "epoch": 0.06, "grad_norm": 2.1875, "learning_rate": 0.00019952724711712344, "loss": 2.3078, "step": 26315 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.0001995270675901189, "loss": 2.2123, "step": 26320 }, { "epoch": 0.06, "grad_norm": 1.921875, "learning_rate": 0.0001995268880291141, "loss": 1.9737, "step": 26325 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019952670843410912, "loss": 2.2094, "step": 26330 }, { "epoch": 0.06, "grad_norm": 1.609375, "learning_rate": 0.00019952652880510398, "loss": 2.1675, "step": 26335 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.0001995263491420988, "loss": 2.0065, "step": 26340 }, { "epoch": 0.06, "grad_norm": 1.7890625, "learning_rate": 0.00019952616944509362, "loss": 2.4426, "step": 26345 }, { "epoch": 0.06, "grad_norm": 1.671875, "learning_rate": 0.00019952598971408847, "loss": 2.0852, "step": 26350 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.00019952580994908345, "loss": 2.2526, "step": 26355 }, { "epoch": 0.06, "grad_norm": 1.8046875, "learning_rate": 0.00019952563015007861, "loss": 2.169, "step": 26360 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.000199525450317074, "loss": 2.1827, "step": 26365 }, { "epoch": 0.06, "grad_norm": 1.90625, "learning_rate": 0.0001995252704500697, "loss": 2.0678, "step": 26370 }, { "epoch": 0.06, "grad_norm": 1.515625, "learning_rate": 0.00019952509054906576, "loss": 2.1619, "step": 26375 }, { "epoch": 0.06, "grad_norm": 2.078125, "learning_rate": 0.00019952491061406223, "loss": 2.2632, "step": 26380 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.00019952473064505923, "loss": 2.2685, "step": 26385 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.00019952455064205674, "loss": 2.0918, "step": 26390 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.00019952437060505486, "loss": 2.2096, "step": 26395 }, { "epoch": 0.06, "grad_norm": 1.484375, "learning_rate": 0.00019952419053405367, "loss": 2.2635, "step": 26400 }, { "epoch": 0.06, "grad_norm": 1.6875, "learning_rate": 0.0001995240104290532, "loss": 2.1617, "step": 26405 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.00019952383029005351, "loss": 2.0867, "step": 26410 }, { "epoch": 0.06, "grad_norm": 1.8046875, "learning_rate": 0.0001995236501170547, "loss": 2.2206, "step": 26415 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019952346991005678, "loss": 2.3178, "step": 26420 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019952328966905985, "loss": 2.2247, "step": 26425 }, { "epoch": 0.06, "grad_norm": 1.515625, "learning_rate": 0.00019952310939406396, "loss": 2.1223, "step": 26430 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.00019952292908506918, "loss": 2.1131, "step": 26435 }, { "epoch": 0.06, "grad_norm": 2.171875, "learning_rate": 0.00019952274874207552, "loss": 2.125, "step": 26440 }, { "epoch": 0.06, "grad_norm": 2.078125, "learning_rate": 0.00019952256836508315, "loss": 2.1944, "step": 26445 }, { "epoch": 0.06, "grad_norm": 2.078125, "learning_rate": 0.00019952238795409202, "loss": 2.3689, "step": 26450 }, { "epoch": 0.06, "grad_norm": 2.046875, "learning_rate": 0.00019952220750910223, "loss": 2.1631, "step": 26455 }, { "epoch": 0.06, "grad_norm": 2.234375, "learning_rate": 0.0001995220270301139, "loss": 2.173, "step": 26460 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.000199521846517127, "loss": 2.241, "step": 26465 }, { "epoch": 0.06, "grad_norm": 1.8515625, "learning_rate": 0.00019952166597014164, "loss": 2.1243, "step": 26470 }, { "epoch": 0.06, "grad_norm": 2.234375, "learning_rate": 0.00019952148538915788, "loss": 2.2318, "step": 26475 }, { "epoch": 0.06, "grad_norm": 2.1875, "learning_rate": 0.00019952130477417576, "loss": 2.2601, "step": 26480 }, { "epoch": 0.06, "grad_norm": 2.203125, "learning_rate": 0.00019952112412519537, "loss": 2.2532, "step": 26485 }, { "epoch": 0.06, "grad_norm": 1.6875, "learning_rate": 0.00019952094344221678, "loss": 2.0158, "step": 26490 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019952076272524, "loss": 2.3783, "step": 26495 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.0001995205819742651, "loss": 2.2575, "step": 26500 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.0001995204011892922, "loss": 2.2558, "step": 26505 }, { "epoch": 0.06, "grad_norm": 2.453125, "learning_rate": 0.00019952022037032134, "loss": 2.323, "step": 26510 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.00019952003951735254, "loss": 2.1306, "step": 26515 }, { "epoch": 0.06, "grad_norm": 1.6796875, "learning_rate": 0.00019951985863038592, "loss": 2.0062, "step": 26520 }, { "epoch": 0.06, "grad_norm": 1.8515625, "learning_rate": 0.00019951967770942148, "loss": 2.1249, "step": 26525 }, { "epoch": 0.06, "grad_norm": 1.6796875, "learning_rate": 0.00019951949675445932, "loss": 2.312, "step": 26530 }, { "epoch": 0.06, "grad_norm": 1.5625, "learning_rate": 0.0001995193157654995, "loss": 2.2617, "step": 26535 }, { "epoch": 0.06, "grad_norm": 1.671875, "learning_rate": 0.00019951913474254207, "loss": 2.0115, "step": 26540 }, { "epoch": 0.06, "grad_norm": 1.84375, "learning_rate": 0.00019951895368558708, "loss": 2.2557, "step": 26545 }, { "epoch": 0.06, "grad_norm": 1.5859375, "learning_rate": 0.00019951877259463465, "loss": 2.2538, "step": 26550 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.0001995185914696848, "loss": 2.3101, "step": 26555 }, { "epoch": 0.06, "grad_norm": 1.875, "learning_rate": 0.00019951841031073757, "loss": 2.1213, "step": 26560 }, { "epoch": 0.06, "grad_norm": 1.875, "learning_rate": 0.00019951822911779305, "loss": 2.2301, "step": 26565 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.00019951804789085132, "loss": 2.2602, "step": 26570 }, { "epoch": 0.06, "grad_norm": 1.9765625, "learning_rate": 0.0001995178666299124, "loss": 2.2934, "step": 26575 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.00019951768533497637, "loss": 2.3139, "step": 26580 }, { "epoch": 0.06, "grad_norm": 2.109375, "learning_rate": 0.0001995175040060433, "loss": 2.1998, "step": 26585 }, { "epoch": 0.06, "grad_norm": 1.921875, "learning_rate": 0.00019951732264311326, "loss": 2.2437, "step": 26590 }, { "epoch": 0.06, "grad_norm": 1.921875, "learning_rate": 0.0001995171412461863, "loss": 2.1948, "step": 26595 }, { "epoch": 0.06, "grad_norm": 1.640625, "learning_rate": 0.00019951695981526243, "loss": 2.0434, "step": 26600 }, { "epoch": 0.06, "grad_norm": 1.6953125, "learning_rate": 0.0001995167783503418, "loss": 2.2088, "step": 26605 }, { "epoch": 0.06, "grad_norm": 2.046875, "learning_rate": 0.00019951659685142446, "loss": 2.3853, "step": 26610 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.0001995164153185104, "loss": 2.1586, "step": 26615 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019951623375159973, "loss": 2.2879, "step": 26620 }, { "epoch": 0.06, "grad_norm": 1.875, "learning_rate": 0.00019951605215069252, "loss": 2.3925, "step": 26625 }, { "epoch": 0.06, "grad_norm": 2.171875, "learning_rate": 0.0001995158705157888, "loss": 2.1904, "step": 26630 }, { "epoch": 0.06, "grad_norm": 2.28125, "learning_rate": 0.00019951568884688873, "loss": 2.1648, "step": 26635 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.0001995155071439922, "loss": 2.278, "step": 26640 }, { "epoch": 0.06, "grad_norm": 1.6953125, "learning_rate": 0.00019951532540709942, "loss": 2.1887, "step": 26645 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019951514363621042, "loss": 2.1094, "step": 26650 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.0001995149618313252, "loss": 2.2689, "step": 26655 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.00019951477999244387, "loss": 2.2673, "step": 26660 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.00019951459811956652, "loss": 2.0173, "step": 26665 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.00019951441621269314, "loss": 2.2508, "step": 26670 }, { "epoch": 0.06, "grad_norm": 1.75, "learning_rate": 0.00019951423427182384, "loss": 2.3253, "step": 26675 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.00019951405229695868, "loss": 2.273, "step": 26680 }, { "epoch": 0.06, "grad_norm": 1.6640625, "learning_rate": 0.0001995138702880977, "loss": 2.0778, "step": 26685 }, { "epoch": 0.06, "grad_norm": 1.5859375, "learning_rate": 0.000199513688245241, "loss": 2.2047, "step": 26690 }, { "epoch": 0.06, "grad_norm": 2.125, "learning_rate": 0.0001995135061683886, "loss": 2.2485, "step": 26695 }, { "epoch": 0.06, "grad_norm": 1.6328125, "learning_rate": 0.0001995133240575406, "loss": 2.1619, "step": 26700 }, { "epoch": 0.06, "grad_norm": 1.6640625, "learning_rate": 0.00019951314191269702, "loss": 2.0433, "step": 26705 }, { "epoch": 0.06, "grad_norm": 2.875, "learning_rate": 0.00019951295973385797, "loss": 2.0756, "step": 26710 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019951277752102347, "loss": 2.2626, "step": 26715 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019951259527419362, "loss": 2.2612, "step": 26720 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019951241299336845, "loss": 2.0722, "step": 26725 }, { "epoch": 0.06, "grad_norm": 1.8515625, "learning_rate": 0.00019951223067854804, "loss": 2.2706, "step": 26730 }, { "epoch": 0.06, "grad_norm": 1.9140625, "learning_rate": 0.00019951204832973244, "loss": 2.3041, "step": 26735 }, { "epoch": 0.06, "grad_norm": 1.5859375, "learning_rate": 0.00019951186594692173, "loss": 2.2075, "step": 26740 }, { "epoch": 0.06, "grad_norm": 1.9140625, "learning_rate": 0.00019951168353011593, "loss": 2.2159, "step": 26745 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019951150107931515, "loss": 2.1894, "step": 26750 }, { "epoch": 0.06, "grad_norm": 2.125, "learning_rate": 0.00019951131859451945, "loss": 2.2896, "step": 26755 }, { "epoch": 0.06, "grad_norm": 1.6484375, "learning_rate": 0.0001995111360757289, "loss": 2.2985, "step": 26760 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019951095352294348, "loss": 2.1003, "step": 26765 }, { "epoch": 0.06, "grad_norm": 1.734375, "learning_rate": 0.00019951077093616337, "loss": 2.2127, "step": 26770 }, { "epoch": 0.06, "grad_norm": 1.5078125, "learning_rate": 0.00019951058831538856, "loss": 2.23, "step": 26775 }, { "epoch": 0.06, "grad_norm": 2.375, "learning_rate": 0.0001995104056606191, "loss": 2.1796, "step": 26780 }, { "epoch": 0.06, "grad_norm": 1.6875, "learning_rate": 0.0001995102229718551, "loss": 2.1161, "step": 26785 }, { "epoch": 0.06, "grad_norm": 1.5625, "learning_rate": 0.00019951004024909664, "loss": 2.2415, "step": 26790 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.0001995098574923437, "loss": 2.2779, "step": 26795 }, { "epoch": 0.06, "grad_norm": 2.046875, "learning_rate": 0.0001995096747015964, "loss": 2.2615, "step": 26800 }, { "epoch": 0.06, "grad_norm": 1.5625, "learning_rate": 0.0001995094918768548, "loss": 2.2808, "step": 26805 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.00019950930901811893, "loss": 2.2942, "step": 26810 }, { "epoch": 0.06, "grad_norm": 1.5625, "learning_rate": 0.0001995091261253889, "loss": 2.1116, "step": 26815 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019950894319866472, "loss": 2.2285, "step": 26820 }, { "epoch": 0.06, "grad_norm": 1.9296875, "learning_rate": 0.0001995087602379465, "loss": 2.253, "step": 26825 }, { "epoch": 0.06, "grad_norm": 2.265625, "learning_rate": 0.0001995085772432343, "loss": 2.1719, "step": 26830 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.00019950839421452812, "loss": 1.9789, "step": 26835 }, { "epoch": 0.06, "grad_norm": 1.4140625, "learning_rate": 0.0001995082111518281, "loss": 2.1308, "step": 26840 }, { "epoch": 0.06, "grad_norm": 1.9140625, "learning_rate": 0.00019950802805513427, "loss": 2.331, "step": 26845 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.0001995078449244467, "loss": 2.1787, "step": 26850 }, { "epoch": 0.06, "grad_norm": 2.234375, "learning_rate": 0.00019950766175976542, "loss": 2.2598, "step": 26855 }, { "epoch": 0.06, "grad_norm": 1.71875, "learning_rate": 0.00019950747856109052, "loss": 2.0182, "step": 26860 }, { "epoch": 0.06, "grad_norm": 2.109375, "learning_rate": 0.00019950729532842206, "loss": 1.9529, "step": 26865 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.00019950711206176013, "loss": 2.185, "step": 26870 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.00019950692876110474, "loss": 2.2027, "step": 26875 }, { "epoch": 0.06, "grad_norm": 1.6953125, "learning_rate": 0.000199506745426456, "loss": 2.0807, "step": 26880 }, { "epoch": 0.06, "grad_norm": 1.8046875, "learning_rate": 0.00019950656205781393, "loss": 2.0108, "step": 26885 }, { "epoch": 0.06, "grad_norm": 1.1875, "learning_rate": 0.00019950637865517862, "loss": 1.9774, "step": 26890 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019950619521855013, "loss": 2.0296, "step": 26895 }, { "epoch": 0.06, "grad_norm": 1.671875, "learning_rate": 0.00019950601174792851, "loss": 2.0341, "step": 26900 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019950582824331385, "loss": 2.1351, "step": 26905 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.00019950564470470617, "loss": 2.1926, "step": 26910 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019950546113210559, "loss": 2.3668, "step": 26915 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.00019950527752551214, "loss": 2.24, "step": 26920 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.00019950509388492584, "loss": 2.0911, "step": 26925 }, { "epoch": 0.06, "grad_norm": 1.984375, "learning_rate": 0.00019950491021034682, "loss": 2.3402, "step": 26930 }, { "epoch": 0.06, "grad_norm": 1.5625, "learning_rate": 0.0001995047265017751, "loss": 2.1252, "step": 26935 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.0001995045427592108, "loss": 2.3719, "step": 26940 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.0001995043589826539, "loss": 2.2259, "step": 26945 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.00019950417517210456, "loss": 2.1339, "step": 26950 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019950399132756275, "loss": 2.1341, "step": 26955 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.00019950380744902856, "loss": 2.1461, "step": 26960 }, { "epoch": 0.06, "grad_norm": 1.6328125, "learning_rate": 0.00019950362353650213, "loss": 2.2134, "step": 26965 }, { "epoch": 0.06, "grad_norm": 1.96875, "learning_rate": 0.00019950343958998338, "loss": 2.0174, "step": 26970 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019950325560947246, "loss": 2.1054, "step": 26975 }, { "epoch": 0.06, "grad_norm": 2.8125, "learning_rate": 0.00019950307159496947, "loss": 2.1369, "step": 26980 }, { "epoch": 0.06, "grad_norm": 1.8046875, "learning_rate": 0.0001995028875464744, "loss": 2.2652, "step": 26985 }, { "epoch": 0.06, "grad_norm": 1.6640625, "learning_rate": 0.00019950270346398734, "loss": 2.1081, "step": 26990 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019950251934750836, "loss": 2.2837, "step": 26995 }, { "epoch": 0.06, "grad_norm": 1.9765625, "learning_rate": 0.0001995023351970375, "loss": 2.0228, "step": 27000 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.00019950215101257484, "loss": 2.1775, "step": 27005 }, { "epoch": 0.06, "grad_norm": 1.9765625, "learning_rate": 0.00019950196679412044, "loss": 2.0568, "step": 27010 }, { "epoch": 0.06, "grad_norm": 2.296875, "learning_rate": 0.00019950178254167438, "loss": 2.1536, "step": 27015 }, { "epoch": 0.06, "grad_norm": 1.6015625, "learning_rate": 0.0001995015982552367, "loss": 2.1283, "step": 27020 }, { "epoch": 0.06, "grad_norm": 1.71875, "learning_rate": 0.00019950141393480745, "loss": 2.162, "step": 27025 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019950122958038672, "loss": 2.1285, "step": 27030 }, { "epoch": 0.06, "grad_norm": 1.6875, "learning_rate": 0.00019950104519197458, "loss": 2.25, "step": 27035 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.0001995008607695711, "loss": 2.5024, "step": 27040 }, { "epoch": 0.06, "grad_norm": 1.9453125, "learning_rate": 0.00019950067631317628, "loss": 2.2227, "step": 27045 }, { "epoch": 0.06, "grad_norm": 1.546875, "learning_rate": 0.00019950049182279026, "loss": 2.2593, "step": 27050 }, { "epoch": 0.06, "grad_norm": 1.6640625, "learning_rate": 0.000199500307298413, "loss": 2.0391, "step": 27055 }, { "epoch": 0.06, "grad_norm": 1.9140625, "learning_rate": 0.0001995001227400447, "loss": 2.1453, "step": 27060 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019949993814768533, "loss": 2.1917, "step": 27065 }, { "epoch": 0.06, "grad_norm": 1.71875, "learning_rate": 0.00019949975352133496, "loss": 2.1829, "step": 27070 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.0001994995688609937, "loss": 2.3504, "step": 27075 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.00019949938416666157, "loss": 2.2283, "step": 27080 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.00019949919943833863, "loss": 2.2247, "step": 27085 }, { "epoch": 0.06, "grad_norm": 1.5390625, "learning_rate": 0.000199499014676025, "loss": 1.9955, "step": 27090 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.00019949882987972063, "loss": 2.233, "step": 27095 }, { "epoch": 0.06, "grad_norm": 2.09375, "learning_rate": 0.00019949864504942573, "loss": 2.2562, "step": 27100 }, { "epoch": 0.06, "grad_norm": 1.9453125, "learning_rate": 0.00019949846018514026, "loss": 2.2338, "step": 27105 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.0001994982752868643, "loss": 2.3311, "step": 27110 }, { "epoch": 0.06, "grad_norm": 1.9296875, "learning_rate": 0.00019949809035459796, "loss": 2.2515, "step": 27115 }, { "epoch": 0.06, "grad_norm": 1.875, "learning_rate": 0.00019949790538834123, "loss": 2.356, "step": 27120 }, { "epoch": 0.06, "grad_norm": 1.75, "learning_rate": 0.0001994977203880942, "loss": 2.0794, "step": 27125 }, { "epoch": 0.06, "grad_norm": 1.640625, "learning_rate": 0.00019949753535385698, "loss": 2.4046, "step": 27130 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.00019949735028562958, "loss": 1.9809, "step": 27135 }, { "epoch": 0.06, "grad_norm": 2.65625, "learning_rate": 0.0001994971651834121, "loss": 2.0803, "step": 27140 }, { "epoch": 0.06, "grad_norm": 1.921875, "learning_rate": 0.00019949698004720457, "loss": 2.2326, "step": 27145 }, { "epoch": 0.06, "grad_norm": 1.9453125, "learning_rate": 0.00019949679487700706, "loss": 2.2685, "step": 27150 }, { "epoch": 0.06, "grad_norm": 1.7734375, "learning_rate": 0.00019949660967281965, "loss": 2.174, "step": 27155 }, { "epoch": 0.06, "grad_norm": 2.203125, "learning_rate": 0.00019949642443464237, "loss": 2.2551, "step": 27160 }, { "epoch": 0.06, "grad_norm": 1.796875, "learning_rate": 0.00019949623916247532, "loss": 2.1898, "step": 27165 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019949605385631856, "loss": 2.1402, "step": 27170 }, { "epoch": 0.06, "grad_norm": 2.59375, "learning_rate": 0.00019949586851617218, "loss": 2.3115, "step": 27175 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019949568314203616, "loss": 2.245, "step": 27180 }, { "epoch": 0.06, "grad_norm": 2.15625, "learning_rate": 0.00019949549773391062, "loss": 2.184, "step": 27185 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.0001994953122917956, "loss": 2.1069, "step": 27190 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019949512681569117, "loss": 2.3237, "step": 27195 }, { "epoch": 0.06, "grad_norm": 1.984375, "learning_rate": 0.00019949494130559744, "loss": 2.0935, "step": 27200 }, { "epoch": 0.06, "grad_norm": 2.0, "learning_rate": 0.00019949475576151442, "loss": 2.1787, "step": 27205 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019949457018344217, "loss": 2.2212, "step": 27210 }, { "epoch": 0.06, "grad_norm": 1.6953125, "learning_rate": 0.00019949438457138077, "loss": 1.967, "step": 27215 }, { "epoch": 0.06, "grad_norm": 1.9140625, "learning_rate": 0.00019949419892533032, "loss": 2.1223, "step": 27220 }, { "epoch": 0.06, "grad_norm": 1.5078125, "learning_rate": 0.0001994940132452908, "loss": 2.1707, "step": 27225 }, { "epoch": 0.06, "grad_norm": 2.21875, "learning_rate": 0.00019949382753126234, "loss": 2.2905, "step": 27230 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.000199493641783245, "loss": 2.2488, "step": 27235 }, { "epoch": 0.06, "grad_norm": 1.578125, "learning_rate": 0.0001994934560012388, "loss": 2.0899, "step": 27240 }, { "epoch": 0.06, "grad_norm": 1.6484375, "learning_rate": 0.00019949327018524383, "loss": 2.0903, "step": 27245 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.0001994930843352602, "loss": 2.2153, "step": 27250 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.0001994928984512879, "loss": 2.1564, "step": 27255 }, { "epoch": 0.06, "grad_norm": 2.0625, "learning_rate": 0.00019949271253332703, "loss": 2.2337, "step": 27260 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019949252658137763, "loss": 2.12, "step": 27265 }, { "epoch": 0.06, "grad_norm": 1.71875, "learning_rate": 0.00019949234059543977, "loss": 2.0523, "step": 27270 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.0001994921545755135, "loss": 2.169, "step": 27275 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019949196852159896, "loss": 2.152, "step": 27280 }, { "epoch": 0.06, "grad_norm": 2.15625, "learning_rate": 0.00019949178243369614, "loss": 1.9596, "step": 27285 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019949159631180513, "loss": 2.1458, "step": 27290 }, { "epoch": 0.06, "grad_norm": 1.6796875, "learning_rate": 0.00019949141015592598, "loss": 2.2784, "step": 27295 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.00019949122396605874, "loss": 2.3685, "step": 27300 }, { "epoch": 0.06, "grad_norm": 2.03125, "learning_rate": 0.00019949103774220353, "loss": 2.196, "step": 27305 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.00019949085148436034, "loss": 2.253, "step": 27310 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.0001994906651925293, "loss": 2.0969, "step": 27315 }, { "epoch": 0.06, "grad_norm": 2.34375, "learning_rate": 0.00019949047886671045, "loss": 1.9194, "step": 27320 }, { "epoch": 0.06, "grad_norm": 1.8125, "learning_rate": 0.00019949029250690382, "loss": 2.1976, "step": 27325 }, { "epoch": 0.06, "grad_norm": 1.8671875, "learning_rate": 0.00019949010611310949, "loss": 2.1173, "step": 27330 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.00019948991968532758, "loss": 2.1877, "step": 27335 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019948973322355808, "loss": 2.0285, "step": 27340 }, { "epoch": 0.06, "grad_norm": 2.09375, "learning_rate": 0.00019948954672780112, "loss": 2.1627, "step": 27345 }, { "epoch": 0.06, "grad_norm": 1.71875, "learning_rate": 0.00019948936019805667, "loss": 2.2202, "step": 27350 }, { "epoch": 0.06, "grad_norm": 1.8984375, "learning_rate": 0.0001994891736343249, "loss": 2.0881, "step": 27355 }, { "epoch": 0.06, "grad_norm": 1.78125, "learning_rate": 0.0001994889870366058, "loss": 2.2139, "step": 27360 }, { "epoch": 0.06, "grad_norm": 2.34375, "learning_rate": 0.00019948880040489945, "loss": 2.1045, "step": 27365 }, { "epoch": 0.06, "grad_norm": 2.359375, "learning_rate": 0.00019948861373920594, "loss": 2.0947, "step": 27370 }, { "epoch": 0.06, "grad_norm": 1.859375, "learning_rate": 0.0001994884270395253, "loss": 2.3027, "step": 27375 }, { "epoch": 0.06, "grad_norm": 1.59375, "learning_rate": 0.0001994882403058576, "loss": 2.2413, "step": 27380 }, { "epoch": 0.06, "grad_norm": 1.71875, "learning_rate": 0.00019948805353820292, "loss": 2.2398, "step": 27385 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019948786673656136, "loss": 2.2556, "step": 27390 }, { "epoch": 0.06, "grad_norm": 1.8359375, "learning_rate": 0.0001994876799009329, "loss": 2.1285, "step": 27395 }, { "epoch": 0.06, "grad_norm": 2.09375, "learning_rate": 0.00019948749303131763, "loss": 2.1781, "step": 27400 }, { "epoch": 0.06, "grad_norm": 1.640625, "learning_rate": 0.00019948730612771563, "loss": 2.2608, "step": 27405 }, { "epoch": 0.06, "grad_norm": 2.0625, "learning_rate": 0.000199487119190127, "loss": 2.3181, "step": 27410 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.00019948693221855175, "loss": 2.1709, "step": 27415 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019948674521298994, "loss": 2.2055, "step": 27420 }, { "epoch": 0.06, "grad_norm": 1.90625, "learning_rate": 0.00019948655817344165, "loss": 1.9518, "step": 27425 }, { "epoch": 0.06, "grad_norm": 1.9765625, "learning_rate": 0.000199486371099907, "loss": 2.3901, "step": 27430 }, { "epoch": 0.06, "grad_norm": 2.1875, "learning_rate": 0.00019948618399238596, "loss": 2.1596, "step": 27435 }, { "epoch": 0.06, "grad_norm": 2.3125, "learning_rate": 0.00019948599685087865, "loss": 2.2682, "step": 27440 }, { "epoch": 0.06, "grad_norm": 1.6796875, "learning_rate": 0.0001994858096753851, "loss": 2.0692, "step": 27445 }, { "epoch": 0.06, "grad_norm": 1.765625, "learning_rate": 0.0001994856224659054, "loss": 2.12, "step": 27450 }, { "epoch": 0.06, "grad_norm": 1.5390625, "learning_rate": 0.0001994854352224396, "loss": 2.082, "step": 27455 }, { "epoch": 0.06, "grad_norm": 2.140625, "learning_rate": 0.0001994852479449878, "loss": 2.0739, "step": 27460 }, { "epoch": 0.06, "grad_norm": 1.9921875, "learning_rate": 0.00019948506063355003, "loss": 2.2032, "step": 27465 }, { "epoch": 0.06, "grad_norm": 2.171875, "learning_rate": 0.00019948487328812635, "loss": 2.3632, "step": 27470 }, { "epoch": 0.06, "grad_norm": 1.9375, "learning_rate": 0.0001994846859087168, "loss": 2.3574, "step": 27475 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.0001994844984953215, "loss": 2.1995, "step": 27480 }, { "epoch": 0.06, "grad_norm": 1.6328125, "learning_rate": 0.0001994843110479405, "loss": 1.9285, "step": 27485 }, { "epoch": 0.06, "grad_norm": 2.015625, "learning_rate": 0.00019948412356657387, "loss": 2.0806, "step": 27490 }, { "epoch": 0.06, "grad_norm": 1.8515625, "learning_rate": 0.00019948393605122164, "loss": 2.2131, "step": 27495 }, { "epoch": 0.06, "grad_norm": 1.5390625, "learning_rate": 0.0001994837485018839, "loss": 2.289, "step": 27500 }, { "epoch": 0.06, "grad_norm": 1.6171875, "learning_rate": 0.0001994835609185607, "loss": 2.0362, "step": 27505 }, { "epoch": 0.06, "grad_norm": 2.09375, "learning_rate": 0.00019948337330125213, "loss": 2.2296, "step": 27510 }, { "epoch": 0.06, "grad_norm": 1.9609375, "learning_rate": 0.00019948318564995822, "loss": 2.3176, "step": 27515 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.000199482997964679, "loss": 2.2142, "step": 27520 }, { "epoch": 0.06, "grad_norm": 1.7578125, "learning_rate": 0.00019948281024541467, "loss": 2.0698, "step": 27525 }, { "epoch": 0.06, "grad_norm": 1.6171875, "learning_rate": 0.0001994826224921652, "loss": 2.0566, "step": 27530 }, { "epoch": 0.06, "grad_norm": 1.703125, "learning_rate": 0.0001994824347049306, "loss": 2.0147, "step": 27535 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.00019948224688371102, "loss": 2.2412, "step": 27540 }, { "epoch": 0.06, "grad_norm": 1.8828125, "learning_rate": 0.00019948205902850651, "loss": 2.3157, "step": 27545 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019948187113931715, "loss": 2.3357, "step": 27550 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.00019948168321614295, "loss": 2.1093, "step": 27555 }, { "epoch": 0.06, "grad_norm": 1.9765625, "learning_rate": 0.000199481495258984, "loss": 2.1342, "step": 27560 }, { "epoch": 0.06, "grad_norm": 1.7109375, "learning_rate": 0.00019948130726784035, "loss": 2.4101, "step": 27565 }, { "epoch": 0.06, "grad_norm": 1.6875, "learning_rate": 0.0001994811192427121, "loss": 2.1685, "step": 27570 }, { "epoch": 0.06, "grad_norm": 1.828125, "learning_rate": 0.0001994809311835993, "loss": 2.3271, "step": 27575 }, { "epoch": 0.06, "grad_norm": 1.7265625, "learning_rate": 0.000199480743090502, "loss": 2.1922, "step": 27580 }, { "epoch": 0.06, "grad_norm": 2.34375, "learning_rate": 0.0001994805549634203, "loss": 2.1725, "step": 27585 }, { "epoch": 0.06, "grad_norm": 2.078125, "learning_rate": 0.00019948036680235423, "loss": 2.1142, "step": 27590 }, { "epoch": 0.06, "grad_norm": 1.890625, "learning_rate": 0.00019948017860730384, "loss": 2.3265, "step": 27595 }, { "epoch": 0.06, "grad_norm": 2.234375, "learning_rate": 0.00019947999037826924, "loss": 2.3481, "step": 27600 }, { "epoch": 0.06, "grad_norm": 1.6640625, "learning_rate": 0.00019947980211525046, "loss": 1.8517, "step": 27605 }, { "epoch": 0.06, "grad_norm": 1.5390625, "learning_rate": 0.00019947961381824758, "loss": 2.3035, "step": 27610 }, { "epoch": 0.06, "grad_norm": 1.8203125, "learning_rate": 0.00019947942548726065, "loss": 2.3479, "step": 27615 }, { "epoch": 0.06, "grad_norm": 1.609375, "learning_rate": 0.00019947923712228973, "loss": 2.2185, "step": 27620 }, { "epoch": 0.07, "grad_norm": 1.6015625, "learning_rate": 0.00019947904872333493, "loss": 2.3797, "step": 27625 }, { "epoch": 0.07, "grad_norm": 1.9453125, "learning_rate": 0.00019947886029039627, "loss": 2.3987, "step": 27630 }, { "epoch": 0.07, "grad_norm": 2.21875, "learning_rate": 0.00019947867182347384, "loss": 2.3105, "step": 27635 }, { "epoch": 0.07, "grad_norm": 1.796875, "learning_rate": 0.00019947848332256765, "loss": 2.1863, "step": 27640 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.00019947829478767786, "loss": 2.1464, "step": 27645 }, { "epoch": 0.07, "grad_norm": 1.671875, "learning_rate": 0.00019947810621880445, "loss": 1.9278, "step": 27650 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019947791761594754, "loss": 2.3691, "step": 27655 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.00019947772897910713, "loss": 2.3974, "step": 27660 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.00019947754030828337, "loss": 2.22, "step": 27665 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.00019947735160347625, "loss": 2.288, "step": 27670 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019947716286468586, "loss": 2.1665, "step": 27675 }, { "epoch": 0.07, "grad_norm": 1.671875, "learning_rate": 0.00019947697409191227, "loss": 2.0368, "step": 27680 }, { "epoch": 0.07, "grad_norm": 1.671875, "learning_rate": 0.00019947678528515553, "loss": 2.2127, "step": 27685 }, { "epoch": 0.07, "grad_norm": 1.765625, "learning_rate": 0.00019947659644441572, "loss": 2.1012, "step": 27690 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019947640756969292, "loss": 2.2851, "step": 27695 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019947621866098718, "loss": 2.2873, "step": 27700 }, { "epoch": 0.07, "grad_norm": 2.1875, "learning_rate": 0.00019947602971829854, "loss": 2.315, "step": 27705 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.00019947584074162707, "loss": 2.2933, "step": 27710 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.00019947565173097286, "loss": 2.1314, "step": 27715 }, { "epoch": 0.07, "grad_norm": 1.5, "learning_rate": 0.00019947546268633598, "loss": 2.2115, "step": 27720 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.00019947527360771646, "loss": 2.2762, "step": 27725 }, { "epoch": 0.07, "grad_norm": 1.9609375, "learning_rate": 0.0001994750844951144, "loss": 2.2671, "step": 27730 }, { "epoch": 0.07, "grad_norm": 1.9140625, "learning_rate": 0.00019947489534852982, "loss": 2.279, "step": 27735 }, { "epoch": 0.07, "grad_norm": 1.78125, "learning_rate": 0.00019947470616796283, "loss": 2.0964, "step": 27740 }, { "epoch": 0.07, "grad_norm": 1.890625, "learning_rate": 0.0001994745169534135, "loss": 2.1637, "step": 27745 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019947432770488183, "loss": 2.2804, "step": 27750 }, { "epoch": 0.07, "grad_norm": 1.7890625, "learning_rate": 0.00019947413842236796, "loss": 2.1893, "step": 27755 }, { "epoch": 0.07, "grad_norm": 1.96875, "learning_rate": 0.00019947394910587192, "loss": 2.1081, "step": 27760 }, { "epoch": 0.07, "grad_norm": 1.8515625, "learning_rate": 0.00019947375975539372, "loss": 2.2593, "step": 27765 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.00019947357037093351, "loss": 2.1759, "step": 27770 }, { "epoch": 0.07, "grad_norm": 2.484375, "learning_rate": 0.00019947338095249135, "loss": 2.1353, "step": 27775 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.00019947319150006727, "loss": 2.3749, "step": 27780 }, { "epoch": 0.07, "grad_norm": 1.96875, "learning_rate": 0.00019947300201366135, "loss": 2.2119, "step": 27785 }, { "epoch": 0.07, "grad_norm": 1.5546875, "learning_rate": 0.0001994728124932736, "loss": 2.1187, "step": 27790 }, { "epoch": 0.07, "grad_norm": 1.8984375, "learning_rate": 0.00019947262293890417, "loss": 2.1173, "step": 27795 }, { "epoch": 0.07, "grad_norm": 1.7109375, "learning_rate": 0.0001994724333505531, "loss": 2.1611, "step": 27800 }, { "epoch": 0.07, "grad_norm": 2.21875, "learning_rate": 0.00019947224372822043, "loss": 2.2426, "step": 27805 }, { "epoch": 0.07, "grad_norm": 1.6640625, "learning_rate": 0.00019947205407190623, "loss": 2.31, "step": 27810 }, { "epoch": 0.07, "grad_norm": 1.9765625, "learning_rate": 0.00019947186438161056, "loss": 2.3057, "step": 27815 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.00019947167465733353, "loss": 2.1329, "step": 27820 }, { "epoch": 0.07, "grad_norm": 1.7890625, "learning_rate": 0.00019947148489907516, "loss": 2.171, "step": 27825 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.0001994712951068355, "loss": 2.2227, "step": 27830 }, { "epoch": 0.07, "grad_norm": 2.125, "learning_rate": 0.00019947110528061469, "loss": 2.1983, "step": 27835 }, { "epoch": 0.07, "grad_norm": 1.7890625, "learning_rate": 0.00019947091542041272, "loss": 2.1848, "step": 27840 }, { "epoch": 0.07, "grad_norm": 2.21875, "learning_rate": 0.00019947072552622968, "loss": 1.8779, "step": 27845 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.00019947053559806564, "loss": 2.2825, "step": 27850 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.00019947034563592064, "loss": 2.1819, "step": 27855 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.0001994701556397948, "loss": 2.1917, "step": 27860 }, { "epoch": 0.07, "grad_norm": 1.765625, "learning_rate": 0.00019946996560968813, "loss": 2.2655, "step": 27865 }, { "epoch": 0.07, "grad_norm": 2.421875, "learning_rate": 0.00019946977554560072, "loss": 2.2676, "step": 27870 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.00019946958544753263, "loss": 2.2658, "step": 27875 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.00019946939531548393, "loss": 2.1889, "step": 27880 }, { "epoch": 0.07, "grad_norm": 1.9609375, "learning_rate": 0.00019946920514945469, "loss": 2.1558, "step": 27885 }, { "epoch": 0.07, "grad_norm": 1.9609375, "learning_rate": 0.00019946901494944495, "loss": 2.2709, "step": 27890 }, { "epoch": 0.07, "grad_norm": 2.125, "learning_rate": 0.00019946882471545482, "loss": 2.232, "step": 27895 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.00019946863444748428, "loss": 2.2794, "step": 27900 }, { "epoch": 0.07, "grad_norm": 1.8984375, "learning_rate": 0.00019946844414553348, "loss": 2.2429, "step": 27905 }, { "epoch": 0.07, "grad_norm": 2.46875, "learning_rate": 0.00019946825380960247, "loss": 2.1427, "step": 27910 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.00019946806343969125, "loss": 2.1462, "step": 27915 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019946787303579998, "loss": 2.1257, "step": 27920 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.0001994676825979287, "loss": 2.3153, "step": 27925 }, { "epoch": 0.07, "grad_norm": 1.8515625, "learning_rate": 0.00019946749212607743, "loss": 2.2589, "step": 27930 }, { "epoch": 0.07, "grad_norm": 2.296875, "learning_rate": 0.00019946730162024624, "loss": 2.2032, "step": 27935 }, { "epoch": 0.07, "grad_norm": 1.5625, "learning_rate": 0.00019946711108043523, "loss": 2.3328, "step": 27940 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.00019946692050664446, "loss": 2.2385, "step": 27945 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.00019946672989887398, "loss": 2.226, "step": 27950 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.00019946653925712388, "loss": 2.0923, "step": 27955 }, { "epoch": 0.07, "grad_norm": 2.25, "learning_rate": 0.00019946634858139417, "loss": 2.3493, "step": 27960 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.000199466157871685, "loss": 2.1046, "step": 27965 }, { "epoch": 0.07, "grad_norm": 1.4921875, "learning_rate": 0.00019946596712799633, "loss": 2.3176, "step": 27970 }, { "epoch": 0.07, "grad_norm": 1.671875, "learning_rate": 0.0001994657763503283, "loss": 2.2735, "step": 27975 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.00019946558553868098, "loss": 2.1816, "step": 27980 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019946539469305438, "loss": 2.1627, "step": 27985 }, { "epoch": 0.07, "grad_norm": 2.484375, "learning_rate": 0.0001994652038134486, "loss": 2.1469, "step": 27990 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.00019946501289986376, "loss": 2.2091, "step": 27995 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019946482195229984, "loss": 2.2644, "step": 28000 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.0001994646309707569, "loss": 2.2741, "step": 28005 }, { "epoch": 0.07, "grad_norm": 2.15625, "learning_rate": 0.00019946443995523506, "loss": 2.4223, "step": 28010 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.00019946424890573435, "loss": 2.0726, "step": 28015 }, { "epoch": 0.07, "grad_norm": 1.984375, "learning_rate": 0.00019946405782225485, "loss": 2.1, "step": 28020 }, { "epoch": 0.07, "grad_norm": 2.1875, "learning_rate": 0.00019946386670479665, "loss": 2.2523, "step": 28025 }, { "epoch": 0.07, "grad_norm": 1.9453125, "learning_rate": 0.00019946367555335976, "loss": 2.2804, "step": 28030 }, { "epoch": 0.07, "grad_norm": 1.78125, "learning_rate": 0.00019946348436794428, "loss": 2.1623, "step": 28035 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.00019946329314855029, "loss": 2.1977, "step": 28040 }, { "epoch": 0.07, "grad_norm": 1.4765625, "learning_rate": 0.00019946310189517783, "loss": 2.2582, "step": 28045 }, { "epoch": 0.07, "grad_norm": 1.9921875, "learning_rate": 0.00019946291060782696, "loss": 2.229, "step": 28050 }, { "epoch": 0.07, "grad_norm": 1.6875, "learning_rate": 0.00019946271928649776, "loss": 2.2131, "step": 28055 }, { "epoch": 0.07, "grad_norm": 2.46875, "learning_rate": 0.00019946252793119027, "loss": 2.2386, "step": 28060 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.0001994623365419046, "loss": 2.3619, "step": 28065 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.0001994621451186408, "loss": 2.1506, "step": 28070 }, { "epoch": 0.07, "grad_norm": 2.21875, "learning_rate": 0.0001994619536613989, "loss": 2.3562, "step": 28075 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.00019946176217017903, "loss": 2.2506, "step": 28080 }, { "epoch": 0.07, "grad_norm": 1.671875, "learning_rate": 0.0001994615706449812, "loss": 2.2714, "step": 28085 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.00019946137908580548, "loss": 2.2868, "step": 28090 }, { "epoch": 0.07, "grad_norm": 2.1875, "learning_rate": 0.00019946118749265197, "loss": 2.1171, "step": 28095 }, { "epoch": 0.07, "grad_norm": 1.671875, "learning_rate": 0.00019946099586552068, "loss": 2.1723, "step": 28100 }, { "epoch": 0.07, "grad_norm": 1.984375, "learning_rate": 0.00019946080420441177, "loss": 2.2473, "step": 28105 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.0001994606125093252, "loss": 2.2617, "step": 28110 }, { "epoch": 0.07, "grad_norm": 1.5234375, "learning_rate": 0.00019946042078026105, "loss": 2.2352, "step": 28115 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.00019946022901721949, "loss": 2.1953, "step": 28120 }, { "epoch": 0.07, "grad_norm": 1.609375, "learning_rate": 0.00019946003722020046, "loss": 2.4386, "step": 28125 }, { "epoch": 0.07, "grad_norm": 2.21875, "learning_rate": 0.0001994598453892041, "loss": 2.1778, "step": 28130 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.00019945965352423045, "loss": 2.0379, "step": 28135 }, { "epoch": 0.07, "grad_norm": 1.6640625, "learning_rate": 0.00019945946162527956, "loss": 2.2659, "step": 28140 }, { "epoch": 0.07, "grad_norm": 2.109375, "learning_rate": 0.00019945926969235154, "loss": 2.2671, "step": 28145 }, { "epoch": 0.07, "grad_norm": 1.734375, "learning_rate": 0.00019945907772544644, "loss": 2.0245, "step": 28150 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.0001994588857245643, "loss": 2.2019, "step": 28155 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019945869368970517, "loss": 2.142, "step": 28160 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019945850162086918, "loss": 2.1723, "step": 28165 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.00019945830951805635, "loss": 2.2337, "step": 28170 }, { "epoch": 0.07, "grad_norm": 1.7109375, "learning_rate": 0.00019945811738126678, "loss": 2.1452, "step": 28175 }, { "epoch": 0.07, "grad_norm": 1.78125, "learning_rate": 0.0001994579252105005, "loss": 2.1885, "step": 28180 }, { "epoch": 0.07, "grad_norm": 1.96875, "learning_rate": 0.00019945773300575757, "loss": 2.2685, "step": 28185 }, { "epoch": 0.07, "grad_norm": 1.5625, "learning_rate": 0.00019945754076703808, "loss": 2.2683, "step": 28190 }, { "epoch": 0.07, "grad_norm": 1.421875, "learning_rate": 0.00019945734849434213, "loss": 2.1014, "step": 28195 }, { "epoch": 0.07, "grad_norm": 1.609375, "learning_rate": 0.0001994571561876697, "loss": 2.0683, "step": 28200 }, { "epoch": 0.07, "grad_norm": 1.8515625, "learning_rate": 0.00019945696384702092, "loss": 2.2741, "step": 28205 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.00019945677147239582, "loss": 2.1973, "step": 28210 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.0001994565790637945, "loss": 2.0818, "step": 28215 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.00019945638662121704, "loss": 2.1846, "step": 28220 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.00019945619414466344, "loss": 2.2106, "step": 28225 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.0001994560016341338, "loss": 2.1334, "step": 28230 }, { "epoch": 0.07, "grad_norm": 2.28125, "learning_rate": 0.0001994558090896282, "loss": 2.2668, "step": 28235 }, { "epoch": 0.07, "grad_norm": 2.25, "learning_rate": 0.00019945561651114665, "loss": 2.2402, "step": 28240 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.0001994554238986893, "loss": 2.1863, "step": 28245 }, { "epoch": 0.07, "grad_norm": 2.125, "learning_rate": 0.00019945523125225616, "loss": 2.2251, "step": 28250 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.00019945503857184732, "loss": 2.1481, "step": 28255 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.00019945484585746283, "loss": 2.2544, "step": 28260 }, { "epoch": 0.07, "grad_norm": 1.9765625, "learning_rate": 0.00019945465310910275, "loss": 2.2312, "step": 28265 }, { "epoch": 0.07, "grad_norm": 1.640625, "learning_rate": 0.0001994544603267672, "loss": 2.0592, "step": 28270 }, { "epoch": 0.07, "grad_norm": 1.796875, "learning_rate": 0.00019945426751045612, "loss": 2.3077, "step": 28275 }, { "epoch": 0.07, "grad_norm": 1.546875, "learning_rate": 0.0001994540746601697, "loss": 2.0879, "step": 28280 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.00019945388177590798, "loss": 2.2414, "step": 28285 }, { "epoch": 0.07, "grad_norm": 1.484375, "learning_rate": 0.000199453688857671, "loss": 2.4258, "step": 28290 }, { "epoch": 0.07, "grad_norm": 1.640625, "learning_rate": 0.00019945349590545886, "loss": 2.0955, "step": 28295 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.00019945330291927155, "loss": 2.2656, "step": 28300 }, { "epoch": 0.07, "grad_norm": 2.09375, "learning_rate": 0.00019945310989910922, "loss": 2.158, "step": 28305 }, { "epoch": 0.07, "grad_norm": 1.71875, "learning_rate": 0.0001994529168449719, "loss": 2.116, "step": 28310 }, { "epoch": 0.07, "grad_norm": 1.2734375, "learning_rate": 0.00019945272375685965, "loss": 2.2331, "step": 28315 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.00019945253063477259, "loss": 2.2343, "step": 28320 }, { "epoch": 0.07, "grad_norm": 1.59375, "learning_rate": 0.0001994523374787107, "loss": 2.22, "step": 28325 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019945214428867408, "loss": 2.1385, "step": 28330 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.0001994519510646628, "loss": 2.2559, "step": 28335 }, { "epoch": 0.07, "grad_norm": 1.78125, "learning_rate": 0.000199451757806677, "loss": 2.191, "step": 28340 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019945156451471661, "loss": 2.0836, "step": 28345 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.00019945137118878176, "loss": 2.0915, "step": 28350 }, { "epoch": 0.07, "grad_norm": 1.984375, "learning_rate": 0.00019945117782887252, "loss": 2.2547, "step": 28355 }, { "epoch": 0.07, "grad_norm": 1.9140625, "learning_rate": 0.000199450984434989, "loss": 2.1746, "step": 28360 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.00019945079100713118, "loss": 2.317, "step": 28365 }, { "epoch": 0.07, "grad_norm": 1.96875, "learning_rate": 0.00019945059754529918, "loss": 2.1994, "step": 28370 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019945040404949305, "loss": 2.1588, "step": 28375 }, { "epoch": 0.07, "grad_norm": 2.171875, "learning_rate": 0.00019945021051971288, "loss": 2.1777, "step": 28380 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.00019945001695595866, "loss": 2.1639, "step": 28385 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.00019944982335823053, "loss": 2.3929, "step": 28390 }, { "epoch": 0.07, "grad_norm": 1.5703125, "learning_rate": 0.00019944962972652855, "loss": 2.0824, "step": 28395 }, { "epoch": 0.07, "grad_norm": 1.9140625, "learning_rate": 0.00019944943606085277, "loss": 2.2288, "step": 28400 }, { "epoch": 0.07, "grad_norm": 1.4765625, "learning_rate": 0.00019944924236120327, "loss": 2.4539, "step": 28405 }, { "epoch": 0.07, "grad_norm": 2.109375, "learning_rate": 0.0001994490486275801, "loss": 2.1471, "step": 28410 }, { "epoch": 0.07, "grad_norm": 1.953125, "learning_rate": 0.00019944885485998333, "loss": 2.2247, "step": 28415 }, { "epoch": 0.07, "grad_norm": 1.71875, "learning_rate": 0.00019944866105841303, "loss": 2.1283, "step": 28420 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.00019944846722286922, "loss": 2.21, "step": 28425 }, { "epoch": 0.07, "grad_norm": 1.6796875, "learning_rate": 0.00019944827335335207, "loss": 2.2919, "step": 28430 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.00019944807944986156, "loss": 2.1834, "step": 28435 }, { "epoch": 0.07, "grad_norm": 1.8515625, "learning_rate": 0.00019944788551239779, "loss": 2.1683, "step": 28440 }, { "epoch": 0.07, "grad_norm": 1.6328125, "learning_rate": 0.0001994476915409608, "loss": 2.161, "step": 28445 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019944749753555069, "loss": 2.0225, "step": 28450 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019944730349616752, "loss": 2.1104, "step": 28455 }, { "epoch": 0.07, "grad_norm": 1.9140625, "learning_rate": 0.00019944710942281136, "loss": 2.249, "step": 28460 }, { "epoch": 0.07, "grad_norm": 2.375, "learning_rate": 0.00019944691531548222, "loss": 2.1349, "step": 28465 }, { "epoch": 0.07, "grad_norm": 1.8984375, "learning_rate": 0.00019944672117418023, "loss": 2.1032, "step": 28470 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.00019944652699890544, "loss": 2.1738, "step": 28475 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.0001994463327896579, "loss": 2.0891, "step": 28480 }, { "epoch": 0.07, "grad_norm": 1.6875, "learning_rate": 0.0001994461385464377, "loss": 2.1561, "step": 28485 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019944594426924493, "loss": 2.1696, "step": 28490 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.00019944574995807958, "loss": 2.121, "step": 28495 }, { "epoch": 0.07, "grad_norm": 1.78125, "learning_rate": 0.00019944555561294175, "loss": 2.3247, "step": 28500 }, { "epoch": 0.07, "grad_norm": 1.5, "learning_rate": 0.00019944536123383156, "loss": 2.1972, "step": 28505 }, { "epoch": 0.07, "grad_norm": 1.6875, "learning_rate": 0.000199445166820749, "loss": 2.1964, "step": 28510 }, { "epoch": 0.07, "grad_norm": 2.140625, "learning_rate": 0.00019944497237369416, "loss": 2.2857, "step": 28515 }, { "epoch": 0.07, "grad_norm": 1.7734375, "learning_rate": 0.00019944477789266712, "loss": 2.0913, "step": 28520 }, { "epoch": 0.07, "grad_norm": 1.5625, "learning_rate": 0.00019944458337766796, "loss": 2.0985, "step": 28525 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.0001994443888286967, "loss": 2.1452, "step": 28530 }, { "epoch": 0.07, "grad_norm": 1.796875, "learning_rate": 0.00019944419424575344, "loss": 2.2396, "step": 28535 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.00019944399962883825, "loss": 2.25, "step": 28540 }, { "epoch": 0.07, "grad_norm": 3.84375, "learning_rate": 0.00019944380497795118, "loss": 2.0886, "step": 28545 }, { "epoch": 0.07, "grad_norm": 1.71875, "learning_rate": 0.00019944361029309232, "loss": 1.9824, "step": 28550 }, { "epoch": 0.07, "grad_norm": 1.7890625, "learning_rate": 0.00019944341557426168, "loss": 2.1702, "step": 28555 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.0001994432208214594, "loss": 2.241, "step": 28560 }, { "epoch": 0.07, "grad_norm": 1.6640625, "learning_rate": 0.0001994430260346855, "loss": 2.1704, "step": 28565 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.00019944283121394008, "loss": 2.2198, "step": 28570 }, { "epoch": 0.07, "grad_norm": 1.7734375, "learning_rate": 0.00019944263635922313, "loss": 2.2228, "step": 28575 }, { "epoch": 0.07, "grad_norm": 1.734375, "learning_rate": 0.0001994424414705348, "loss": 2.1615, "step": 28580 }, { "epoch": 0.07, "grad_norm": 1.71875, "learning_rate": 0.00019944224654787517, "loss": 2.2101, "step": 28585 }, { "epoch": 0.07, "grad_norm": 1.6328125, "learning_rate": 0.0001994420515912442, "loss": 2.1049, "step": 28590 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019944185660064205, "loss": 2.2752, "step": 28595 }, { "epoch": 0.07, "grad_norm": 1.671875, "learning_rate": 0.00019944166157606877, "loss": 2.3, "step": 28600 }, { "epoch": 0.07, "grad_norm": 1.765625, "learning_rate": 0.00019944146651752442, "loss": 2.1635, "step": 28605 }, { "epoch": 0.07, "grad_norm": 1.671875, "learning_rate": 0.00019944127142500903, "loss": 2.2968, "step": 28610 }, { "epoch": 0.07, "grad_norm": 1.8515625, "learning_rate": 0.00019944107629852272, "loss": 2.2051, "step": 28615 }, { "epoch": 0.07, "grad_norm": 1.5390625, "learning_rate": 0.0001994408811380655, "loss": 2.2798, "step": 28620 }, { "epoch": 0.07, "grad_norm": 2.109375, "learning_rate": 0.00019944068594363753, "loss": 2.0992, "step": 28625 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.0001994404907152388, "loss": 2.0923, "step": 28630 }, { "epoch": 0.07, "grad_norm": 1.390625, "learning_rate": 0.00019944029545286937, "loss": 2.1729, "step": 28635 }, { "epoch": 0.07, "grad_norm": 1.6796875, "learning_rate": 0.00019944010015652937, "loss": 2.2394, "step": 28640 }, { "epoch": 0.07, "grad_norm": 1.9453125, "learning_rate": 0.0001994399048262188, "loss": 2.2589, "step": 28645 }, { "epoch": 0.07, "grad_norm": 1.7734375, "learning_rate": 0.00019943970946193775, "loss": 2.1181, "step": 28650 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.0001994395140636863, "loss": 2.0435, "step": 28655 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019943931863146451, "loss": 2.3743, "step": 28660 }, { "epoch": 0.07, "grad_norm": 1.7890625, "learning_rate": 0.00019943912316527245, "loss": 2.1502, "step": 28665 }, { "epoch": 0.07, "grad_norm": 1.53125, "learning_rate": 0.00019943892766511017, "loss": 2.2186, "step": 28670 }, { "epoch": 0.07, "grad_norm": 1.90625, "learning_rate": 0.00019943873213097777, "loss": 2.205, "step": 28675 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.00019943853656287526, "loss": 2.0226, "step": 28680 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.00019943834096080277, "loss": 2.2271, "step": 28685 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.00019943814532476033, "loss": 2.3273, "step": 28690 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019943794965474802, "loss": 2.1807, "step": 28695 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019943775395076592, "loss": 2.3481, "step": 28700 }, { "epoch": 0.07, "grad_norm": 1.7890625, "learning_rate": 0.00019943755821281406, "loss": 2.188, "step": 28705 }, { "epoch": 0.07, "grad_norm": 1.9609375, "learning_rate": 0.00019943736244089252, "loss": 2.4568, "step": 28710 }, { "epoch": 0.07, "grad_norm": 1.9921875, "learning_rate": 0.00019943716663500138, "loss": 2.1988, "step": 28715 }, { "epoch": 0.07, "grad_norm": 1.765625, "learning_rate": 0.00019943697079514072, "loss": 2.2189, "step": 28720 }, { "epoch": 0.07, "grad_norm": 1.9375, "learning_rate": 0.00019943677492131057, "loss": 2.1382, "step": 28725 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.000199436579013511, "loss": 2.1426, "step": 28730 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.0001994363830717421, "loss": 2.1175, "step": 28735 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.00019943618709600394, "loss": 2.3409, "step": 28740 }, { "epoch": 0.07, "grad_norm": 1.6640625, "learning_rate": 0.00019943599108629659, "loss": 2.1531, "step": 28745 }, { "epoch": 0.07, "grad_norm": 1.4921875, "learning_rate": 0.00019943579504262007, "loss": 2.1527, "step": 28750 }, { "epoch": 0.07, "grad_norm": 1.7734375, "learning_rate": 0.0001994355989649745, "loss": 2.0975, "step": 28755 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.00019943540285335991, "loss": 2.2957, "step": 28760 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.0001994352067077764, "loss": 2.2285, "step": 28765 }, { "epoch": 0.07, "grad_norm": 2.453125, "learning_rate": 0.000199435010528224, "loss": 2.1246, "step": 28770 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019943481431470281, "loss": 2.3214, "step": 28775 }, { "epoch": 0.07, "grad_norm": 1.7734375, "learning_rate": 0.00019943461806721288, "loss": 2.1696, "step": 28780 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019943442178575426, "loss": 2.3597, "step": 28785 }, { "epoch": 0.07, "grad_norm": 1.984375, "learning_rate": 0.00019943422547032708, "loss": 2.2513, "step": 28790 }, { "epoch": 0.07, "grad_norm": 1.9140625, "learning_rate": 0.00019943402912093135, "loss": 2.2361, "step": 28795 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.00019943383273756716, "loss": 2.1896, "step": 28800 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019943363632023454, "loss": 2.1632, "step": 28805 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019943343986893362, "loss": 2.0797, "step": 28810 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.0001994332433836644, "loss": 2.1625, "step": 28815 }, { "epoch": 0.07, "grad_norm": 2.78125, "learning_rate": 0.000199433046864427, "loss": 2.2313, "step": 28820 }, { "epoch": 0.07, "grad_norm": 1.6328125, "learning_rate": 0.00019943285031122146, "loss": 2.0264, "step": 28825 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019943265372404786, "loss": 2.3373, "step": 28830 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019943245710290626, "loss": 2.2731, "step": 28835 }, { "epoch": 0.07, "grad_norm": 1.6796875, "learning_rate": 0.00019943226044779675, "loss": 2.2763, "step": 28840 }, { "epoch": 0.07, "grad_norm": 2.34375, "learning_rate": 0.00019943206375871934, "loss": 2.3538, "step": 28845 }, { "epoch": 0.07, "grad_norm": 2.140625, "learning_rate": 0.00019943186703567415, "loss": 1.9888, "step": 28850 }, { "epoch": 0.07, "grad_norm": 2.265625, "learning_rate": 0.00019943167027866126, "loss": 2.1891, "step": 28855 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019943147348768067, "loss": 2.2371, "step": 28860 }, { "epoch": 0.07, "grad_norm": 1.9921875, "learning_rate": 0.00019943127666273251, "loss": 2.2773, "step": 28865 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.00019943107980381682, "loss": 2.3024, "step": 28870 }, { "epoch": 0.07, "grad_norm": 1.6640625, "learning_rate": 0.00019943088291093363, "loss": 2.1838, "step": 28875 }, { "epoch": 0.07, "grad_norm": 2.15625, "learning_rate": 0.0001994306859840831, "loss": 2.2319, "step": 28880 }, { "epoch": 0.07, "grad_norm": 2.296875, "learning_rate": 0.00019943048902326522, "loss": 2.0906, "step": 28885 }, { "epoch": 0.07, "grad_norm": 1.90625, "learning_rate": 0.00019943029202848007, "loss": 2.1773, "step": 28890 }, { "epoch": 0.07, "grad_norm": 1.7734375, "learning_rate": 0.00019943009499972774, "loss": 2.3695, "step": 28895 }, { "epoch": 0.07, "grad_norm": 1.765625, "learning_rate": 0.0001994298979370083, "loss": 2.0957, "step": 28900 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.00019942970084032178, "loss": 2.518, "step": 28905 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.00019942950370966829, "loss": 2.2605, "step": 28910 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.00019942930654504788, "loss": 2.0041, "step": 28915 }, { "epoch": 0.07, "grad_norm": 1.65625, "learning_rate": 0.0001994291093464606, "loss": 2.2141, "step": 28920 }, { "epoch": 0.07, "grad_norm": 1.8984375, "learning_rate": 0.00019942891211390654, "loss": 2.2513, "step": 28925 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.00019942871484738577, "loss": 2.1185, "step": 28930 }, { "epoch": 0.07, "grad_norm": 2.375, "learning_rate": 0.0001994285175468983, "loss": 2.1945, "step": 28935 }, { "epoch": 0.07, "grad_norm": 1.9765625, "learning_rate": 0.00019942832021244432, "loss": 2.3025, "step": 28940 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019942812284402377, "loss": 2.0117, "step": 28945 }, { "epoch": 0.07, "grad_norm": 2.1875, "learning_rate": 0.00019942792544163678, "loss": 2.2047, "step": 28950 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.00019942772800528343, "loss": 2.2097, "step": 28955 }, { "epoch": 0.07, "grad_norm": 1.4609375, "learning_rate": 0.0001994275305349637, "loss": 2.2316, "step": 28960 }, { "epoch": 0.07, "grad_norm": 1.9609375, "learning_rate": 0.0001994273330306778, "loss": 2.1476, "step": 28965 }, { "epoch": 0.07, "grad_norm": 1.6171875, "learning_rate": 0.0001994271354924257, "loss": 2.2141, "step": 28970 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.00019942693792020748, "loss": 2.3064, "step": 28975 }, { "epoch": 0.07, "grad_norm": 1.765625, "learning_rate": 0.00019942674031402322, "loss": 2.3468, "step": 28980 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019942654267387299, "loss": 2.2874, "step": 28985 }, { "epoch": 0.07, "grad_norm": 1.421875, "learning_rate": 0.00019942634499975682, "loss": 2.1308, "step": 28990 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.0001994261472916748, "loss": 2.2767, "step": 28995 }, { "epoch": 0.07, "grad_norm": 1.796875, "learning_rate": 0.00019942594954962703, "loss": 2.1609, "step": 29000 }, { "epoch": 0.07, "grad_norm": 2.171875, "learning_rate": 0.00019942575177361356, "loss": 2.2494, "step": 29005 }, { "epoch": 0.07, "grad_norm": 1.90625, "learning_rate": 0.00019942555396363444, "loss": 2.1206, "step": 29010 }, { "epoch": 0.07, "grad_norm": 2.109375, "learning_rate": 0.00019942535611968976, "loss": 2.2225, "step": 29015 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019942515824177955, "loss": 2.2321, "step": 29020 }, { "epoch": 0.07, "grad_norm": 2.5625, "learning_rate": 0.00019942496032990392, "loss": 2.2799, "step": 29025 }, { "epoch": 0.07, "grad_norm": 1.8515625, "learning_rate": 0.0001994247623840629, "loss": 2.3176, "step": 29030 }, { "epoch": 0.07, "grad_norm": 1.6875, "learning_rate": 0.0001994245644042566, "loss": 2.0901, "step": 29035 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.00019942436639048506, "loss": 2.1272, "step": 29040 }, { "epoch": 0.07, "grad_norm": 1.71875, "learning_rate": 0.00019942416834274834, "loss": 2.0802, "step": 29045 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.00019942397026104653, "loss": 2.0682, "step": 29050 }, { "epoch": 0.07, "grad_norm": 1.6484375, "learning_rate": 0.0001994237721453797, "loss": 2.0471, "step": 29055 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.0001994235739957479, "loss": 2.1164, "step": 29060 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.0001994233758121512, "loss": 2.2933, "step": 29065 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019942317759458967, "loss": 2.0513, "step": 29070 }, { "epoch": 0.07, "grad_norm": 1.6640625, "learning_rate": 0.0001994229793430634, "loss": 2.3028, "step": 29075 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.0001994227810575724, "loss": 2.1145, "step": 29080 }, { "epoch": 0.07, "grad_norm": 1.7578125, "learning_rate": 0.0001994225827381168, "loss": 2.1567, "step": 29085 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.00019942238438469664, "loss": 2.0928, "step": 29090 }, { "epoch": 0.07, "grad_norm": 2.125, "learning_rate": 0.000199422185997312, "loss": 2.2199, "step": 29095 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.0001994219875759629, "loss": 2.1618, "step": 29100 }, { "epoch": 0.07, "grad_norm": 1.796875, "learning_rate": 0.0001994217891206495, "loss": 2.1626, "step": 29105 }, { "epoch": 0.07, "grad_norm": 1.640625, "learning_rate": 0.00019942159063137175, "loss": 2.2497, "step": 29110 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019942139210812985, "loss": 2.1451, "step": 29115 }, { "epoch": 0.07, "grad_norm": 1.578125, "learning_rate": 0.00019942119355092377, "loss": 2.1319, "step": 29120 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.0001994209949597536, "loss": 2.3732, "step": 29125 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.0001994207963346194, "loss": 2.3152, "step": 29130 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.00019942059767552128, "loss": 2.0775, "step": 29135 }, { "epoch": 0.07, "grad_norm": 1.796875, "learning_rate": 0.00019942039898245928, "loss": 2.1284, "step": 29140 }, { "epoch": 0.07, "grad_norm": 1.6640625, "learning_rate": 0.00019942020025543348, "loss": 2.2772, "step": 29145 }, { "epoch": 0.07, "grad_norm": 1.7578125, "learning_rate": 0.00019942000149444388, "loss": 2.1957, "step": 29150 }, { "epoch": 0.07, "grad_norm": 1.734375, "learning_rate": 0.00019941980269949067, "loss": 2.1008, "step": 29155 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.00019941960387057382, "loss": 2.0629, "step": 29160 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.00019941940500769342, "loss": 1.9494, "step": 29165 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.00019941920611084958, "loss": 2.2582, "step": 29170 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.0001994190071800423, "loss": 2.2598, "step": 29175 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.0001994188082152717, "loss": 2.2375, "step": 29180 }, { "epoch": 0.07, "grad_norm": 1.953125, "learning_rate": 0.00019941860921653785, "loss": 2.1898, "step": 29185 }, { "epoch": 0.07, "grad_norm": 1.9609375, "learning_rate": 0.00019941841018384079, "loss": 2.0865, "step": 29190 }, { "epoch": 0.07, "grad_norm": 1.796875, "learning_rate": 0.0001994182111171806, "loss": 2.1491, "step": 29195 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.00019941801201655733, "loss": 2.2753, "step": 29200 }, { "epoch": 0.07, "grad_norm": 1.578125, "learning_rate": 0.0001994178128819711, "loss": 2.08, "step": 29205 }, { "epoch": 0.07, "grad_norm": 2.234375, "learning_rate": 0.00019941761371342187, "loss": 2.193, "step": 29210 }, { "epoch": 0.07, "grad_norm": 2.203125, "learning_rate": 0.00019941741451090983, "loss": 2.251, "step": 29215 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.000199417215274435, "loss": 2.1023, "step": 29220 }, { "epoch": 0.07, "grad_norm": 1.6875, "learning_rate": 0.00019941701600399744, "loss": 2.1637, "step": 29225 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.00019941681669959722, "loss": 2.2284, "step": 29230 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.0001994166173612344, "loss": 2.0682, "step": 29235 }, { "epoch": 0.07, "grad_norm": 1.546875, "learning_rate": 0.00019941641798890909, "loss": 2.1987, "step": 29240 }, { "epoch": 0.07, "grad_norm": 2.140625, "learning_rate": 0.00019941621858262133, "loss": 2.4107, "step": 29245 }, { "epoch": 0.07, "grad_norm": 2.171875, "learning_rate": 0.00019941601914237115, "loss": 2.1971, "step": 29250 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019941581966815867, "loss": 2.1872, "step": 29255 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.00019941562015998398, "loss": 2.1072, "step": 29260 }, { "epoch": 0.07, "grad_norm": 1.7734375, "learning_rate": 0.00019941542061784707, "loss": 2.0318, "step": 29265 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.00019941522104174804, "loss": 2.2406, "step": 29270 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.000199415021431687, "loss": 2.2812, "step": 29275 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.00019941482178766399, "loss": 2.0461, "step": 29280 }, { "epoch": 0.07, "grad_norm": 1.6171875, "learning_rate": 0.00019941462210967904, "loss": 2.1951, "step": 29285 }, { "epoch": 0.07, "grad_norm": 1.90625, "learning_rate": 0.0001994144223977323, "loss": 2.0902, "step": 29290 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019941422265182373, "loss": 2.2087, "step": 29295 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.0001994140228719535, "loss": 2.1385, "step": 29300 }, { "epoch": 0.07, "grad_norm": 1.65625, "learning_rate": 0.00019941382305812164, "loss": 2.3347, "step": 29305 }, { "epoch": 0.07, "grad_norm": 1.9375, "learning_rate": 0.0001994136232103282, "loss": 2.2136, "step": 29310 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.00019941342332857324, "loss": 2.1801, "step": 29315 }, { "epoch": 0.07, "grad_norm": 1.9140625, "learning_rate": 0.00019941322341285688, "loss": 2.3183, "step": 29320 }, { "epoch": 0.07, "grad_norm": 2.15625, "learning_rate": 0.00019941302346317916, "loss": 2.2089, "step": 29325 }, { "epoch": 0.07, "grad_norm": 1.453125, "learning_rate": 0.00019941282347954014, "loss": 1.9599, "step": 29330 }, { "epoch": 0.07, "grad_norm": 1.9453125, "learning_rate": 0.00019941262346193992, "loss": 2.2308, "step": 29335 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.0001994124234103785, "loss": 2.2802, "step": 29340 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019941222332485603, "loss": 2.1143, "step": 29345 }, { "epoch": 0.07, "grad_norm": 1.546875, "learning_rate": 0.00019941202320537254, "loss": 2.1903, "step": 29350 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.0001994118230519281, "loss": 2.2914, "step": 29355 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.00019941162286452274, "loss": 2.3328, "step": 29360 }, { "epoch": 0.07, "grad_norm": 1.7890625, "learning_rate": 0.00019941142264315662, "loss": 2.0479, "step": 29365 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019941122238782973, "loss": 2.2597, "step": 29370 }, { "epoch": 0.07, "grad_norm": 1.53125, "learning_rate": 0.00019941102209854213, "loss": 2.2385, "step": 29375 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.000199410821775294, "loss": 2.2675, "step": 29380 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019941062141808527, "loss": 2.0827, "step": 29385 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019941042102691607, "loss": 2.0325, "step": 29390 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.0001994102206017865, "loss": 2.2046, "step": 29395 }, { "epoch": 0.07, "grad_norm": 1.8984375, "learning_rate": 0.00019941002014269656, "loss": 2.164, "step": 29400 }, { "epoch": 0.07, "grad_norm": 2.140625, "learning_rate": 0.0001994098196496464, "loss": 2.2498, "step": 29405 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.000199409619122636, "loss": 2.1513, "step": 29410 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019940941856166547, "loss": 2.1546, "step": 29415 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.0001994092179667349, "loss": 2.2051, "step": 29420 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.00019940901733784435, "loss": 2.1725, "step": 29425 }, { "epoch": 0.07, "grad_norm": 1.5703125, "learning_rate": 0.00019940881667499384, "loss": 2.1103, "step": 29430 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.0001994086159781835, "loss": 2.0552, "step": 29435 }, { "epoch": 0.07, "grad_norm": 2.40625, "learning_rate": 0.00019940841524741337, "loss": 2.3394, "step": 29440 }, { "epoch": 0.07, "grad_norm": 1.984375, "learning_rate": 0.00019940821448268353, "loss": 2.3351, "step": 29445 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.00019940801368399403, "loss": 2.1866, "step": 29450 }, { "epoch": 0.07, "grad_norm": 1.6875, "learning_rate": 0.00019940781285134496, "loss": 2.1572, "step": 29455 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.00019940761198473638, "loss": 2.3061, "step": 29460 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.00019940741108416833, "loss": 2.2964, "step": 29465 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019940721014964093, "loss": 2.2208, "step": 29470 }, { "epoch": 0.07, "grad_norm": 2.140625, "learning_rate": 0.00019940700918115423, "loss": 2.1571, "step": 29475 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.00019940680817870826, "loss": 2.3845, "step": 29480 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019940660714230316, "loss": 2.049, "step": 29485 }, { "epoch": 0.07, "grad_norm": 1.796875, "learning_rate": 0.00019940640607193894, "loss": 2.3362, "step": 29490 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.0001994062049676157, "loss": 2.1433, "step": 29495 }, { "epoch": 0.07, "grad_norm": 2.15625, "learning_rate": 0.00019940600382933348, "loss": 2.1376, "step": 29500 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.00019940580265709237, "loss": 2.2252, "step": 29505 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.00019940560145089245, "loss": 2.361, "step": 29510 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019940540021073374, "loss": 2.2243, "step": 29515 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.00019940519893661639, "loss": 2.2572, "step": 29520 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.00019940499762854036, "loss": 2.2158, "step": 29525 }, { "epoch": 0.07, "grad_norm": 2.140625, "learning_rate": 0.00019940479628650587, "loss": 2.1221, "step": 29530 }, { "epoch": 0.07, "grad_norm": 1.9453125, "learning_rate": 0.0001994045949105128, "loss": 2.0712, "step": 29535 }, { "epoch": 0.07, "grad_norm": 1.9609375, "learning_rate": 0.0001994043935005614, "loss": 2.095, "step": 29540 }, { "epoch": 0.07, "grad_norm": 1.7890625, "learning_rate": 0.00019940419205665162, "loss": 2.3014, "step": 29545 }, { "epoch": 0.07, "grad_norm": 2.203125, "learning_rate": 0.00019940399057878354, "loss": 2.1253, "step": 29550 }, { "epoch": 0.07, "grad_norm": 1.4921875, "learning_rate": 0.0001994037890669573, "loss": 2.3279, "step": 29555 }, { "epoch": 0.07, "grad_norm": 1.5078125, "learning_rate": 0.0001994035875211729, "loss": 2.0934, "step": 29560 }, { "epoch": 0.07, "grad_norm": 1.7734375, "learning_rate": 0.0001994033859414304, "loss": 2.2607, "step": 29565 }, { "epoch": 0.07, "grad_norm": 2.15625, "learning_rate": 0.00019940318432773, "loss": 2.1028, "step": 29570 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.00019940298268007156, "loss": 2.1018, "step": 29575 }, { "epoch": 0.07, "grad_norm": 2.171875, "learning_rate": 0.0001994027809984553, "loss": 2.2547, "step": 29580 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.00019940257928288127, "loss": 2.1267, "step": 29585 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019940237753334947, "loss": 2.0925, "step": 29590 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.00019940217574986003, "loss": 2.329, "step": 29595 }, { "epoch": 0.07, "grad_norm": 1.640625, "learning_rate": 0.00019940197393241305, "loss": 2.1664, "step": 29600 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.00019940177208100849, "loss": 2.3495, "step": 29605 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.00019940157019564651, "loss": 2.2215, "step": 29610 }, { "epoch": 0.07, "grad_norm": 1.765625, "learning_rate": 0.00019940136827632717, "loss": 2.0595, "step": 29615 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.0001994011663230505, "loss": 2.2087, "step": 29620 }, { "epoch": 0.07, "grad_norm": 1.8984375, "learning_rate": 0.0001994009643358166, "loss": 2.1052, "step": 29625 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.0001994007623146255, "loss": 2.3626, "step": 29630 }, { "epoch": 0.07, "grad_norm": 1.796875, "learning_rate": 0.00019940056025947734, "loss": 2.3248, "step": 29635 }, { "epoch": 0.07, "grad_norm": 2.25, "learning_rate": 0.0001994003581703721, "loss": 2.0044, "step": 29640 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019940015604730992, "loss": 2.4095, "step": 29645 }, { "epoch": 0.07, "grad_norm": 1.90625, "learning_rate": 0.00019939995389029082, "loss": 2.2316, "step": 29650 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.00019939975169931494, "loss": 2.28, "step": 29655 }, { "epoch": 0.07, "grad_norm": 1.9140625, "learning_rate": 0.00019939954947438226, "loss": 2.1864, "step": 29660 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.0001993993472154929, "loss": 2.1414, "step": 29665 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.00019939914492264694, "loss": 2.0756, "step": 29670 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.0001993989425958444, "loss": 2.1208, "step": 29675 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.0001993987402350854, "loss": 2.2208, "step": 29680 }, { "epoch": 0.07, "grad_norm": 1.6796875, "learning_rate": 0.00019939853784037, "loss": 2.0965, "step": 29685 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.00019939833541169822, "loss": 2.2009, "step": 29690 }, { "epoch": 0.07, "grad_norm": 1.9140625, "learning_rate": 0.00019939813294907017, "loss": 2.2634, "step": 29695 }, { "epoch": 0.07, "grad_norm": 2.140625, "learning_rate": 0.00019939793045248595, "loss": 2.1402, "step": 29700 }, { "epoch": 0.07, "grad_norm": 1.953125, "learning_rate": 0.00019939772792194557, "loss": 2.1306, "step": 29705 }, { "epoch": 0.07, "grad_norm": 1.78125, "learning_rate": 0.00019939752535744911, "loss": 2.1303, "step": 29710 }, { "epoch": 0.07, "grad_norm": 1.890625, "learning_rate": 0.0001993973227589967, "loss": 2.2288, "step": 29715 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.0001993971201265883, "loss": 2.2279, "step": 29720 }, { "epoch": 0.07, "grad_norm": 2.125, "learning_rate": 0.0001993969174602241, "loss": 2.106, "step": 29725 }, { "epoch": 0.07, "grad_norm": 1.3828125, "learning_rate": 0.00019939671475990408, "loss": 2.2342, "step": 29730 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.00019939651202562834, "loss": 2.2225, "step": 29735 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.00019939630925739694, "loss": 2.3699, "step": 29740 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.00019939610645520998, "loss": 2.1903, "step": 29745 }, { "epoch": 0.07, "grad_norm": 1.6328125, "learning_rate": 0.00019939590361906748, "loss": 2.391, "step": 29750 }, { "epoch": 0.07, "grad_norm": 2.21875, "learning_rate": 0.00019939570074896956, "loss": 2.2902, "step": 29755 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019939549784491628, "loss": 2.2646, "step": 29760 }, { "epoch": 0.07, "grad_norm": 1.90625, "learning_rate": 0.00019939529490690765, "loss": 2.2348, "step": 29765 }, { "epoch": 0.07, "grad_norm": 1.8984375, "learning_rate": 0.0001993950919349438, "loss": 2.1786, "step": 29770 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019939488892902483, "loss": 2.1866, "step": 29775 }, { "epoch": 0.07, "grad_norm": 1.6875, "learning_rate": 0.00019939468588915071, "loss": 2.0243, "step": 29780 }, { "epoch": 0.07, "grad_norm": 1.9453125, "learning_rate": 0.00019939448281532159, "loss": 2.0967, "step": 29785 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.00019939427970753747, "loss": 2.0642, "step": 29790 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.0001993940765657985, "loss": 2.245, "step": 29795 }, { "epoch": 0.07, "grad_norm": 1.7109375, "learning_rate": 0.00019939387339010473, "loss": 2.1548, "step": 29800 }, { "epoch": 0.07, "grad_norm": 1.5625, "learning_rate": 0.00019939367018045618, "loss": 2.2098, "step": 29805 }, { "epoch": 0.07, "grad_norm": 1.9140625, "learning_rate": 0.00019939346693685295, "loss": 2.2323, "step": 29810 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.0001993932636592951, "loss": 2.4841, "step": 29815 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.0001993930603477827, "loss": 2.2973, "step": 29820 }, { "epoch": 0.07, "grad_norm": 1.5859375, "learning_rate": 0.00019939285700231588, "loss": 2.2499, "step": 29825 }, { "epoch": 0.07, "grad_norm": 1.609375, "learning_rate": 0.00019939265362289464, "loss": 2.2092, "step": 29830 }, { "epoch": 0.07, "grad_norm": 2.140625, "learning_rate": 0.00019939245020951903, "loss": 2.0977, "step": 29835 }, { "epoch": 0.07, "grad_norm": 1.890625, "learning_rate": 0.0001993922467621892, "loss": 2.206, "step": 29840 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.00019939204328090515, "loss": 2.0966, "step": 29845 }, { "epoch": 0.07, "grad_norm": 2.640625, "learning_rate": 0.000199391839765667, "loss": 2.2075, "step": 29850 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019939163621647473, "loss": 2.3356, "step": 29855 }, { "epoch": 0.07, "grad_norm": 1.890625, "learning_rate": 0.00019939143263332854, "loss": 2.2749, "step": 29860 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.0001993912290162284, "loss": 2.1178, "step": 29865 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019939102536517444, "loss": 2.09, "step": 29870 }, { "epoch": 0.07, "grad_norm": 1.4921875, "learning_rate": 0.0001993908216801667, "loss": 2.0762, "step": 29875 }, { "epoch": 0.07, "grad_norm": 2.59375, "learning_rate": 0.00019939061796120524, "loss": 2.1897, "step": 29880 }, { "epoch": 0.07, "grad_norm": 2.5625, "learning_rate": 0.00019939041420829014, "loss": 2.2495, "step": 29885 }, { "epoch": 0.07, "grad_norm": 1.90625, "learning_rate": 0.0001993902104214215, "loss": 1.9771, "step": 29890 }, { "epoch": 0.07, "grad_norm": 2.765625, "learning_rate": 0.00019939000660059933, "loss": 2.2581, "step": 29895 }, { "epoch": 0.07, "grad_norm": 1.90625, "learning_rate": 0.00019938980274582374, "loss": 2.1691, "step": 29900 }, { "epoch": 0.07, "grad_norm": 1.8515625, "learning_rate": 0.00019938959885709478, "loss": 2.3512, "step": 29905 }, { "epoch": 0.07, "grad_norm": 1.7578125, "learning_rate": 0.00019938939493441255, "loss": 2.3414, "step": 29910 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019938919097777712, "loss": 2.2103, "step": 29915 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019938898698718848, "loss": 2.2506, "step": 29920 }, { "epoch": 0.07, "grad_norm": 2.171875, "learning_rate": 0.0001993887829626468, "loss": 2.1027, "step": 29925 }, { "epoch": 0.07, "grad_norm": 2.15625, "learning_rate": 0.0001993885789041521, "loss": 2.1564, "step": 29930 }, { "epoch": 0.07, "grad_norm": 1.953125, "learning_rate": 0.00019938837481170448, "loss": 2.1952, "step": 29935 }, { "epoch": 0.07, "grad_norm": 2.125, "learning_rate": 0.00019938817068530397, "loss": 2.1795, "step": 29940 }, { "epoch": 0.07, "grad_norm": 2.125, "learning_rate": 0.00019938796652495063, "loss": 2.1337, "step": 29945 }, { "epoch": 0.07, "grad_norm": 1.8515625, "learning_rate": 0.0001993877623306446, "loss": 2.0495, "step": 29950 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.0001993875581023859, "loss": 2.2711, "step": 29955 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.0001993873538401746, "loss": 2.3256, "step": 29960 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.00019938714954401075, "loss": 2.1397, "step": 29965 }, { "epoch": 0.07, "grad_norm": 1.7578125, "learning_rate": 0.0001993869452138945, "loss": 2.1183, "step": 29970 }, { "epoch": 0.07, "grad_norm": 2.796875, "learning_rate": 0.00019938674084982581, "loss": 2.1757, "step": 29975 }, { "epoch": 0.07, "grad_norm": 1.984375, "learning_rate": 0.00019938653645180487, "loss": 2.2299, "step": 29980 }, { "epoch": 0.07, "grad_norm": 1.578125, "learning_rate": 0.00019938633201983165, "loss": 2.1828, "step": 29985 }, { "epoch": 0.07, "grad_norm": 2.265625, "learning_rate": 0.00019938612755390626, "loss": 2.3432, "step": 29990 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.00019938592305402878, "loss": 2.042, "step": 29995 }, { "epoch": 0.07, "grad_norm": 1.453125, "learning_rate": 0.00019938571852019926, "loss": 2.27, "step": 30000 }, { "epoch": 0.07, "grad_norm": 1.671875, "learning_rate": 0.00019938551395241776, "loss": 2.3234, "step": 30005 }, { "epoch": 0.07, "grad_norm": 1.5078125, "learning_rate": 0.00019938530935068437, "loss": 2.0958, "step": 30010 }, { "epoch": 0.07, "grad_norm": 1.640625, "learning_rate": 0.00019938510471499918, "loss": 2.1679, "step": 30015 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.0001993849000453622, "loss": 2.1448, "step": 30020 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.00019938469534177357, "loss": 1.9618, "step": 30025 }, { "epoch": 0.07, "grad_norm": 1.9609375, "learning_rate": 0.0001993844906042333, "loss": 2.085, "step": 30030 }, { "epoch": 0.07, "grad_norm": 1.78125, "learning_rate": 0.00019938428583274152, "loss": 2.1039, "step": 30035 }, { "epoch": 0.07, "grad_norm": 1.78125, "learning_rate": 0.00019938408102729822, "loss": 2.2153, "step": 30040 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.00019938387618790356, "loss": 1.9614, "step": 30045 }, { "epoch": 0.07, "grad_norm": 1.953125, "learning_rate": 0.00019938367131455752, "loss": 2.2041, "step": 30050 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019938346640726025, "loss": 2.3132, "step": 30055 }, { "epoch": 0.07, "grad_norm": 1.7109375, "learning_rate": 0.0001993832614660118, "loss": 2.1682, "step": 30060 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.00019938305649081216, "loss": 2.3629, "step": 30065 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.0001993828514816615, "loss": 2.1368, "step": 30070 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.0001993826464385599, "loss": 2.3935, "step": 30075 }, { "epoch": 0.07, "grad_norm": 1.8671875, "learning_rate": 0.00019938244136150733, "loss": 2.2767, "step": 30080 }, { "epoch": 0.07, "grad_norm": 1.421875, "learning_rate": 0.00019938223625050392, "loss": 2.2136, "step": 30085 }, { "epoch": 0.07, "grad_norm": 1.5859375, "learning_rate": 0.00019938203110554973, "loss": 2.2217, "step": 30090 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.00019938182592664486, "loss": 2.2101, "step": 30095 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019938162071378936, "loss": 2.2926, "step": 30100 }, { "epoch": 0.07, "grad_norm": 1.7578125, "learning_rate": 0.0001993814154669833, "loss": 2.1793, "step": 30105 }, { "epoch": 0.07, "grad_norm": 2.375, "learning_rate": 0.00019938121018622673, "loss": 2.2237, "step": 30110 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.0001993810048715197, "loss": 2.2592, "step": 30115 }, { "epoch": 0.07, "grad_norm": 1.8515625, "learning_rate": 0.0001993807995228624, "loss": 2.25, "step": 30120 }, { "epoch": 0.07, "grad_norm": 1.9375, "learning_rate": 0.00019938059414025477, "loss": 2.2442, "step": 30125 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.0001993803887236969, "loss": 2.1944, "step": 30130 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.00019938018327318892, "loss": 2.2147, "step": 30135 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019937997778873086, "loss": 2.1062, "step": 30140 }, { "epoch": 0.07, "grad_norm": 1.640625, "learning_rate": 0.0001993797722703228, "loss": 2.1839, "step": 30145 }, { "epoch": 0.07, "grad_norm": 1.9921875, "learning_rate": 0.0001993795667179648, "loss": 2.1323, "step": 30150 }, { "epoch": 0.07, "grad_norm": 2.265625, "learning_rate": 0.00019937936113165696, "loss": 2.0455, "step": 30155 }, { "epoch": 0.07, "grad_norm": 2.109375, "learning_rate": 0.0001993791555113993, "loss": 1.9625, "step": 30160 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.00019937894985719195, "loss": 2.2605, "step": 30165 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.0001993787441690349, "loss": 2.0564, "step": 30170 }, { "epoch": 0.07, "grad_norm": 1.734375, "learning_rate": 0.00019937853844692832, "loss": 2.2196, "step": 30175 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.0001993783326908722, "loss": 2.1467, "step": 30180 }, { "epoch": 0.07, "grad_norm": 1.484375, "learning_rate": 0.00019937812690086663, "loss": 1.9907, "step": 30185 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.0001993779210769117, "loss": 2.1819, "step": 30190 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.00019937771521900748, "loss": 2.0758, "step": 30195 }, { "epoch": 0.07, "grad_norm": 1.6796875, "learning_rate": 0.00019937750932715403, "loss": 2.0789, "step": 30200 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.00019937730340135142, "loss": 2.2085, "step": 30205 }, { "epoch": 0.07, "grad_norm": 1.8984375, "learning_rate": 0.0001993770974415997, "loss": 2.207, "step": 30210 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019937689144789897, "loss": 2.3801, "step": 30215 }, { "epoch": 0.07, "grad_norm": 1.9921875, "learning_rate": 0.0001993766854202493, "loss": 2.2065, "step": 30220 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.00019937647935865075, "loss": 2.3094, "step": 30225 }, { "epoch": 0.07, "grad_norm": 1.734375, "learning_rate": 0.00019937627326310339, "loss": 2.0572, "step": 30230 }, { "epoch": 0.07, "grad_norm": 1.984375, "learning_rate": 0.0001993760671336073, "loss": 2.118, "step": 30235 }, { "epoch": 0.07, "grad_norm": 2.09375, "learning_rate": 0.00019937586097016255, "loss": 2.0378, "step": 30240 }, { "epoch": 0.07, "grad_norm": 1.65625, "learning_rate": 0.0001993756547727692, "loss": 2.2229, "step": 30245 }, { "epoch": 0.07, "grad_norm": 2.375, "learning_rate": 0.0001993754485414273, "loss": 2.2323, "step": 30250 }, { "epoch": 0.07, "grad_norm": 1.546875, "learning_rate": 0.00019937524227613699, "loss": 2.1778, "step": 30255 }, { "epoch": 0.07, "grad_norm": 1.59375, "learning_rate": 0.00019937503597689823, "loss": 2.1398, "step": 30260 }, { "epoch": 0.07, "grad_norm": 1.9609375, "learning_rate": 0.0001993748296437112, "loss": 2.1532, "step": 30265 }, { "epoch": 0.07, "grad_norm": 1.9453125, "learning_rate": 0.0001993746232765759, "loss": 2.2729, "step": 30270 }, { "epoch": 0.07, "grad_norm": 1.375, "learning_rate": 0.00019937441687549245, "loss": 2.0491, "step": 30275 }, { "epoch": 0.07, "grad_norm": 1.7890625, "learning_rate": 0.0001993742104404609, "loss": 2.2344, "step": 30280 }, { "epoch": 0.07, "grad_norm": 1.96875, "learning_rate": 0.0001993740039714813, "loss": 2.1711, "step": 30285 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019937379746855376, "loss": 2.1464, "step": 30290 }, { "epoch": 0.07, "grad_norm": 1.5859375, "learning_rate": 0.0001993735909316783, "loss": 2.1313, "step": 30295 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019937338436085505, "loss": 2.2745, "step": 30300 }, { "epoch": 0.07, "grad_norm": 1.7734375, "learning_rate": 0.00019937317775608403, "loss": 2.2466, "step": 30305 }, { "epoch": 0.07, "grad_norm": 1.7578125, "learning_rate": 0.00019937297111736533, "loss": 2.0297, "step": 30310 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.00019937276444469904, "loss": 2.1765, "step": 30315 }, { "epoch": 0.07, "grad_norm": 1.984375, "learning_rate": 0.00019937255773808517, "loss": 2.3568, "step": 30320 }, { "epoch": 0.07, "grad_norm": 1.6484375, "learning_rate": 0.00019937235099752387, "loss": 2.2067, "step": 30325 }, { "epoch": 0.07, "grad_norm": 1.640625, "learning_rate": 0.00019937214422301516, "loss": 2.3644, "step": 30330 }, { "epoch": 0.07, "grad_norm": 1.375, "learning_rate": 0.00019937193741455912, "loss": 2.2027, "step": 30335 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.0001993717305721558, "loss": 2.1532, "step": 30340 }, { "epoch": 0.07, "grad_norm": 1.71875, "learning_rate": 0.00019937152369580533, "loss": 2.1903, "step": 30345 }, { "epoch": 0.07, "grad_norm": 1.8671875, "learning_rate": 0.00019937131678550776, "loss": 2.084, "step": 30350 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.00019937110984126312, "loss": 2.2533, "step": 30355 }, { "epoch": 0.07, "grad_norm": 1.4609375, "learning_rate": 0.0001993709028630715, "loss": 2.0715, "step": 30360 }, { "epoch": 0.07, "grad_norm": 2.421875, "learning_rate": 0.000199370695850933, "loss": 2.1061, "step": 30365 }, { "epoch": 0.07, "grad_norm": 1.6875, "learning_rate": 0.00019937048880484766, "loss": 2.1308, "step": 30370 }, { "epoch": 0.07, "grad_norm": 1.5703125, "learning_rate": 0.00019937028172481552, "loss": 2.1554, "step": 30375 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.00019937007461083675, "loss": 2.0922, "step": 30380 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.00019936986746291134, "loss": 2.2839, "step": 30385 }, { "epoch": 0.07, "grad_norm": 1.65625, "learning_rate": 0.00019936966028103937, "loss": 2.1299, "step": 30390 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019936945306522091, "loss": 2.2321, "step": 30395 }, { "epoch": 0.07, "grad_norm": 1.7109375, "learning_rate": 0.0001993692458154561, "loss": 2.114, "step": 30400 }, { "epoch": 0.07, "grad_norm": 1.71875, "learning_rate": 0.0001993690385317449, "loss": 2.211, "step": 30405 }, { "epoch": 0.07, "grad_norm": 1.6875, "learning_rate": 0.00019936883121408742, "loss": 2.1626, "step": 30410 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.00019936862386248381, "loss": 2.1251, "step": 30415 }, { "epoch": 0.07, "grad_norm": 2.296875, "learning_rate": 0.000199368416476934, "loss": 2.2825, "step": 30420 }, { "epoch": 0.07, "grad_norm": 1.9453125, "learning_rate": 0.0001993682090574382, "loss": 1.9748, "step": 30425 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.0001993680016039964, "loss": 2.1252, "step": 30430 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.0001993677941166087, "loss": 2.1286, "step": 30435 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.0001993675865952751, "loss": 2.1054, "step": 30440 }, { "epoch": 0.07, "grad_norm": 1.6328125, "learning_rate": 0.00019936737903999578, "loss": 2.165, "step": 30445 }, { "epoch": 0.07, "grad_norm": 1.9375, "learning_rate": 0.00019936717145077075, "loss": 2.192, "step": 30450 }, { "epoch": 0.07, "grad_norm": 1.7109375, "learning_rate": 0.00019936696382760012, "loss": 2.2621, "step": 30455 }, { "epoch": 0.07, "grad_norm": 1.9375, "learning_rate": 0.0001993667561704839, "loss": 2.2443, "step": 30460 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019936654847942217, "loss": 2.0942, "step": 30465 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019936634075441508, "loss": 2.0895, "step": 30470 }, { "epoch": 0.07, "grad_norm": 2.265625, "learning_rate": 0.00019936613299546263, "loss": 2.2643, "step": 30475 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019936592520256488, "loss": 2.316, "step": 30480 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019936571737572196, "loss": 2.1129, "step": 30485 }, { "epoch": 0.07, "grad_norm": 1.7109375, "learning_rate": 0.0001993655095149339, "loss": 2.198, "step": 30490 }, { "epoch": 0.07, "grad_norm": 2.125, "learning_rate": 0.00019936530162020078, "loss": 2.0559, "step": 30495 }, { "epoch": 0.07, "grad_norm": 1.6875, "learning_rate": 0.00019936509369152265, "loss": 2.316, "step": 30500 }, { "epoch": 0.07, "grad_norm": 2.140625, "learning_rate": 0.00019936488572889962, "loss": 2.3519, "step": 30505 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.00019936467773233173, "loss": 2.2317, "step": 30510 }, { "epoch": 0.07, "grad_norm": 1.5078125, "learning_rate": 0.0001993644697018191, "loss": 2.077, "step": 30515 }, { "epoch": 0.07, "grad_norm": 1.6796875, "learning_rate": 0.00019936426163736176, "loss": 2.148, "step": 30520 }, { "epoch": 0.07, "grad_norm": 1.6328125, "learning_rate": 0.00019936405353895972, "loss": 2.1655, "step": 30525 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.00019936384540661319, "loss": 2.1624, "step": 30530 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.00019936363724032215, "loss": 2.129, "step": 30535 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019936342904008667, "loss": 2.2254, "step": 30540 }, { "epoch": 0.07, "grad_norm": 2.1875, "learning_rate": 0.00019936322080590685, "loss": 2.3113, "step": 30545 }, { "epoch": 0.07, "grad_norm": 2.390625, "learning_rate": 0.00019936301253778275, "loss": 2.0688, "step": 30550 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019936280423571445, "loss": 2.3675, "step": 30555 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.00019936259589970203, "loss": 2.2801, "step": 30560 }, { "epoch": 0.07, "grad_norm": 1.4140625, "learning_rate": 0.0001993623875297455, "loss": 2.1113, "step": 30565 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.000199362179125845, "loss": 2.2277, "step": 30570 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.0001993619706880006, "loss": 2.2418, "step": 30575 }, { "epoch": 0.07, "grad_norm": 1.65625, "learning_rate": 0.00019936176221621232, "loss": 2.0225, "step": 30580 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019936155371048028, "loss": 2.3975, "step": 30585 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.00019936134517080451, "loss": 2.104, "step": 30590 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.00019936113659718512, "loss": 2.2165, "step": 30595 }, { "epoch": 0.07, "grad_norm": 1.84375, "learning_rate": 0.00019936092798962215, "loss": 2.2328, "step": 30600 }, { "epoch": 0.07, "grad_norm": 1.3359375, "learning_rate": 0.00019936071934811568, "loss": 2.274, "step": 30605 }, { "epoch": 0.07, "grad_norm": 1.5703125, "learning_rate": 0.00019936051067266582, "loss": 2.3493, "step": 30610 }, { "epoch": 0.07, "grad_norm": 1.734375, "learning_rate": 0.00019936030196327257, "loss": 2.3368, "step": 30615 }, { "epoch": 0.07, "grad_norm": 1.6171875, "learning_rate": 0.00019936009321993607, "loss": 2.1607, "step": 30620 }, { "epoch": 0.07, "grad_norm": 1.640625, "learning_rate": 0.00019935988444265635, "loss": 2.1791, "step": 30625 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.00019935967563143348, "loss": 2.4323, "step": 30630 }, { "epoch": 0.07, "grad_norm": 1.5625, "learning_rate": 0.00019935946678626754, "loss": 2.3096, "step": 30635 }, { "epoch": 0.07, "grad_norm": 1.609375, "learning_rate": 0.00019935925790715863, "loss": 2.0082, "step": 30640 }, { "epoch": 0.07, "grad_norm": 1.5625, "learning_rate": 0.00019935904899410676, "loss": 2.2003, "step": 30645 }, { "epoch": 0.07, "grad_norm": 1.5625, "learning_rate": 0.00019935884004711207, "loss": 2.1939, "step": 30650 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.0001993586310661746, "loss": 2.1024, "step": 30655 }, { "epoch": 0.07, "grad_norm": 1.796875, "learning_rate": 0.00019935842205129437, "loss": 2.156, "step": 30660 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.00019935821300247155, "loss": 2.2077, "step": 30665 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019935800391970615, "loss": 2.2334, "step": 30670 }, { "epoch": 0.07, "grad_norm": 1.96875, "learning_rate": 0.00019935779480299823, "loss": 2.2027, "step": 30675 }, { "epoch": 0.07, "grad_norm": 1.953125, "learning_rate": 0.00019935758565234793, "loss": 2.1954, "step": 30680 }, { "epoch": 0.07, "grad_norm": 2.109375, "learning_rate": 0.00019935737646775524, "loss": 2.1493, "step": 30685 }, { "epoch": 0.07, "grad_norm": 2.109375, "learning_rate": 0.0001993571672492203, "loss": 2.1007, "step": 30690 }, { "epoch": 0.07, "grad_norm": 1.953125, "learning_rate": 0.0001993569579967431, "loss": 2.2562, "step": 30695 }, { "epoch": 0.07, "grad_norm": 1.515625, "learning_rate": 0.00019935674871032378, "loss": 2.1168, "step": 30700 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.00019935653938996244, "loss": 2.2364, "step": 30705 }, { "epoch": 0.07, "grad_norm": 1.8671875, "learning_rate": 0.00019935633003565907, "loss": 2.0429, "step": 30710 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.00019935612064741375, "loss": 2.2369, "step": 30715 }, { "epoch": 0.07, "grad_norm": 2.09375, "learning_rate": 0.0001993559112252266, "loss": 2.291, "step": 30720 }, { "epoch": 0.07, "grad_norm": 2.484375, "learning_rate": 0.00019935570176909769, "loss": 2.2496, "step": 30725 }, { "epoch": 0.07, "grad_norm": 1.6484375, "learning_rate": 0.00019935549227902705, "loss": 1.8689, "step": 30730 }, { "epoch": 0.07, "grad_norm": 1.7734375, "learning_rate": 0.0001993552827550148, "loss": 2.0061, "step": 30735 }, { "epoch": 0.07, "grad_norm": 2.25, "learning_rate": 0.00019935507319706095, "loss": 2.1256, "step": 30740 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.0001993548636051656, "loss": 2.2697, "step": 30745 }, { "epoch": 0.07, "grad_norm": 1.9921875, "learning_rate": 0.0001993546539793288, "loss": 2.3997, "step": 30750 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019935444431955072, "loss": 2.3081, "step": 30755 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.00019935423462583133, "loss": 2.1534, "step": 30760 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.0001993540248981707, "loss": 2.1592, "step": 30765 }, { "epoch": 0.07, "grad_norm": 1.734375, "learning_rate": 0.000199353815136569, "loss": 2.232, "step": 30770 }, { "epoch": 0.07, "grad_norm": 1.7890625, "learning_rate": 0.00019935360534102618, "loss": 2.1902, "step": 30775 }, { "epoch": 0.07, "grad_norm": 2.109375, "learning_rate": 0.0001993533955115424, "loss": 2.0666, "step": 30780 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.00019935318564811766, "loss": 2.1322, "step": 30785 }, { "epoch": 0.07, "grad_norm": 2.109375, "learning_rate": 0.00019935297575075209, "loss": 2.0832, "step": 30790 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.00019935276581944575, "loss": 2.1744, "step": 30795 }, { "epoch": 0.07, "grad_norm": 1.9375, "learning_rate": 0.0001993525558541987, "loss": 2.1946, "step": 30800 }, { "epoch": 0.07, "grad_norm": 1.9609375, "learning_rate": 0.00019935234585501102, "loss": 2.3085, "step": 30805 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019935213582188274, "loss": 2.2407, "step": 30810 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.00019935192575481402, "loss": 2.2015, "step": 30815 }, { "epoch": 0.07, "grad_norm": 1.9375, "learning_rate": 0.00019935171565380486, "loss": 1.9876, "step": 30820 }, { "epoch": 0.07, "grad_norm": 1.6015625, "learning_rate": 0.00019935150551885533, "loss": 2.2684, "step": 30825 }, { "epoch": 0.07, "grad_norm": 1.59375, "learning_rate": 0.00019935129534996555, "loss": 2.2211, "step": 30830 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019935108514713555, "loss": 2.2842, "step": 30835 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019935087491036545, "loss": 2.255, "step": 30840 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019935066463965524, "loss": 2.2609, "step": 30845 }, { "epoch": 0.07, "grad_norm": 1.6796875, "learning_rate": 0.00019935045433500508, "loss": 2.2457, "step": 30850 }, { "epoch": 0.07, "grad_norm": 1.6796875, "learning_rate": 0.000199350243996415, "loss": 2.183, "step": 30855 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019935003362388506, "loss": 2.442, "step": 30860 }, { "epoch": 0.07, "grad_norm": 1.8984375, "learning_rate": 0.00019934982321741536, "loss": 2.1044, "step": 30865 }, { "epoch": 0.07, "grad_norm": 2.234375, "learning_rate": 0.00019934961277700595, "loss": 2.114, "step": 30870 }, { "epoch": 0.07, "grad_norm": 1.9609375, "learning_rate": 0.00019934940230265688, "loss": 2.1717, "step": 30875 }, { "epoch": 0.07, "grad_norm": 1.71875, "learning_rate": 0.00019934919179436832, "loss": 2.1699, "step": 30880 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019934898125214021, "loss": 2.0668, "step": 30885 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.00019934877067597274, "loss": 2.2642, "step": 30890 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.00019934856006586585, "loss": 2.2842, "step": 30895 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.00019934834942181976, "loss": 2.0726, "step": 30900 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.00019934813874383447, "loss": 2.033, "step": 30905 }, { "epoch": 0.07, "grad_norm": 1.8984375, "learning_rate": 0.00019934792803191003, "loss": 2.1292, "step": 30910 }, { "epoch": 0.07, "grad_norm": 1.46875, "learning_rate": 0.00019934771728604656, "loss": 2.0868, "step": 30915 }, { "epoch": 0.07, "grad_norm": 1.7890625, "learning_rate": 0.00019934750650624407, "loss": 2.1936, "step": 30920 }, { "epoch": 0.07, "grad_norm": 1.9140625, "learning_rate": 0.0001993472956925027, "loss": 2.1821, "step": 30925 }, { "epoch": 0.07, "grad_norm": 1.6171875, "learning_rate": 0.0001993470848448225, "loss": 2.2569, "step": 30930 }, { "epoch": 0.07, "grad_norm": 1.6953125, "learning_rate": 0.0001993468739632035, "loss": 2.2483, "step": 30935 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019934666304764583, "loss": 2.3064, "step": 30940 }, { "epoch": 0.07, "grad_norm": 1.703125, "learning_rate": 0.00019934645209814954, "loss": 2.0946, "step": 30945 }, { "epoch": 0.07, "grad_norm": 1.546875, "learning_rate": 0.00019934624111471466, "loss": 2.0321, "step": 30950 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.00019934603009734135, "loss": 2.084, "step": 30955 }, { "epoch": 0.07, "grad_norm": 2.15625, "learning_rate": 0.0001993458190460296, "loss": 2.108, "step": 30960 }, { "epoch": 0.07, "grad_norm": 2.171875, "learning_rate": 0.00019934560796077956, "loss": 2.1994, "step": 30965 }, { "epoch": 0.07, "grad_norm": 2.671875, "learning_rate": 0.00019934539684159121, "loss": 2.0268, "step": 30970 }, { "epoch": 0.07, "grad_norm": 1.7578125, "learning_rate": 0.0001993451856884647, "loss": 2.2056, "step": 30975 }, { "epoch": 0.07, "grad_norm": 1.7109375, "learning_rate": 0.00019934497450140007, "loss": 2.1563, "step": 30980 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.0001993447632803974, "loss": 2.3011, "step": 30985 }, { "epoch": 0.07, "grad_norm": 1.734375, "learning_rate": 0.00019934455202545674, "loss": 2.1319, "step": 30990 }, { "epoch": 0.07, "grad_norm": 1.671875, "learning_rate": 0.0001993443407365782, "loss": 1.9951, "step": 30995 }, { "epoch": 0.07, "grad_norm": 2.09375, "learning_rate": 0.0001993441294137618, "loss": 2.3297, "step": 31000 }, { "epoch": 0.07, "grad_norm": 1.9453125, "learning_rate": 0.00019934391805700769, "loss": 2.278, "step": 31005 }, { "epoch": 0.07, "grad_norm": 1.8984375, "learning_rate": 0.00019934370666631587, "loss": 2.2263, "step": 31010 }, { "epoch": 0.07, "grad_norm": 1.5625, "learning_rate": 0.00019934349524168644, "loss": 2.1722, "step": 31015 }, { "epoch": 0.07, "grad_norm": 1.59375, "learning_rate": 0.00019934328378311947, "loss": 2.0782, "step": 31020 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019934307229061503, "loss": 2.1736, "step": 31025 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019934286076417318, "loss": 2.1671, "step": 31030 }, { "epoch": 0.07, "grad_norm": 2.09375, "learning_rate": 0.00019934264920379403, "loss": 2.2747, "step": 31035 }, { "epoch": 0.07, "grad_norm": 2.484375, "learning_rate": 0.00019934243760947763, "loss": 1.9528, "step": 31040 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019934222598122403, "loss": 2.1113, "step": 31045 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019934201431903335, "loss": 2.2612, "step": 31050 }, { "epoch": 0.07, "grad_norm": 1.5703125, "learning_rate": 0.0001993418026229056, "loss": 2.1504, "step": 31055 }, { "epoch": 0.07, "grad_norm": 1.609375, "learning_rate": 0.0001993415908928409, "loss": 2.2373, "step": 31060 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.00019934137912883934, "loss": 2.3603, "step": 31065 }, { "epoch": 0.07, "grad_norm": 2.140625, "learning_rate": 0.00019934116733090097, "loss": 2.3038, "step": 31070 }, { "epoch": 0.07, "grad_norm": 1.5859375, "learning_rate": 0.0001993409554990258, "loss": 2.3885, "step": 31075 }, { "epoch": 0.07, "grad_norm": 1.6484375, "learning_rate": 0.00019934074363321403, "loss": 2.2697, "step": 31080 }, { "epoch": 0.07, "grad_norm": 1.578125, "learning_rate": 0.0001993405317334656, "loss": 2.2994, "step": 31085 }, { "epoch": 0.07, "grad_norm": 1.90625, "learning_rate": 0.00019934031979978065, "loss": 2.0859, "step": 31090 }, { "epoch": 0.07, "grad_norm": 1.6875, "learning_rate": 0.00019934010783215927, "loss": 2.1205, "step": 31095 }, { "epoch": 0.07, "grad_norm": 1.7890625, "learning_rate": 0.0001993398958306015, "loss": 2.2884, "step": 31100 }, { "epoch": 0.07, "grad_norm": 1.6484375, "learning_rate": 0.0001993396837951074, "loss": 2.3706, "step": 31105 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.00019933947172567709, "loss": 2.1632, "step": 31110 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.0001993392596223106, "loss": 2.103, "step": 31115 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.00019933904748500801, "loss": 2.4334, "step": 31120 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.00019933883531376943, "loss": 1.9639, "step": 31125 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019933862310859488, "loss": 2.2667, "step": 31130 }, { "epoch": 0.07, "grad_norm": 1.59375, "learning_rate": 0.00019933841086948445, "loss": 2.1828, "step": 31135 }, { "epoch": 0.07, "grad_norm": 1.890625, "learning_rate": 0.00019933819859643823, "loss": 2.2873, "step": 31140 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.00019933798628945625, "loss": 2.3235, "step": 31145 }, { "epoch": 0.07, "grad_norm": 1.5703125, "learning_rate": 0.00019933777394853865, "loss": 2.2626, "step": 31150 }, { "epoch": 0.07, "grad_norm": 1.734375, "learning_rate": 0.00019933756157368545, "loss": 2.1486, "step": 31155 }, { "epoch": 0.07, "grad_norm": 1.9140625, "learning_rate": 0.00019933734916489674, "loss": 2.0722, "step": 31160 }, { "epoch": 0.07, "grad_norm": 1.640625, "learning_rate": 0.0001993371367221726, "loss": 2.2336, "step": 31165 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019933692424551307, "loss": 2.2996, "step": 31170 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.00019933671173491825, "loss": 2.2417, "step": 31175 }, { "epoch": 0.07, "grad_norm": 1.890625, "learning_rate": 0.00019933649919038823, "loss": 2.1768, "step": 31180 }, { "epoch": 0.07, "grad_norm": 2.21875, "learning_rate": 0.00019933628661192304, "loss": 2.1317, "step": 31185 }, { "epoch": 0.07, "grad_norm": 1.9921875, "learning_rate": 0.00019933607399952278, "loss": 2.106, "step": 31190 }, { "epoch": 0.07, "grad_norm": 1.671875, "learning_rate": 0.0001993358613531875, "loss": 2.1993, "step": 31195 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.00019933564867291733, "loss": 2.1575, "step": 31200 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.00019933543595871225, "loss": 2.2206, "step": 31205 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.0001993352232105724, "loss": 2.0693, "step": 31210 }, { "epoch": 0.07, "grad_norm": 1.890625, "learning_rate": 0.00019933501042849786, "loss": 2.0567, "step": 31215 }, { "epoch": 0.07, "grad_norm": 1.8515625, "learning_rate": 0.00019933479761248866, "loss": 2.2618, "step": 31220 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.00019933458476254488, "loss": 2.1019, "step": 31225 }, { "epoch": 0.07, "grad_norm": 1.546875, "learning_rate": 0.00019933437187866663, "loss": 2.2136, "step": 31230 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019933415896085393, "loss": 2.1529, "step": 31235 }, { "epoch": 0.07, "grad_norm": 1.640625, "learning_rate": 0.0001993339460091069, "loss": 2.5664, "step": 31240 }, { "epoch": 0.07, "grad_norm": 1.7421875, "learning_rate": 0.0001993337330234256, "loss": 2.2006, "step": 31245 }, { "epoch": 0.07, "grad_norm": 1.90625, "learning_rate": 0.00019933352000381006, "loss": 2.5237, "step": 31250 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.0001993333069502604, "loss": 2.2222, "step": 31255 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.0001993330938627767, "loss": 2.0991, "step": 31260 }, { "epoch": 0.07, "grad_norm": 1.671875, "learning_rate": 0.000199332880741359, "loss": 2.1259, "step": 31265 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.0001993326675860074, "loss": 2.4283, "step": 31270 }, { "epoch": 0.07, "grad_norm": 1.75, "learning_rate": 0.00019933245439672195, "loss": 2.1823, "step": 31275 }, { "epoch": 0.07, "grad_norm": 2.25, "learning_rate": 0.00019933224117350273, "loss": 2.3321, "step": 31280 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.00019933202791634982, "loss": 2.2003, "step": 31285 }, { "epoch": 0.07, "grad_norm": 1.65625, "learning_rate": 0.00019933181462526326, "loss": 2.0616, "step": 31290 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.00019933160130024318, "loss": 2.345, "step": 31295 }, { "epoch": 0.07, "grad_norm": 2.34375, "learning_rate": 0.00019933138794128963, "loss": 2.0546, "step": 31300 }, { "epoch": 0.07, "grad_norm": 1.890625, "learning_rate": 0.00019933117454840266, "loss": 2.1612, "step": 31305 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.0001993309611215824, "loss": 2.2531, "step": 31310 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019933074766082881, "loss": 2.3007, "step": 31315 }, { "epoch": 0.07, "grad_norm": 1.5546875, "learning_rate": 0.0001993305341661421, "loss": 2.0736, "step": 31320 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.00019933032063752223, "loss": 2.132, "step": 31325 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.00019933010707496936, "loss": 2.2698, "step": 31330 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.0001993298934784835, "loss": 2.1683, "step": 31335 }, { "epoch": 0.07, "grad_norm": 1.59375, "learning_rate": 0.00019932967984806474, "loss": 2.3784, "step": 31340 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.00019932946618371315, "loss": 2.014, "step": 31345 }, { "epoch": 0.07, "grad_norm": 1.953125, "learning_rate": 0.00019932925248542885, "loss": 2.3842, "step": 31350 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019932903875321184, "loss": 2.3026, "step": 31355 }, { "epoch": 0.07, "grad_norm": 1.78125, "learning_rate": 0.00019932882498706228, "loss": 2.2125, "step": 31360 }, { "epoch": 0.07, "grad_norm": 1.6875, "learning_rate": 0.00019932861118698015, "loss": 2.2769, "step": 31365 }, { "epoch": 0.07, "grad_norm": 2.296875, "learning_rate": 0.00019932839735296555, "loss": 2.3208, "step": 31370 }, { "epoch": 0.07, "grad_norm": 1.6328125, "learning_rate": 0.0001993281834850186, "loss": 2.1834, "step": 31375 }, { "epoch": 0.07, "grad_norm": 1.6171875, "learning_rate": 0.00019932796958313935, "loss": 2.185, "step": 31380 }, { "epoch": 0.07, "grad_norm": 1.78125, "learning_rate": 0.00019932775564732787, "loss": 2.3316, "step": 31385 }, { "epoch": 0.07, "grad_norm": 2.4375, "learning_rate": 0.00019932754167758417, "loss": 2.4365, "step": 31390 }, { "epoch": 0.07, "grad_norm": 1.546875, "learning_rate": 0.00019932732767390844, "loss": 1.9933, "step": 31395 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019932711363630068, "loss": 2.1675, "step": 31400 }, { "epoch": 0.07, "grad_norm": 2.15625, "learning_rate": 0.00019932689956476095, "loss": 2.3257, "step": 31405 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019932668545928937, "loss": 2.2428, "step": 31410 }, { "epoch": 0.07, "grad_norm": 1.8828125, "learning_rate": 0.000199326471319886, "loss": 2.1603, "step": 31415 }, { "epoch": 0.07, "grad_norm": 2.375, "learning_rate": 0.00019932625714655087, "loss": 2.3338, "step": 31420 }, { "epoch": 0.07, "grad_norm": 2.171875, "learning_rate": 0.00019932604293928412, "loss": 2.1774, "step": 31425 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019932582869808577, "loss": 2.2396, "step": 31430 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.00019932561442295596, "loss": 2.2987, "step": 31435 }, { "epoch": 0.07, "grad_norm": 1.6640625, "learning_rate": 0.0001993254001138947, "loss": 2.1301, "step": 31440 }, { "epoch": 0.07, "grad_norm": 1.9765625, "learning_rate": 0.00019932518577090206, "loss": 2.1233, "step": 31445 }, { "epoch": 0.07, "grad_norm": 1.953125, "learning_rate": 0.00019932497139397816, "loss": 2.138, "step": 31450 }, { "epoch": 0.07, "grad_norm": 1.53125, "learning_rate": 0.00019932475698312304, "loss": 2.2074, "step": 31455 }, { "epoch": 0.07, "grad_norm": 1.9453125, "learning_rate": 0.00019932454253833676, "loss": 2.1689, "step": 31460 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019932432805961945, "loss": 2.1711, "step": 31465 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.00019932411354697113, "loss": 2.1983, "step": 31470 }, { "epoch": 0.07, "grad_norm": 1.7734375, "learning_rate": 0.00019932389900039187, "loss": 2.2408, "step": 31475 }, { "epoch": 0.07, "grad_norm": 1.78125, "learning_rate": 0.0001993236844198818, "loss": 2.0612, "step": 31480 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019932346980544097, "loss": 2.2408, "step": 31485 }, { "epoch": 0.07, "grad_norm": 1.8671875, "learning_rate": 0.0001993232551570694, "loss": 2.2689, "step": 31490 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019932304047476723, "loss": 2.1616, "step": 31495 }, { "epoch": 0.07, "grad_norm": 1.890625, "learning_rate": 0.00019932282575853448, "loss": 2.2315, "step": 31500 }, { "epoch": 0.07, "grad_norm": 1.9609375, "learning_rate": 0.00019932261100837127, "loss": 2.0394, "step": 31505 }, { "epoch": 0.07, "grad_norm": 2.203125, "learning_rate": 0.00019932239622427766, "loss": 2.1172, "step": 31510 }, { "epoch": 0.07, "grad_norm": 1.7109375, "learning_rate": 0.00019932218140625372, "loss": 2.2941, "step": 31515 }, { "epoch": 0.07, "grad_norm": 1.7734375, "learning_rate": 0.00019932196655429954, "loss": 2.0652, "step": 31520 }, { "epoch": 0.07, "grad_norm": 1.890625, "learning_rate": 0.00019932175166841512, "loss": 2.2046, "step": 31525 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.00019932153674860064, "loss": 2.2743, "step": 31530 }, { "epoch": 0.07, "grad_norm": 1.5, "learning_rate": 0.00019932132179485609, "loss": 2.238, "step": 31535 }, { "epoch": 0.07, "grad_norm": 1.8515625, "learning_rate": 0.0001993211068071816, "loss": 1.9319, "step": 31540 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.0001993208917855772, "loss": 2.0476, "step": 31545 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.00019932067673004297, "loss": 2.3029, "step": 31550 }, { "epoch": 0.07, "grad_norm": 1.5625, "learning_rate": 0.000199320461640579, "loss": 2.1688, "step": 31555 }, { "epoch": 0.07, "grad_norm": 2.265625, "learning_rate": 0.0001993202465171854, "loss": 2.1101, "step": 31560 }, { "epoch": 0.07, "grad_norm": 1.828125, "learning_rate": 0.00019932003135986217, "loss": 2.1289, "step": 31565 }, { "epoch": 0.07, "grad_norm": 2.03125, "learning_rate": 0.00019931981616860942, "loss": 2.3244, "step": 31570 }, { "epoch": 0.07, "grad_norm": 1.984375, "learning_rate": 0.00019931960094342723, "loss": 2.1125, "step": 31575 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.00019931938568431564, "loss": 2.3402, "step": 31580 }, { "epoch": 0.07, "grad_norm": 1.6328125, "learning_rate": 0.00019931917039127476, "loss": 2.3522, "step": 31585 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019931895506430464, "loss": 2.196, "step": 31590 }, { "epoch": 0.07, "grad_norm": 1.8984375, "learning_rate": 0.00019931873970340539, "loss": 2.2242, "step": 31595 }, { "epoch": 0.07, "grad_norm": 1.9765625, "learning_rate": 0.00019931852430857703, "loss": 2.1631, "step": 31600 }, { "epoch": 0.07, "grad_norm": 1.9140625, "learning_rate": 0.00019931830887981965, "loss": 2.1267, "step": 31605 }, { "epoch": 0.07, "grad_norm": 1.9765625, "learning_rate": 0.00019931809341713338, "loss": 2.0808, "step": 31610 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.00019931787792051823, "loss": 1.9405, "step": 31615 }, { "epoch": 0.07, "grad_norm": 2.21875, "learning_rate": 0.00019931766238997427, "loss": 2.1549, "step": 31620 }, { "epoch": 0.07, "grad_norm": 1.5625, "learning_rate": 0.0001993174468255016, "loss": 2.1981, "step": 31625 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019931723122710034, "loss": 2.3154, "step": 31630 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.00019931701559477045, "loss": 2.1734, "step": 31635 }, { "epoch": 0.07, "grad_norm": 2.0, "learning_rate": 0.00019931679992851207, "loss": 2.1203, "step": 31640 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.0001993165842283253, "loss": 2.1332, "step": 31645 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.0001993163684942102, "loss": 2.0493, "step": 31650 }, { "epoch": 0.07, "grad_norm": 1.90625, "learning_rate": 0.00019931615272616678, "loss": 2.1649, "step": 31655 }, { "epoch": 0.07, "grad_norm": 1.6015625, "learning_rate": 0.00019931593692419517, "loss": 2.2936, "step": 31660 }, { "epoch": 0.07, "grad_norm": 2.109375, "learning_rate": 0.00019931572108829545, "loss": 2.0793, "step": 31665 }, { "epoch": 0.07, "grad_norm": 2.046875, "learning_rate": 0.00019931550521846769, "loss": 2.1463, "step": 31670 }, { "epoch": 0.07, "grad_norm": 1.765625, "learning_rate": 0.00019931528931471193, "loss": 2.0683, "step": 31675 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019931507337702826, "loss": 2.1314, "step": 31680 }, { "epoch": 0.07, "grad_norm": 1.8125, "learning_rate": 0.0001993148574054168, "loss": 2.2845, "step": 31685 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019931464139987752, "loss": 2.1905, "step": 31690 }, { "epoch": 0.07, "grad_norm": 1.890625, "learning_rate": 0.0001993144253604106, "loss": 2.0539, "step": 31695 }, { "epoch": 0.07, "grad_norm": 1.6640625, "learning_rate": 0.00019931420928701607, "loss": 2.3103, "step": 31700 }, { "epoch": 0.07, "grad_norm": 1.6796875, "learning_rate": 0.00019931399317969398, "loss": 2.2824, "step": 31705 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019931377703844447, "loss": 2.0438, "step": 31710 }, { "epoch": 0.07, "grad_norm": 1.984375, "learning_rate": 0.00019931356086326757, "loss": 2.0096, "step": 31715 }, { "epoch": 0.07, "grad_norm": 2.09375, "learning_rate": 0.00019931334465416332, "loss": 2.1365, "step": 31720 }, { "epoch": 0.07, "grad_norm": 1.4375, "learning_rate": 0.00019931312841113186, "loss": 2.1638, "step": 31725 }, { "epoch": 0.07, "grad_norm": 2.234375, "learning_rate": 0.0001993129121341732, "loss": 2.1193, "step": 31730 }, { "epoch": 0.07, "grad_norm": 1.625, "learning_rate": 0.00019931269582328746, "loss": 2.1128, "step": 31735 }, { "epoch": 0.07, "grad_norm": 2.265625, "learning_rate": 0.00019931247947847474, "loss": 2.2368, "step": 31740 }, { "epoch": 0.07, "grad_norm": 1.921875, "learning_rate": 0.00019931226309973502, "loss": 2.0736, "step": 31745 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019931204668706846, "loss": 2.2159, "step": 31750 }, { "epoch": 0.07, "grad_norm": 2.171875, "learning_rate": 0.00019931183024047511, "loss": 2.2479, "step": 31755 }, { "epoch": 0.07, "grad_norm": 1.71875, "learning_rate": 0.00019931161375995503, "loss": 1.9299, "step": 31760 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.0001993113972455083, "loss": 2.1272, "step": 31765 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.000199311180697135, "loss": 2.1543, "step": 31770 }, { "epoch": 0.07, "grad_norm": 1.7265625, "learning_rate": 0.00019931096411483518, "loss": 2.2594, "step": 31775 }, { "epoch": 0.07, "grad_norm": 1.984375, "learning_rate": 0.00019931074749860895, "loss": 2.0876, "step": 31780 }, { "epoch": 0.07, "grad_norm": 1.78125, "learning_rate": 0.00019931053084845638, "loss": 2.2732, "step": 31785 }, { "epoch": 0.07, "grad_norm": 1.875, "learning_rate": 0.0001993103141643775, "loss": 2.3469, "step": 31790 }, { "epoch": 0.07, "grad_norm": 1.734375, "learning_rate": 0.00019931009744637244, "loss": 2.1666, "step": 31795 }, { "epoch": 0.07, "grad_norm": 1.8046875, "learning_rate": 0.00019930988069444126, "loss": 2.4649, "step": 31800 }, { "epoch": 0.07, "grad_norm": 1.859375, "learning_rate": 0.00019930966390858398, "loss": 2.3426, "step": 31805 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.00019930944708880074, "loss": 2.1381, "step": 31810 }, { "epoch": 0.07, "grad_norm": 2.09375, "learning_rate": 0.0001993092302350916, "loss": 2.1966, "step": 31815 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019930901334745663, "loss": 2.2943, "step": 31820 }, { "epoch": 0.07, "grad_norm": 2.078125, "learning_rate": 0.0001993087964258959, "loss": 2.2056, "step": 31825 }, { "epoch": 0.07, "grad_norm": 2.0625, "learning_rate": 0.0001993085794704095, "loss": 2.3531, "step": 31830 }, { "epoch": 0.07, "grad_norm": 1.9375, "learning_rate": 0.00019930836248099746, "loss": 2.2577, "step": 31835 }, { "epoch": 0.07, "grad_norm": 1.8203125, "learning_rate": 0.00019930814545765984, "loss": 2.0896, "step": 31840 }, { "epoch": 0.07, "grad_norm": 1.9296875, "learning_rate": 0.00019930792840039684, "loss": 2.3536, "step": 31845 }, { "epoch": 0.07, "grad_norm": 1.96875, "learning_rate": 0.0001993077113092084, "loss": 2.4849, "step": 31850 }, { "epoch": 0.07, "grad_norm": 1.8359375, "learning_rate": 0.00019930749418409465, "loss": 2.1475, "step": 31855 }, { "epoch": 0.07, "grad_norm": 2.234375, "learning_rate": 0.00019930727702505567, "loss": 2.1614, "step": 31860 }, { "epoch": 0.07, "grad_norm": 2.015625, "learning_rate": 0.00019930705983209153, "loss": 2.2922, "step": 31865 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.00019930684260520228, "loss": 2.0227, "step": 31870 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.000199306625344388, "loss": 2.1424, "step": 31875 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.00019930640804964882, "loss": 2.2831, "step": 31880 }, { "epoch": 0.08, "grad_norm": 1.578125, "learning_rate": 0.00019930619072098472, "loss": 2.1045, "step": 31885 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.00019930597335839588, "loss": 2.1301, "step": 31890 }, { "epoch": 0.08, "grad_norm": 1.5078125, "learning_rate": 0.00019930575596188228, "loss": 2.3928, "step": 31895 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.00019930553853144404, "loss": 2.3124, "step": 31900 }, { "epoch": 0.08, "grad_norm": 2.171875, "learning_rate": 0.00019930532106708122, "loss": 2.34, "step": 31905 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.00019930510356879394, "loss": 1.9722, "step": 31910 }, { "epoch": 0.08, "grad_norm": 1.578125, "learning_rate": 0.00019930488603658219, "loss": 2.1837, "step": 31915 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.0001993046684704461, "loss": 2.1748, "step": 31920 }, { "epoch": 0.08, "grad_norm": 2.140625, "learning_rate": 0.0001993044508703857, "loss": 2.3065, "step": 31925 }, { "epoch": 0.08, "grad_norm": 1.9921875, "learning_rate": 0.00019930423323640117, "loss": 2.0074, "step": 31930 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.0001993040155684925, "loss": 2.2715, "step": 31935 }, { "epoch": 0.08, "grad_norm": 1.53125, "learning_rate": 0.00019930379786665974, "loss": 2.2399, "step": 31940 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.00019930358013090302, "loss": 2.0417, "step": 31945 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.00019930336236122237, "loss": 2.1367, "step": 31950 }, { "epoch": 0.08, "grad_norm": 1.8046875, "learning_rate": 0.00019930314455761795, "loss": 2.1805, "step": 31955 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.00019930292672008974, "loss": 2.1633, "step": 31960 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.00019930270884863785, "loss": 2.2099, "step": 31965 }, { "epoch": 0.08, "grad_norm": 1.765625, "learning_rate": 0.00019930249094326233, "loss": 2.2256, "step": 31970 }, { "epoch": 0.08, "grad_norm": 1.5546875, "learning_rate": 0.00019930227300396332, "loss": 2.1542, "step": 31975 }, { "epoch": 0.08, "grad_norm": 1.8984375, "learning_rate": 0.0001993020550307408, "loss": 2.2643, "step": 31980 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.00019930183702359497, "loss": 2.0406, "step": 31985 }, { "epoch": 0.08, "grad_norm": 1.90625, "learning_rate": 0.00019930161898252578, "loss": 2.1986, "step": 31990 }, { "epoch": 0.08, "grad_norm": 2.25, "learning_rate": 0.00019930140090753336, "loss": 2.1259, "step": 31995 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.0001993011827986178, "loss": 2.2488, "step": 32000 }, { "epoch": 0.08, "grad_norm": 1.90625, "learning_rate": 0.00019930096465577913, "loss": 2.2054, "step": 32005 }, { "epoch": 0.08, "grad_norm": 1.5234375, "learning_rate": 0.00019930074647901748, "loss": 2.2034, "step": 32010 }, { "epoch": 0.08, "grad_norm": 1.6484375, "learning_rate": 0.00019930052826833288, "loss": 2.4387, "step": 32015 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.0001993003100237254, "loss": 2.31, "step": 32020 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019930009174519517, "loss": 2.1678, "step": 32025 }, { "epoch": 0.08, "grad_norm": 1.640625, "learning_rate": 0.0001992998734327422, "loss": 2.0168, "step": 32030 }, { "epoch": 0.08, "grad_norm": 1.8515625, "learning_rate": 0.0001992996550863666, "loss": 2.1026, "step": 32035 }, { "epoch": 0.08, "grad_norm": 1.4609375, "learning_rate": 0.00019929943670606845, "loss": 2.3496, "step": 32040 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.0001992992182918478, "loss": 2.0104, "step": 32045 }, { "epoch": 0.08, "grad_norm": 1.765625, "learning_rate": 0.00019929899984370473, "loss": 2.1803, "step": 32050 }, { "epoch": 0.08, "grad_norm": 1.59375, "learning_rate": 0.00019929878136163933, "loss": 2.1742, "step": 32055 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.00019929856284565164, "loss": 2.1627, "step": 32060 }, { "epoch": 0.08, "grad_norm": 2.234375, "learning_rate": 0.00019929834429574178, "loss": 2.2904, "step": 32065 }, { "epoch": 0.08, "grad_norm": 1.8984375, "learning_rate": 0.0001992981257119098, "loss": 2.2167, "step": 32070 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.0001992979070941558, "loss": 2.3052, "step": 32075 }, { "epoch": 0.08, "grad_norm": 1.6953125, "learning_rate": 0.00019929768844247984, "loss": 2.2697, "step": 32080 }, { "epoch": 0.08, "grad_norm": 1.890625, "learning_rate": 0.00019929746975688194, "loss": 2.2737, "step": 32085 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019929725103736226, "loss": 2.2832, "step": 32090 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.00019929703228392084, "loss": 2.1901, "step": 32095 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.00019929681349655773, "loss": 2.4535, "step": 32100 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019929659467527304, "loss": 2.1664, "step": 32105 }, { "epoch": 0.08, "grad_norm": 2.140625, "learning_rate": 0.00019929637582006684, "loss": 2.2073, "step": 32110 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.0001992961569309392, "loss": 2.1676, "step": 32115 }, { "epoch": 0.08, "grad_norm": 1.984375, "learning_rate": 0.00019929593800789016, "loss": 2.1502, "step": 32120 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019929571905091985, "loss": 2.368, "step": 32125 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019929550006002832, "loss": 2.1826, "step": 32130 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.00019929528103521563, "loss": 2.0834, "step": 32135 }, { "epoch": 0.08, "grad_norm": 1.859375, "learning_rate": 0.0001992950619764819, "loss": 2.1698, "step": 32140 }, { "epoch": 0.08, "grad_norm": 1.5625, "learning_rate": 0.0001992948428838272, "loss": 2.1532, "step": 32145 }, { "epoch": 0.08, "grad_norm": 1.6484375, "learning_rate": 0.00019929462375725153, "loss": 2.2403, "step": 32150 }, { "epoch": 0.08, "grad_norm": 2.078125, "learning_rate": 0.00019929440459675503, "loss": 2.3077, "step": 32155 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.00019929418540233775, "loss": 2.2081, "step": 32160 }, { "epoch": 0.08, "grad_norm": 2.140625, "learning_rate": 0.0001992939661739998, "loss": 2.2925, "step": 32165 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.00019929374691174121, "loss": 2.3053, "step": 32170 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.00019929352761556207, "loss": 2.0942, "step": 32175 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.0001992933082854625, "loss": 2.1196, "step": 32180 }, { "epoch": 0.08, "grad_norm": 2.265625, "learning_rate": 0.00019929308892144247, "loss": 2.205, "step": 32185 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019929286952350217, "loss": 2.1768, "step": 32190 }, { "epoch": 0.08, "grad_norm": 1.6015625, "learning_rate": 0.00019929265009164163, "loss": 2.1717, "step": 32195 }, { "epoch": 0.08, "grad_norm": 1.984375, "learning_rate": 0.00019929243062586087, "loss": 2.2061, "step": 32200 }, { "epoch": 0.08, "grad_norm": 1.546875, "learning_rate": 0.00019929221112616007, "loss": 2.2393, "step": 32205 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.0001992919915925392, "loss": 2.198, "step": 32210 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.00019929177202499844, "loss": 2.1278, "step": 32215 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.00019929155242353778, "loss": 2.2275, "step": 32220 }, { "epoch": 0.08, "grad_norm": 1.8046875, "learning_rate": 0.00019929133278815731, "loss": 2.0931, "step": 32225 }, { "epoch": 0.08, "grad_norm": 1.5625, "learning_rate": 0.00019929111311885715, "loss": 2.2616, "step": 32230 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.0001992908934156373, "loss": 2.1979, "step": 32235 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.00019929067367849794, "loss": 2.3684, "step": 32240 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.00019929045390743906, "loss": 2.2949, "step": 32245 }, { "epoch": 0.08, "grad_norm": 2.078125, "learning_rate": 0.00019929023410246078, "loss": 2.2069, "step": 32250 }, { "epoch": 0.08, "grad_norm": 1.9453125, "learning_rate": 0.00019929001426356312, "loss": 2.1686, "step": 32255 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.00019928979439074617, "loss": 2.0941, "step": 32260 }, { "epoch": 0.08, "grad_norm": 2.125, "learning_rate": 0.00019928957448401007, "loss": 2.2889, "step": 32265 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.00019928935454335483, "loss": 2.2237, "step": 32270 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019928913456878058, "loss": 2.1182, "step": 32275 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.00019928891456028733, "loss": 1.9339, "step": 32280 }, { "epoch": 0.08, "grad_norm": 2.203125, "learning_rate": 0.0001992886945178752, "loss": 2.2643, "step": 32285 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.00019928847444154423, "loss": 1.936, "step": 32290 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019928825433129452, "loss": 2.3794, "step": 32295 }, { "epoch": 0.08, "grad_norm": 1.59375, "learning_rate": 0.00019928803418712616, "loss": 2.0938, "step": 32300 }, { "epoch": 0.08, "grad_norm": 1.9765625, "learning_rate": 0.0001992878140090392, "loss": 2.1789, "step": 32305 }, { "epoch": 0.08, "grad_norm": 1.8515625, "learning_rate": 0.0001992875937970337, "loss": 2.4678, "step": 32310 }, { "epoch": 0.08, "grad_norm": 1.7265625, "learning_rate": 0.0001992873735511098, "loss": 2.3689, "step": 32315 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.0001992871532712675, "loss": 2.1203, "step": 32320 }, { "epoch": 0.08, "grad_norm": 1.625, "learning_rate": 0.00019928693295750692, "loss": 2.1495, "step": 32325 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.00019928671260982812, "loss": 2.1383, "step": 32330 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.0001992864922282312, "loss": 2.1532, "step": 32335 }, { "epoch": 0.08, "grad_norm": 1.4921875, "learning_rate": 0.0001992862718127162, "loss": 2.0565, "step": 32340 }, { "epoch": 0.08, "grad_norm": 2.21875, "learning_rate": 0.0001992860513632832, "loss": 2.342, "step": 32345 }, { "epoch": 0.08, "grad_norm": 1.9765625, "learning_rate": 0.00019928583087993227, "loss": 2.2792, "step": 32350 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.00019928561036266351, "loss": 2.2313, "step": 32355 }, { "epoch": 0.08, "grad_norm": 1.765625, "learning_rate": 0.000199285389811477, "loss": 2.2752, "step": 32360 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.0001992851692263728, "loss": 2.2916, "step": 32365 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019928494860735097, "loss": 2.0723, "step": 32370 }, { "epoch": 0.08, "grad_norm": 1.703125, "learning_rate": 0.0001992847279544116, "loss": 2.1781, "step": 32375 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.0001992845072675548, "loss": 2.3212, "step": 32380 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.00019928428654678056, "loss": 2.1518, "step": 32385 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.00019928406579208902, "loss": 2.1556, "step": 32390 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019928384500348027, "loss": 1.9514, "step": 32395 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.00019928362418095431, "loss": 2.1328, "step": 32400 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.00019928340332451132, "loss": 2.1212, "step": 32405 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.0001992831824341513, "loss": 2.1136, "step": 32410 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019928296150987433, "loss": 2.1499, "step": 32415 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.00019928274055168053, "loss": 2.0086, "step": 32420 }, { "epoch": 0.08, "grad_norm": 2.234375, "learning_rate": 0.0001992825195595699, "loss": 2.3549, "step": 32425 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.0001992822985335426, "loss": 2.1549, "step": 32430 }, { "epoch": 0.08, "grad_norm": 2.640625, "learning_rate": 0.00019928207747359867, "loss": 2.1721, "step": 32435 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.00019928185637973817, "loss": 2.1363, "step": 32440 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.00019928163525196116, "loss": 2.0181, "step": 32445 }, { "epoch": 0.08, "grad_norm": 1.3828125, "learning_rate": 0.00019928141409026774, "loss": 1.982, "step": 32450 }, { "epoch": 0.08, "grad_norm": 1.5703125, "learning_rate": 0.00019928119289465803, "loss": 2.2035, "step": 32455 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.00019928097166513204, "loss": 2.2347, "step": 32460 }, { "epoch": 0.08, "grad_norm": 2.171875, "learning_rate": 0.00019928075040168987, "loss": 2.3067, "step": 32465 }, { "epoch": 0.08, "grad_norm": 1.4921875, "learning_rate": 0.00019928052910433159, "loss": 2.0529, "step": 32470 }, { "epoch": 0.08, "grad_norm": 1.5703125, "learning_rate": 0.00019928030777305731, "loss": 2.2452, "step": 32475 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.00019928008640786703, "loss": 1.9965, "step": 32480 }, { "epoch": 0.08, "grad_norm": 1.5390625, "learning_rate": 0.00019927986500876092, "loss": 2.0944, "step": 32485 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.000199279643575739, "loss": 2.2204, "step": 32490 }, { "epoch": 0.08, "grad_norm": 2.375, "learning_rate": 0.00019927942210880132, "loss": 2.1769, "step": 32495 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.00019927920060794802, "loss": 2.217, "step": 32500 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.00019927897907317914, "loss": 2.1059, "step": 32505 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.00019927875750449473, "loss": 2.0931, "step": 32510 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.00019927853590189493, "loss": 2.178, "step": 32515 }, { "epoch": 0.08, "grad_norm": 1.640625, "learning_rate": 0.00019927831426537975, "loss": 2.2515, "step": 32520 }, { "epoch": 0.08, "grad_norm": 1.671875, "learning_rate": 0.00019927809259494934, "loss": 2.2683, "step": 32525 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.0001992778708906037, "loss": 2.1374, "step": 32530 }, { "epoch": 0.08, "grad_norm": 1.7265625, "learning_rate": 0.00019927764915234297, "loss": 2.1728, "step": 32535 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019927742738016715, "loss": 2.0967, "step": 32540 }, { "epoch": 0.08, "grad_norm": 1.6953125, "learning_rate": 0.0001992772055740764, "loss": 2.2163, "step": 32545 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.00019927698373407073, "loss": 2.2153, "step": 32550 }, { "epoch": 0.08, "grad_norm": 1.6328125, "learning_rate": 0.00019927676186015026, "loss": 2.1956, "step": 32555 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019927653995231503, "loss": 2.1349, "step": 32560 }, { "epoch": 0.08, "grad_norm": 1.703125, "learning_rate": 0.00019927631801056514, "loss": 2.2206, "step": 32565 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.00019927609603490067, "loss": 2.2498, "step": 32570 }, { "epoch": 0.08, "grad_norm": 1.4765625, "learning_rate": 0.0001992758740253217, "loss": 2.1711, "step": 32575 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.00019927565198182826, "loss": 2.1161, "step": 32580 }, { "epoch": 0.08, "grad_norm": 1.8046875, "learning_rate": 0.00019927542990442047, "loss": 2.1544, "step": 32585 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.00019927520779309838, "loss": 1.9696, "step": 32590 }, { "epoch": 0.08, "grad_norm": 1.640625, "learning_rate": 0.0001992749856478621, "loss": 2.14, "step": 32595 }, { "epoch": 0.08, "grad_norm": 1.765625, "learning_rate": 0.00019927476346871166, "loss": 2.0366, "step": 32600 }, { "epoch": 0.08, "grad_norm": 2.875, "learning_rate": 0.00019927454125564717, "loss": 2.2094, "step": 32605 }, { "epoch": 0.08, "grad_norm": 1.7421875, "learning_rate": 0.00019927431900866867, "loss": 2.2269, "step": 32610 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.00019927409672777633, "loss": 2.1249, "step": 32615 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.00019927387441297012, "loss": 2.2408, "step": 32620 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.00019927365206425014, "loss": 1.9622, "step": 32625 }, { "epoch": 0.08, "grad_norm": 1.7890625, "learning_rate": 0.00019927342968161651, "loss": 2.0044, "step": 32630 }, { "epoch": 0.08, "grad_norm": 1.765625, "learning_rate": 0.00019927320726506923, "loss": 2.0867, "step": 32635 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.00019927298481460848, "loss": 2.2698, "step": 32640 }, { "epoch": 0.08, "grad_norm": 2.1875, "learning_rate": 0.00019927276233023422, "loss": 2.1342, "step": 32645 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.00019927253981194662, "loss": 2.1707, "step": 32650 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.00019927231725974572, "loss": 2.3, "step": 32655 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.0001992720946736316, "loss": 2.2084, "step": 32660 }, { "epoch": 0.08, "grad_norm": 2.15625, "learning_rate": 0.0001992718720536043, "loss": 2.2305, "step": 32665 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.00019927164939966395, "loss": 2.3698, "step": 32670 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.0001992714267118106, "loss": 2.1882, "step": 32675 }, { "epoch": 0.08, "grad_norm": 1.9921875, "learning_rate": 0.00019927120399004436, "loss": 2.1746, "step": 32680 }, { "epoch": 0.08, "grad_norm": 2.65625, "learning_rate": 0.00019927098123436523, "loss": 2.1956, "step": 32685 }, { "epoch": 0.08, "grad_norm": 1.7890625, "learning_rate": 0.00019927075844477336, "loss": 2.0444, "step": 32690 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.00019927053562126878, "loss": 2.0511, "step": 32695 }, { "epoch": 0.08, "grad_norm": 1.90625, "learning_rate": 0.0001992703127638516, "loss": 2.2367, "step": 32700 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.0001992700898725219, "loss": 2.1963, "step": 32705 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.0001992698669472797, "loss": 2.2854, "step": 32710 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.0001992696439881251, "loss": 2.2307, "step": 32715 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.00019926942099505822, "loss": 2.1725, "step": 32720 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.0001992691979680791, "loss": 2.2545, "step": 32725 }, { "epoch": 0.08, "grad_norm": 2.078125, "learning_rate": 0.00019926897490718783, "loss": 2.1829, "step": 32730 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.0001992687518123845, "loss": 2.2567, "step": 32735 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.0001992685286836691, "loss": 2.2784, "step": 32740 }, { "epoch": 0.08, "grad_norm": 2.203125, "learning_rate": 0.00019926830552104182, "loss": 2.2003, "step": 32745 }, { "epoch": 0.08, "grad_norm": 1.90625, "learning_rate": 0.00019926808232450266, "loss": 2.132, "step": 32750 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019926785909405174, "loss": 2.2476, "step": 32755 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019926763582968913, "loss": 2.114, "step": 32760 }, { "epoch": 0.08, "grad_norm": 2.265625, "learning_rate": 0.00019926741253141486, "loss": 2.0652, "step": 32765 }, { "epoch": 0.08, "grad_norm": 2.28125, "learning_rate": 0.00019926718919922905, "loss": 2.2209, "step": 32770 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.0001992669658331318, "loss": 2.1164, "step": 32775 }, { "epoch": 0.08, "grad_norm": 1.859375, "learning_rate": 0.00019926674243312312, "loss": 2.1706, "step": 32780 }, { "epoch": 0.08, "grad_norm": 1.890625, "learning_rate": 0.00019926651899920312, "loss": 2.204, "step": 32785 }, { "epoch": 0.08, "grad_norm": 2.125, "learning_rate": 0.00019926629553137192, "loss": 2.1165, "step": 32790 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.0001992660720296295, "loss": 2.1946, "step": 32795 }, { "epoch": 0.08, "grad_norm": 2.0, "learning_rate": 0.000199265848493976, "loss": 2.0451, "step": 32800 }, { "epoch": 0.08, "grad_norm": 2.078125, "learning_rate": 0.00019926562492441152, "loss": 2.0454, "step": 32805 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.00019926540132093608, "loss": 2.2807, "step": 32810 }, { "epoch": 0.08, "grad_norm": 1.90625, "learning_rate": 0.00019926517768354977, "loss": 1.9554, "step": 32815 }, { "epoch": 0.08, "grad_norm": 1.9453125, "learning_rate": 0.00019926495401225268, "loss": 2.2535, "step": 32820 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.0001992647303070449, "loss": 2.1233, "step": 32825 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.0001992645065679265, "loss": 2.24, "step": 32830 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.0001992642827948975, "loss": 2.1592, "step": 32835 }, { "epoch": 0.08, "grad_norm": 2.078125, "learning_rate": 0.00019926405898795801, "loss": 2.2145, "step": 32840 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.00019926383514710816, "loss": 2.1238, "step": 32845 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.00019926361127234796, "loss": 2.2548, "step": 32850 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.00019926338736367752, "loss": 2.1826, "step": 32855 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.0001992631634210969, "loss": 2.3167, "step": 32860 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.00019926293944460622, "loss": 2.2105, "step": 32865 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.00019926271543420547, "loss": 2.2919, "step": 32870 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.0001992624913898948, "loss": 2.2728, "step": 32875 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.00019926226731167422, "loss": 2.2872, "step": 32880 }, { "epoch": 0.08, "grad_norm": 1.9921875, "learning_rate": 0.00019926204319954392, "loss": 2.2412, "step": 32885 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019926181905350387, "loss": 2.1858, "step": 32890 }, { "epoch": 0.08, "grad_norm": 2.375, "learning_rate": 0.00019926159487355417, "loss": 2.081, "step": 32895 }, { "epoch": 0.08, "grad_norm": 1.640625, "learning_rate": 0.00019926137065969494, "loss": 2.2468, "step": 32900 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.0001992611464119262, "loss": 2.1526, "step": 32905 }, { "epoch": 0.08, "grad_norm": 1.5390625, "learning_rate": 0.00019926092213024807, "loss": 2.1933, "step": 32910 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.0001992606978146606, "loss": 2.3823, "step": 32915 }, { "epoch": 0.08, "grad_norm": 1.640625, "learning_rate": 0.00019926047346516387, "loss": 2.2628, "step": 32920 }, { "epoch": 0.08, "grad_norm": 2.375, "learning_rate": 0.00019926024908175797, "loss": 2.1522, "step": 32925 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.00019926002466444297, "loss": 1.989, "step": 32930 }, { "epoch": 0.08, "grad_norm": 1.6640625, "learning_rate": 0.00019925980021321893, "loss": 1.9704, "step": 32935 }, { "epoch": 0.08, "grad_norm": 2.140625, "learning_rate": 0.00019925957572808594, "loss": 2.243, "step": 32940 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.0001992593512090441, "loss": 2.039, "step": 32945 }, { "epoch": 0.08, "grad_norm": 1.640625, "learning_rate": 0.0001992591266560935, "loss": 2.209, "step": 32950 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019925890206923412, "loss": 2.1688, "step": 32955 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.00019925867744846612, "loss": 2.2785, "step": 32960 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.00019925845279378956, "loss": 1.9424, "step": 32965 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.0001992582281052045, "loss": 2.3339, "step": 32970 }, { "epoch": 0.08, "grad_norm": 1.8046875, "learning_rate": 0.00019925800338271103, "loss": 2.3061, "step": 32975 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.00019925777862630923, "loss": 2.2062, "step": 32980 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.0001992575538359992, "loss": 2.2515, "step": 32985 }, { "epoch": 0.08, "grad_norm": 1.5234375, "learning_rate": 0.00019925732901178095, "loss": 2.2666, "step": 32990 }, { "epoch": 0.08, "grad_norm": 1.90625, "learning_rate": 0.0001992571041536546, "loss": 2.1842, "step": 32995 }, { "epoch": 0.08, "grad_norm": 1.859375, "learning_rate": 0.00019925687926162025, "loss": 2.2543, "step": 33000 }, { "epoch": 0.08, "grad_norm": 1.5390625, "learning_rate": 0.00019925665433567793, "loss": 2.2414, "step": 33005 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.00019925642937582773, "loss": 2.0772, "step": 33010 }, { "epoch": 0.08, "grad_norm": 1.9765625, "learning_rate": 0.00019925620438206976, "loss": 2.3662, "step": 33015 }, { "epoch": 0.08, "grad_norm": 1.984375, "learning_rate": 0.00019925597935440404, "loss": 2.1974, "step": 33020 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.00019925575429283069, "loss": 2.2077, "step": 33025 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019925552919734978, "loss": 2.152, "step": 33030 }, { "epoch": 0.08, "grad_norm": 1.59375, "learning_rate": 0.0001992553040679614, "loss": 2.1379, "step": 33035 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019925507890466556, "loss": 2.0806, "step": 33040 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019925485370746242, "loss": 2.114, "step": 33045 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.00019925462847635202, "loss": 2.1554, "step": 33050 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.0001992544032113344, "loss": 2.3392, "step": 33055 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019925417791240974, "loss": 2.2773, "step": 33060 }, { "epoch": 0.08, "grad_norm": 1.5078125, "learning_rate": 0.00019925395257957803, "loss": 2.1933, "step": 33065 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019925372721283933, "loss": 2.221, "step": 33070 }, { "epoch": 0.08, "grad_norm": 1.515625, "learning_rate": 0.0001992535018121938, "loss": 2.1214, "step": 33075 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019925327637764143, "loss": 2.1675, "step": 33080 }, { "epoch": 0.08, "grad_norm": 1.5, "learning_rate": 0.00019925305090918237, "loss": 2.1818, "step": 33085 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.0001992528254068167, "loss": 2.2936, "step": 33090 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019925259987054443, "loss": 2.1225, "step": 33095 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019925237430036568, "loss": 2.3089, "step": 33100 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.00019925214869628052, "loss": 2.2639, "step": 33105 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.000199251923058289, "loss": 2.157, "step": 33110 }, { "epoch": 0.08, "grad_norm": 1.578125, "learning_rate": 0.00019925169738639124, "loss": 2.1679, "step": 33115 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.0001992514716805873, "loss": 2.2629, "step": 33120 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.00019925124594087726, "loss": 2.1531, "step": 33125 }, { "epoch": 0.08, "grad_norm": 1.453125, "learning_rate": 0.0001992510201672612, "loss": 2.3051, "step": 33130 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.0001992507943597392, "loss": 2.2795, "step": 33135 }, { "epoch": 0.08, "grad_norm": 2.34375, "learning_rate": 0.0001992505685183113, "loss": 2.3549, "step": 33140 }, { "epoch": 0.08, "grad_norm": 1.7421875, "learning_rate": 0.00019925034264297762, "loss": 2.1589, "step": 33145 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019925011673373823, "loss": 2.3292, "step": 33150 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.0001992498907905932, "loss": 2.2397, "step": 33155 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.0001992496648135426, "loss": 2.1232, "step": 33160 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.0001992494388025865, "loss": 2.2214, "step": 33165 }, { "epoch": 0.08, "grad_norm": 2.125, "learning_rate": 0.00019924921275772501, "loss": 2.1544, "step": 33170 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.0001992489866789582, "loss": 2.1665, "step": 33175 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.00019924876056628613, "loss": 2.0764, "step": 33180 }, { "epoch": 0.08, "grad_norm": 1.984375, "learning_rate": 0.00019924853441970886, "loss": 2.265, "step": 33185 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.0001992483082392265, "loss": 2.2544, "step": 33190 }, { "epoch": 0.08, "grad_norm": 2.078125, "learning_rate": 0.00019924808202483914, "loss": 2.2403, "step": 33195 }, { "epoch": 0.08, "grad_norm": 1.6484375, "learning_rate": 0.00019924785577654682, "loss": 2.0856, "step": 33200 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.00019924762949434963, "loss": 2.3007, "step": 33205 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.00019924740317824765, "loss": 2.2609, "step": 33210 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.00019924717682824096, "loss": 2.437, "step": 33215 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.00019924695044432964, "loss": 2.3148, "step": 33220 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.00019924672402651377, "loss": 2.1296, "step": 33225 }, { "epoch": 0.08, "grad_norm": 1.9453125, "learning_rate": 0.0001992464975747934, "loss": 2.2418, "step": 33230 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.00019924627108916862, "loss": 2.1861, "step": 33235 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.00019924604456963955, "loss": 2.2299, "step": 33240 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.0001992458180162062, "loss": 2.2491, "step": 33245 }, { "epoch": 0.08, "grad_norm": 1.7265625, "learning_rate": 0.00019924559142886868, "loss": 2.1924, "step": 33250 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.00019924536480762705, "loss": 2.4058, "step": 33255 }, { "epoch": 0.08, "grad_norm": 1.765625, "learning_rate": 0.00019924513815248142, "loss": 2.1755, "step": 33260 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.00019924491146343186, "loss": 2.1502, "step": 33265 }, { "epoch": 0.08, "grad_norm": 1.8984375, "learning_rate": 0.00019924468474047843, "loss": 2.2598, "step": 33270 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.00019924445798362125, "loss": 2.299, "step": 33275 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.0001992442311928603, "loss": 2.2894, "step": 33280 }, { "epoch": 0.08, "grad_norm": 2.515625, "learning_rate": 0.00019924400436819575, "loss": 2.1201, "step": 33285 }, { "epoch": 0.08, "grad_norm": 1.765625, "learning_rate": 0.00019924377750962766, "loss": 2.1501, "step": 33290 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.0001992435506171561, "loss": 2.1979, "step": 33295 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019924332369078115, "loss": 2.2353, "step": 33300 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019924309673050283, "loss": 2.2045, "step": 33305 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.0001992428697363213, "loss": 2.0334, "step": 33310 }, { "epoch": 0.08, "grad_norm": 2.21875, "learning_rate": 0.0001992426427082366, "loss": 2.028, "step": 33315 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.0001992424156462488, "loss": 2.2312, "step": 33320 }, { "epoch": 0.08, "grad_norm": 1.9921875, "learning_rate": 0.00019924218855035803, "loss": 2.4036, "step": 33325 }, { "epoch": 0.08, "grad_norm": 2.15625, "learning_rate": 0.00019924196142056432, "loss": 2.2718, "step": 33330 }, { "epoch": 0.08, "grad_norm": 2.296875, "learning_rate": 0.00019924173425686775, "loss": 2.1135, "step": 33335 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.0001992415070592684, "loss": 2.2114, "step": 33340 }, { "epoch": 0.08, "grad_norm": 1.6953125, "learning_rate": 0.00019924127982776636, "loss": 2.0988, "step": 33345 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019924105256236167, "loss": 2.2978, "step": 33350 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.00019924082526305445, "loss": 2.0967, "step": 33355 }, { "epoch": 0.08, "grad_norm": 2.203125, "learning_rate": 0.00019924059792984477, "loss": 2.1179, "step": 33360 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.0001992403705627327, "loss": 2.2593, "step": 33365 }, { "epoch": 0.08, "grad_norm": 1.546875, "learning_rate": 0.00019924014316171832, "loss": 2.0592, "step": 33370 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.0001992399157268017, "loss": 2.2521, "step": 33375 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.00019923968825798295, "loss": 2.3037, "step": 33380 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.0001992394607552621, "loss": 2.138, "step": 33385 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019923923321863923, "loss": 2.3781, "step": 33390 }, { "epoch": 0.08, "grad_norm": 1.6640625, "learning_rate": 0.0001992390056481145, "loss": 2.2081, "step": 33395 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.00019923877804368787, "loss": 2.2604, "step": 33400 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.0001992385504053595, "loss": 1.9647, "step": 33405 }, { "epoch": 0.08, "grad_norm": 1.4609375, "learning_rate": 0.00019923832273312947, "loss": 2.1265, "step": 33410 }, { "epoch": 0.08, "grad_norm": 1.5625, "learning_rate": 0.00019923809502699776, "loss": 2.0893, "step": 33415 }, { "epoch": 0.08, "grad_norm": 2.40625, "learning_rate": 0.00019923786728696455, "loss": 2.121, "step": 33420 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.00019923763951302988, "loss": 2.2033, "step": 33425 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.00019923741170519387, "loss": 2.0512, "step": 33430 }, { "epoch": 0.08, "grad_norm": 1.6328125, "learning_rate": 0.0001992371838634565, "loss": 2.2603, "step": 33435 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019923695598781795, "loss": 2.1603, "step": 33440 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.00019923672807827824, "loss": 2.1949, "step": 33445 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.00019923650013483748, "loss": 2.1054, "step": 33450 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.00019923627215749572, "loss": 2.2158, "step": 33455 }, { "epoch": 0.08, "grad_norm": 1.59375, "learning_rate": 0.00019923604414625302, "loss": 2.0603, "step": 33460 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.00019923581610110954, "loss": 2.3862, "step": 33465 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019923558802206528, "loss": 2.1814, "step": 33470 }, { "epoch": 0.08, "grad_norm": 1.8046875, "learning_rate": 0.00019923535990912036, "loss": 2.0784, "step": 33475 }, { "epoch": 0.08, "grad_norm": 1.59375, "learning_rate": 0.00019923513176227478, "loss": 2.1935, "step": 33480 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.00019923490358152876, "loss": 2.2551, "step": 33485 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019923467536688225, "loss": 2.3035, "step": 33490 }, { "epoch": 0.08, "grad_norm": 1.9453125, "learning_rate": 0.0001992344471183354, "loss": 2.127, "step": 33495 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.00019923421883588825, "loss": 2.15, "step": 33500 }, { "epoch": 0.08, "grad_norm": 1.53125, "learning_rate": 0.00019923399051954087, "loss": 2.2686, "step": 33505 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.0001992337621692934, "loss": 2.2901, "step": 33510 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.00019923353378514585, "loss": 2.1811, "step": 33515 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.00019923330536709836, "loss": 2.3023, "step": 33520 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.00019923307691515092, "loss": 2.1631, "step": 33525 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.0001992328484293037, "loss": 2.199, "step": 33530 }, { "epoch": 0.08, "grad_norm": 2.203125, "learning_rate": 0.00019923261990955672, "loss": 2.1734, "step": 33535 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.0001992323913559101, "loss": 2.0968, "step": 33540 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.00019923216276836387, "loss": 2.3439, "step": 33545 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019923193414691815, "loss": 2.2642, "step": 33550 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.000199231705491573, "loss": 2.2248, "step": 33555 }, { "epoch": 0.08, "grad_norm": 1.4609375, "learning_rate": 0.00019923147680232849, "loss": 2.273, "step": 33560 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.0001992312480791847, "loss": 2.2572, "step": 33565 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019923101932214175, "loss": 2.2634, "step": 33570 }, { "epoch": 0.08, "grad_norm": 1.90625, "learning_rate": 0.00019923079053119964, "loss": 2.3729, "step": 33575 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.0001992305617063585, "loss": 2.2042, "step": 33580 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.00019923033284761845, "loss": 2.1905, "step": 33585 }, { "epoch": 0.08, "grad_norm": 1.890625, "learning_rate": 0.0001992301039549795, "loss": 2.2291, "step": 33590 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.00019922987502844173, "loss": 2.1055, "step": 33595 }, { "epoch": 0.08, "grad_norm": 2.078125, "learning_rate": 0.0001992296460680052, "loss": 2.2246, "step": 33600 }, { "epoch": 0.08, "grad_norm": 1.7265625, "learning_rate": 0.00019922941707367007, "loss": 2.1284, "step": 33605 }, { "epoch": 0.08, "grad_norm": 1.9453125, "learning_rate": 0.00019922918804543636, "loss": 2.234, "step": 33610 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.00019922895898330417, "loss": 2.2574, "step": 33615 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.00019922872988727356, "loss": 2.3493, "step": 33620 }, { "epoch": 0.08, "grad_norm": 1.890625, "learning_rate": 0.00019922850075734463, "loss": 2.1565, "step": 33625 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.00019922827159351742, "loss": 2.2888, "step": 33630 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.00019922804239579203, "loss": 2.2569, "step": 33635 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019922781316416858, "loss": 2.2371, "step": 33640 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.00019922758389864705, "loss": 2.2398, "step": 33645 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.00019922735459922762, "loss": 2.2383, "step": 33650 }, { "epoch": 0.08, "grad_norm": 2.3125, "learning_rate": 0.0001992271252659103, "loss": 2.0524, "step": 33655 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.00019922689589869523, "loss": 2.2701, "step": 33660 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.00019922666649758246, "loss": 2.1945, "step": 33665 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.00019922643706257203, "loss": 2.2558, "step": 33670 }, { "epoch": 0.08, "grad_norm": 1.703125, "learning_rate": 0.00019922620759366404, "loss": 2.2088, "step": 33675 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.0001992259780908586, "loss": 2.3346, "step": 33680 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019922574855415576, "loss": 2.2182, "step": 33685 }, { "epoch": 0.08, "grad_norm": 1.7890625, "learning_rate": 0.0001992255189835556, "loss": 2.3065, "step": 33690 }, { "epoch": 0.08, "grad_norm": 2.515625, "learning_rate": 0.0001992252893790582, "loss": 2.2182, "step": 33695 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.00019922505974066365, "loss": 2.1735, "step": 33700 }, { "epoch": 0.08, "grad_norm": 1.890625, "learning_rate": 0.00019922483006837202, "loss": 2.0896, "step": 33705 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.0001992246003621834, "loss": 2.2043, "step": 33710 }, { "epoch": 0.08, "grad_norm": 1.578125, "learning_rate": 0.00019922437062209783, "loss": 2.1695, "step": 33715 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.0001992241408481154, "loss": 2.0971, "step": 33720 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.00019922391104023628, "loss": 2.1598, "step": 33725 }, { "epoch": 0.08, "grad_norm": 1.625, "learning_rate": 0.00019922368119846043, "loss": 2.5038, "step": 33730 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.00019922345132278798, "loss": 2.0322, "step": 33735 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.00019922322141321898, "loss": 2.1372, "step": 33740 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.0001992229914697535, "loss": 2.0098, "step": 33745 }, { "epoch": 0.08, "grad_norm": 1.9765625, "learning_rate": 0.0001992227614923917, "loss": 2.1872, "step": 33750 }, { "epoch": 0.08, "grad_norm": 2.203125, "learning_rate": 0.00019922253148113358, "loss": 2.2471, "step": 33755 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.00019922230143597927, "loss": 2.0412, "step": 33760 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.00019922207135692879, "loss": 2.2112, "step": 33765 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.00019922184124398227, "loss": 2.26, "step": 33770 }, { "epoch": 0.08, "grad_norm": 1.5234375, "learning_rate": 0.00019922161109713974, "loss": 1.947, "step": 33775 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019922138091640134, "loss": 2.1501, "step": 33780 }, { "epoch": 0.08, "grad_norm": 1.453125, "learning_rate": 0.00019922115070176711, "loss": 2.1367, "step": 33785 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019922092045323716, "loss": 2.2365, "step": 33790 }, { "epoch": 0.08, "grad_norm": 1.6015625, "learning_rate": 0.00019922069017081149, "loss": 2.0981, "step": 33795 }, { "epoch": 0.08, "grad_norm": 1.703125, "learning_rate": 0.00019922045985449027, "loss": 2.0829, "step": 33800 }, { "epoch": 0.08, "grad_norm": 1.8984375, "learning_rate": 0.00019922022950427356, "loss": 2.1853, "step": 33805 }, { "epoch": 0.08, "grad_norm": 1.8515625, "learning_rate": 0.00019921999912016135, "loss": 2.1683, "step": 33810 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.00019921976870215384, "loss": 2.299, "step": 33815 }, { "epoch": 0.08, "grad_norm": 1.65625, "learning_rate": 0.00019921953825025106, "loss": 2.0434, "step": 33820 }, { "epoch": 0.08, "grad_norm": 2.5, "learning_rate": 0.00019921930776445307, "loss": 2.2454, "step": 33825 }, { "epoch": 0.08, "grad_norm": 1.65625, "learning_rate": 0.00019921907724475998, "loss": 2.1018, "step": 33830 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019921884669117185, "loss": 2.1345, "step": 33835 }, { "epoch": 0.08, "grad_norm": 1.9453125, "learning_rate": 0.00019921861610368874, "loss": 2.1512, "step": 33840 }, { "epoch": 0.08, "grad_norm": 2.25, "learning_rate": 0.0001992183854823108, "loss": 2.2874, "step": 33845 }, { "epoch": 0.08, "grad_norm": 2.25, "learning_rate": 0.000199218154827038, "loss": 2.1494, "step": 33850 }, { "epoch": 0.08, "grad_norm": 2.078125, "learning_rate": 0.00019921792413787052, "loss": 2.2565, "step": 33855 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.00019921769341480836, "loss": 2.1494, "step": 33860 }, { "epoch": 0.08, "grad_norm": 2.328125, "learning_rate": 0.00019921746265785166, "loss": 2.234, "step": 33865 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.00019921723186700047, "loss": 1.9654, "step": 33870 }, { "epoch": 0.08, "grad_norm": 1.6015625, "learning_rate": 0.0001992170010422549, "loss": 2.1937, "step": 33875 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.000199216770183615, "loss": 2.158, "step": 33880 }, { "epoch": 0.08, "grad_norm": 1.71875, "learning_rate": 0.0001992165392910808, "loss": 2.4381, "step": 33885 }, { "epoch": 0.08, "grad_norm": 1.6484375, "learning_rate": 0.00019921630836465247, "loss": 2.1896, "step": 33890 }, { "epoch": 0.08, "grad_norm": 2.1875, "learning_rate": 0.00019921607740433007, "loss": 2.1357, "step": 33895 }, { "epoch": 0.08, "grad_norm": 2.34375, "learning_rate": 0.0001992158464101136, "loss": 2.2029, "step": 33900 }, { "epoch": 0.08, "grad_norm": 2.1875, "learning_rate": 0.00019921561538200323, "loss": 2.2254, "step": 33905 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.00019921538431999904, "loss": 2.1985, "step": 33910 }, { "epoch": 0.08, "grad_norm": 1.984375, "learning_rate": 0.00019921515322410103, "loss": 2.2118, "step": 33915 }, { "epoch": 0.08, "grad_norm": 1.8984375, "learning_rate": 0.00019921492209430935, "loss": 2.1675, "step": 33920 }, { "epoch": 0.08, "grad_norm": 1.7890625, "learning_rate": 0.00019921469093062404, "loss": 2.2417, "step": 33925 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.0001992144597330452, "loss": 1.9881, "step": 33930 }, { "epoch": 0.08, "grad_norm": 1.6953125, "learning_rate": 0.00019921422850157288, "loss": 2.1563, "step": 33935 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.0001992139972362072, "loss": 2.281, "step": 33940 }, { "epoch": 0.08, "grad_norm": 1.8515625, "learning_rate": 0.00019921376593694823, "loss": 2.3597, "step": 33945 }, { "epoch": 0.08, "grad_norm": 1.4921875, "learning_rate": 0.000199213534603796, "loss": 2.2298, "step": 33950 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.00019921330323675066, "loss": 2.1773, "step": 33955 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.00019921307183581223, "loss": 2.1984, "step": 33960 }, { "epoch": 0.08, "grad_norm": 1.6640625, "learning_rate": 0.00019921284040098084, "loss": 2.3006, "step": 33965 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019921260893225652, "loss": 2.2853, "step": 33970 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.0001992123774296394, "loss": 2.078, "step": 33975 }, { "epoch": 0.08, "grad_norm": 1.8046875, "learning_rate": 0.00019921214589312954, "loss": 2.3868, "step": 33980 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.000199211914322727, "loss": 2.2151, "step": 33985 }, { "epoch": 0.08, "grad_norm": 1.5625, "learning_rate": 0.00019921168271843184, "loss": 2.2754, "step": 33990 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.0001992114510802442, "loss": 2.2484, "step": 33995 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.0001992112194081641, "loss": 2.1658, "step": 34000 }, { "epoch": 0.08, "grad_norm": 1.5390625, "learning_rate": 0.0001992109877021917, "loss": 2.1285, "step": 34005 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.00019921075596232698, "loss": 2.1212, "step": 34010 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.00019921052418857009, "loss": 2.1203, "step": 34015 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019921029238092108, "loss": 2.0987, "step": 34020 }, { "epoch": 0.08, "grad_norm": 1.6953125, "learning_rate": 0.00019921006053938003, "loss": 2.2718, "step": 34025 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019920982866394703, "loss": 2.2498, "step": 34030 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019920959675462217, "loss": 2.2422, "step": 34035 }, { "epoch": 0.08, "grad_norm": 1.9765625, "learning_rate": 0.00019920936481140547, "loss": 2.2114, "step": 34040 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.0001992091328342971, "loss": 2.3589, "step": 34045 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.00019920890082329707, "loss": 2.2261, "step": 34050 }, { "epoch": 0.08, "grad_norm": 2.34375, "learning_rate": 0.00019920866877840547, "loss": 2.3555, "step": 34055 }, { "epoch": 0.08, "grad_norm": 1.890625, "learning_rate": 0.00019920843669962238, "loss": 2.1646, "step": 34060 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019920820458694792, "loss": 2.1811, "step": 34065 }, { "epoch": 0.08, "grad_norm": 1.859375, "learning_rate": 0.00019920797244038214, "loss": 2.0567, "step": 34070 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.0001992077402599251, "loss": 2.2187, "step": 34075 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.00019920750804557687, "loss": 2.1615, "step": 34080 }, { "epoch": 0.08, "grad_norm": 1.765625, "learning_rate": 0.0001992072757973376, "loss": 2.0821, "step": 34085 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019920704351520733, "loss": 2.0958, "step": 34090 }, { "epoch": 0.08, "grad_norm": 2.359375, "learning_rate": 0.0001992068111991861, "loss": 2.2072, "step": 34095 }, { "epoch": 0.08, "grad_norm": 1.609375, "learning_rate": 0.00019920657884927402, "loss": 2.3035, "step": 34100 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.0001992063464654712, "loss": 2.1781, "step": 34105 }, { "epoch": 0.08, "grad_norm": 1.671875, "learning_rate": 0.00019920611404777768, "loss": 2.2976, "step": 34110 }, { "epoch": 0.08, "grad_norm": 2.203125, "learning_rate": 0.00019920588159619355, "loss": 2.1372, "step": 34115 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.0001992056491107189, "loss": 2.1939, "step": 34120 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.00019920541659135382, "loss": 2.3174, "step": 34125 }, { "epoch": 0.08, "grad_norm": 1.90625, "learning_rate": 0.00019920518403809836, "loss": 2.2719, "step": 34130 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.00019920495145095258, "loss": 2.1945, "step": 34135 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.00019920471882991662, "loss": 2.3257, "step": 34140 }, { "epoch": 0.08, "grad_norm": 1.7421875, "learning_rate": 0.0001992044861749905, "loss": 2.218, "step": 34145 }, { "epoch": 0.08, "grad_norm": 2.1875, "learning_rate": 0.00019920425348617435, "loss": 2.1525, "step": 34150 }, { "epoch": 0.08, "grad_norm": 1.984375, "learning_rate": 0.00019920402076346825, "loss": 2.1895, "step": 34155 }, { "epoch": 0.08, "grad_norm": 2.328125, "learning_rate": 0.00019920378800687222, "loss": 2.2155, "step": 34160 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.0001992035552163864, "loss": 2.1016, "step": 34165 }, { "epoch": 0.08, "grad_norm": 1.890625, "learning_rate": 0.00019920332239201083, "loss": 2.1838, "step": 34170 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.0001992030895337456, "loss": 2.1475, "step": 34175 }, { "epoch": 0.08, "grad_norm": 1.265625, "learning_rate": 0.00019920285664159083, "loss": 1.9582, "step": 34180 }, { "epoch": 0.08, "grad_norm": 1.609375, "learning_rate": 0.00019920262371554652, "loss": 2.1247, "step": 34185 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.0001992023907556128, "loss": 2.2008, "step": 34190 }, { "epoch": 0.08, "grad_norm": 1.765625, "learning_rate": 0.0001992021577617898, "loss": 2.2143, "step": 34195 }, { "epoch": 0.08, "grad_norm": 1.984375, "learning_rate": 0.0001992019247340775, "loss": 2.2022, "step": 34200 }, { "epoch": 0.08, "grad_norm": 1.9765625, "learning_rate": 0.00019920169167247603, "loss": 2.1269, "step": 34205 }, { "epoch": 0.08, "grad_norm": 1.59375, "learning_rate": 0.00019920145857698547, "loss": 2.2477, "step": 34210 }, { "epoch": 0.08, "grad_norm": 1.71875, "learning_rate": 0.00019920122544760588, "loss": 2.2256, "step": 34215 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019920099228433732, "loss": 2.2021, "step": 34220 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.00019920075908717996, "loss": 2.3004, "step": 34225 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.0001992005258561338, "loss": 2.1404, "step": 34230 }, { "epoch": 0.08, "grad_norm": 1.578125, "learning_rate": 0.00019920029259119893, "loss": 2.0711, "step": 34235 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019920005929237547, "loss": 2.237, "step": 34240 }, { "epoch": 0.08, "grad_norm": 1.7890625, "learning_rate": 0.00019919982595966343, "loss": 2.1093, "step": 34245 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019919959259306297, "loss": 2.0941, "step": 34250 }, { "epoch": 0.08, "grad_norm": 1.578125, "learning_rate": 0.0001991993591925741, "loss": 2.1923, "step": 34255 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019919912575819693, "loss": 2.1487, "step": 34260 }, { "epoch": 0.08, "grad_norm": 1.9453125, "learning_rate": 0.00019919889228993157, "loss": 2.0153, "step": 34265 }, { "epoch": 0.08, "grad_norm": 3.390625, "learning_rate": 0.00019919865878777806, "loss": 2.1095, "step": 34270 }, { "epoch": 0.08, "grad_norm": 1.65625, "learning_rate": 0.00019919842525173648, "loss": 2.2683, "step": 34275 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.00019919819168180691, "loss": 2.0378, "step": 34280 }, { "epoch": 0.08, "grad_norm": 2.28125, "learning_rate": 0.00019919795807798947, "loss": 2.3259, "step": 34285 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019919772444028418, "loss": 2.1383, "step": 34290 }, { "epoch": 0.08, "grad_norm": 1.609375, "learning_rate": 0.00019919749076869115, "loss": 2.1731, "step": 34295 }, { "epoch": 0.08, "grad_norm": 1.8515625, "learning_rate": 0.00019919725706321045, "loss": 2.2854, "step": 34300 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.00019919702332384218, "loss": 2.2696, "step": 34305 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.0001991967895505864, "loss": 2.3214, "step": 34310 }, { "epoch": 0.08, "grad_norm": 1.5546875, "learning_rate": 0.00019919655574344322, "loss": 2.1861, "step": 34315 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.0001991963219024127, "loss": 2.1482, "step": 34320 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.0001991960880274949, "loss": 2.2937, "step": 34325 }, { "epoch": 0.08, "grad_norm": 1.6484375, "learning_rate": 0.0001991958541186899, "loss": 2.0559, "step": 34330 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.00019919562017599782, "loss": 2.1437, "step": 34335 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.0001991953861994187, "loss": 2.0793, "step": 34340 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.00019919515218895265, "loss": 2.098, "step": 34345 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.00019919491814459974, "loss": 2.2278, "step": 34350 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.00019919468406636007, "loss": 2.0996, "step": 34355 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019919444995423365, "loss": 2.1669, "step": 34360 }, { "epoch": 0.08, "grad_norm": 1.71875, "learning_rate": 0.00019919421580822064, "loss": 2.0625, "step": 34365 }, { "epoch": 0.08, "grad_norm": 1.7265625, "learning_rate": 0.00019919398162832107, "loss": 2.2546, "step": 34370 }, { "epoch": 0.08, "grad_norm": 1.6640625, "learning_rate": 0.00019919374741453504, "loss": 2.1851, "step": 34375 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.00019919351316686264, "loss": 2.0152, "step": 34380 }, { "epoch": 0.08, "grad_norm": 1.9765625, "learning_rate": 0.00019919327888530392, "loss": 2.2111, "step": 34385 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019919304456985897, "loss": 2.2547, "step": 34390 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.0001991928102205279, "loss": 2.2575, "step": 34395 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.00019919257583731078, "loss": 2.3239, "step": 34400 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.00019919234142020762, "loss": 2.1727, "step": 34405 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.00019919210696921858, "loss": 2.0198, "step": 34410 }, { "epoch": 0.08, "grad_norm": 1.390625, "learning_rate": 0.00019919187248434374, "loss": 2.1657, "step": 34415 }, { "epoch": 0.08, "grad_norm": 2.140625, "learning_rate": 0.00019919163796558315, "loss": 2.0874, "step": 34420 }, { "epoch": 0.08, "grad_norm": 1.5859375, "learning_rate": 0.00019919140341293688, "loss": 2.1723, "step": 34425 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.00019919116882640504, "loss": 2.2644, "step": 34430 }, { "epoch": 0.08, "grad_norm": 1.8515625, "learning_rate": 0.0001991909342059877, "loss": 2.1478, "step": 34435 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019919069955168494, "loss": 2.1188, "step": 34440 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.00019919046486349684, "loss": 2.1614, "step": 34445 }, { "epoch": 0.08, "grad_norm": 1.7265625, "learning_rate": 0.00019919023014142346, "loss": 2.018, "step": 34450 }, { "epoch": 0.08, "grad_norm": 2.46875, "learning_rate": 0.00019918999538546492, "loss": 1.9574, "step": 34455 }, { "epoch": 0.08, "grad_norm": 1.7890625, "learning_rate": 0.00019918976059562127, "loss": 2.0431, "step": 34460 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.0001991895257718926, "loss": 2.1655, "step": 34465 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.000199189290914279, "loss": 2.1369, "step": 34470 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.00019918905602278052, "loss": 2.1306, "step": 34475 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.00019918882109739726, "loss": 2.1732, "step": 34480 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019918858613812932, "loss": 2.3403, "step": 34485 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019918835114497675, "loss": 2.3144, "step": 34490 }, { "epoch": 0.08, "grad_norm": 1.7265625, "learning_rate": 0.00019918811611793964, "loss": 2.1098, "step": 34495 }, { "epoch": 0.08, "grad_norm": 1.7890625, "learning_rate": 0.00019918788105701806, "loss": 2.2466, "step": 34500 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.0001991876459622121, "loss": 2.2396, "step": 34505 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019918741083352185, "loss": 2.1002, "step": 34510 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.0001991871756709474, "loss": 2.0211, "step": 34515 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.00019918694047448877, "loss": 2.2319, "step": 34520 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.0001991867052441461, "loss": 2.3284, "step": 34525 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.00019918646997991947, "loss": 2.2188, "step": 34530 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.0001991862346818089, "loss": 2.1662, "step": 34535 }, { "epoch": 0.08, "grad_norm": 2.234375, "learning_rate": 0.0001991859993498146, "loss": 2.2255, "step": 34540 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.0001991857639839365, "loss": 2.2327, "step": 34545 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019918552858417475, "loss": 2.0748, "step": 34550 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.00019918529315052943, "loss": 2.0231, "step": 34555 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.0001991850576830006, "loss": 2.3118, "step": 34560 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.0001991848221815884, "loss": 2.0996, "step": 34565 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.00019918458664629283, "loss": 2.1661, "step": 34570 }, { "epoch": 0.08, "grad_norm": 2.203125, "learning_rate": 0.000199184351077114, "loss": 2.0978, "step": 34575 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.00019918411547405201, "loss": 2.2403, "step": 34580 }, { "epoch": 0.08, "grad_norm": 1.46875, "learning_rate": 0.00019918387983710692, "loss": 2.3443, "step": 34585 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.00019918364416627882, "loss": 2.2167, "step": 34590 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.0001991834084615678, "loss": 2.161, "step": 34595 }, { "epoch": 0.08, "grad_norm": 1.8046875, "learning_rate": 0.0001991831727229739, "loss": 2.2069, "step": 34600 }, { "epoch": 0.08, "grad_norm": 2.078125, "learning_rate": 0.00019918293695049725, "loss": 2.3798, "step": 34605 }, { "epoch": 0.08, "grad_norm": 2.515625, "learning_rate": 0.00019918270114413793, "loss": 2.2611, "step": 34610 }, { "epoch": 0.08, "grad_norm": 1.6953125, "learning_rate": 0.00019918246530389598, "loss": 2.0714, "step": 34615 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.00019918222942977146, "loss": 2.1346, "step": 34620 }, { "epoch": 0.08, "grad_norm": 1.984375, "learning_rate": 0.00019918199352176458, "loss": 2.1573, "step": 34625 }, { "epoch": 0.08, "grad_norm": 1.8046875, "learning_rate": 0.00019918175757987526, "loss": 2.2192, "step": 34630 }, { "epoch": 0.08, "grad_norm": 1.453125, "learning_rate": 0.00019918152160410368, "loss": 2.1435, "step": 34635 }, { "epoch": 0.08, "grad_norm": 1.859375, "learning_rate": 0.00019918128559444988, "loss": 2.2247, "step": 34640 }, { "epoch": 0.08, "grad_norm": 1.859375, "learning_rate": 0.00019918104955091398, "loss": 2.158, "step": 34645 }, { "epoch": 0.08, "grad_norm": 1.9921875, "learning_rate": 0.00019918081347349603, "loss": 2.0986, "step": 34650 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.0001991805773621961, "loss": 2.1988, "step": 34655 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.0001991803412170143, "loss": 2.2423, "step": 34660 }, { "epoch": 0.08, "grad_norm": 1.703125, "learning_rate": 0.00019918010503795067, "loss": 2.189, "step": 34665 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.00019917986882500533, "loss": 2.2727, "step": 34670 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.00019917963257817838, "loss": 2.3973, "step": 34675 }, { "epoch": 0.08, "grad_norm": 1.609375, "learning_rate": 0.00019917939629746981, "loss": 2.1952, "step": 34680 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.0001991791599828798, "loss": 2.2038, "step": 34685 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.00019917892363440837, "loss": 2.2882, "step": 34690 }, { "epoch": 0.08, "grad_norm": 2.125, "learning_rate": 0.00019917868725205566, "loss": 2.2476, "step": 34695 }, { "epoch": 0.08, "grad_norm": 2.1875, "learning_rate": 0.00019917845083582166, "loss": 2.1867, "step": 34700 }, { "epoch": 0.08, "grad_norm": 2.421875, "learning_rate": 0.00019917821438570653, "loss": 2.2068, "step": 34705 }, { "epoch": 0.08, "grad_norm": 1.65625, "learning_rate": 0.00019917797790171033, "loss": 2.4568, "step": 34710 }, { "epoch": 0.08, "grad_norm": 1.7890625, "learning_rate": 0.0001991777413838331, "loss": 2.1173, "step": 34715 }, { "epoch": 0.08, "grad_norm": 1.7890625, "learning_rate": 0.000199177504832075, "loss": 2.2212, "step": 34720 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.00019917726824643606, "loss": 2.0485, "step": 34725 }, { "epoch": 0.08, "grad_norm": 2.078125, "learning_rate": 0.00019917703162691634, "loss": 2.1731, "step": 34730 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.00019917679497351596, "loss": 2.2402, "step": 34735 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.000199176558286235, "loss": 2.3385, "step": 34740 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.0001991763215650735, "loss": 2.1478, "step": 34745 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.0001991760848100316, "loss": 2.1253, "step": 34750 }, { "epoch": 0.08, "grad_norm": 1.859375, "learning_rate": 0.0001991758480211093, "loss": 2.1897, "step": 34755 }, { "epoch": 0.08, "grad_norm": 2.0, "learning_rate": 0.00019917561119830679, "loss": 2.3385, "step": 34760 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.00019917537434162408, "loss": 2.2373, "step": 34765 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.00019917513745106128, "loss": 2.0243, "step": 34770 }, { "epoch": 0.08, "grad_norm": 2.234375, "learning_rate": 0.0001991749005266184, "loss": 2.2998, "step": 34775 }, { "epoch": 0.08, "grad_norm": 2.171875, "learning_rate": 0.00019917466356829562, "loss": 2.1861, "step": 34780 }, { "epoch": 0.08, "grad_norm": 2.0, "learning_rate": 0.00019917442657609296, "loss": 2.2698, "step": 34785 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.0001991741895500105, "loss": 2.1769, "step": 34790 }, { "epoch": 0.08, "grad_norm": 1.9921875, "learning_rate": 0.00019917395249004837, "loss": 2.2123, "step": 34795 }, { "epoch": 0.08, "grad_norm": 1.890625, "learning_rate": 0.0001991737153962066, "loss": 2.2084, "step": 34800 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.00019917347826848528, "loss": 2.3614, "step": 34805 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.0001991732411068845, "loss": 2.2875, "step": 34810 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019917300391140438, "loss": 2.2646, "step": 34815 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019917276668204495, "loss": 2.3087, "step": 34820 }, { "epoch": 0.08, "grad_norm": 1.890625, "learning_rate": 0.0001991725294188063, "loss": 2.2796, "step": 34825 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.0001991722921216885, "loss": 2.1537, "step": 34830 }, { "epoch": 0.08, "grad_norm": 1.6328125, "learning_rate": 0.0001991720547906917, "loss": 1.8978, "step": 34835 }, { "epoch": 0.08, "grad_norm": 2.125, "learning_rate": 0.00019917181742581586, "loss": 2.0912, "step": 34840 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019917158002706112, "loss": 2.3541, "step": 34845 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.00019917134259442762, "loss": 2.193, "step": 34850 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.0001991711051279154, "loss": 2.1727, "step": 34855 }, { "epoch": 0.08, "grad_norm": 1.6953125, "learning_rate": 0.0001991708676275245, "loss": 2.2728, "step": 34860 }, { "epoch": 0.08, "grad_norm": 1.453125, "learning_rate": 0.00019917063009325503, "loss": 2.3597, "step": 34865 }, { "epoch": 0.08, "grad_norm": 2.15625, "learning_rate": 0.00019917039252510707, "loss": 2.2334, "step": 34870 }, { "epoch": 0.08, "grad_norm": 1.4375, "learning_rate": 0.00019917015492308074, "loss": 2.231, "step": 34875 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.00019916991728717606, "loss": 1.9447, "step": 34880 }, { "epoch": 0.08, "grad_norm": 1.625, "learning_rate": 0.00019916967961739314, "loss": 2.0687, "step": 34885 }, { "epoch": 0.08, "grad_norm": 1.9921875, "learning_rate": 0.00019916944191373208, "loss": 2.2341, "step": 34890 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.00019916920417619292, "loss": 2.332, "step": 34895 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.00019916896640477573, "loss": 2.1581, "step": 34900 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.0001991687285994807, "loss": 2.1006, "step": 34905 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019916849076030777, "loss": 2.1952, "step": 34910 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.00019916825288725712, "loss": 2.2203, "step": 34915 }, { "epoch": 0.08, "grad_norm": 2.171875, "learning_rate": 0.00019916801498032877, "loss": 2.1107, "step": 34920 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.00019916777703952283, "loss": 2.4122, "step": 34925 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.0001991675390648394, "loss": 2.2124, "step": 34930 }, { "epoch": 0.08, "grad_norm": 1.546875, "learning_rate": 0.00019916730105627852, "loss": 2.075, "step": 34935 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.0001991670630138403, "loss": 2.211, "step": 34940 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.0001991668249375248, "loss": 2.1097, "step": 34945 }, { "epoch": 0.08, "grad_norm": 1.578125, "learning_rate": 0.00019916658682733214, "loss": 2.311, "step": 34950 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019916634868326235, "loss": 2.1388, "step": 34955 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.00019916611050531556, "loss": 2.3149, "step": 34960 }, { "epoch": 0.08, "grad_norm": 2.234375, "learning_rate": 0.00019916587229349181, "loss": 2.2679, "step": 34965 }, { "epoch": 0.08, "grad_norm": 1.5234375, "learning_rate": 0.0001991656340477912, "loss": 2.067, "step": 34970 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.00019916539576821384, "loss": 2.3168, "step": 34975 }, { "epoch": 0.08, "grad_norm": 1.703125, "learning_rate": 0.00019916515745475977, "loss": 2.0046, "step": 34980 }, { "epoch": 0.08, "grad_norm": 1.765625, "learning_rate": 0.00019916491910742905, "loss": 2.1821, "step": 34985 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.00019916468072622183, "loss": 1.9475, "step": 34990 }, { "epoch": 0.08, "grad_norm": 1.578125, "learning_rate": 0.00019916444231113815, "loss": 2.268, "step": 34995 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.00019916420386217808, "loss": 2.2139, "step": 35000 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.00019916396537934174, "loss": 2.2314, "step": 35005 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.0001991637268626292, "loss": 2.011, "step": 35010 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.0001991634883120405, "loss": 2.1668, "step": 35015 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.00019916324972757575, "loss": 2.3104, "step": 35020 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.00019916301110923506, "loss": 2.3044, "step": 35025 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.0001991627724570185, "loss": 2.0061, "step": 35030 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.0001991625337709261, "loss": 2.1568, "step": 35035 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.000199162295050958, "loss": 2.1051, "step": 35040 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.00019916205629711428, "loss": 2.3096, "step": 35045 }, { "epoch": 0.08, "grad_norm": 1.5078125, "learning_rate": 0.000199161817509395, "loss": 2.1765, "step": 35050 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.00019916157868780023, "loss": 2.2514, "step": 35055 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.00019916133983233008, "loss": 2.1008, "step": 35060 }, { "epoch": 0.08, "grad_norm": 1.765625, "learning_rate": 0.00019916110094298457, "loss": 2.1976, "step": 35065 }, { "epoch": 0.08, "grad_norm": 2.53125, "learning_rate": 0.0001991608620197639, "loss": 2.3235, "step": 35070 }, { "epoch": 0.08, "grad_norm": 1.4765625, "learning_rate": 0.00019916062306266804, "loss": 2.1428, "step": 35075 }, { "epoch": 0.08, "grad_norm": 2.171875, "learning_rate": 0.0001991603840716971, "loss": 2.0775, "step": 35080 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.00019916014504685122, "loss": 2.2426, "step": 35085 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.00019915990598813038, "loss": 2.182, "step": 35090 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.00019915966689553475, "loss": 2.2779, "step": 35095 }, { "epoch": 0.08, "grad_norm": 1.7265625, "learning_rate": 0.0001991594277690644, "loss": 2.1753, "step": 35100 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019915918860871935, "loss": 2.1667, "step": 35105 }, { "epoch": 0.08, "grad_norm": 1.7421875, "learning_rate": 0.00019915894941449974, "loss": 2.2413, "step": 35110 }, { "epoch": 0.08, "grad_norm": 1.5078125, "learning_rate": 0.00019915871018640565, "loss": 2.0492, "step": 35115 }, { "epoch": 0.08, "grad_norm": 1.9453125, "learning_rate": 0.00019915847092443712, "loss": 2.1803, "step": 35120 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.00019915823162859427, "loss": 2.2062, "step": 35125 }, { "epoch": 0.08, "grad_norm": 1.4921875, "learning_rate": 0.00019915799229887715, "loss": 2.2551, "step": 35130 }, { "epoch": 0.08, "grad_norm": 2.0, "learning_rate": 0.0001991577529352859, "loss": 2.0495, "step": 35135 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019915751353782052, "loss": 2.2513, "step": 35140 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.00019915727410648118, "loss": 2.2173, "step": 35145 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.0001991570346412679, "loss": 2.2847, "step": 35150 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019915679514218075, "loss": 2.2469, "step": 35155 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.00019915655560921988, "loss": 2.1481, "step": 35160 }, { "epoch": 0.08, "grad_norm": 1.625, "learning_rate": 0.00019915631604238533, "loss": 2.2007, "step": 35165 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.00019915607644167715, "loss": 2.0992, "step": 35170 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.00019915583680709547, "loss": 2.3415, "step": 35175 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.0001991555971386404, "loss": 2.2131, "step": 35180 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.00019915535743631193, "loss": 2.1481, "step": 35185 }, { "epoch": 0.08, "grad_norm": 2.734375, "learning_rate": 0.00019915511770011022, "loss": 2.2092, "step": 35190 }, { "epoch": 0.08, "grad_norm": 1.46875, "learning_rate": 0.00019915487793003528, "loss": 2.005, "step": 35195 }, { "epoch": 0.08, "grad_norm": 1.6953125, "learning_rate": 0.0001991546381260873, "loss": 2.0379, "step": 35200 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.00019915439828826624, "loss": 2.1466, "step": 35205 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.00019915415841657226, "loss": 2.1954, "step": 35210 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.0001991539185110054, "loss": 2.004, "step": 35215 }, { "epoch": 0.08, "grad_norm": 1.484375, "learning_rate": 0.0001991536785715658, "loss": 2.1761, "step": 35220 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.0001991534385982535, "loss": 2.2429, "step": 35225 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.00019915319859106857, "loss": 2.1239, "step": 35230 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.00019915295855001111, "loss": 2.145, "step": 35235 }, { "epoch": 0.08, "grad_norm": 1.8203125, "learning_rate": 0.0001991527184750812, "loss": 2.0759, "step": 35240 }, { "epoch": 0.08, "grad_norm": 1.640625, "learning_rate": 0.00019915247836627896, "loss": 2.1851, "step": 35245 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.0001991522382236044, "loss": 2.2442, "step": 35250 }, { "epoch": 0.08, "grad_norm": 1.671875, "learning_rate": 0.00019915199804705766, "loss": 2.252, "step": 35255 }, { "epoch": 0.08, "grad_norm": 1.703125, "learning_rate": 0.00019915175783663877, "loss": 2.2186, "step": 35260 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019915151759234787, "loss": 2.0331, "step": 35265 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.000199151277314185, "loss": 2.2121, "step": 35270 }, { "epoch": 0.08, "grad_norm": 1.7265625, "learning_rate": 0.00019915103700215024, "loss": 2.243, "step": 35275 }, { "epoch": 0.08, "grad_norm": 1.9765625, "learning_rate": 0.0001991507966562437, "loss": 2.1043, "step": 35280 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019915055627646547, "loss": 2.0143, "step": 35285 }, { "epoch": 0.08, "grad_norm": 2.515625, "learning_rate": 0.00019915031586281557, "loss": 2.3236, "step": 35290 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.00019915007541529417, "loss": 2.2928, "step": 35295 }, { "epoch": 0.08, "grad_norm": 2.328125, "learning_rate": 0.00019914983493390126, "loss": 2.1342, "step": 35300 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.000199149594418637, "loss": 2.123, "step": 35305 }, { "epoch": 0.08, "grad_norm": 1.65625, "learning_rate": 0.0001991493538695014, "loss": 2.0715, "step": 35310 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.00019914911328649466, "loss": 2.1618, "step": 35315 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.00019914887266961673, "loss": 2.1665, "step": 35320 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019914863201886775, "loss": 2.2898, "step": 35325 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.00019914839133424781, "loss": 2.2858, "step": 35330 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.00019914815061575697, "loss": 2.2306, "step": 35335 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.00019914790986339534, "loss": 2.2051, "step": 35340 }, { "epoch": 0.08, "grad_norm": 3.671875, "learning_rate": 0.00019914766907716297, "loss": 2.2092, "step": 35345 }, { "epoch": 0.08, "grad_norm": 1.8515625, "learning_rate": 0.00019914742825705996, "loss": 2.0767, "step": 35350 }, { "epoch": 0.08, "grad_norm": 2.34375, "learning_rate": 0.0001991471874030864, "loss": 2.3074, "step": 35355 }, { "epoch": 0.08, "grad_norm": 2.09375, "learning_rate": 0.00019914694651524236, "loss": 2.2848, "step": 35360 }, { "epoch": 0.08, "grad_norm": 2.203125, "learning_rate": 0.00019914670559352792, "loss": 2.1423, "step": 35365 }, { "epoch": 0.08, "grad_norm": 2.59375, "learning_rate": 0.00019914646463794315, "loss": 2.0722, "step": 35370 }, { "epoch": 0.08, "grad_norm": 1.65625, "learning_rate": 0.0001991462236484882, "loss": 2.2237, "step": 35375 }, { "epoch": 0.08, "grad_norm": 1.6484375, "learning_rate": 0.00019914598262516306, "loss": 2.0971, "step": 35380 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019914574156796786, "loss": 2.1431, "step": 35385 }, { "epoch": 0.08, "grad_norm": 1.609375, "learning_rate": 0.00019914550047690267, "loss": 2.0603, "step": 35390 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.0001991452593519676, "loss": 2.14, "step": 35395 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.0001991450181931627, "loss": 2.2737, "step": 35400 }, { "epoch": 0.08, "grad_norm": 1.7265625, "learning_rate": 0.00019914477700048807, "loss": 2.197, "step": 35405 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.0001991445357739438, "loss": 2.1891, "step": 35410 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.0001991442945135299, "loss": 2.1694, "step": 35415 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019914405321924656, "loss": 2.2078, "step": 35420 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.0001991438118910938, "loss": 2.1664, "step": 35425 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.00019914357052907173, "loss": 2.3381, "step": 35430 }, { "epoch": 0.08, "grad_norm": 1.625, "learning_rate": 0.00019914332913318041, "loss": 2.1204, "step": 35435 }, { "epoch": 0.08, "grad_norm": 1.890625, "learning_rate": 0.00019914308770341993, "loss": 2.0911, "step": 35440 }, { "epoch": 0.08, "grad_norm": 1.625, "learning_rate": 0.00019914284623979037, "loss": 2.2759, "step": 35445 }, { "epoch": 0.08, "grad_norm": 1.859375, "learning_rate": 0.00019914260474229183, "loss": 2.1832, "step": 35450 }, { "epoch": 0.08, "grad_norm": 2.4375, "learning_rate": 0.00019914236321092433, "loss": 2.1923, "step": 35455 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.00019914212164568807, "loss": 2.3029, "step": 35460 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.000199141880046583, "loss": 2.1939, "step": 35465 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019914163841360931, "loss": 2.0303, "step": 35470 }, { "epoch": 0.08, "grad_norm": 2.265625, "learning_rate": 0.00019914139674676701, "loss": 2.2532, "step": 35475 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019914115504605624, "loss": 2.2368, "step": 35480 }, { "epoch": 0.08, "grad_norm": 1.625, "learning_rate": 0.00019914091331147703, "loss": 2.073, "step": 35485 }, { "epoch": 0.08, "grad_norm": 2.1875, "learning_rate": 0.0001991406715430295, "loss": 2.2936, "step": 35490 }, { "epoch": 0.08, "grad_norm": 1.953125, "learning_rate": 0.0001991404297407137, "loss": 2.2047, "step": 35495 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.00019914018790452977, "loss": 2.1494, "step": 35500 }, { "epoch": 0.08, "grad_norm": 1.9296875, "learning_rate": 0.00019913994603447773, "loss": 2.1683, "step": 35505 }, { "epoch": 0.08, "grad_norm": 1.7890625, "learning_rate": 0.00019913970413055766, "loss": 2.2538, "step": 35510 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.0001991394621927697, "loss": 2.1505, "step": 35515 }, { "epoch": 0.08, "grad_norm": 1.5390625, "learning_rate": 0.0001991392202211139, "loss": 2.0175, "step": 35520 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.00019913897821559033, "loss": 2.145, "step": 35525 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.00019913873617619912, "loss": 2.356, "step": 35530 }, { "epoch": 0.08, "grad_norm": 1.6484375, "learning_rate": 0.00019913849410294027, "loss": 2.15, "step": 35535 }, { "epoch": 0.08, "grad_norm": 1.984375, "learning_rate": 0.00019913825199581396, "loss": 2.3671, "step": 35540 }, { "epoch": 0.08, "grad_norm": 2.1875, "learning_rate": 0.0001991380098548202, "loss": 2.4067, "step": 35545 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.0001991377676799591, "loss": 2.2182, "step": 35550 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.00019913752547123074, "loss": 2.0447, "step": 35555 }, { "epoch": 0.08, "grad_norm": 1.5859375, "learning_rate": 0.0001991372832286352, "loss": 2.213, "step": 35560 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.0001991370409521726, "loss": 2.24, "step": 35565 }, { "epoch": 0.08, "grad_norm": 1.6015625, "learning_rate": 0.00019913679864184294, "loss": 2.285, "step": 35570 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.00019913655629764636, "loss": 1.9456, "step": 35575 }, { "epoch": 0.08, "grad_norm": 2.34375, "learning_rate": 0.00019913631391958294, "loss": 2.2973, "step": 35580 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.00019913607150765276, "loss": 2.1546, "step": 35585 }, { "epoch": 0.08, "grad_norm": 2.265625, "learning_rate": 0.0001991358290618559, "loss": 2.0638, "step": 35590 }, { "epoch": 0.08, "grad_norm": 1.9921875, "learning_rate": 0.00019913558658219246, "loss": 2.2284, "step": 35595 }, { "epoch": 0.08, "grad_norm": 2.078125, "learning_rate": 0.0001991353440686625, "loss": 2.2259, "step": 35600 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.00019913510152126608, "loss": 2.1544, "step": 35605 }, { "epoch": 0.08, "grad_norm": 1.7421875, "learning_rate": 0.00019913485894000336, "loss": 2.2221, "step": 35610 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.00019913461632487434, "loss": 2.2556, "step": 35615 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.00019913437367587915, "loss": 2.0517, "step": 35620 }, { "epoch": 0.08, "grad_norm": 1.609375, "learning_rate": 0.00019913413099301782, "loss": 2.0986, "step": 35625 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.00019913388827629054, "loss": 2.1472, "step": 35630 }, { "epoch": 0.08, "grad_norm": 1.6171875, "learning_rate": 0.0001991336455256973, "loss": 2.2186, "step": 35635 }, { "epoch": 0.08, "grad_norm": 1.8671875, "learning_rate": 0.0001991334027412382, "loss": 2.2491, "step": 35640 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.00019913315992291334, "loss": 2.0668, "step": 35645 }, { "epoch": 0.08, "grad_norm": 1.7890625, "learning_rate": 0.00019913291707072278, "loss": 1.9646, "step": 35650 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019913267418466662, "loss": 1.9939, "step": 35655 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.00019913243126474495, "loss": 2.2177, "step": 35660 }, { "epoch": 0.08, "grad_norm": 1.3671875, "learning_rate": 0.00019913218831095781, "loss": 2.1401, "step": 35665 }, { "epoch": 0.08, "grad_norm": 2.515625, "learning_rate": 0.00019913194532330538, "loss": 2.0652, "step": 35670 }, { "epoch": 0.08, "grad_norm": 1.6953125, "learning_rate": 0.00019913170230178762, "loss": 2.1053, "step": 35675 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.0001991314592464047, "loss": 2.1982, "step": 35680 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.0001991312161571567, "loss": 2.2012, "step": 35685 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.00019913097303404364, "loss": 2.3055, "step": 35690 }, { "epoch": 0.08, "grad_norm": 1.8984375, "learning_rate": 0.00019913072987706566, "loss": 2.1013, "step": 35695 }, { "epoch": 0.08, "grad_norm": 2.421875, "learning_rate": 0.00019913048668622282, "loss": 2.1033, "step": 35700 }, { "epoch": 0.08, "grad_norm": 1.640625, "learning_rate": 0.00019913024346151522, "loss": 2.2512, "step": 35705 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.0001991300002029429, "loss": 2.2923, "step": 35710 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.000199129756910506, "loss": 2.2587, "step": 35715 }, { "epoch": 0.08, "grad_norm": 1.6640625, "learning_rate": 0.00019912951358420458, "loss": 2.0906, "step": 35720 }, { "epoch": 0.08, "grad_norm": 1.71875, "learning_rate": 0.00019912927022403869, "loss": 2.334, "step": 35725 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.00019912902683000847, "loss": 2.1995, "step": 35730 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019912878340211395, "loss": 2.1454, "step": 35735 }, { "epoch": 0.08, "grad_norm": 1.5234375, "learning_rate": 0.00019912853994035528, "loss": 2.0437, "step": 35740 }, { "epoch": 0.08, "grad_norm": 2.328125, "learning_rate": 0.00019912829644473247, "loss": 2.2191, "step": 35745 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.00019912805291524567, "loss": 2.0913, "step": 35750 }, { "epoch": 0.08, "grad_norm": 1.828125, "learning_rate": 0.0001991278093518949, "loss": 2.2093, "step": 35755 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.00019912756575468028, "loss": 2.1997, "step": 35760 }, { "epoch": 0.08, "grad_norm": 1.9453125, "learning_rate": 0.0001991273221236019, "loss": 2.2765, "step": 35765 }, { "epoch": 0.08, "grad_norm": 2.28125, "learning_rate": 0.00019912707845865982, "loss": 2.4476, "step": 35770 }, { "epoch": 0.08, "grad_norm": 1.9140625, "learning_rate": 0.00019912683475985409, "loss": 2.0644, "step": 35775 }, { "epoch": 0.08, "grad_norm": 1.734375, "learning_rate": 0.0001991265910271849, "loss": 2.1812, "step": 35780 }, { "epoch": 0.08, "grad_norm": 1.6328125, "learning_rate": 0.00019912634726065224, "loss": 2.3247, "step": 35785 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.00019912610346025622, "loss": 2.1694, "step": 35790 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.00019912585962599693, "loss": 2.1538, "step": 35795 }, { "epoch": 0.08, "grad_norm": 1.890625, "learning_rate": 0.00019912561575787446, "loss": 2.2514, "step": 35800 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019912537185588883, "loss": 2.2249, "step": 35805 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.00019912512792004026, "loss": 2.2133, "step": 35810 }, { "epoch": 0.08, "grad_norm": 1.859375, "learning_rate": 0.0001991248839503287, "loss": 2.1523, "step": 35815 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019912463994675427, "loss": 2.1681, "step": 35820 }, { "epoch": 0.08, "grad_norm": 1.7265625, "learning_rate": 0.0001991243959093171, "loss": 2.2203, "step": 35825 }, { "epoch": 0.08, "grad_norm": 1.9609375, "learning_rate": 0.00019912415183801722, "loss": 2.3492, "step": 35830 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.00019912390773285473, "loss": 2.1704, "step": 35835 }, { "epoch": 0.08, "grad_norm": 1.8046875, "learning_rate": 0.0001991236635938297, "loss": 2.1424, "step": 35840 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.00019912341942094223, "loss": 2.21, "step": 35845 }, { "epoch": 0.08, "grad_norm": 1.9765625, "learning_rate": 0.00019912317521419244, "loss": 2.2154, "step": 35850 }, { "epoch": 0.08, "grad_norm": 1.7734375, "learning_rate": 0.00019912293097358035, "loss": 2.245, "step": 35855 }, { "epoch": 0.08, "grad_norm": 1.78125, "learning_rate": 0.0001991226866991061, "loss": 2.1135, "step": 35860 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.0001991224423907697, "loss": 2.0822, "step": 35865 }, { "epoch": 0.08, "grad_norm": 2.125, "learning_rate": 0.00019912219804857129, "loss": 2.1377, "step": 35870 }, { "epoch": 0.08, "grad_norm": 2.296875, "learning_rate": 0.00019912195367251094, "loss": 2.2225, "step": 35875 }, { "epoch": 0.08, "grad_norm": 2.046875, "learning_rate": 0.00019912170926258873, "loss": 2.23, "step": 35880 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.00019912146481880475, "loss": 2.1113, "step": 35885 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.00019912122034115908, "loss": 2.1468, "step": 35890 }, { "epoch": 0.08, "grad_norm": 2.296875, "learning_rate": 0.00019912097582965182, "loss": 2.1445, "step": 35895 }, { "epoch": 0.08, "grad_norm": 1.7890625, "learning_rate": 0.00019912073128428302, "loss": 2.0531, "step": 35900 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019912048670505276, "loss": 2.1102, "step": 35905 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019912024209196117, "loss": 2.2588, "step": 35910 }, { "epoch": 0.08, "grad_norm": 1.859375, "learning_rate": 0.00019911999744500828, "loss": 2.0709, "step": 35915 }, { "epoch": 0.08, "grad_norm": 1.7109375, "learning_rate": 0.00019911975276419423, "loss": 2.1531, "step": 35920 }, { "epoch": 0.08, "grad_norm": 2.0625, "learning_rate": 0.00019911950804951908, "loss": 2.2805, "step": 35925 }, { "epoch": 0.08, "grad_norm": 1.71875, "learning_rate": 0.0001991192633009829, "loss": 2.2619, "step": 35930 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.0001991190185185858, "loss": 2.339, "step": 35935 }, { "epoch": 0.08, "grad_norm": 1.9765625, "learning_rate": 0.00019911877370232782, "loss": 2.0692, "step": 35940 }, { "epoch": 0.08, "grad_norm": 1.8125, "learning_rate": 0.00019911852885220904, "loss": 2.2757, "step": 35945 }, { "epoch": 0.08, "grad_norm": 1.84375, "learning_rate": 0.00019911828396822965, "loss": 2.1284, "step": 35950 }, { "epoch": 0.08, "grad_norm": 1.703125, "learning_rate": 0.0001991180390503896, "loss": 2.3405, "step": 35955 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019911779409868902, "loss": 2.2818, "step": 35960 }, { "epoch": 0.08, "grad_norm": 2.125, "learning_rate": 0.00019911754911312803, "loss": 2.2642, "step": 35965 }, { "epoch": 0.08, "grad_norm": 2.078125, "learning_rate": 0.00019911730409370672, "loss": 2.065, "step": 35970 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019911705904042507, "loss": 2.1701, "step": 35975 }, { "epoch": 0.08, "grad_norm": 1.671875, "learning_rate": 0.0001991168139532833, "loss": 2.2373, "step": 35980 }, { "epoch": 0.08, "grad_norm": 1.8046875, "learning_rate": 0.0001991165688322814, "loss": 2.1922, "step": 35985 }, { "epoch": 0.08, "grad_norm": 1.6796875, "learning_rate": 0.00019911632367741947, "loss": 2.2477, "step": 35990 }, { "epoch": 0.08, "grad_norm": 2.03125, "learning_rate": 0.00019911607848869763, "loss": 2.3534, "step": 35995 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.00019911583326611592, "loss": 2.0987, "step": 36000 }, { "epoch": 0.08, "grad_norm": 1.9765625, "learning_rate": 0.00019911558800967446, "loss": 2.2343, "step": 36005 }, { "epoch": 0.08, "grad_norm": 2.140625, "learning_rate": 0.00019911534271937331, "loss": 2.1168, "step": 36010 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.00019911509739521256, "loss": 2.2265, "step": 36015 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.00019911485203719228, "loss": 2.0553, "step": 36020 }, { "epoch": 0.08, "grad_norm": 1.96875, "learning_rate": 0.00019911460664531262, "loss": 1.967, "step": 36025 }, { "epoch": 0.08, "grad_norm": 1.6875, "learning_rate": 0.00019911436121957356, "loss": 2.2862, "step": 36030 }, { "epoch": 0.08, "grad_norm": 1.71875, "learning_rate": 0.00019911411575997527, "loss": 2.2344, "step": 36035 }, { "epoch": 0.08, "grad_norm": 1.6640625, "learning_rate": 0.0001991138702665178, "loss": 2.2234, "step": 36040 }, { "epoch": 0.08, "grad_norm": 1.671875, "learning_rate": 0.0001991136247392012, "loss": 2.1703, "step": 36045 }, { "epoch": 0.08, "grad_norm": 1.8984375, "learning_rate": 0.00019911337917802562, "loss": 2.1898, "step": 36050 }, { "epoch": 0.08, "grad_norm": 2.015625, "learning_rate": 0.00019911313358299108, "loss": 2.0289, "step": 36055 }, { "epoch": 0.08, "grad_norm": 1.8359375, "learning_rate": 0.00019911288795409772, "loss": 2.1006, "step": 36060 }, { "epoch": 0.08, "grad_norm": 1.875, "learning_rate": 0.00019911264229134564, "loss": 2.2781, "step": 36065 }, { "epoch": 0.08, "grad_norm": 2.3125, "learning_rate": 0.00019911239659473482, "loss": 2.0473, "step": 36070 }, { "epoch": 0.08, "grad_norm": 1.7578125, "learning_rate": 0.00019911215086426543, "loss": 2.0881, "step": 36075 }, { "epoch": 0.08, "grad_norm": 1.8828125, "learning_rate": 0.00019911190509993754, "loss": 2.3088, "step": 36080 }, { "epoch": 0.08, "grad_norm": 2.140625, "learning_rate": 0.00019911165930175119, "loss": 2.2615, "step": 36085 }, { "epoch": 0.08, "grad_norm": 1.9375, "learning_rate": 0.00019911141346970655, "loss": 2.1479, "step": 36090 }, { "epoch": 0.08, "grad_norm": 1.796875, "learning_rate": 0.0001991111676038036, "loss": 2.1533, "step": 36095 }, { "epoch": 0.08, "grad_norm": 2.171875, "learning_rate": 0.00019911092170404253, "loss": 2.0755, "step": 36100 }, { "epoch": 0.08, "grad_norm": 2.109375, "learning_rate": 0.00019911067577042335, "loss": 1.9482, "step": 36105 }, { "epoch": 0.08, "grad_norm": 1.75, "learning_rate": 0.00019911042980294617, "loss": 2.0911, "step": 36110 }, { "epoch": 0.08, "grad_norm": 1.921875, "learning_rate": 0.00019911018380161107, "loss": 2.2478, "step": 36115 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.00019910993776641812, "loss": 2.2316, "step": 36120 }, { "epoch": 0.09, "grad_norm": 3.578125, "learning_rate": 0.00019910969169736743, "loss": 2.2812, "step": 36125 }, { "epoch": 0.09, "grad_norm": 1.8359375, "learning_rate": 0.00019910944559445906, "loss": 2.1838, "step": 36130 }, { "epoch": 0.09, "grad_norm": 2.28125, "learning_rate": 0.0001991091994576931, "loss": 2.3494, "step": 36135 }, { "epoch": 0.09, "grad_norm": 1.8046875, "learning_rate": 0.00019910895328706968, "loss": 2.1003, "step": 36140 }, { "epoch": 0.09, "grad_norm": 1.625, "learning_rate": 0.00019910870708258883, "loss": 2.3143, "step": 36145 }, { "epoch": 0.09, "grad_norm": 1.90625, "learning_rate": 0.00019910846084425062, "loss": 2.1718, "step": 36150 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.00019910821457205517, "loss": 2.2054, "step": 36155 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019910796826600258, "loss": 2.0944, "step": 36160 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019910772192609287, "loss": 2.2279, "step": 36165 }, { "epoch": 0.09, "grad_norm": 2.125, "learning_rate": 0.0001991074755523262, "loss": 2.1088, "step": 36170 }, { "epoch": 0.09, "grad_norm": 2.25, "learning_rate": 0.0001991072291447026, "loss": 2.3137, "step": 36175 }, { "epoch": 0.09, "grad_norm": 2.203125, "learning_rate": 0.00019910698270322218, "loss": 2.2562, "step": 36180 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.00019910673622788505, "loss": 2.1, "step": 36185 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.00019910648971869122, "loss": 2.2705, "step": 36190 }, { "epoch": 0.09, "grad_norm": 1.7109375, "learning_rate": 0.00019910624317564084, "loss": 2.0837, "step": 36195 }, { "epoch": 0.09, "grad_norm": 1.6796875, "learning_rate": 0.00019910599659873394, "loss": 2.1098, "step": 36200 }, { "epoch": 0.09, "grad_norm": 1.8359375, "learning_rate": 0.00019910574998797067, "loss": 2.1398, "step": 36205 }, { "epoch": 0.09, "grad_norm": 1.6796875, "learning_rate": 0.00019910550334335106, "loss": 2.2496, "step": 36210 }, { "epoch": 0.09, "grad_norm": 2.0, "learning_rate": 0.0001991052566648752, "loss": 2.2429, "step": 36215 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.0001991050099525432, "loss": 2.392, "step": 36220 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.00019910476320635513, "loss": 2.1066, "step": 36225 }, { "epoch": 0.09, "grad_norm": 1.7109375, "learning_rate": 0.00019910451642631108, "loss": 2.1846, "step": 36230 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.00019910426961241114, "loss": 2.2513, "step": 36235 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.00019910402276465536, "loss": 2.2643, "step": 36240 }, { "epoch": 0.09, "grad_norm": 1.890625, "learning_rate": 0.00019910377588304386, "loss": 2.2116, "step": 36245 }, { "epoch": 0.09, "grad_norm": 2.1875, "learning_rate": 0.00019910352896757673, "loss": 1.9966, "step": 36250 }, { "epoch": 0.09, "grad_norm": 2.375, "learning_rate": 0.000199103282018254, "loss": 2.1258, "step": 36255 }, { "epoch": 0.09, "grad_norm": 1.875, "learning_rate": 0.00019910303503507581, "loss": 2.2249, "step": 36260 }, { "epoch": 0.09, "grad_norm": 1.921875, "learning_rate": 0.00019910278801804222, "loss": 2.0751, "step": 36265 }, { "epoch": 0.09, "grad_norm": 1.8515625, "learning_rate": 0.00019910254096715334, "loss": 2.1393, "step": 36270 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019910229388240923, "loss": 2.1021, "step": 36275 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019910204676380997, "loss": 2.2881, "step": 36280 }, { "epoch": 0.09, "grad_norm": 1.78125, "learning_rate": 0.00019910179961135565, "loss": 2.2099, "step": 36285 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.00019910155242504636, "loss": 2.1088, "step": 36290 }, { "epoch": 0.09, "grad_norm": 2.1875, "learning_rate": 0.0001991013052048822, "loss": 2.359, "step": 36295 }, { "epoch": 0.09, "grad_norm": 1.703125, "learning_rate": 0.00019910105795086321, "loss": 2.0132, "step": 36300 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.00019910081066298952, "loss": 2.1978, "step": 36305 }, { "epoch": 0.09, "grad_norm": 1.890625, "learning_rate": 0.00019910056334126116, "loss": 2.0727, "step": 36310 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.00019910031598567828, "loss": 2.1606, "step": 36315 }, { "epoch": 0.09, "grad_norm": 1.484375, "learning_rate": 0.00019910006859624093, "loss": 2.1739, "step": 36320 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019909982117294922, "loss": 2.0719, "step": 36325 }, { "epoch": 0.09, "grad_norm": 1.65625, "learning_rate": 0.0001990995737158032, "loss": 2.036, "step": 36330 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019909932622480298, "loss": 2.1748, "step": 36335 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.0001990990786999486, "loss": 1.9067, "step": 36340 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.0001990988311412402, "loss": 2.0208, "step": 36345 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019909858354867784, "loss": 2.1579, "step": 36350 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.0001990983359222616, "loss": 2.2547, "step": 36355 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.00019909808826199156, "loss": 2.0838, "step": 36360 }, { "epoch": 0.09, "grad_norm": 2.265625, "learning_rate": 0.00019909784056786785, "loss": 2.3818, "step": 36365 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019909759283989048, "loss": 2.0809, "step": 36370 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.0001990973450780596, "loss": 2.0617, "step": 36375 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.00019909709728237527, "loss": 2.2113, "step": 36380 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.00019909684945283758, "loss": 2.1157, "step": 36385 }, { "epoch": 0.09, "grad_norm": 1.59375, "learning_rate": 0.0001990966015894466, "loss": 2.0132, "step": 36390 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.0001990963536922024, "loss": 2.2845, "step": 36395 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.00019909610576110512, "loss": 2.0375, "step": 36400 }, { "epoch": 0.09, "grad_norm": 1.6484375, "learning_rate": 0.00019909585779615482, "loss": 2.1789, "step": 36405 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019909560979735155, "loss": 2.2834, "step": 36410 }, { "epoch": 0.09, "grad_norm": 1.671875, "learning_rate": 0.00019909536176469542, "loss": 2.1289, "step": 36415 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019909511369818654, "loss": 2.3526, "step": 36420 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019909486559782497, "loss": 2.2614, "step": 36425 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.0001990946174636108, "loss": 2.1864, "step": 36430 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.00019909436929554407, "loss": 2.2766, "step": 36435 }, { "epoch": 0.09, "grad_norm": 1.7109375, "learning_rate": 0.00019909412109362498, "loss": 2.2667, "step": 36440 }, { "epoch": 0.09, "grad_norm": 1.5859375, "learning_rate": 0.00019909387285785347, "loss": 2.0876, "step": 36445 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019909362458822973, "loss": 2.2167, "step": 36450 }, { "epoch": 0.09, "grad_norm": 2.265625, "learning_rate": 0.00019909337628475383, "loss": 2.2164, "step": 36455 }, { "epoch": 0.09, "grad_norm": 1.90625, "learning_rate": 0.00019909312794742578, "loss": 2.1148, "step": 36460 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019909287957624577, "loss": 2.3414, "step": 36465 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019909263117121382, "loss": 2.0961, "step": 36470 }, { "epoch": 0.09, "grad_norm": 2.578125, "learning_rate": 0.00019909238273233, "loss": 2.4768, "step": 36475 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.0001990921342595945, "loss": 2.3002, "step": 36480 }, { "epoch": 0.09, "grad_norm": 1.453125, "learning_rate": 0.00019909188575300724, "loss": 2.1085, "step": 36485 }, { "epoch": 0.09, "grad_norm": 1.5234375, "learning_rate": 0.00019909163721256845, "loss": 2.2856, "step": 36490 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.00019909138863827814, "loss": 2.1908, "step": 36495 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.00019909114003013642, "loss": 2.2079, "step": 36500 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019909089138814339, "loss": 2.1859, "step": 36505 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.0001990906427122991, "loss": 2.1585, "step": 36510 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.00019909039400260362, "loss": 2.1413, "step": 36515 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.00019909014525905708, "loss": 2.1767, "step": 36520 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019908989648165955, "loss": 2.3755, "step": 36525 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019908964767041115, "loss": 2.2489, "step": 36530 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.0001990893988253119, "loss": 2.1477, "step": 36535 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019908914994636192, "loss": 2.2576, "step": 36540 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019908890103356128, "loss": 1.87, "step": 36545 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.00019908865208691007, "loss": 2.257, "step": 36550 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.0001990884031064084, "loss": 2.2271, "step": 36555 }, { "epoch": 0.09, "grad_norm": 1.890625, "learning_rate": 0.00019908815409205632, "loss": 2.2837, "step": 36560 }, { "epoch": 0.09, "grad_norm": 1.890625, "learning_rate": 0.00019908790504385393, "loss": 2.195, "step": 36565 }, { "epoch": 0.09, "grad_norm": 1.984375, "learning_rate": 0.00019908765596180132, "loss": 2.4715, "step": 36570 }, { "epoch": 0.09, "grad_norm": 1.6328125, "learning_rate": 0.00019908740684589858, "loss": 2.1819, "step": 36575 }, { "epoch": 0.09, "grad_norm": 1.9140625, "learning_rate": 0.00019908715769614577, "loss": 2.0249, "step": 36580 }, { "epoch": 0.09, "grad_norm": 2.25, "learning_rate": 0.00019908690851254299, "loss": 2.0275, "step": 36585 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019908665929509032, "loss": 2.2226, "step": 36590 }, { "epoch": 0.09, "grad_norm": 1.6796875, "learning_rate": 0.00019908641004378786, "loss": 2.0566, "step": 36595 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.00019908616075863566, "loss": 2.0518, "step": 36600 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.00019908591143963386, "loss": 2.3018, "step": 36605 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.00019908566208678253, "loss": 2.2355, "step": 36610 }, { "epoch": 0.09, "grad_norm": 2.234375, "learning_rate": 0.00019908541270008173, "loss": 2.2694, "step": 36615 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.0001990851632795315, "loss": 2.1362, "step": 36620 }, { "epoch": 0.09, "grad_norm": 1.6875, "learning_rate": 0.00019908491382513205, "loss": 2.1078, "step": 36625 }, { "epoch": 0.09, "grad_norm": 2.0, "learning_rate": 0.00019908466433688336, "loss": 2.1418, "step": 36630 }, { "epoch": 0.09, "grad_norm": 1.78125, "learning_rate": 0.00019908441481478558, "loss": 2.1576, "step": 36635 }, { "epoch": 0.09, "grad_norm": 1.6015625, "learning_rate": 0.00019908416525883874, "loss": 2.2606, "step": 36640 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.00019908391566904297, "loss": 2.3302, "step": 36645 }, { "epoch": 0.09, "grad_norm": 1.5859375, "learning_rate": 0.00019908366604539835, "loss": 2.0984, "step": 36650 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019908341638790491, "loss": 2.2632, "step": 36655 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019908316669656282, "loss": 2.1803, "step": 36660 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.0001990829169713721, "loss": 2.2169, "step": 36665 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.00019908266721233283, "loss": 2.3704, "step": 36670 }, { "epoch": 0.09, "grad_norm": 2.203125, "learning_rate": 0.00019908241741944518, "loss": 2.2085, "step": 36675 }, { "epoch": 0.09, "grad_norm": 2.171875, "learning_rate": 0.00019908216759270916, "loss": 2.1472, "step": 36680 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019908191773212485, "loss": 2.2735, "step": 36685 }, { "epoch": 0.09, "grad_norm": 2.0, "learning_rate": 0.0001990816678376924, "loss": 2.3166, "step": 36690 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019908141790941182, "loss": 2.0041, "step": 36695 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019908116794728326, "loss": 2.1622, "step": 36700 }, { "epoch": 0.09, "grad_norm": 1.625, "learning_rate": 0.00019908091795130676, "loss": 2.1077, "step": 36705 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019908066792148242, "loss": 2.1164, "step": 36710 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019908041785781034, "loss": 2.1914, "step": 36715 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.00019908016776029057, "loss": 2.1344, "step": 36720 }, { "epoch": 0.09, "grad_norm": 2.328125, "learning_rate": 0.00019907991762892324, "loss": 2.3008, "step": 36725 }, { "epoch": 0.09, "grad_norm": 1.890625, "learning_rate": 0.00019907966746370842, "loss": 2.141, "step": 36730 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019907941726464616, "loss": 2.442, "step": 36735 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.0001990791670317366, "loss": 2.1272, "step": 36740 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019907891676497976, "loss": 2.3699, "step": 36745 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019907866646437581, "loss": 2.3059, "step": 36750 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.00019907841612992478, "loss": 2.2724, "step": 36755 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019907816576162675, "loss": 2.096, "step": 36760 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.00019907791535948182, "loss": 1.9618, "step": 36765 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.00019907766492349008, "loss": 2.3309, "step": 36770 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019907741445365163, "loss": 2.1355, "step": 36775 }, { "epoch": 0.09, "grad_norm": 1.65625, "learning_rate": 0.00019907716394996654, "loss": 2.1805, "step": 36780 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.0001990769134124349, "loss": 2.2163, "step": 36785 }, { "epoch": 0.09, "grad_norm": 1.515625, "learning_rate": 0.00019907666284105674, "loss": 2.163, "step": 36790 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.00019907641223583224, "loss": 2.2205, "step": 36795 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.00019907616159676143, "loss": 2.1598, "step": 36800 }, { "epoch": 0.09, "grad_norm": 1.578125, "learning_rate": 0.00019907591092384439, "loss": 2.1474, "step": 36805 }, { "epoch": 0.09, "grad_norm": 1.875, "learning_rate": 0.00019907566021708126, "loss": 2.3033, "step": 36810 }, { "epoch": 0.09, "grad_norm": 2.0, "learning_rate": 0.00019907540947647204, "loss": 2.129, "step": 36815 }, { "epoch": 0.09, "grad_norm": 1.515625, "learning_rate": 0.0001990751587020169, "loss": 1.9978, "step": 36820 }, { "epoch": 0.09, "grad_norm": 2.375, "learning_rate": 0.00019907490789371587, "loss": 2.3142, "step": 36825 }, { "epoch": 0.09, "grad_norm": 2.25, "learning_rate": 0.00019907465705156906, "loss": 2.2954, "step": 36830 }, { "epoch": 0.09, "grad_norm": 1.703125, "learning_rate": 0.00019907440617557658, "loss": 2.0945, "step": 36835 }, { "epoch": 0.09, "grad_norm": 2.125, "learning_rate": 0.00019907415526573843, "loss": 2.3448, "step": 36840 }, { "epoch": 0.09, "grad_norm": 2.328125, "learning_rate": 0.0001990739043220548, "loss": 1.9618, "step": 36845 }, { "epoch": 0.09, "grad_norm": 1.9765625, "learning_rate": 0.0001990736533445257, "loss": 2.1384, "step": 36850 }, { "epoch": 0.09, "grad_norm": 2.125, "learning_rate": 0.00019907340233315125, "loss": 2.1707, "step": 36855 }, { "epoch": 0.09, "grad_norm": 1.78125, "learning_rate": 0.00019907315128793153, "loss": 2.1351, "step": 36860 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019907290020886663, "loss": 2.0433, "step": 36865 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.00019907264909595662, "loss": 2.1821, "step": 36870 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.0001990723979492016, "loss": 2.2645, "step": 36875 }, { "epoch": 0.09, "grad_norm": 2.328125, "learning_rate": 0.00019907214676860165, "loss": 2.1461, "step": 36880 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019907189555415687, "loss": 2.13, "step": 36885 }, { "epoch": 0.09, "grad_norm": 1.6953125, "learning_rate": 0.0001990716443058673, "loss": 2.2077, "step": 36890 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.0001990713930237331, "loss": 2.1052, "step": 36895 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.0001990711417077543, "loss": 2.2138, "step": 36900 }, { "epoch": 0.09, "grad_norm": 1.875, "learning_rate": 0.000199070890357931, "loss": 2.2853, "step": 36905 }, { "epoch": 0.09, "grad_norm": 2.140625, "learning_rate": 0.0001990706389742633, "loss": 2.1339, "step": 36910 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.00019907038755675123, "loss": 2.1708, "step": 36915 }, { "epoch": 0.09, "grad_norm": 1.90625, "learning_rate": 0.00019907013610539494, "loss": 2.2115, "step": 36920 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.00019906988462019455, "loss": 2.1942, "step": 36925 }, { "epoch": 0.09, "grad_norm": 1.6953125, "learning_rate": 0.00019906963310115002, "loss": 2.2267, "step": 36930 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.00019906938154826153, "loss": 2.0307, "step": 36935 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.00019906912996152916, "loss": 2.3066, "step": 36940 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019906887834095295, "loss": 2.1341, "step": 36945 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019906862668653304, "loss": 2.152, "step": 36950 }, { "epoch": 0.09, "grad_norm": 2.0, "learning_rate": 0.00019906837499826947, "loss": 2.2488, "step": 36955 }, { "epoch": 0.09, "grad_norm": 1.6015625, "learning_rate": 0.00019906812327616236, "loss": 2.201, "step": 36960 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.00019906787152021175, "loss": 2.2015, "step": 36965 }, { "epoch": 0.09, "grad_norm": 1.890625, "learning_rate": 0.00019906761973041782, "loss": 2.1602, "step": 36970 }, { "epoch": 0.09, "grad_norm": 1.7109375, "learning_rate": 0.00019906736790678054, "loss": 2.0002, "step": 36975 }, { "epoch": 0.09, "grad_norm": 4.4375, "learning_rate": 0.00019906711604930006, "loss": 2.0949, "step": 36980 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.00019906686415797648, "loss": 2.2753, "step": 36985 }, { "epoch": 0.09, "grad_norm": 1.9375, "learning_rate": 0.00019906661223280985, "loss": 2.2246, "step": 36990 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.0001990663602738003, "loss": 2.1532, "step": 36995 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019906610828094781, "loss": 2.1839, "step": 37000 }, { "epoch": 0.09, "grad_norm": 2.140625, "learning_rate": 0.0001990658562542526, "loss": 1.9506, "step": 37005 }, { "epoch": 0.09, "grad_norm": 1.8046875, "learning_rate": 0.00019906560419371468, "loss": 2.0868, "step": 37010 }, { "epoch": 0.09, "grad_norm": 1.6640625, "learning_rate": 0.00019906535209933415, "loss": 2.3796, "step": 37015 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.0001990650999711111, "loss": 2.1994, "step": 37020 }, { "epoch": 0.09, "grad_norm": 1.6640625, "learning_rate": 0.00019906484780904561, "loss": 2.3281, "step": 37025 }, { "epoch": 0.09, "grad_norm": 2.1875, "learning_rate": 0.0001990645956131378, "loss": 2.3245, "step": 37030 }, { "epoch": 0.09, "grad_norm": 2.453125, "learning_rate": 0.0001990643433833877, "loss": 2.362, "step": 37035 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.0001990640911197954, "loss": 2.3178, "step": 37040 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.00019906383882236105, "loss": 2.2456, "step": 37045 }, { "epoch": 0.09, "grad_norm": 1.765625, "learning_rate": 0.00019906358649108468, "loss": 2.0767, "step": 37050 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.0001990633341259664, "loss": 2.2185, "step": 37055 }, { "epoch": 0.09, "grad_norm": 1.578125, "learning_rate": 0.0001990630817270063, "loss": 2.3245, "step": 37060 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.00019906282929420445, "loss": 2.202, "step": 37065 }, { "epoch": 0.09, "grad_norm": 1.7109375, "learning_rate": 0.0001990625768275609, "loss": 2.1171, "step": 37070 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.00019906232432707582, "loss": 2.1286, "step": 37075 }, { "epoch": 0.09, "grad_norm": 2.125, "learning_rate": 0.00019906207179274925, "loss": 2.283, "step": 37080 }, { "epoch": 0.09, "grad_norm": 1.6171875, "learning_rate": 0.00019906181922458126, "loss": 2.163, "step": 37085 }, { "epoch": 0.09, "grad_norm": 1.6875, "learning_rate": 0.00019906156662257197, "loss": 2.24, "step": 37090 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019906131398672147, "loss": 2.0407, "step": 37095 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.0001990610613170298, "loss": 2.2999, "step": 37100 }, { "epoch": 0.09, "grad_norm": 1.8515625, "learning_rate": 0.00019906080861349708, "loss": 1.913, "step": 37105 }, { "epoch": 0.09, "grad_norm": 1.625, "learning_rate": 0.00019906055587612338, "loss": 2.2176, "step": 37110 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019906030310490883, "loss": 2.0844, "step": 37115 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.00019906005029985346, "loss": 2.0868, "step": 37120 }, { "epoch": 0.09, "grad_norm": 1.6875, "learning_rate": 0.00019905979746095737, "loss": 1.9595, "step": 37125 }, { "epoch": 0.09, "grad_norm": 1.6953125, "learning_rate": 0.0001990595445882207, "loss": 2.155, "step": 37130 }, { "epoch": 0.09, "grad_norm": 2.21875, "learning_rate": 0.00019905929168164346, "loss": 2.2277, "step": 37135 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.00019905903874122577, "loss": 2.1976, "step": 37140 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.00019905878576696772, "loss": 2.1792, "step": 37145 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.0001990585327588694, "loss": 2.2308, "step": 37150 }, { "epoch": 0.09, "grad_norm": 1.9375, "learning_rate": 0.00019905827971693088, "loss": 2.226, "step": 37155 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019905802664115226, "loss": 2.0568, "step": 37160 }, { "epoch": 0.09, "grad_norm": 1.65625, "learning_rate": 0.00019905777353153363, "loss": 1.8565, "step": 37165 }, { "epoch": 0.09, "grad_norm": 1.90625, "learning_rate": 0.00019905752038807504, "loss": 2.2113, "step": 37170 }, { "epoch": 0.09, "grad_norm": 1.703125, "learning_rate": 0.00019905726721077663, "loss": 2.0811, "step": 37175 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.00019905701399963843, "loss": 2.1105, "step": 37180 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.0001990567607546606, "loss": 2.1678, "step": 37185 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.00019905650747584317, "loss": 2.0412, "step": 37190 }, { "epoch": 0.09, "grad_norm": 2.1875, "learning_rate": 0.00019905625416318624, "loss": 2.2019, "step": 37195 }, { "epoch": 0.09, "grad_norm": 1.8359375, "learning_rate": 0.00019905600081668987, "loss": 2.1871, "step": 37200 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.0001990557474363542, "loss": 2.1102, "step": 37205 }, { "epoch": 0.09, "grad_norm": 1.984375, "learning_rate": 0.0001990554940221793, "loss": 2.13, "step": 37210 }, { "epoch": 0.09, "grad_norm": 1.6171875, "learning_rate": 0.00019905524057416523, "loss": 2.0992, "step": 37215 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.0001990549870923121, "loss": 2.3697, "step": 37220 }, { "epoch": 0.09, "grad_norm": 1.8125, "learning_rate": 0.00019905473357661996, "loss": 2.1284, "step": 37225 }, { "epoch": 0.09, "grad_norm": 1.8046875, "learning_rate": 0.00019905448002708896, "loss": 2.1219, "step": 37230 }, { "epoch": 0.09, "grad_norm": 1.9375, "learning_rate": 0.00019905422644371913, "loss": 2.255, "step": 37235 }, { "epoch": 0.09, "grad_norm": 1.640625, "learning_rate": 0.00019905397282651063, "loss": 2.1396, "step": 37240 }, { "epoch": 0.09, "grad_norm": 2.125, "learning_rate": 0.00019905371917546344, "loss": 2.2364, "step": 37245 }, { "epoch": 0.09, "grad_norm": 2.3125, "learning_rate": 0.00019905346549057773, "loss": 2.0067, "step": 37250 }, { "epoch": 0.09, "grad_norm": 1.984375, "learning_rate": 0.00019905321177185353, "loss": 2.1128, "step": 37255 }, { "epoch": 0.09, "grad_norm": 2.203125, "learning_rate": 0.000199052958019291, "loss": 2.2314, "step": 37260 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019905270423289017, "loss": 2.2576, "step": 37265 }, { "epoch": 0.09, "grad_norm": 2.125, "learning_rate": 0.00019905245041265115, "loss": 2.2107, "step": 37270 }, { "epoch": 0.09, "grad_norm": 1.9765625, "learning_rate": 0.000199052196558574, "loss": 2.1619, "step": 37275 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.00019905194267065882, "loss": 2.1797, "step": 37280 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.0001990516887489057, "loss": 2.2927, "step": 37285 }, { "epoch": 0.09, "grad_norm": 1.984375, "learning_rate": 0.00019905143479331474, "loss": 2.457, "step": 37290 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.00019905118080388597, "loss": 2.1785, "step": 37295 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019905092678061957, "loss": 2.2568, "step": 37300 }, { "epoch": 0.09, "grad_norm": 2.46875, "learning_rate": 0.00019905067272351555, "loss": 2.2314, "step": 37305 }, { "epoch": 0.09, "grad_norm": 1.984375, "learning_rate": 0.00019905041863257404, "loss": 2.1576, "step": 37310 }, { "epoch": 0.09, "grad_norm": 1.8046875, "learning_rate": 0.00019905016450779512, "loss": 2.035, "step": 37315 }, { "epoch": 0.09, "grad_norm": 1.5234375, "learning_rate": 0.00019904991034917882, "loss": 2.2474, "step": 37320 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.00019904965615672536, "loss": 2.2359, "step": 37325 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.00019904940193043465, "loss": 2.0702, "step": 37330 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.00019904914767030692, "loss": 2.109, "step": 37335 }, { "epoch": 0.09, "grad_norm": 1.5703125, "learning_rate": 0.00019904889337634218, "loss": 1.9766, "step": 37340 }, { "epoch": 0.09, "grad_norm": 2.234375, "learning_rate": 0.00019904863904854058, "loss": 2.09, "step": 37345 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.00019904838468690213, "loss": 2.1942, "step": 37350 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.00019904813029142696, "loss": 2.1624, "step": 37355 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.00019904787586211517, "loss": 2.1361, "step": 37360 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.0001990476213989668, "loss": 2.2687, "step": 37365 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.000199047366901982, "loss": 2.3231, "step": 37370 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.0001990471123711608, "loss": 2.1397, "step": 37375 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.00019904685780650332, "loss": 2.2757, "step": 37380 }, { "epoch": 0.09, "grad_norm": 1.8125, "learning_rate": 0.00019904660320800964, "loss": 2.2481, "step": 37385 }, { "epoch": 0.09, "grad_norm": 1.9765625, "learning_rate": 0.00019904634857567983, "loss": 2.2594, "step": 37390 }, { "epoch": 0.09, "grad_norm": 1.5703125, "learning_rate": 0.000199046093909514, "loss": 2.0838, "step": 37395 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.00019904583920951223, "loss": 2.208, "step": 37400 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.0001990455844756746, "loss": 2.248, "step": 37405 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.0001990453297080012, "loss": 2.128, "step": 37410 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019904507490649213, "loss": 2.1156, "step": 37415 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.00019904482007114746, "loss": 2.0841, "step": 37420 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.0001990445652019673, "loss": 2.2195, "step": 37425 }, { "epoch": 0.09, "grad_norm": 2.3125, "learning_rate": 0.0001990443102989517, "loss": 2.3096, "step": 37430 }, { "epoch": 0.09, "grad_norm": 2.15625, "learning_rate": 0.00019904405536210078, "loss": 2.1844, "step": 37435 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.0001990438003914146, "loss": 2.1799, "step": 37440 }, { "epoch": 0.09, "grad_norm": 1.8515625, "learning_rate": 0.00019904354538689324, "loss": 2.1812, "step": 37445 }, { "epoch": 0.09, "grad_norm": 1.671875, "learning_rate": 0.00019904329034853686, "loss": 2.3687, "step": 37450 }, { "epoch": 0.09, "grad_norm": 1.6640625, "learning_rate": 0.00019904303527634547, "loss": 2.0928, "step": 37455 }, { "epoch": 0.09, "grad_norm": 1.859375, "learning_rate": 0.00019904278017031919, "loss": 2.4202, "step": 37460 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.00019904252503045807, "loss": 2.2837, "step": 37465 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.00019904226985676226, "loss": 2.1257, "step": 37470 }, { "epoch": 0.09, "grad_norm": 1.6953125, "learning_rate": 0.00019904201464923183, "loss": 2.2809, "step": 37475 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.0001990417594078668, "loss": 1.9462, "step": 37480 }, { "epoch": 0.09, "grad_norm": 1.875, "learning_rate": 0.00019904150413266734, "loss": 2.019, "step": 37485 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.0001990412488236335, "loss": 2.3047, "step": 37490 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.00019904099348076537, "loss": 2.1902, "step": 37495 }, { "epoch": 0.09, "grad_norm": 1.8125, "learning_rate": 0.00019904073810406305, "loss": 2.0252, "step": 37500 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.0001990404826935266, "loss": 2.2584, "step": 37505 }, { "epoch": 0.09, "grad_norm": 1.7109375, "learning_rate": 0.00019904022724915617, "loss": 2.2371, "step": 37510 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.00019903997177095177, "loss": 2.2677, "step": 37515 }, { "epoch": 0.09, "grad_norm": 2.171875, "learning_rate": 0.0001990397162589135, "loss": 2.1279, "step": 37520 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.0001990394607130415, "loss": 2.1911, "step": 37525 }, { "epoch": 0.09, "grad_norm": 2.34375, "learning_rate": 0.0001990392051333358, "loss": 2.1237, "step": 37530 }, { "epoch": 0.09, "grad_norm": 1.8125, "learning_rate": 0.00019903894951979654, "loss": 2.1089, "step": 37535 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019903869387242375, "loss": 2.3209, "step": 37540 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.00019903843819121754, "loss": 2.2692, "step": 37545 }, { "epoch": 0.09, "grad_norm": 2.5, "learning_rate": 0.00019903818247617804, "loss": 2.2902, "step": 37550 }, { "epoch": 0.09, "grad_norm": 2.203125, "learning_rate": 0.00019903792672730527, "loss": 2.3259, "step": 37555 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.00019903767094459938, "loss": 2.081, "step": 37560 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.0001990374151280604, "loss": 1.9777, "step": 37565 }, { "epoch": 0.09, "grad_norm": 1.5546875, "learning_rate": 0.00019903715927768845, "loss": 2.1938, "step": 37570 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.0001990369033934836, "loss": 2.1659, "step": 37575 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.00019903664747544596, "loss": 2.3096, "step": 37580 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019903639152357561, "loss": 2.1593, "step": 37585 }, { "epoch": 0.09, "grad_norm": 1.875, "learning_rate": 0.0001990361355378726, "loss": 2.1972, "step": 37590 }, { "epoch": 0.09, "grad_norm": 2.25, "learning_rate": 0.0001990358795183371, "loss": 2.1562, "step": 37595 }, { "epoch": 0.09, "grad_norm": 1.78125, "learning_rate": 0.0001990356234649691, "loss": 2.1434, "step": 37600 }, { "epoch": 0.09, "grad_norm": 1.5625, "learning_rate": 0.00019903536737776874, "loss": 2.1552, "step": 37605 }, { "epoch": 0.09, "grad_norm": 1.9375, "learning_rate": 0.00019903511125673613, "loss": 2.2446, "step": 37610 }, { "epoch": 0.09, "grad_norm": 2.25, "learning_rate": 0.00019903485510187134, "loss": 2.15, "step": 37615 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.00019903459891317443, "loss": 2.1723, "step": 37620 }, { "epoch": 0.09, "grad_norm": 1.8515625, "learning_rate": 0.00019903434269064548, "loss": 2.1918, "step": 37625 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.00019903408643428462, "loss": 2.098, "step": 37630 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019903383014409194, "loss": 2.4157, "step": 37635 }, { "epoch": 0.09, "grad_norm": 2.734375, "learning_rate": 0.0001990335738200675, "loss": 2.0675, "step": 37640 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.00019903331746221138, "loss": 2.1696, "step": 37645 }, { "epoch": 0.09, "grad_norm": 1.6953125, "learning_rate": 0.00019903306107052368, "loss": 2.1989, "step": 37650 }, { "epoch": 0.09, "grad_norm": 1.65625, "learning_rate": 0.0001990328046450045, "loss": 2.1818, "step": 37655 }, { "epoch": 0.09, "grad_norm": 1.6953125, "learning_rate": 0.0001990325481856539, "loss": 2.2671, "step": 37660 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019903229169247202, "loss": 2.2645, "step": 37665 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019903203516545887, "loss": 2.1672, "step": 37670 }, { "epoch": 0.09, "grad_norm": 1.8359375, "learning_rate": 0.0001990317786046146, "loss": 2.2276, "step": 37675 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019903152200993927, "loss": 2.1204, "step": 37680 }, { "epoch": 0.09, "grad_norm": 1.984375, "learning_rate": 0.00019903126538143301, "loss": 2.0965, "step": 37685 }, { "epoch": 0.09, "grad_norm": 1.765625, "learning_rate": 0.00019903100871909585, "loss": 2.2611, "step": 37690 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.00019903075202292792, "loss": 2.3139, "step": 37695 }, { "epoch": 0.09, "grad_norm": 2.125, "learning_rate": 0.00019903049529292928, "loss": 2.1455, "step": 37700 }, { "epoch": 0.09, "grad_norm": 1.8046875, "learning_rate": 0.0001990302385291, "loss": 2.1022, "step": 37705 }, { "epoch": 0.09, "grad_norm": 1.9140625, "learning_rate": 0.00019902998173144024, "loss": 2.2029, "step": 37710 }, { "epoch": 0.09, "grad_norm": 1.8359375, "learning_rate": 0.00019902972489995002, "loss": 2.2534, "step": 37715 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.00019902946803462943, "loss": 2.0004, "step": 37720 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.0001990292111354786, "loss": 1.9638, "step": 37725 }, { "epoch": 0.09, "grad_norm": 3.40625, "learning_rate": 0.00019902895420249758, "loss": 2.1982, "step": 37730 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.0001990286972356865, "loss": 2.026, "step": 37735 }, { "epoch": 0.09, "grad_norm": 1.9765625, "learning_rate": 0.00019902844023504538, "loss": 2.1254, "step": 37740 }, { "epoch": 0.09, "grad_norm": 2.171875, "learning_rate": 0.00019902818320057439, "loss": 2.2445, "step": 37745 }, { "epoch": 0.09, "grad_norm": 1.6796875, "learning_rate": 0.00019902792613227358, "loss": 2.1161, "step": 37750 }, { "epoch": 0.09, "grad_norm": 1.609375, "learning_rate": 0.000199027669030143, "loss": 2.1927, "step": 37755 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.0001990274118941828, "loss": 2.1977, "step": 37760 }, { "epoch": 0.09, "grad_norm": 2.125, "learning_rate": 0.00019902715472439302, "loss": 2.2208, "step": 37765 }, { "epoch": 0.09, "grad_norm": 1.765625, "learning_rate": 0.00019902689752077378, "loss": 2.1954, "step": 37770 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.00019902664028332513, "loss": 2.0407, "step": 37775 }, { "epoch": 0.09, "grad_norm": 2.125, "learning_rate": 0.00019902638301204722, "loss": 2.4484, "step": 37780 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.00019902612570694007, "loss": 2.2031, "step": 37785 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.00019902586836800383, "loss": 2.167, "step": 37790 }, { "epoch": 0.09, "grad_norm": 2.0, "learning_rate": 0.00019902561099523856, "loss": 2.2, "step": 37795 }, { "epoch": 0.09, "grad_norm": 2.0, "learning_rate": 0.0001990253535886443, "loss": 2.332, "step": 37800 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.00019902509614822122, "loss": 2.1381, "step": 37805 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.00019902483867396937, "loss": 2.2106, "step": 37810 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.00019902458116588883, "loss": 2.4547, "step": 37815 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.0001990243236239797, "loss": 2.2041, "step": 37820 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019902406604824205, "loss": 2.3547, "step": 37825 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.000199023808438676, "loss": 2.2614, "step": 37830 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019902355079528164, "loss": 2.1984, "step": 37835 }, { "epoch": 0.09, "grad_norm": 1.6015625, "learning_rate": 0.000199023293118059, "loss": 1.9064, "step": 37840 }, { "epoch": 0.09, "grad_norm": 2.171875, "learning_rate": 0.00019902303540700824, "loss": 2.338, "step": 37845 }, { "epoch": 0.09, "grad_norm": 1.5703125, "learning_rate": 0.0001990227776621294, "loss": 2.1516, "step": 37850 }, { "epoch": 0.09, "grad_norm": 1.515625, "learning_rate": 0.0001990225198834226, "loss": 2.2227, "step": 37855 }, { "epoch": 0.09, "grad_norm": 2.140625, "learning_rate": 0.00019902226207088787, "loss": 2.3644, "step": 37860 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019902200422452536, "loss": 2.1102, "step": 37865 }, { "epoch": 0.09, "grad_norm": 1.90625, "learning_rate": 0.00019902174634433517, "loss": 2.1567, "step": 37870 }, { "epoch": 0.09, "grad_norm": 1.9765625, "learning_rate": 0.00019902148843031731, "loss": 2.1467, "step": 37875 }, { "epoch": 0.09, "grad_norm": 2.65625, "learning_rate": 0.00019902123048247194, "loss": 2.3715, "step": 37880 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.00019902097250079913, "loss": 2.0349, "step": 37885 }, { "epoch": 0.09, "grad_norm": 1.484375, "learning_rate": 0.00019902071448529892, "loss": 2.1632, "step": 37890 }, { "epoch": 0.09, "grad_norm": 1.890625, "learning_rate": 0.00019902045643597146, "loss": 2.256, "step": 37895 }, { "epoch": 0.09, "grad_norm": 1.9140625, "learning_rate": 0.0001990201983528168, "loss": 2.1199, "step": 37900 }, { "epoch": 0.09, "grad_norm": 1.859375, "learning_rate": 0.00019901994023583509, "loss": 2.1408, "step": 37905 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.00019901968208502636, "loss": 2.1271, "step": 37910 }, { "epoch": 0.09, "grad_norm": 2.21875, "learning_rate": 0.0001990194239003907, "loss": 2.2133, "step": 37915 }, { "epoch": 0.09, "grad_norm": 1.6171875, "learning_rate": 0.00019901916568192818, "loss": 1.9994, "step": 37920 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.00019901890742963894, "loss": 2.2848, "step": 37925 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.00019901864914352303, "loss": 2.1364, "step": 37930 }, { "epoch": 0.09, "grad_norm": 1.671875, "learning_rate": 0.00019901839082358058, "loss": 2.071, "step": 37935 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.00019901813246981165, "loss": 2.0845, "step": 37940 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.00019901787408221634, "loss": 2.2728, "step": 37945 }, { "epoch": 0.09, "grad_norm": 1.8125, "learning_rate": 0.0001990176156607947, "loss": 2.1133, "step": 37950 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.00019901735720554683, "loss": 2.2221, "step": 37955 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.00019901709871647288, "loss": 2.3171, "step": 37960 }, { "epoch": 0.09, "grad_norm": 1.34375, "learning_rate": 0.00019901684019357288, "loss": 2.0309, "step": 37965 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019901658163684694, "loss": 2.2102, "step": 37970 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.0001990163230462951, "loss": 2.3219, "step": 37975 }, { "epoch": 0.09, "grad_norm": 1.765625, "learning_rate": 0.00019901606442191753, "loss": 2.1485, "step": 37980 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019901580576371427, "loss": 2.231, "step": 37985 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.0001990155470716854, "loss": 2.2866, "step": 37990 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019901528834583102, "loss": 2.1051, "step": 37995 }, { "epoch": 0.09, "grad_norm": 1.4921875, "learning_rate": 0.00019901502958615127, "loss": 2.1972, "step": 38000 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.00019901477079264614, "loss": 2.2654, "step": 38005 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.00019901451196531578, "loss": 2.3177, "step": 38010 }, { "epoch": 0.09, "grad_norm": 2.53125, "learning_rate": 0.00019901425310416026, "loss": 2.1282, "step": 38015 }, { "epoch": 0.09, "grad_norm": 1.6328125, "learning_rate": 0.00019901399420917966, "loss": 2.1688, "step": 38020 }, { "epoch": 0.09, "grad_norm": 1.609375, "learning_rate": 0.00019901373528037413, "loss": 2.214, "step": 38025 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.00019901347631774368, "loss": 2.2049, "step": 38030 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.00019901321732128846, "loss": 2.3809, "step": 38035 }, { "epoch": 0.09, "grad_norm": 1.78125, "learning_rate": 0.00019901295829100852, "loss": 2.3278, "step": 38040 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019901269922690393, "loss": 2.2341, "step": 38045 }, { "epoch": 0.09, "grad_norm": 1.625, "learning_rate": 0.00019901244012897484, "loss": 2.1037, "step": 38050 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019901218099722126, "loss": 2.139, "step": 38055 }, { "epoch": 0.09, "grad_norm": 1.6328125, "learning_rate": 0.00019901192183164338, "loss": 2.3245, "step": 38060 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.0001990116626322412, "loss": 2.2559, "step": 38065 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.00019901140339901482, "loss": 2.3592, "step": 38070 }, { "epoch": 0.09, "grad_norm": 1.890625, "learning_rate": 0.0001990111441319644, "loss": 2.0989, "step": 38075 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019901088483108997, "loss": 2.2995, "step": 38080 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.0001990106254963916, "loss": 2.2878, "step": 38085 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.0001990103661278694, "loss": 2.0751, "step": 38090 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019901010672552347, "loss": 2.2075, "step": 38095 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.0001990098472893539, "loss": 2.2233, "step": 38100 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019900958781936078, "loss": 2.1797, "step": 38105 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.00019900932831554418, "loss": 2.3023, "step": 38110 }, { "epoch": 0.09, "grad_norm": 1.609375, "learning_rate": 0.00019900906877790418, "loss": 2.1256, "step": 38115 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.0001990088092064409, "loss": 2.3272, "step": 38120 }, { "epoch": 0.09, "grad_norm": 1.6015625, "learning_rate": 0.0001990085496011544, "loss": 2.1341, "step": 38125 }, { "epoch": 0.09, "grad_norm": 1.6484375, "learning_rate": 0.0001990082899620448, "loss": 2.0841, "step": 38130 }, { "epoch": 0.09, "grad_norm": 1.5625, "learning_rate": 0.00019900803028911218, "loss": 2.0225, "step": 38135 }, { "epoch": 0.09, "grad_norm": 2.203125, "learning_rate": 0.00019900777058235662, "loss": 2.2913, "step": 38140 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.0001990075108417782, "loss": 2.2866, "step": 38145 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019900725106737703, "loss": 2.3166, "step": 38150 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.00019900699125915318, "loss": 2.1406, "step": 38155 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.00019900673141710673, "loss": 2.2271, "step": 38160 }, { "epoch": 0.09, "grad_norm": 1.6640625, "learning_rate": 0.0001990064715412378, "loss": 2.2862, "step": 38165 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.0001990062116315465, "loss": 2.2641, "step": 38170 }, { "epoch": 0.09, "grad_norm": 2.28125, "learning_rate": 0.00019900595168803283, "loss": 2.187, "step": 38175 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.00019900569171069695, "loss": 2.2893, "step": 38180 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.00019900543169953895, "loss": 2.0503, "step": 38185 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.00019900517165455885, "loss": 2.1577, "step": 38190 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.0001990049115757568, "loss": 2.2625, "step": 38195 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019900465146313292, "loss": 2.2488, "step": 38200 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.00019900439131668722, "loss": 2.0863, "step": 38205 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019900413113641983, "loss": 2.2674, "step": 38210 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019900387092233084, "loss": 2.0786, "step": 38215 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019900361067442033, "loss": 2.304, "step": 38220 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.0001990033503926884, "loss": 2.1065, "step": 38225 }, { "epoch": 0.09, "grad_norm": 2.1875, "learning_rate": 0.0001990030900771351, "loss": 2.3341, "step": 38230 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.00019900282972776055, "loss": 2.1667, "step": 38235 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.00019900256934456486, "loss": 2.1693, "step": 38240 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.0001990023089275481, "loss": 2.2472, "step": 38245 }, { "epoch": 0.09, "grad_norm": 2.296875, "learning_rate": 0.00019900204847671035, "loss": 2.2808, "step": 38250 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019900178799205168, "loss": 2.1384, "step": 38255 }, { "epoch": 0.09, "grad_norm": 2.4375, "learning_rate": 0.00019900152747357225, "loss": 2.3194, "step": 38260 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.0001990012669212721, "loss": 2.0996, "step": 38265 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.0001990010063351513, "loss": 2.2864, "step": 38270 }, { "epoch": 0.09, "grad_norm": 2.140625, "learning_rate": 0.00019900074571520995, "loss": 2.2077, "step": 38275 }, { "epoch": 0.09, "grad_norm": 1.6640625, "learning_rate": 0.00019900048506144816, "loss": 2.161, "step": 38280 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.00019900022437386602, "loss": 2.1664, "step": 38285 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.0001989999636524636, "loss": 2.4034, "step": 38290 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019899970289724098, "loss": 2.0858, "step": 38295 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.0001989994421081983, "loss": 2.1068, "step": 38300 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.00019899918128533556, "loss": 2.1066, "step": 38305 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.00019899892042865295, "loss": 2.2502, "step": 38310 }, { "epoch": 0.09, "grad_norm": 1.8515625, "learning_rate": 0.0001989986595381505, "loss": 2.375, "step": 38315 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.00019899839861382831, "loss": 2.037, "step": 38320 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.0001989981376556865, "loss": 2.2624, "step": 38325 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.00019899787666372509, "loss": 2.0429, "step": 38330 }, { "epoch": 0.09, "grad_norm": 2.265625, "learning_rate": 0.00019899761563794423, "loss": 2.2628, "step": 38335 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019899735457834397, "loss": 2.0796, "step": 38340 }, { "epoch": 0.09, "grad_norm": 1.6875, "learning_rate": 0.0001989970934849245, "loss": 2.1024, "step": 38345 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.00019899683235768574, "loss": 2.1863, "step": 38350 }, { "epoch": 0.09, "grad_norm": 2.125, "learning_rate": 0.00019899657119662787, "loss": 2.1161, "step": 38355 }, { "epoch": 0.09, "grad_norm": 1.5078125, "learning_rate": 0.000198996310001751, "loss": 2.1723, "step": 38360 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.0001989960487730552, "loss": 2.2611, "step": 38365 }, { "epoch": 0.09, "grad_norm": 1.625, "learning_rate": 0.00019899578751054054, "loss": 2.1269, "step": 38370 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.00019899552621420715, "loss": 2.2321, "step": 38375 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019899526488405505, "loss": 2.2966, "step": 38380 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.0001989950035200844, "loss": 2.2001, "step": 38385 }, { "epoch": 0.09, "grad_norm": 1.875, "learning_rate": 0.00019899474212229526, "loss": 2.112, "step": 38390 }, { "epoch": 0.09, "grad_norm": 1.9375, "learning_rate": 0.00019899448069068772, "loss": 1.9056, "step": 38395 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.00019899421922526186, "loss": 2.3093, "step": 38400 }, { "epoch": 0.09, "grad_norm": 1.4296875, "learning_rate": 0.00019899395772601778, "loss": 2.0626, "step": 38405 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.0001989936961929556, "loss": 2.1935, "step": 38410 }, { "epoch": 0.09, "grad_norm": 1.8359375, "learning_rate": 0.00019899343462607535, "loss": 2.1135, "step": 38415 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.00019899317302537713, "loss": 2.1153, "step": 38420 }, { "epoch": 0.09, "grad_norm": 1.6171875, "learning_rate": 0.00019899291139086108, "loss": 2.2108, "step": 38425 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.0001989926497225272, "loss": 2.0931, "step": 38430 }, { "epoch": 0.09, "grad_norm": 1.984375, "learning_rate": 0.00019899238802037573, "loss": 2.0885, "step": 38435 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.0001989921262844066, "loss": 2.1757, "step": 38440 }, { "epoch": 0.09, "grad_norm": 1.7109375, "learning_rate": 0.00019899186451461995, "loss": 2.314, "step": 38445 }, { "epoch": 0.09, "grad_norm": 1.5703125, "learning_rate": 0.00019899160271101592, "loss": 2.21, "step": 38450 }, { "epoch": 0.09, "grad_norm": 2.15625, "learning_rate": 0.00019899134087359458, "loss": 2.1126, "step": 38455 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019899107900235596, "loss": 2.0664, "step": 38460 }, { "epoch": 0.09, "grad_norm": 2.171875, "learning_rate": 0.0001989908170973002, "loss": 2.1559, "step": 38465 }, { "epoch": 0.09, "grad_norm": 1.640625, "learning_rate": 0.0001989905551584274, "loss": 2.1846, "step": 38470 }, { "epoch": 0.09, "grad_norm": 1.703125, "learning_rate": 0.00019899029318573761, "loss": 2.1078, "step": 38475 }, { "epoch": 0.09, "grad_norm": 1.9375, "learning_rate": 0.00019899003117923095, "loss": 2.2008, "step": 38480 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.0001989897691389075, "loss": 2.0552, "step": 38485 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.00019898950706476737, "loss": 2.3036, "step": 38490 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019898924495681062, "loss": 2.1238, "step": 38495 }, { "epoch": 0.09, "grad_norm": 1.625, "learning_rate": 0.00019898898281503732, "loss": 2.1231, "step": 38500 }, { "epoch": 0.09, "grad_norm": 1.5625, "learning_rate": 0.00019898872063944762, "loss": 2.0358, "step": 38505 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019898845843004158, "loss": 2.2061, "step": 38510 }, { "epoch": 0.09, "grad_norm": 2.1875, "learning_rate": 0.00019898819618681924, "loss": 2.0843, "step": 38515 }, { "epoch": 0.09, "grad_norm": 2.28125, "learning_rate": 0.00019898793390978078, "loss": 2.0707, "step": 38520 }, { "epoch": 0.09, "grad_norm": 1.9765625, "learning_rate": 0.00019898767159892624, "loss": 2.3146, "step": 38525 }, { "epoch": 0.09, "grad_norm": 2.46875, "learning_rate": 0.00019898740925425573, "loss": 2.0725, "step": 38530 }, { "epoch": 0.09, "grad_norm": 1.6953125, "learning_rate": 0.0001989871468757693, "loss": 2.3306, "step": 38535 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019898688446346707, "loss": 2.1995, "step": 38540 }, { "epoch": 0.09, "grad_norm": 1.921875, "learning_rate": 0.00019898662201734912, "loss": 2.2171, "step": 38545 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019898635953741555, "loss": 2.1492, "step": 38550 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019898609702366645, "loss": 2.1001, "step": 38555 }, { "epoch": 0.09, "grad_norm": 1.6484375, "learning_rate": 0.00019898583447610193, "loss": 2.4006, "step": 38560 }, { "epoch": 0.09, "grad_norm": 1.984375, "learning_rate": 0.000198985571894722, "loss": 2.1967, "step": 38565 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019898530927952684, "loss": 2.2313, "step": 38570 }, { "epoch": 0.09, "grad_norm": 1.859375, "learning_rate": 0.0001989850466305165, "loss": 2.2195, "step": 38575 }, { "epoch": 0.09, "grad_norm": 2.203125, "learning_rate": 0.00019898478394769106, "loss": 2.2665, "step": 38580 }, { "epoch": 0.09, "grad_norm": 2.0, "learning_rate": 0.00019898452123105062, "loss": 2.14, "step": 38585 }, { "epoch": 0.09, "grad_norm": 1.6875, "learning_rate": 0.0001989842584805953, "loss": 2.1309, "step": 38590 }, { "epoch": 0.09, "grad_norm": 1.6796875, "learning_rate": 0.00019898399569632513, "loss": 2.1976, "step": 38595 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.00019898373287824023, "loss": 2.1722, "step": 38600 }, { "epoch": 0.09, "grad_norm": 1.40625, "learning_rate": 0.0001989834700263407, "loss": 2.2183, "step": 38605 }, { "epoch": 0.09, "grad_norm": 1.984375, "learning_rate": 0.00019898320714062666, "loss": 2.1535, "step": 38610 }, { "epoch": 0.09, "grad_norm": 2.25, "learning_rate": 0.00019898294422109812, "loss": 2.2306, "step": 38615 }, { "epoch": 0.09, "grad_norm": 2.171875, "learning_rate": 0.0001989826812677552, "loss": 2.2701, "step": 38620 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.00019898241828059804, "loss": 2.1802, "step": 38625 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.00019898215525962666, "loss": 2.1736, "step": 38630 }, { "epoch": 0.09, "grad_norm": 1.859375, "learning_rate": 0.00019898189220484121, "loss": 2.2008, "step": 38635 }, { "epoch": 0.09, "grad_norm": 1.8359375, "learning_rate": 0.00019898162911624172, "loss": 2.08, "step": 38640 }, { "epoch": 0.09, "grad_norm": 1.7109375, "learning_rate": 0.00019898136599382833, "loss": 2.2269, "step": 38645 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.0001989811028376011, "loss": 2.271, "step": 38650 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.00019898083964756013, "loss": 2.1775, "step": 38655 }, { "epoch": 0.09, "grad_norm": 1.390625, "learning_rate": 0.0001989805764237055, "loss": 2.1368, "step": 38660 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019898031316603734, "loss": 2.2518, "step": 38665 }, { "epoch": 0.09, "grad_norm": 2.234375, "learning_rate": 0.00019898004987455568, "loss": 2.1146, "step": 38670 }, { "epoch": 0.09, "grad_norm": 1.6796875, "learning_rate": 0.00019897978654926065, "loss": 2.1757, "step": 38675 }, { "epoch": 0.09, "grad_norm": 1.9140625, "learning_rate": 0.00019897952319015234, "loss": 2.0185, "step": 38680 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.00019897925979723083, "loss": 2.3544, "step": 38685 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019897899637049622, "loss": 2.055, "step": 38690 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019897873290994854, "loss": 2.2201, "step": 38695 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.000198978469415588, "loss": 2.2505, "step": 38700 }, { "epoch": 0.09, "grad_norm": 1.890625, "learning_rate": 0.00019897820588741456, "loss": 2.193, "step": 38705 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019897794232542838, "loss": 2.2445, "step": 38710 }, { "epoch": 0.09, "grad_norm": 2.140625, "learning_rate": 0.00019897767872962955, "loss": 2.1395, "step": 38715 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.00019897741510001816, "loss": 2.1799, "step": 38720 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.0001989771514365943, "loss": 2.3768, "step": 38725 }, { "epoch": 0.09, "grad_norm": 1.9375, "learning_rate": 0.00019897688773935803, "loss": 2.3375, "step": 38730 }, { "epoch": 0.09, "grad_norm": 1.7109375, "learning_rate": 0.00019897662400830945, "loss": 2.1194, "step": 38735 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.00019897636024344867, "loss": 2.0845, "step": 38740 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019897609644477576, "loss": 2.2367, "step": 38745 }, { "epoch": 0.09, "grad_norm": 1.8046875, "learning_rate": 0.00019897583261229085, "loss": 2.2068, "step": 38750 }, { "epoch": 0.09, "grad_norm": 1.5703125, "learning_rate": 0.00019897556874599398, "loss": 2.3119, "step": 38755 }, { "epoch": 0.09, "grad_norm": 1.78125, "learning_rate": 0.00019897530484588524, "loss": 2.3841, "step": 38760 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.00019897504091196479, "loss": 2.2044, "step": 38765 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019897477694423262, "loss": 2.1319, "step": 38770 }, { "epoch": 0.09, "grad_norm": 1.6953125, "learning_rate": 0.00019897451294268893, "loss": 2.1447, "step": 38775 }, { "epoch": 0.09, "grad_norm": 1.765625, "learning_rate": 0.00019897424890733372, "loss": 2.1439, "step": 38780 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.0001989739848381671, "loss": 2.0604, "step": 38785 }, { "epoch": 0.09, "grad_norm": 1.8125, "learning_rate": 0.0001989737207351892, "loss": 2.0961, "step": 38790 }, { "epoch": 0.09, "grad_norm": 1.875, "learning_rate": 0.00019897345659840006, "loss": 2.1441, "step": 38795 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019897319242779976, "loss": 2.2995, "step": 38800 }, { "epoch": 0.09, "grad_norm": 1.8515625, "learning_rate": 0.00019897292822338848, "loss": 2.1987, "step": 38805 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.00019897266398516623, "loss": 2.0735, "step": 38810 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.0001989723997131331, "loss": 2.059, "step": 38815 }, { "epoch": 0.09, "grad_norm": 1.9375, "learning_rate": 0.00019897213540728925, "loss": 2.2468, "step": 38820 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.0001989718710676347, "loss": 2.199, "step": 38825 }, { "epoch": 0.09, "grad_norm": 1.625, "learning_rate": 0.00019897160669416956, "loss": 2.1141, "step": 38830 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.00019897134228689394, "loss": 2.201, "step": 38835 }, { "epoch": 0.09, "grad_norm": 1.3671875, "learning_rate": 0.00019897107784580791, "loss": 2.0623, "step": 38840 }, { "epoch": 0.09, "grad_norm": 1.671875, "learning_rate": 0.00019897081337091156, "loss": 2.0356, "step": 38845 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019897054886220498, "loss": 1.9666, "step": 38850 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019897028431968824, "loss": 2.2403, "step": 38855 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.0001989700197433615, "loss": 2.0959, "step": 38860 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.0001989697551332248, "loss": 2.163, "step": 38865 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.00019896949048927822, "loss": 2.2193, "step": 38870 }, { "epoch": 0.09, "grad_norm": 2.3125, "learning_rate": 0.0001989692258115219, "loss": 2.2418, "step": 38875 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.00019896896109995583, "loss": 2.3708, "step": 38880 }, { "epoch": 0.09, "grad_norm": 1.8359375, "learning_rate": 0.00019896869635458022, "loss": 2.1853, "step": 38885 }, { "epoch": 0.09, "grad_norm": 1.578125, "learning_rate": 0.0001989684315753951, "loss": 2.2143, "step": 38890 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.0001989681667624006, "loss": 2.1697, "step": 38895 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019896790191559674, "loss": 2.0239, "step": 38900 }, { "epoch": 0.09, "grad_norm": 1.890625, "learning_rate": 0.00019896763703498364, "loss": 2.0848, "step": 38905 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.00019896737212056142, "loss": 2.0393, "step": 38910 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.00019896710717233015, "loss": 2.1873, "step": 38915 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.00019896684219028994, "loss": 2.2687, "step": 38920 }, { "epoch": 0.09, "grad_norm": 1.859375, "learning_rate": 0.00019896657717444083, "loss": 2.2861, "step": 38925 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.00019896631212478293, "loss": 2.219, "step": 38930 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.0001989660470413164, "loss": 2.0975, "step": 38935 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019896578192404123, "loss": 2.304, "step": 38940 }, { "epoch": 0.09, "grad_norm": 1.65625, "learning_rate": 0.00019896551677295757, "loss": 2.1589, "step": 38945 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019896525158806546, "loss": 2.0781, "step": 38950 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.00019896498636936508, "loss": 2.2934, "step": 38955 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019896472111685643, "loss": 2.2164, "step": 38960 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.00019896445583053964, "loss": 2.1397, "step": 38965 }, { "epoch": 0.09, "grad_norm": 2.15625, "learning_rate": 0.0001989641905104148, "loss": 2.2642, "step": 38970 }, { "epoch": 0.09, "grad_norm": 1.8125, "learning_rate": 0.00019896392515648202, "loss": 2.1823, "step": 38975 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.00019896365976874135, "loss": 2.0584, "step": 38980 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.00019896339434719292, "loss": 2.0437, "step": 38985 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019896312889183676, "loss": 2.1012, "step": 38990 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019896286340267302, "loss": 2.0887, "step": 38995 }, { "epoch": 0.09, "grad_norm": 2.171875, "learning_rate": 0.00019896259787970176, "loss": 2.2466, "step": 39000 }, { "epoch": 0.09, "grad_norm": 2.171875, "learning_rate": 0.0001989623323229231, "loss": 2.1212, "step": 39005 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.00019896206673233711, "loss": 2.179, "step": 39010 }, { "epoch": 0.09, "grad_norm": 1.78125, "learning_rate": 0.00019896180110794387, "loss": 2.073, "step": 39015 }, { "epoch": 0.09, "grad_norm": 1.6875, "learning_rate": 0.00019896153544974348, "loss": 2.1229, "step": 39020 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.00019896126975773605, "loss": 2.1754, "step": 39025 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019896100403192166, "loss": 2.1022, "step": 39030 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.0001989607382723004, "loss": 2.199, "step": 39035 }, { "epoch": 0.09, "grad_norm": 1.8125, "learning_rate": 0.00019896047247887233, "loss": 2.3092, "step": 39040 }, { "epoch": 0.09, "grad_norm": 1.875, "learning_rate": 0.00019896020665163757, "loss": 2.0895, "step": 39045 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.00019895994079059627, "loss": 2.0882, "step": 39050 }, { "epoch": 0.09, "grad_norm": 1.6015625, "learning_rate": 0.00019895967489574838, "loss": 2.1642, "step": 39055 }, { "epoch": 0.09, "grad_norm": 2.46875, "learning_rate": 0.0001989594089670941, "loss": 2.1375, "step": 39060 }, { "epoch": 0.09, "grad_norm": 2.1875, "learning_rate": 0.0001989591430046335, "loss": 2.189, "step": 39065 }, { "epoch": 0.09, "grad_norm": 1.65625, "learning_rate": 0.00019895887700836667, "loss": 2.0661, "step": 39070 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.00019895861097829368, "loss": 2.1474, "step": 39075 }, { "epoch": 0.09, "grad_norm": 1.765625, "learning_rate": 0.0001989583449144146, "loss": 2.2894, "step": 39080 }, { "epoch": 0.09, "grad_norm": 2.140625, "learning_rate": 0.00019895807881672962, "loss": 2.0962, "step": 39085 }, { "epoch": 0.09, "grad_norm": 1.640625, "learning_rate": 0.00019895781268523872, "loss": 2.1638, "step": 39090 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019895754651994203, "loss": 2.1436, "step": 39095 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019895728032083966, "loss": 2.1339, "step": 39100 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.0001989570140879317, "loss": 2.1749, "step": 39105 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019895674782121822, "loss": 2.3029, "step": 39110 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019895648152069931, "loss": 2.3575, "step": 39115 }, { "epoch": 0.09, "grad_norm": 1.6953125, "learning_rate": 0.00019895621518637512, "loss": 2.3556, "step": 39120 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019895594881824563, "loss": 2.2095, "step": 39125 }, { "epoch": 0.09, "grad_norm": 1.8125, "learning_rate": 0.00019895568241631102, "loss": 2.181, "step": 39130 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.00019895541598057137, "loss": 2.2254, "step": 39135 }, { "epoch": 0.09, "grad_norm": 1.7109375, "learning_rate": 0.00019895514951102675, "loss": 2.4118, "step": 39140 }, { "epoch": 0.09, "grad_norm": 2.109375, "learning_rate": 0.00019895488300767725, "loss": 2.279, "step": 39145 }, { "epoch": 0.09, "grad_norm": 1.6484375, "learning_rate": 0.00019895461647052292, "loss": 1.9965, "step": 39150 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019895434989956396, "loss": 2.3752, "step": 39155 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.00019895408329480038, "loss": 2.1761, "step": 39160 }, { "epoch": 0.09, "grad_norm": 1.921875, "learning_rate": 0.0001989538166562323, "loss": 2.2487, "step": 39165 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.00019895354998385978, "loss": 2.2768, "step": 39170 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.00019895328327768294, "loss": 2.3112, "step": 39175 }, { "epoch": 0.09, "grad_norm": 1.875, "learning_rate": 0.00019895301653770188, "loss": 2.1368, "step": 39180 }, { "epoch": 0.09, "grad_norm": 1.6015625, "learning_rate": 0.00019895274976391666, "loss": 2.128, "step": 39185 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.0001989524829563274, "loss": 2.3328, "step": 39190 }, { "epoch": 0.09, "grad_norm": 1.671875, "learning_rate": 0.00019895221611493418, "loss": 2.1571, "step": 39195 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.00019895194923973705, "loss": 2.2264, "step": 39200 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.0001989516823307362, "loss": 2.177, "step": 39205 }, { "epoch": 0.09, "grad_norm": 1.671875, "learning_rate": 0.0001989514153879316, "loss": 2.213, "step": 39210 }, { "epoch": 0.09, "grad_norm": 2.78125, "learning_rate": 0.00019895114841132343, "loss": 2.1012, "step": 39215 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.00019895088140091177, "loss": 2.1668, "step": 39220 }, { "epoch": 0.09, "grad_norm": 1.5, "learning_rate": 0.00019895061435669666, "loss": 2.1666, "step": 39225 }, { "epoch": 0.09, "grad_norm": 1.671875, "learning_rate": 0.00019895034727867824, "loss": 2.2961, "step": 39230 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.0001989500801668566, "loss": 2.0154, "step": 39235 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.00019894981302123182, "loss": 2.1598, "step": 39240 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.00019894954584180396, "loss": 2.2018, "step": 39245 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.00019894927862857314, "loss": 2.2294, "step": 39250 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.0001989490113815395, "loss": 2.1733, "step": 39255 }, { "epoch": 0.09, "grad_norm": 1.609375, "learning_rate": 0.00019894874410070304, "loss": 1.9801, "step": 39260 }, { "epoch": 0.09, "grad_norm": 1.8515625, "learning_rate": 0.0001989484767860639, "loss": 2.0247, "step": 39265 }, { "epoch": 0.09, "grad_norm": 1.8515625, "learning_rate": 0.00019894820943762217, "loss": 2.16, "step": 39270 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019894794205537795, "loss": 2.1767, "step": 39275 }, { "epoch": 0.09, "grad_norm": 2.140625, "learning_rate": 0.00019894767463933132, "loss": 2.2087, "step": 39280 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.00019894740718948235, "loss": 2.2209, "step": 39285 }, { "epoch": 0.09, "grad_norm": 1.65625, "learning_rate": 0.00019894713970583115, "loss": 2.1484, "step": 39290 }, { "epoch": 0.09, "grad_norm": 2.140625, "learning_rate": 0.0001989468721883778, "loss": 2.1894, "step": 39295 }, { "epoch": 0.09, "grad_norm": 1.625, "learning_rate": 0.00019894660463712244, "loss": 2.1339, "step": 39300 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.0001989463370520651, "loss": 1.9622, "step": 39305 }, { "epoch": 0.09, "grad_norm": 2.171875, "learning_rate": 0.00019894606943320593, "loss": 2.2483, "step": 39310 }, { "epoch": 0.09, "grad_norm": 2.890625, "learning_rate": 0.00019894580178054497, "loss": 2.327, "step": 39315 }, { "epoch": 0.09, "grad_norm": 2.34375, "learning_rate": 0.00019894553409408232, "loss": 2.2151, "step": 39320 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019894526637381806, "loss": 2.0934, "step": 39325 }, { "epoch": 0.09, "grad_norm": 1.90625, "learning_rate": 0.00019894499861975235, "loss": 2.1138, "step": 39330 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.00019894473083188521, "loss": 2.2414, "step": 39335 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.00019894446301021675, "loss": 2.1904, "step": 39340 }, { "epoch": 0.09, "grad_norm": 1.9375, "learning_rate": 0.0001989441951547471, "loss": 2.2269, "step": 39345 }, { "epoch": 0.09, "grad_norm": 1.671875, "learning_rate": 0.00019894392726547627, "loss": 2.3261, "step": 39350 }, { "epoch": 0.09, "grad_norm": 1.9140625, "learning_rate": 0.00019894365934240441, "loss": 2.2053, "step": 39355 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.00019894339138553163, "loss": 2.3906, "step": 39360 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.00019894312339485798, "loss": 2.3542, "step": 39365 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.00019894285537038354, "loss": 2.1119, "step": 39370 }, { "epoch": 0.09, "grad_norm": 2.234375, "learning_rate": 0.00019894258731210847, "loss": 2.2051, "step": 39375 }, { "epoch": 0.09, "grad_norm": 1.78125, "learning_rate": 0.0001989423192200328, "loss": 2.2144, "step": 39380 }, { "epoch": 0.09, "grad_norm": 1.5703125, "learning_rate": 0.00019894205109415665, "loss": 2.2192, "step": 39385 }, { "epoch": 0.09, "grad_norm": 2.65625, "learning_rate": 0.00019894178293448006, "loss": 2.1465, "step": 39390 }, { "epoch": 0.09, "grad_norm": 2.203125, "learning_rate": 0.0001989415147410032, "loss": 2.2379, "step": 39395 }, { "epoch": 0.09, "grad_norm": 2.15625, "learning_rate": 0.0001989412465137261, "loss": 2.1438, "step": 39400 }, { "epoch": 0.09, "grad_norm": 1.65625, "learning_rate": 0.0001989409782526489, "loss": 2.2931, "step": 39405 }, { "epoch": 0.09, "grad_norm": 1.859375, "learning_rate": 0.00019894070995777168, "loss": 2.1579, "step": 39410 }, { "epoch": 0.09, "grad_norm": 1.625, "learning_rate": 0.00019894044162909448, "loss": 1.853, "step": 39415 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019894017326661746, "loss": 2.209, "step": 39420 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019893990487034063, "loss": 2.2743, "step": 39425 }, { "epoch": 0.09, "grad_norm": 2.625, "learning_rate": 0.0001989396364402642, "loss": 2.2511, "step": 39430 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.00019893936797638816, "loss": 2.3798, "step": 39435 }, { "epoch": 0.09, "grad_norm": 1.640625, "learning_rate": 0.00019893909947871265, "loss": 2.0042, "step": 39440 }, { "epoch": 0.09, "grad_norm": 1.8515625, "learning_rate": 0.00019893883094723776, "loss": 2.0739, "step": 39445 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.00019893856238196355, "loss": 2.213, "step": 39450 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.00019893829378289012, "loss": 2.2715, "step": 39455 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.0001989380251500176, "loss": 2.1404, "step": 39460 }, { "epoch": 0.09, "grad_norm": 1.6328125, "learning_rate": 0.00019893775648334607, "loss": 2.1834, "step": 39465 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019893748778287558, "loss": 2.1377, "step": 39470 }, { "epoch": 0.09, "grad_norm": 1.9296875, "learning_rate": 0.00019893721904860626, "loss": 2.1637, "step": 39475 }, { "epoch": 0.09, "grad_norm": 1.6953125, "learning_rate": 0.00019893695028053819, "loss": 2.3672, "step": 39480 }, { "epoch": 0.09, "grad_norm": 1.9140625, "learning_rate": 0.00019893668147867146, "loss": 2.154, "step": 39485 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019893641264300617, "loss": 2.0629, "step": 39490 }, { "epoch": 0.09, "grad_norm": 1.5703125, "learning_rate": 0.00019893614377354242, "loss": 2.1046, "step": 39495 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.00019893587487028027, "loss": 2.3075, "step": 39500 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019893560593321982, "loss": 2.1033, "step": 39505 }, { "epoch": 0.09, "grad_norm": 2.46875, "learning_rate": 0.0001989353369623612, "loss": 2.1536, "step": 39510 }, { "epoch": 0.09, "grad_norm": 1.9765625, "learning_rate": 0.00019893506795770445, "loss": 2.3105, "step": 39515 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.00019893479891924972, "loss": 2.2481, "step": 39520 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.00019893452984699704, "loss": 2.1281, "step": 39525 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019893426074094652, "loss": 2.1022, "step": 39530 }, { "epoch": 0.09, "grad_norm": 1.6640625, "learning_rate": 0.00019893399160109828, "loss": 2.1392, "step": 39535 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.0001989337224274524, "loss": 2.2465, "step": 39540 }, { "epoch": 0.09, "grad_norm": 2.40625, "learning_rate": 0.00019893345322000898, "loss": 2.1487, "step": 39545 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019893318397876808, "loss": 2.1764, "step": 39550 }, { "epoch": 0.09, "grad_norm": 1.765625, "learning_rate": 0.0001989329147037298, "loss": 2.4001, "step": 39555 }, { "epoch": 0.09, "grad_norm": 1.8359375, "learning_rate": 0.00019893264539489424, "loss": 2.2134, "step": 39560 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.0001989323760522615, "loss": 2.18, "step": 39565 }, { "epoch": 0.09, "grad_norm": 1.921875, "learning_rate": 0.0001989321066758317, "loss": 1.8982, "step": 39570 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019893183726560484, "loss": 2.2477, "step": 39575 }, { "epoch": 0.09, "grad_norm": 1.5234375, "learning_rate": 0.0001989315678215811, "loss": 2.2532, "step": 39580 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.00019893129834376057, "loss": 2.2489, "step": 39585 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019893102883214328, "loss": 2.1942, "step": 39590 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.00019893075928672935, "loss": 2.2793, "step": 39595 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019893048970751892, "loss": 1.9566, "step": 39600 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.000198930220094512, "loss": 2.1037, "step": 39605 }, { "epoch": 0.09, "grad_norm": 2.703125, "learning_rate": 0.00019892995044770871, "loss": 2.1335, "step": 39610 }, { "epoch": 0.09, "grad_norm": 1.765625, "learning_rate": 0.0001989296807671092, "loss": 2.1117, "step": 39615 }, { "epoch": 0.09, "grad_norm": 1.78125, "learning_rate": 0.00019892941105271347, "loss": 2.232, "step": 39620 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.0001989291413045217, "loss": 2.109, "step": 39625 }, { "epoch": 0.09, "grad_norm": 2.125, "learning_rate": 0.00019892887152253393, "loss": 2.1269, "step": 39630 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019892860170675025, "loss": 2.2358, "step": 39635 }, { "epoch": 0.09, "grad_norm": 1.734375, "learning_rate": 0.00019892833185717078, "loss": 2.0829, "step": 39640 }, { "epoch": 0.09, "grad_norm": 2.1875, "learning_rate": 0.00019892806197379558, "loss": 2.3415, "step": 39645 }, { "epoch": 0.09, "grad_norm": 2.15625, "learning_rate": 0.00019892779205662477, "loss": 2.2733, "step": 39650 }, { "epoch": 0.09, "grad_norm": 1.9765625, "learning_rate": 0.00019892752210565843, "loss": 1.8752, "step": 39655 }, { "epoch": 0.09, "grad_norm": 1.625, "learning_rate": 0.0001989272521208967, "loss": 2.1261, "step": 39660 }, { "epoch": 0.09, "grad_norm": 1.609375, "learning_rate": 0.00019892698210233957, "loss": 2.3267, "step": 39665 }, { "epoch": 0.09, "grad_norm": 1.921875, "learning_rate": 0.00019892671204998718, "loss": 2.2289, "step": 39670 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019892644196383967, "loss": 2.1496, "step": 39675 }, { "epoch": 0.09, "grad_norm": 1.921875, "learning_rate": 0.0001989261718438971, "loss": 2.1044, "step": 39680 }, { "epoch": 0.09, "grad_norm": 2.28125, "learning_rate": 0.0001989259016901595, "loss": 2.0941, "step": 39685 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.00019892563150262707, "loss": 2.341, "step": 39690 }, { "epoch": 0.09, "grad_norm": 1.859375, "learning_rate": 0.00019892536128129985, "loss": 2.1969, "step": 39695 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019892509102617788, "loss": 2.0778, "step": 39700 }, { "epoch": 0.09, "grad_norm": 2.25, "learning_rate": 0.00019892482073726136, "loss": 2.2353, "step": 39705 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.0001989245504145503, "loss": 2.2493, "step": 39710 }, { "epoch": 0.09, "grad_norm": 2.359375, "learning_rate": 0.00019892428005804482, "loss": 2.1712, "step": 39715 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.00019892400966774502, "loss": 2.1289, "step": 39720 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.000198923739243651, "loss": 2.1346, "step": 39725 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.0001989234687857628, "loss": 2.18, "step": 39730 }, { "epoch": 0.09, "grad_norm": 2.0, "learning_rate": 0.00019892319829408058, "loss": 1.93, "step": 39735 }, { "epoch": 0.09, "grad_norm": 2.1875, "learning_rate": 0.00019892292776860438, "loss": 2.2697, "step": 39740 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.00019892265720933432, "loss": 2.1871, "step": 39745 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.00019892238661627047, "loss": 2.1862, "step": 39750 }, { "epoch": 0.09, "grad_norm": 2.21875, "learning_rate": 0.00019892211598941298, "loss": 2.1773, "step": 39755 }, { "epoch": 0.09, "grad_norm": 1.5859375, "learning_rate": 0.0001989218453287619, "loss": 2.2181, "step": 39760 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.00019892157463431728, "loss": 1.9954, "step": 39765 }, { "epoch": 0.09, "grad_norm": 2.171875, "learning_rate": 0.00019892130390607927, "loss": 2.2397, "step": 39770 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019892103314404796, "loss": 2.1198, "step": 39775 }, { "epoch": 0.09, "grad_norm": 2.21875, "learning_rate": 0.00019892076234822342, "loss": 2.2197, "step": 39780 }, { "epoch": 0.09, "grad_norm": 1.96875, "learning_rate": 0.00019892049151860576, "loss": 2.3294, "step": 39785 }, { "epoch": 0.09, "grad_norm": 1.6875, "learning_rate": 0.0001989202206551951, "loss": 2.1089, "step": 39790 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.00019891994975799145, "loss": 2.1654, "step": 39795 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019891967882699495, "loss": 2.0158, "step": 39800 }, { "epoch": 0.09, "grad_norm": 2.21875, "learning_rate": 0.00019891940786220574, "loss": 2.2133, "step": 39805 }, { "epoch": 0.09, "grad_norm": 1.59375, "learning_rate": 0.00019891913686362383, "loss": 2.0148, "step": 39810 }, { "epoch": 0.09, "grad_norm": 1.9375, "learning_rate": 0.00019891886583124936, "loss": 2.0142, "step": 39815 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.0001989185947650824, "loss": 2.1099, "step": 39820 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.00019891832366512305, "loss": 2.0388, "step": 39825 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.00019891805253137142, "loss": 2.2269, "step": 39830 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.0001989177813638276, "loss": 2.4028, "step": 39835 }, { "epoch": 0.09, "grad_norm": 1.671875, "learning_rate": 0.00019891751016249166, "loss": 1.9753, "step": 39840 }, { "epoch": 0.09, "grad_norm": 1.984375, "learning_rate": 0.00019891723892736369, "loss": 2.1744, "step": 39845 }, { "epoch": 0.09, "grad_norm": 1.875, "learning_rate": 0.00019891696765844384, "loss": 2.1866, "step": 39850 }, { "epoch": 0.09, "grad_norm": 1.5546875, "learning_rate": 0.0001989166963557321, "loss": 2.3569, "step": 39855 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019891642501922868, "loss": 2.1054, "step": 39860 }, { "epoch": 0.09, "grad_norm": 1.234375, "learning_rate": 0.00019891615364893357, "loss": 1.9053, "step": 39865 }, { "epoch": 0.09, "grad_norm": 1.6796875, "learning_rate": 0.00019891588224484692, "loss": 2.1085, "step": 39870 }, { "epoch": 0.09, "grad_norm": 2.140625, "learning_rate": 0.00019891561080696884, "loss": 2.3714, "step": 39875 }, { "epoch": 0.09, "grad_norm": 1.9609375, "learning_rate": 0.00019891533933529936, "loss": 2.2984, "step": 39880 }, { "epoch": 0.09, "grad_norm": 1.8359375, "learning_rate": 0.00019891506782983862, "loss": 2.0856, "step": 39885 }, { "epoch": 0.09, "grad_norm": 1.6875, "learning_rate": 0.00019891479629058668, "loss": 2.1967, "step": 39890 }, { "epoch": 0.09, "grad_norm": 1.859375, "learning_rate": 0.00019891452471754368, "loss": 2.2077, "step": 39895 }, { "epoch": 0.09, "grad_norm": 5.46875, "learning_rate": 0.00019891425311070968, "loss": 2.24, "step": 39900 }, { "epoch": 0.09, "grad_norm": 1.890625, "learning_rate": 0.00019891398147008475, "loss": 2.2235, "step": 39905 }, { "epoch": 0.09, "grad_norm": 1.5, "learning_rate": 0.00019891370979566904, "loss": 1.9969, "step": 39910 }, { "epoch": 0.09, "grad_norm": 2.015625, "learning_rate": 0.0001989134380874626, "loss": 2.0905, "step": 39915 }, { "epoch": 0.09, "grad_norm": 1.515625, "learning_rate": 0.00019891316634546553, "loss": 1.8886, "step": 39920 }, { "epoch": 0.09, "grad_norm": 2.828125, "learning_rate": 0.00019891289456967795, "loss": 1.9132, "step": 39925 }, { "epoch": 0.09, "grad_norm": 1.8046875, "learning_rate": 0.00019891262276009993, "loss": 2.0979, "step": 39930 }, { "epoch": 0.09, "grad_norm": 1.65625, "learning_rate": 0.00019891235091673153, "loss": 2.2671, "step": 39935 }, { "epoch": 0.09, "grad_norm": 2.140625, "learning_rate": 0.0001989120790395729, "loss": 2.2797, "step": 39940 }, { "epoch": 0.09, "grad_norm": 2.640625, "learning_rate": 0.00019891180712862413, "loss": 2.1354, "step": 39945 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019891153518388527, "loss": 2.3367, "step": 39950 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.00019891126320535643, "loss": 2.1676, "step": 39955 }, { "epoch": 0.09, "grad_norm": 1.8203125, "learning_rate": 0.0001989109911930377, "loss": 2.0239, "step": 39960 }, { "epoch": 0.09, "grad_norm": 1.7109375, "learning_rate": 0.0001989107191469292, "loss": 2.1563, "step": 39965 }, { "epoch": 0.09, "grad_norm": 1.453125, "learning_rate": 0.00019891044706703102, "loss": 2.2871, "step": 39970 }, { "epoch": 0.09, "grad_norm": 1.921875, "learning_rate": 0.0001989101749533432, "loss": 2.3459, "step": 39975 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.0001989099028058659, "loss": 2.1593, "step": 39980 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.0001989096306245992, "loss": 2.17, "step": 39985 }, { "epoch": 0.09, "grad_norm": 1.5859375, "learning_rate": 0.00019890935840954314, "loss": 2.1453, "step": 39990 }, { "epoch": 0.09, "grad_norm": 1.84375, "learning_rate": 0.00019890908616069784, "loss": 2.1441, "step": 39995 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.00019890881387806344, "loss": 2.131, "step": 40000 }, { "epoch": 0.09, "grad_norm": 1.734375, "learning_rate": 0.00019890854156163996, "loss": 2.2151, "step": 40005 }, { "epoch": 0.09, "grad_norm": 1.453125, "learning_rate": 0.00019890826921142756, "loss": 1.9274, "step": 40010 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.00019890799682742628, "loss": 2.1728, "step": 40015 }, { "epoch": 0.09, "grad_norm": 1.859375, "learning_rate": 0.00019890772440963624, "loss": 2.1269, "step": 40020 }, { "epoch": 0.09, "grad_norm": 1.953125, "learning_rate": 0.0001989074519580575, "loss": 2.2465, "step": 40025 }, { "epoch": 0.09, "grad_norm": 1.90625, "learning_rate": 0.00019890717947269022, "loss": 2.1884, "step": 40030 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.00019890690695353443, "loss": 2.1624, "step": 40035 }, { "epoch": 0.09, "grad_norm": 1.8046875, "learning_rate": 0.00019890663440059027, "loss": 2.2227, "step": 40040 }, { "epoch": 0.09, "grad_norm": 1.890625, "learning_rate": 0.00019890636181385781, "loss": 2.1422, "step": 40045 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.00019890608919333715, "loss": 2.1172, "step": 40050 }, { "epoch": 0.09, "grad_norm": 2.171875, "learning_rate": 0.00019890581653902832, "loss": 2.2531, "step": 40055 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.00019890554385093155, "loss": 2.2439, "step": 40060 }, { "epoch": 0.09, "grad_norm": 1.8046875, "learning_rate": 0.0001989052711290468, "loss": 2.1382, "step": 40065 }, { "epoch": 0.09, "grad_norm": 1.6171875, "learning_rate": 0.00019890499837337423, "loss": 2.112, "step": 40070 }, { "epoch": 0.09, "grad_norm": 1.796875, "learning_rate": 0.0001989047255839139, "loss": 2.3726, "step": 40075 }, { "epoch": 0.09, "grad_norm": 2.0, "learning_rate": 0.00019890445276066597, "loss": 2.2734, "step": 40080 }, { "epoch": 0.09, "grad_norm": 2.28125, "learning_rate": 0.00019890417990363042, "loss": 2.2904, "step": 40085 }, { "epoch": 0.09, "grad_norm": 1.65625, "learning_rate": 0.00019890390701280745, "loss": 2.1098, "step": 40090 }, { "epoch": 0.09, "grad_norm": 1.875, "learning_rate": 0.00019890363408819713, "loss": 2.2423, "step": 40095 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.0001989033611297995, "loss": 2.22, "step": 40100 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.0001989030881376147, "loss": 2.3849, "step": 40105 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019890281511164277, "loss": 2.1674, "step": 40110 }, { "epoch": 0.09, "grad_norm": 2.125, "learning_rate": 0.0001989025420518839, "loss": 2.3507, "step": 40115 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.0001989022689583381, "loss": 2.0815, "step": 40120 }, { "epoch": 0.09, "grad_norm": 1.7421875, "learning_rate": 0.0001989019958310055, "loss": 2.1755, "step": 40125 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.0001989017226698862, "loss": 2.162, "step": 40130 }, { "epoch": 0.09, "grad_norm": 1.7265625, "learning_rate": 0.00019890144947498026, "loss": 2.1027, "step": 40135 }, { "epoch": 0.09, "grad_norm": 2.3125, "learning_rate": 0.0001989011762462878, "loss": 2.197, "step": 40140 }, { "epoch": 0.09, "grad_norm": 2.21875, "learning_rate": 0.00019890090298380888, "loss": 2.2912, "step": 40145 }, { "epoch": 0.09, "grad_norm": 2.25, "learning_rate": 0.00019890062968754365, "loss": 2.0312, "step": 40150 }, { "epoch": 0.09, "grad_norm": 1.9453125, "learning_rate": 0.00019890035635749216, "loss": 2.282, "step": 40155 }, { "epoch": 0.09, "grad_norm": 1.765625, "learning_rate": 0.00019890008299365452, "loss": 2.3214, "step": 40160 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.00019889980959603078, "loss": 2.1412, "step": 40165 }, { "epoch": 0.09, "grad_norm": 2.21875, "learning_rate": 0.00019889953616462113, "loss": 1.9238, "step": 40170 }, { "epoch": 0.09, "grad_norm": 1.7890625, "learning_rate": 0.00019889926269942558, "loss": 2.1111, "step": 40175 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.00019889898920044422, "loss": 2.2632, "step": 40180 }, { "epoch": 0.09, "grad_norm": 1.9921875, "learning_rate": 0.00019889871566767723, "loss": 2.1906, "step": 40185 }, { "epoch": 0.09, "grad_norm": 1.671875, "learning_rate": 0.0001988984421011246, "loss": 2.2982, "step": 40190 }, { "epoch": 0.09, "grad_norm": 1.8125, "learning_rate": 0.00019889816850078646, "loss": 2.1275, "step": 40195 }, { "epoch": 0.09, "grad_norm": 1.75, "learning_rate": 0.00019889789486666294, "loss": 2.225, "step": 40200 }, { "epoch": 0.09, "grad_norm": 1.8046875, "learning_rate": 0.0001988976211987541, "loss": 2.1972, "step": 40205 }, { "epoch": 0.09, "grad_norm": 2.046875, "learning_rate": 0.00019889734749706003, "loss": 2.159, "step": 40210 }, { "epoch": 0.09, "grad_norm": 1.703125, "learning_rate": 0.00019889707376158087, "loss": 2.2034, "step": 40215 }, { "epoch": 0.09, "grad_norm": 1.6875, "learning_rate": 0.00019889679999231664, "loss": 2.3939, "step": 40220 }, { "epoch": 0.09, "grad_norm": 2.15625, "learning_rate": 0.00019889652618926745, "loss": 2.1912, "step": 40225 }, { "epoch": 0.09, "grad_norm": 1.78125, "learning_rate": 0.00019889625235243347, "loss": 2.1956, "step": 40230 }, { "epoch": 0.09, "grad_norm": 2.0625, "learning_rate": 0.00019889597848181468, "loss": 2.3685, "step": 40235 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019889570457741127, "loss": 2.194, "step": 40240 }, { "epoch": 0.09, "grad_norm": 1.8828125, "learning_rate": 0.00019889543063922327, "loss": 2.2218, "step": 40245 }, { "epoch": 0.09, "grad_norm": 1.828125, "learning_rate": 0.0001988951566672508, "loss": 2.1868, "step": 40250 }, { "epoch": 0.09, "grad_norm": 2.140625, "learning_rate": 0.00019889488266149397, "loss": 2.2218, "step": 40255 }, { "epoch": 0.09, "grad_norm": 1.5546875, "learning_rate": 0.00019889460862195283, "loss": 2.2167, "step": 40260 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.0001988943345486275, "loss": 2.2721, "step": 40265 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.0001988940604415181, "loss": 2.1063, "step": 40270 }, { "epoch": 0.09, "grad_norm": 1.921875, "learning_rate": 0.00019889378630062466, "loss": 2.1272, "step": 40275 }, { "epoch": 0.09, "grad_norm": 1.7578125, "learning_rate": 0.00019889351212594734, "loss": 2.0942, "step": 40280 }, { "epoch": 0.09, "grad_norm": 1.71875, "learning_rate": 0.0001988932379174862, "loss": 2.1947, "step": 40285 }, { "epoch": 0.09, "grad_norm": 2.09375, "learning_rate": 0.0001988929636752413, "loss": 2.2113, "step": 40290 }, { "epoch": 0.09, "grad_norm": 1.8671875, "learning_rate": 0.0001988926893992128, "loss": 2.1496, "step": 40295 }, { "epoch": 0.09, "grad_norm": 1.578125, "learning_rate": 0.00019889241508940075, "loss": 2.1031, "step": 40300 }, { "epoch": 0.09, "grad_norm": 1.984375, "learning_rate": 0.00019889214074580528, "loss": 2.2662, "step": 40305 }, { "epoch": 0.09, "grad_norm": 2.328125, "learning_rate": 0.00019889186636842644, "loss": 2.1991, "step": 40310 }, { "epoch": 0.09, "grad_norm": 1.5703125, "learning_rate": 0.00019889159195726435, "loss": 2.3222, "step": 40315 }, { "epoch": 0.09, "grad_norm": 2.078125, "learning_rate": 0.00019889131751231912, "loss": 2.3525, "step": 40320 }, { "epoch": 0.09, "grad_norm": 2.03125, "learning_rate": 0.0001988910430335908, "loss": 2.1873, "step": 40325 }, { "epoch": 0.09, "grad_norm": 1.9765625, "learning_rate": 0.0001988907685210795, "loss": 2.2123, "step": 40330 }, { "epoch": 0.09, "grad_norm": 1.8046875, "learning_rate": 0.00019889049397478533, "loss": 2.021, "step": 40335 }, { "epoch": 0.09, "grad_norm": 1.7734375, "learning_rate": 0.0001988902193947084, "loss": 2.192, "step": 40340 }, { "epoch": 0.09, "grad_norm": 1.8515625, "learning_rate": 0.00019888994478084872, "loss": 1.9728, "step": 40345 }, { "epoch": 0.09, "grad_norm": 1.8984375, "learning_rate": 0.0001988896701332065, "loss": 2.3809, "step": 40350 }, { "epoch": 0.09, "grad_norm": 1.703125, "learning_rate": 0.00019888939545178173, "loss": 2.2228, "step": 40355 }, { "epoch": 0.09, "grad_norm": 1.984375, "learning_rate": 0.00019888912073657457, "loss": 2.0897, "step": 40360 }, { "epoch": 0.09, "grad_norm": 1.859375, "learning_rate": 0.00019888884598758512, "loss": 2.1284, "step": 40365 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019888857120481339, "loss": 2.2318, "step": 40370 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.00019888829638825954, "loss": 2.2907, "step": 40375 }, { "epoch": 0.1, "grad_norm": 2.40625, "learning_rate": 0.00019888802153792366, "loss": 2.3642, "step": 40380 }, { "epoch": 0.1, "grad_norm": 2.296875, "learning_rate": 0.00019888774665380586, "loss": 2.2097, "step": 40385 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.0001988874717359062, "loss": 2.2991, "step": 40390 }, { "epoch": 0.1, "grad_norm": 1.6796875, "learning_rate": 0.00019888719678422482, "loss": 2.0055, "step": 40395 }, { "epoch": 0.1, "grad_norm": 1.6328125, "learning_rate": 0.00019888692179876173, "loss": 2.2332, "step": 40400 }, { "epoch": 0.1, "grad_norm": 2.359375, "learning_rate": 0.0001988866467795171, "loss": 2.0461, "step": 40405 }, { "epoch": 0.1, "grad_norm": 2.15625, "learning_rate": 0.00019888637172649098, "loss": 2.1593, "step": 40410 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019888609663968349, "loss": 2.1846, "step": 40415 }, { "epoch": 0.1, "grad_norm": 1.9140625, "learning_rate": 0.00019888582151909474, "loss": 2.1475, "step": 40420 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.00019888554636472477, "loss": 2.2386, "step": 40425 }, { "epoch": 0.1, "grad_norm": 1.71875, "learning_rate": 0.00019888527117657373, "loss": 2.2637, "step": 40430 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.00019888499595464165, "loss": 2.2508, "step": 40435 }, { "epoch": 0.1, "grad_norm": 1.484375, "learning_rate": 0.0001988847206989287, "loss": 2.0472, "step": 40440 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.00019888444540943492, "loss": 1.9568, "step": 40445 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.0001988841700861604, "loss": 1.9334, "step": 40450 }, { "epoch": 0.1, "grad_norm": 1.5859375, "learning_rate": 0.00019888389472910527, "loss": 2.1622, "step": 40455 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.00019888361933826962, "loss": 2.2013, "step": 40460 }, { "epoch": 0.1, "grad_norm": 2.15625, "learning_rate": 0.00019888334391365354, "loss": 2.1678, "step": 40465 }, { "epoch": 0.1, "grad_norm": 1.7109375, "learning_rate": 0.00019888306845525707, "loss": 2.105, "step": 40470 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019888279296308042, "loss": 2.2321, "step": 40475 }, { "epoch": 0.1, "grad_norm": 2.1875, "learning_rate": 0.00019888251743712355, "loss": 2.3392, "step": 40480 }, { "epoch": 0.1, "grad_norm": 1.9765625, "learning_rate": 0.00019888224187738663, "loss": 1.9933, "step": 40485 }, { "epoch": 0.1, "grad_norm": 1.609375, "learning_rate": 0.00019888196628386977, "loss": 2.0735, "step": 40490 }, { "epoch": 0.1, "grad_norm": 1.828125, "learning_rate": 0.00019888169065657304, "loss": 2.0627, "step": 40495 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.0001988814149954965, "loss": 2.1776, "step": 40500 }, { "epoch": 0.1, "grad_norm": 1.9140625, "learning_rate": 0.00019888113930064029, "loss": 2.2455, "step": 40505 }, { "epoch": 0.1, "grad_norm": 2.140625, "learning_rate": 0.00019888086357200448, "loss": 2.3871, "step": 40510 }, { "epoch": 0.1, "grad_norm": 1.6171875, "learning_rate": 0.0001988805878095892, "loss": 2.081, "step": 40515 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.0001988803120133945, "loss": 2.2798, "step": 40520 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.00019888003618342048, "loss": 2.2482, "step": 40525 }, { "epoch": 0.1, "grad_norm": 1.609375, "learning_rate": 0.00019887976031966726, "loss": 2.1739, "step": 40530 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019887948442213492, "loss": 2.2307, "step": 40535 }, { "epoch": 0.1, "grad_norm": 2.109375, "learning_rate": 0.00019887920849082357, "loss": 2.1862, "step": 40540 }, { "epoch": 0.1, "grad_norm": 2.171875, "learning_rate": 0.00019887893252573327, "loss": 2.1418, "step": 40545 }, { "epoch": 0.1, "grad_norm": 1.9375, "learning_rate": 0.00019887865652686414, "loss": 2.1801, "step": 40550 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.00019887838049421626, "loss": 2.1919, "step": 40555 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019887810442778973, "loss": 2.0506, "step": 40560 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.00019887782832758466, "loss": 2.2331, "step": 40565 }, { "epoch": 0.1, "grad_norm": 2.0, "learning_rate": 0.00019887755219360115, "loss": 2.3301, "step": 40570 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.00019887727602583923, "loss": 2.0829, "step": 40575 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019887699982429905, "loss": 2.0433, "step": 40580 }, { "epoch": 0.1, "grad_norm": 2.203125, "learning_rate": 0.0001988767235889807, "loss": 2.1881, "step": 40585 }, { "epoch": 0.1, "grad_norm": 1.6953125, "learning_rate": 0.00019887644731988427, "loss": 2.0402, "step": 40590 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019887617101700986, "loss": 2.2798, "step": 40595 }, { "epoch": 0.1, "grad_norm": 1.671875, "learning_rate": 0.00019887589468035755, "loss": 2.2266, "step": 40600 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.00019887561830992745, "loss": 2.2944, "step": 40605 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.0001988753419057196, "loss": 2.145, "step": 40610 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.0001988750654677342, "loss": 2.3226, "step": 40615 }, { "epoch": 0.1, "grad_norm": 2.234375, "learning_rate": 0.00019887478899597127, "loss": 2.2847, "step": 40620 }, { "epoch": 0.1, "grad_norm": 1.953125, "learning_rate": 0.0001988745124904309, "loss": 2.3412, "step": 40625 }, { "epoch": 0.1, "grad_norm": 1.859375, "learning_rate": 0.00019887423595111323, "loss": 2.2062, "step": 40630 }, { "epoch": 0.1, "grad_norm": 1.953125, "learning_rate": 0.0001988739593780183, "loss": 2.3945, "step": 40635 }, { "epoch": 0.1, "grad_norm": 2.390625, "learning_rate": 0.00019887368277114625, "loss": 2.1545, "step": 40640 }, { "epoch": 0.1, "grad_norm": 1.8984375, "learning_rate": 0.00019887340613049714, "loss": 2.1323, "step": 40645 }, { "epoch": 0.1, "grad_norm": 1.5703125, "learning_rate": 0.00019887312945607107, "loss": 2.3051, "step": 40650 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019887285274786817, "loss": 2.3532, "step": 40655 }, { "epoch": 0.1, "grad_norm": 1.8203125, "learning_rate": 0.00019887257600588852, "loss": 2.2609, "step": 40660 }, { "epoch": 0.1, "grad_norm": 1.8203125, "learning_rate": 0.00019887229923013218, "loss": 2.2034, "step": 40665 }, { "epoch": 0.1, "grad_norm": 2.265625, "learning_rate": 0.0001988720224205993, "loss": 2.1948, "step": 40670 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019887174557728994, "loss": 2.0867, "step": 40675 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.00019887146870020418, "loss": 2.0645, "step": 40680 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019887119178934213, "loss": 2.2129, "step": 40685 }, { "epoch": 0.1, "grad_norm": 1.7109375, "learning_rate": 0.00019887091484470387, "loss": 2.2435, "step": 40690 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019887063786628954, "loss": 2.2423, "step": 40695 }, { "epoch": 0.1, "grad_norm": 1.5859375, "learning_rate": 0.00019887036085409923, "loss": 2.1249, "step": 40700 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019887008380813299, "loss": 2.0809, "step": 40705 }, { "epoch": 0.1, "grad_norm": 2.375, "learning_rate": 0.00019886980672839091, "loss": 2.0381, "step": 40710 }, { "epoch": 0.1, "grad_norm": 1.7578125, "learning_rate": 0.00019886952961487313, "loss": 2.2748, "step": 40715 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.00019886925246757976, "loss": 2.0737, "step": 40720 }, { "epoch": 0.1, "grad_norm": 2.15625, "learning_rate": 0.0001988689752865108, "loss": 2.32, "step": 40725 }, { "epoch": 0.1, "grad_norm": 1.828125, "learning_rate": 0.00019886869807166644, "loss": 2.2106, "step": 40730 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019886842082304673, "loss": 2.3875, "step": 40735 }, { "epoch": 0.1, "grad_norm": 2.15625, "learning_rate": 0.00019886814354065177, "loss": 2.0862, "step": 40740 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.0001988678662244817, "loss": 2.2165, "step": 40745 }, { "epoch": 0.1, "grad_norm": 1.625, "learning_rate": 0.00019886758887453651, "loss": 2.1848, "step": 40750 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.00019886731149081638, "loss": 2.2334, "step": 40755 }, { "epoch": 0.1, "grad_norm": 1.7578125, "learning_rate": 0.0001988670340733214, "loss": 2.2858, "step": 40760 }, { "epoch": 0.1, "grad_norm": 1.453125, "learning_rate": 0.00019886675662205165, "loss": 2.167, "step": 40765 }, { "epoch": 0.1, "grad_norm": 1.65625, "learning_rate": 0.0001988664791370072, "loss": 2.3325, "step": 40770 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.0001988662016181882, "loss": 2.1359, "step": 40775 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.0001988659240655947, "loss": 2.1652, "step": 40780 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.00019886564647922681, "loss": 2.1674, "step": 40785 }, { "epoch": 0.1, "grad_norm": 2.25, "learning_rate": 0.0001988653688590846, "loss": 1.8896, "step": 40790 }, { "epoch": 0.1, "grad_norm": 1.71875, "learning_rate": 0.0001988650912051682, "loss": 2.066, "step": 40795 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.00019886481351747768, "loss": 2.0663, "step": 40800 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.00019886453579601317, "loss": 2.1628, "step": 40805 }, { "epoch": 0.1, "grad_norm": 2.359375, "learning_rate": 0.00019886425804077474, "loss": 2.3225, "step": 40810 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.00019886398025176246, "loss": 2.2947, "step": 40815 }, { "epoch": 0.1, "grad_norm": 1.484375, "learning_rate": 0.00019886370242897647, "loss": 2.1734, "step": 40820 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019886342457241685, "loss": 2.2184, "step": 40825 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.0001988631466820837, "loss": 2.09, "step": 40830 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019886286875797708, "loss": 2.2415, "step": 40835 }, { "epoch": 0.1, "grad_norm": 1.78125, "learning_rate": 0.00019886259080009714, "loss": 2.1873, "step": 40840 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.00019886231280844392, "loss": 2.2204, "step": 40845 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.00019886203478301754, "loss": 2.2505, "step": 40850 }, { "epoch": 0.1, "grad_norm": 2.53125, "learning_rate": 0.0001988617567238181, "loss": 2.1494, "step": 40855 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019886147863084572, "loss": 2.5088, "step": 40860 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.00019886120050410043, "loss": 2.2935, "step": 40865 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.00019886092234358235, "loss": 1.9861, "step": 40870 }, { "epoch": 0.1, "grad_norm": 1.71875, "learning_rate": 0.00019886064414929164, "loss": 2.2552, "step": 40875 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.0001988603659212283, "loss": 2.0901, "step": 40880 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019886008765939247, "loss": 1.9992, "step": 40885 }, { "epoch": 0.1, "grad_norm": 1.6328125, "learning_rate": 0.00019885980936378426, "loss": 2.1371, "step": 40890 }, { "epoch": 0.1, "grad_norm": 2.171875, "learning_rate": 0.00019885953103440372, "loss": 2.1479, "step": 40895 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.000198859252671251, "loss": 2.1739, "step": 40900 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019885897427432615, "loss": 2.2472, "step": 40905 }, { "epoch": 0.1, "grad_norm": 1.828125, "learning_rate": 0.00019885869584362927, "loss": 2.1568, "step": 40910 }, { "epoch": 0.1, "grad_norm": 1.8984375, "learning_rate": 0.0001988584173791605, "loss": 1.9591, "step": 40915 }, { "epoch": 0.1, "grad_norm": 1.8046875, "learning_rate": 0.00019885813888091987, "loss": 2.0373, "step": 40920 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.00019885786034890752, "loss": 2.1582, "step": 40925 }, { "epoch": 0.1, "grad_norm": 1.9765625, "learning_rate": 0.00019885758178312353, "loss": 2.3056, "step": 40930 }, { "epoch": 0.1, "grad_norm": 2.125, "learning_rate": 0.00019885730318356802, "loss": 1.9965, "step": 40935 }, { "epoch": 0.1, "grad_norm": 1.984375, "learning_rate": 0.00019885702455024103, "loss": 2.353, "step": 40940 }, { "epoch": 0.1, "grad_norm": 1.6953125, "learning_rate": 0.00019885674588314268, "loss": 2.2862, "step": 40945 }, { "epoch": 0.1, "grad_norm": 1.9921875, "learning_rate": 0.00019885646718227311, "loss": 2.2295, "step": 40950 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.00019885618844763233, "loss": 2.2728, "step": 40955 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.00019885590967922055, "loss": 2.2132, "step": 40960 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019885563087703774, "loss": 2.2578, "step": 40965 }, { "epoch": 0.1, "grad_norm": 1.5390625, "learning_rate": 0.0001988553520410841, "loss": 2.1476, "step": 40970 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.00019885507317135963, "loss": 2.1831, "step": 40975 }, { "epoch": 0.1, "grad_norm": 2.265625, "learning_rate": 0.0001988547942678645, "loss": 2.0689, "step": 40980 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.00019885451533059877, "loss": 2.1314, "step": 40985 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.00019885423635956255, "loss": 2.1977, "step": 40990 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.0001988539573547559, "loss": 2.2628, "step": 40995 }, { "epoch": 0.1, "grad_norm": 1.953125, "learning_rate": 0.000198853678316179, "loss": 2.0529, "step": 41000 }, { "epoch": 0.1, "grad_norm": 1.6953125, "learning_rate": 0.00019885339924383187, "loss": 2.0788, "step": 41005 }, { "epoch": 0.1, "grad_norm": 1.78125, "learning_rate": 0.0001988531201377146, "loss": 2.1192, "step": 41010 }, { "epoch": 0.1, "grad_norm": 1.9921875, "learning_rate": 0.00019885284099782738, "loss": 2.1251, "step": 41015 }, { "epoch": 0.1, "grad_norm": 1.9375, "learning_rate": 0.00019885256182417015, "loss": 2.1333, "step": 41020 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.00019885228261674315, "loss": 1.9556, "step": 41025 }, { "epoch": 0.1, "grad_norm": 2.140625, "learning_rate": 0.0001988520033755464, "loss": 2.2395, "step": 41030 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.00019885172410058, "loss": 2.0966, "step": 41035 }, { "epoch": 0.1, "grad_norm": 1.734375, "learning_rate": 0.0001988514447918441, "loss": 2.0918, "step": 41040 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.00019885116544933872, "loss": 2.1199, "step": 41045 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.000198850886073064, "loss": 2.2048, "step": 41050 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019885060666302, "loss": 2.225, "step": 41055 }, { "epoch": 0.1, "grad_norm": 1.6484375, "learning_rate": 0.00019885032721920687, "loss": 2.1675, "step": 41060 }, { "epoch": 0.1, "grad_norm": 1.671875, "learning_rate": 0.00019885004774162468, "loss": 2.2246, "step": 41065 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.0001988497682302735, "loss": 2.1495, "step": 41070 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.00019884948868515343, "loss": 2.1152, "step": 41075 }, { "epoch": 0.1, "grad_norm": 1.6796875, "learning_rate": 0.0001988492091062646, "loss": 2.1785, "step": 41080 }, { "epoch": 0.1, "grad_norm": 1.625, "learning_rate": 0.00019884892949360711, "loss": 1.9961, "step": 41085 }, { "epoch": 0.1, "grad_norm": 2.140625, "learning_rate": 0.000198848649847181, "loss": 2.2055, "step": 41090 }, { "epoch": 0.1, "grad_norm": 1.828125, "learning_rate": 0.0001988483701669864, "loss": 2.1782, "step": 41095 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.00019884809045302344, "loss": 2.1242, "step": 41100 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.00019884781070529214, "loss": 2.1832, "step": 41105 }, { "epoch": 0.1, "grad_norm": 1.9375, "learning_rate": 0.00019884753092379266, "loss": 2.2665, "step": 41110 }, { "epoch": 0.1, "grad_norm": 1.703125, "learning_rate": 0.00019884725110852508, "loss": 2.4336, "step": 41115 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019884697125948945, "loss": 2.1471, "step": 41120 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.00019884669137668594, "loss": 2.404, "step": 41125 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019884641146011457, "loss": 2.2348, "step": 41130 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.0001988461315097755, "loss": 2.4149, "step": 41135 }, { "epoch": 0.1, "grad_norm": 1.9375, "learning_rate": 0.00019884585152566878, "loss": 2.2454, "step": 41140 }, { "epoch": 0.1, "grad_norm": 2.359375, "learning_rate": 0.00019884557150779455, "loss": 2.1748, "step": 41145 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019884529145615284, "loss": 2.1804, "step": 41150 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019884501137074382, "loss": 2.2586, "step": 41155 }, { "epoch": 0.1, "grad_norm": 1.65625, "learning_rate": 0.00019884473125156754, "loss": 2.2758, "step": 41160 }, { "epoch": 0.1, "grad_norm": 1.46875, "learning_rate": 0.00019884445109862413, "loss": 2.1524, "step": 41165 }, { "epoch": 0.1, "grad_norm": 1.640625, "learning_rate": 0.00019884417091191363, "loss": 2.0734, "step": 41170 }, { "epoch": 0.1, "grad_norm": 1.9765625, "learning_rate": 0.0001988438906914362, "loss": 2.2983, "step": 41175 }, { "epoch": 0.1, "grad_norm": 1.7109375, "learning_rate": 0.00019884361043719188, "loss": 2.1835, "step": 41180 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019884333014918076, "loss": 2.2411, "step": 41185 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.00019884304982740303, "loss": 1.9731, "step": 41190 }, { "epoch": 0.1, "grad_norm": 1.5, "learning_rate": 0.0001988427694718587, "loss": 2.1534, "step": 41195 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.00019884248908254786, "loss": 2.3236, "step": 41200 }, { "epoch": 0.1, "grad_norm": 2.0, "learning_rate": 0.00019884220865947066, "loss": 2.1821, "step": 41205 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.00019884192820262718, "loss": 2.0692, "step": 41210 }, { "epoch": 0.1, "grad_norm": 2.1875, "learning_rate": 0.00019884164771201746, "loss": 2.2982, "step": 41215 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.00019884136718764168, "loss": 2.2028, "step": 41220 }, { "epoch": 0.1, "grad_norm": 2.15625, "learning_rate": 0.00019884108662949988, "loss": 2.0161, "step": 41225 }, { "epoch": 0.1, "grad_norm": 1.828125, "learning_rate": 0.00019884080603759218, "loss": 2.3301, "step": 41230 }, { "epoch": 0.1, "grad_norm": 1.6796875, "learning_rate": 0.00019884052541191863, "loss": 2.1629, "step": 41235 }, { "epoch": 0.1, "grad_norm": 2.203125, "learning_rate": 0.00019884024475247942, "loss": 2.2317, "step": 41240 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.00019883996405927457, "loss": 2.1796, "step": 41245 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.0001988396833323042, "loss": 2.0042, "step": 41250 }, { "epoch": 0.1, "grad_norm": 1.515625, "learning_rate": 0.0001988394025715684, "loss": 2.226, "step": 41255 }, { "epoch": 0.1, "grad_norm": 2.265625, "learning_rate": 0.00019883912177706724, "loss": 2.1064, "step": 41260 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.00019883884094880087, "loss": 2.1385, "step": 41265 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.00019883856008676934, "loss": 2.1115, "step": 41270 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.0001988382791909728, "loss": 2.3615, "step": 41275 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.0001988379982614113, "loss": 2.2931, "step": 41280 }, { "epoch": 0.1, "grad_norm": 2.265625, "learning_rate": 0.00019883771729808494, "loss": 2.2566, "step": 41285 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.0001988374363009938, "loss": 2.0625, "step": 41290 }, { "epoch": 0.1, "grad_norm": 2.171875, "learning_rate": 0.00019883715527013805, "loss": 2.167, "step": 41295 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.0001988368742055177, "loss": 1.9432, "step": 41300 }, { "epoch": 0.1, "grad_norm": 1.578125, "learning_rate": 0.00019883659310713288, "loss": 2.1712, "step": 41305 }, { "epoch": 0.1, "grad_norm": 2.109375, "learning_rate": 0.0001988363119749837, "loss": 2.2176, "step": 41310 }, { "epoch": 0.1, "grad_norm": 2.296875, "learning_rate": 0.00019883603080907025, "loss": 2.1881, "step": 41315 }, { "epoch": 0.1, "grad_norm": 2.171875, "learning_rate": 0.0001988357496093926, "loss": 2.1637, "step": 41320 }, { "epoch": 0.1, "grad_norm": 1.6484375, "learning_rate": 0.0001988354683759509, "loss": 2.1291, "step": 41325 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.00019883518710874515, "loss": 2.0624, "step": 41330 }, { "epoch": 0.1, "grad_norm": 1.6484375, "learning_rate": 0.00019883490580777555, "loss": 2.1411, "step": 41335 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.00019883462447304213, "loss": 2.2432, "step": 41340 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019883434310454505, "loss": 2.1181, "step": 41345 }, { "epoch": 0.1, "grad_norm": 1.2890625, "learning_rate": 0.00019883406170228434, "loss": 2.0419, "step": 41350 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.00019883378026626012, "loss": 2.2255, "step": 41355 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.00019883349879647248, "loss": 2.2924, "step": 41360 }, { "epoch": 0.1, "grad_norm": 1.703125, "learning_rate": 0.00019883321729292154, "loss": 2.2547, "step": 41365 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.0001988329357556074, "loss": 2.082, "step": 41370 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.0001988326541845301, "loss": 2.242, "step": 41375 }, { "epoch": 0.1, "grad_norm": 1.8203125, "learning_rate": 0.00019883237257968978, "loss": 2.193, "step": 41380 }, { "epoch": 0.1, "grad_norm": 1.640625, "learning_rate": 0.00019883209094108655, "loss": 2.0248, "step": 41385 }, { "epoch": 0.1, "grad_norm": 1.859375, "learning_rate": 0.00019883180926872046, "loss": 2.1329, "step": 41390 }, { "epoch": 0.1, "grad_norm": 2.328125, "learning_rate": 0.00019883152756259166, "loss": 2.3389, "step": 41395 }, { "epoch": 0.1, "grad_norm": 1.890625, "learning_rate": 0.00019883124582270018, "loss": 2.2386, "step": 41400 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.00019883096404904618, "loss": 2.6235, "step": 41405 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.00019883068224162972, "loss": 2.2231, "step": 41410 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.0001988304004004509, "loss": 2.0498, "step": 41415 }, { "epoch": 0.1, "grad_norm": 1.8046875, "learning_rate": 0.00019883011852550985, "loss": 2.3347, "step": 41420 }, { "epoch": 0.1, "grad_norm": 2.21875, "learning_rate": 0.00019882983661680662, "loss": 2.2382, "step": 41425 }, { "epoch": 0.1, "grad_norm": 2.1875, "learning_rate": 0.00019882955467434132, "loss": 2.0595, "step": 41430 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019882927269811407, "loss": 2.2706, "step": 41435 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019882899068812495, "loss": 2.0513, "step": 41440 }, { "epoch": 0.1, "grad_norm": 2.125, "learning_rate": 0.00019882870864437404, "loss": 2.1585, "step": 41445 }, { "epoch": 0.1, "grad_norm": 1.96875, "learning_rate": 0.00019882842656686144, "loss": 2.1981, "step": 41450 }, { "epoch": 0.1, "grad_norm": 1.6796875, "learning_rate": 0.0001988281444555873, "loss": 1.9294, "step": 41455 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.00019882786231055163, "loss": 2.2491, "step": 41460 }, { "epoch": 0.1, "grad_norm": 1.96875, "learning_rate": 0.0001988275801317546, "loss": 2.1506, "step": 41465 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.00019882729791919626, "loss": 2.2138, "step": 41470 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.0001988270156728767, "loss": 2.1969, "step": 41475 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019882673339279607, "loss": 2.1279, "step": 41480 }, { "epoch": 0.1, "grad_norm": 1.6953125, "learning_rate": 0.00019882645107895445, "loss": 2.1026, "step": 41485 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.00019882616873135187, "loss": 2.0497, "step": 41490 }, { "epoch": 0.1, "grad_norm": 1.734375, "learning_rate": 0.00019882588634998852, "loss": 2.0086, "step": 41495 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.00019882560393486443, "loss": 2.2597, "step": 41500 }, { "epoch": 0.1, "grad_norm": 1.9921875, "learning_rate": 0.00019882532148597973, "loss": 2.019, "step": 41505 }, { "epoch": 0.1, "grad_norm": 1.734375, "learning_rate": 0.00019882503900333455, "loss": 2.2517, "step": 41510 }, { "epoch": 0.1, "grad_norm": 1.6953125, "learning_rate": 0.00019882475648692888, "loss": 2.1759, "step": 41515 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019882447393676291, "loss": 2.1786, "step": 41520 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.0001988241913528367, "loss": 2.2078, "step": 41525 }, { "epoch": 0.1, "grad_norm": 2.09375, "learning_rate": 0.00019882390873515038, "loss": 2.2657, "step": 41530 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.000198823626083704, "loss": 2.1979, "step": 41535 }, { "epoch": 0.1, "grad_norm": 2.125, "learning_rate": 0.00019882334339849767, "loss": 2.0573, "step": 41540 }, { "epoch": 0.1, "grad_norm": 2.0, "learning_rate": 0.0001988230606795315, "loss": 2.428, "step": 41545 }, { "epoch": 0.1, "grad_norm": 1.953125, "learning_rate": 0.00019882277792680558, "loss": 2.06, "step": 41550 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.00019882249514032, "loss": 2.326, "step": 41555 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.00019882221232007488, "loss": 2.2523, "step": 41560 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.0001988219294660703, "loss": 2.0948, "step": 41565 }, { "epoch": 0.1, "grad_norm": 1.71875, "learning_rate": 0.00019882164657830634, "loss": 2.2053, "step": 41570 }, { "epoch": 0.1, "grad_norm": 1.6796875, "learning_rate": 0.0001988213636567831, "loss": 2.0079, "step": 41575 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.00019882108070150075, "loss": 2.1558, "step": 41580 }, { "epoch": 0.1, "grad_norm": 1.59375, "learning_rate": 0.00019882079771245926, "loss": 2.0746, "step": 41585 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.00019882051468965884, "loss": 2.3007, "step": 41590 }, { "epoch": 0.1, "grad_norm": 1.859375, "learning_rate": 0.0001988202316330995, "loss": 2.268, "step": 41595 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.0001988199485427814, "loss": 2.1361, "step": 41600 }, { "epoch": 0.1, "grad_norm": 1.7109375, "learning_rate": 0.00019881966541870463, "loss": 2.2462, "step": 41605 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019881938226086923, "loss": 2.0532, "step": 41610 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.00019881909906927536, "loss": 2.0611, "step": 41615 }, { "epoch": 0.1, "grad_norm": 1.9921875, "learning_rate": 0.0001988188158439231, "loss": 2.283, "step": 41620 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.00019881853258481253, "loss": 2.1468, "step": 41625 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019881824929194377, "loss": 2.2309, "step": 41630 }, { "epoch": 0.1, "grad_norm": 1.6328125, "learning_rate": 0.00019881796596531688, "loss": 1.9476, "step": 41635 }, { "epoch": 0.1, "grad_norm": 1.609375, "learning_rate": 0.00019881768260493197, "loss": 2.1981, "step": 41640 }, { "epoch": 0.1, "grad_norm": 2.15625, "learning_rate": 0.0001988173992107892, "loss": 2.2013, "step": 41645 }, { "epoch": 0.1, "grad_norm": 1.640625, "learning_rate": 0.0001988171157828886, "loss": 2.2714, "step": 41650 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019881683232123026, "loss": 1.9847, "step": 41655 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.0001988165488258143, "loss": 2.0904, "step": 41660 }, { "epoch": 0.1, "grad_norm": 1.8203125, "learning_rate": 0.00019881626529664082, "loss": 2.2051, "step": 41665 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.0001988159817337099, "loss": 2.1424, "step": 41670 }, { "epoch": 0.1, "grad_norm": 1.9765625, "learning_rate": 0.0001988156981370217, "loss": 1.8781, "step": 41675 }, { "epoch": 0.1, "grad_norm": 1.5703125, "learning_rate": 0.0001988154145065762, "loss": 2.2859, "step": 41680 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019881513084237358, "loss": 2.0698, "step": 41685 }, { "epoch": 0.1, "grad_norm": 1.65625, "learning_rate": 0.00019881484714441393, "loss": 2.2545, "step": 41690 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019881456341269734, "loss": 2.0338, "step": 41695 }, { "epoch": 0.1, "grad_norm": 2.0, "learning_rate": 0.00019881427964722387, "loss": 2.297, "step": 41700 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.0001988139958479937, "loss": 2.2094, "step": 41705 }, { "epoch": 0.1, "grad_norm": 1.921875, "learning_rate": 0.00019881371201500685, "loss": 2.0614, "step": 41710 }, { "epoch": 0.1, "grad_norm": 1.921875, "learning_rate": 0.00019881342814826342, "loss": 2.1533, "step": 41715 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.00019881314424776355, "loss": 2.021, "step": 41720 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019881286031350737, "loss": 2.1232, "step": 41725 }, { "epoch": 0.1, "grad_norm": 1.859375, "learning_rate": 0.00019881257634549485, "loss": 2.1637, "step": 41730 }, { "epoch": 0.1, "grad_norm": 1.9140625, "learning_rate": 0.0001988122923437262, "loss": 2.0667, "step": 41735 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019881200830820146, "loss": 2.3621, "step": 41740 }, { "epoch": 0.1, "grad_norm": 1.6875, "learning_rate": 0.00019881172423892078, "loss": 2.1383, "step": 41745 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019881144013588416, "loss": 2.3222, "step": 41750 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.0001988111559990918, "loss": 2.2294, "step": 41755 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.0001988108718285438, "loss": 2.1531, "step": 41760 }, { "epoch": 0.1, "grad_norm": 2.234375, "learning_rate": 0.00019881058762424015, "loss": 2.0523, "step": 41765 }, { "epoch": 0.1, "grad_norm": 1.890625, "learning_rate": 0.00019881030338618106, "loss": 2.2962, "step": 41770 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.00019881001911436653, "loss": 2.1735, "step": 41775 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019880973480879672, "loss": 2.2705, "step": 41780 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.00019880945046947172, "loss": 2.0924, "step": 41785 }, { "epoch": 0.1, "grad_norm": 2.125, "learning_rate": 0.00019880916609639162, "loss": 2.262, "step": 41790 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.0001988088816895565, "loss": 2.204, "step": 41795 }, { "epoch": 0.1, "grad_norm": 2.1875, "learning_rate": 0.0001988085972489665, "loss": 2.3828, "step": 41800 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019880831277462168, "loss": 2.2443, "step": 41805 }, { "epoch": 0.1, "grad_norm": 1.578125, "learning_rate": 0.00019880802826652215, "loss": 2.1488, "step": 41810 }, { "epoch": 0.1, "grad_norm": 1.953125, "learning_rate": 0.000198807743724668, "loss": 2.0361, "step": 41815 }, { "epoch": 0.1, "grad_norm": 1.921875, "learning_rate": 0.00019880745914905936, "loss": 2.1334, "step": 41820 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019880717453969626, "loss": 2.1624, "step": 41825 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019880688989657886, "loss": 2.2257, "step": 41830 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.00019880660521970722, "loss": 2.0947, "step": 41835 }, { "epoch": 0.1, "grad_norm": 2.34375, "learning_rate": 0.00019880632050908145, "loss": 2.0676, "step": 41840 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.00019880603576470166, "loss": 2.2028, "step": 41845 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019880575098656794, "loss": 2.3622, "step": 41850 }, { "epoch": 0.1, "grad_norm": 1.9765625, "learning_rate": 0.00019880546617468038, "loss": 2.1681, "step": 41855 }, { "epoch": 0.1, "grad_norm": 2.203125, "learning_rate": 0.00019880518132903908, "loss": 2.147, "step": 41860 }, { "epoch": 0.1, "grad_norm": 1.96875, "learning_rate": 0.00019880489644964416, "loss": 2.0842, "step": 41865 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.00019880461153649567, "loss": 2.0644, "step": 41870 }, { "epoch": 0.1, "grad_norm": 1.625, "learning_rate": 0.0001988043265895937, "loss": 2.1491, "step": 41875 }, { "epoch": 0.1, "grad_norm": 1.9765625, "learning_rate": 0.00019880404160893845, "loss": 2.1335, "step": 41880 }, { "epoch": 0.1, "grad_norm": 1.6171875, "learning_rate": 0.0001988037565945299, "loss": 2.128, "step": 41885 }, { "epoch": 0.1, "grad_norm": 2.171875, "learning_rate": 0.0001988034715463682, "loss": 2.3433, "step": 41890 }, { "epoch": 0.1, "grad_norm": 2.15625, "learning_rate": 0.00019880318646445347, "loss": 2.1696, "step": 41895 }, { "epoch": 0.1, "grad_norm": 1.78125, "learning_rate": 0.00019880290134878577, "loss": 2.3149, "step": 41900 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.0001988026161993652, "loss": 1.9986, "step": 41905 }, { "epoch": 0.1, "grad_norm": 1.7578125, "learning_rate": 0.00019880233101619185, "loss": 2.2167, "step": 41910 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.0001988020457992658, "loss": 2.3562, "step": 41915 }, { "epoch": 0.1, "grad_norm": 1.8203125, "learning_rate": 0.00019880176054858727, "loss": 1.9201, "step": 41920 }, { "epoch": 0.1, "grad_norm": 2.09375, "learning_rate": 0.00019880147526415623, "loss": 2.2693, "step": 41925 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.0001988011899459728, "loss": 2.2077, "step": 41930 }, { "epoch": 0.1, "grad_norm": 1.6171875, "learning_rate": 0.00019880090459403708, "loss": 2.2118, "step": 41935 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.00019880061920834918, "loss": 2.2269, "step": 41940 }, { "epoch": 0.1, "grad_norm": 2.21875, "learning_rate": 0.00019880033378890923, "loss": 2.1555, "step": 41945 }, { "epoch": 0.1, "grad_norm": 2.171875, "learning_rate": 0.00019880004833571727, "loss": 2.2815, "step": 41950 }, { "epoch": 0.1, "grad_norm": 1.578125, "learning_rate": 0.00019879976284877345, "loss": 2.2094, "step": 41955 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.0001987994773280778, "loss": 2.0305, "step": 41960 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.00019879919177363047, "loss": 2.2869, "step": 41965 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.00019879890618543152, "loss": 2.2888, "step": 41970 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019879862056348114, "loss": 2.1501, "step": 41975 }, { "epoch": 0.1, "grad_norm": 2.65625, "learning_rate": 0.0001987983349077793, "loss": 1.9968, "step": 41980 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019879804921832617, "loss": 2.2486, "step": 41985 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.00019879776349512185, "loss": 2.4123, "step": 41990 }, { "epoch": 0.1, "grad_norm": 1.5859375, "learning_rate": 0.00019879747773816642, "loss": 2.3244, "step": 41995 }, { "epoch": 0.1, "grad_norm": 1.5625, "learning_rate": 0.00019879719194745995, "loss": 2.0974, "step": 42000 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.0001987969061230026, "loss": 2.3263, "step": 42005 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019879662026479444, "loss": 2.0678, "step": 42010 }, { "epoch": 0.1, "grad_norm": 1.859375, "learning_rate": 0.00019879633437283554, "loss": 2.131, "step": 42015 }, { "epoch": 0.1, "grad_norm": 1.6328125, "learning_rate": 0.00019879604844712603, "loss": 2.0782, "step": 42020 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.000198795762487666, "loss": 2.2217, "step": 42025 }, { "epoch": 0.1, "grad_norm": 1.5546875, "learning_rate": 0.00019879547649445553, "loss": 1.859, "step": 42030 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.00019879519046749476, "loss": 2.2918, "step": 42035 }, { "epoch": 0.1, "grad_norm": 1.734375, "learning_rate": 0.00019879490440678375, "loss": 2.1002, "step": 42040 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.0001987946183123226, "loss": 2.2285, "step": 42045 }, { "epoch": 0.1, "grad_norm": 1.9140625, "learning_rate": 0.0001987943321841114, "loss": 2.0033, "step": 42050 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.00019879404602215028, "loss": 2.2736, "step": 42055 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.00019879375982643932, "loss": 2.1986, "step": 42060 }, { "epoch": 0.1, "grad_norm": 2.203125, "learning_rate": 0.00019879347359697867, "loss": 2.3536, "step": 42065 }, { "epoch": 0.1, "grad_norm": 1.3828125, "learning_rate": 0.00019879318733376832, "loss": 2.2892, "step": 42070 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.00019879290103680844, "loss": 2.1149, "step": 42075 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.0001987926147060991, "loss": 2.1028, "step": 42080 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019879232834164046, "loss": 2.2254, "step": 42085 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.0001987920419434325, "loss": 2.2551, "step": 42090 }, { "epoch": 0.1, "grad_norm": 3.0625, "learning_rate": 0.00019879175551147543, "loss": 2.149, "step": 42095 }, { "epoch": 0.1, "grad_norm": 2.09375, "learning_rate": 0.0001987914690457693, "loss": 2.0229, "step": 42100 }, { "epoch": 0.1, "grad_norm": 1.7578125, "learning_rate": 0.00019879118254631422, "loss": 2.0946, "step": 42105 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.00019879089601311026, "loss": 2.1252, "step": 42110 }, { "epoch": 0.1, "grad_norm": 1.96875, "learning_rate": 0.00019879060944615756, "loss": 2.2412, "step": 42115 }, { "epoch": 0.1, "grad_norm": 1.859375, "learning_rate": 0.00019879032284545616, "loss": 2.2483, "step": 42120 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.00019879003621100624, "loss": 2.2132, "step": 42125 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.00019878974954280784, "loss": 2.0921, "step": 42130 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.00019878946284086104, "loss": 1.9846, "step": 42135 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.00019878917610516598, "loss": 2.2896, "step": 42140 }, { "epoch": 0.1, "grad_norm": 2.25, "learning_rate": 0.00019878888933572278, "loss": 1.9995, "step": 42145 }, { "epoch": 0.1, "grad_norm": 2.359375, "learning_rate": 0.00019878860253253145, "loss": 2.102, "step": 42150 }, { "epoch": 0.1, "grad_norm": 2.125, "learning_rate": 0.00019878831569559218, "loss": 2.0951, "step": 42155 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.000198788028824905, "loss": 2.1471, "step": 42160 }, { "epoch": 0.1, "grad_norm": 1.96875, "learning_rate": 0.00019878774192047007, "loss": 2.314, "step": 42165 }, { "epoch": 0.1, "grad_norm": 1.703125, "learning_rate": 0.00019878745498228746, "loss": 2.1478, "step": 42170 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019878716801035725, "loss": 2.1181, "step": 42175 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019878688100467953, "loss": 2.2363, "step": 42180 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.00019878659396525443, "loss": 2.057, "step": 42185 }, { "epoch": 0.1, "grad_norm": 2.203125, "learning_rate": 0.00019878630689208206, "loss": 2.0434, "step": 42190 }, { "epoch": 0.1, "grad_norm": 2.328125, "learning_rate": 0.00019878601978516247, "loss": 2.0127, "step": 42195 }, { "epoch": 0.1, "grad_norm": 1.8984375, "learning_rate": 0.00019878573264449579, "loss": 2.2485, "step": 42200 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.00019878544547008212, "loss": 2.0941, "step": 42205 }, { "epoch": 0.1, "grad_norm": 2.125, "learning_rate": 0.00019878515826192157, "loss": 2.1691, "step": 42210 }, { "epoch": 0.1, "grad_norm": 1.828125, "learning_rate": 0.00019878487102001417, "loss": 2.1922, "step": 42215 }, { "epoch": 0.1, "grad_norm": 2.15625, "learning_rate": 0.0001987845837443601, "loss": 2.226, "step": 42220 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.00019878429643495942, "loss": 2.1951, "step": 42225 }, { "epoch": 0.1, "grad_norm": 1.734375, "learning_rate": 0.00019878400909181224, "loss": 2.1468, "step": 42230 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.0001987837217149186, "loss": 2.1523, "step": 42235 }, { "epoch": 0.1, "grad_norm": 1.578125, "learning_rate": 0.0001987834343042787, "loss": 2.2357, "step": 42240 }, { "epoch": 0.1, "grad_norm": 1.9140625, "learning_rate": 0.00019878314685989258, "loss": 2.1143, "step": 42245 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019878285938176036, "loss": 2.1941, "step": 42250 }, { "epoch": 0.1, "grad_norm": 1.921875, "learning_rate": 0.0001987825718698821, "loss": 2.1418, "step": 42255 }, { "epoch": 0.1, "grad_norm": 2.65625, "learning_rate": 0.00019878228432425793, "loss": 2.2023, "step": 42260 }, { "epoch": 0.1, "grad_norm": 2.0, "learning_rate": 0.00019878199674488794, "loss": 2.2513, "step": 42265 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.00019878170913177224, "loss": 2.0665, "step": 42270 }, { "epoch": 0.1, "grad_norm": 1.6328125, "learning_rate": 0.0001987814214849109, "loss": 2.098, "step": 42275 }, { "epoch": 0.1, "grad_norm": 2.359375, "learning_rate": 0.00019878113380430404, "loss": 2.2981, "step": 42280 }, { "epoch": 0.1, "grad_norm": 1.921875, "learning_rate": 0.00019878084608995175, "loss": 2.09, "step": 42285 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.00019878055834185413, "loss": 2.2524, "step": 42290 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.0001987802705600113, "loss": 2.2815, "step": 42295 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.0001987799827444233, "loss": 2.1432, "step": 42300 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.00019877969489509028, "loss": 2.2497, "step": 42305 }, { "epoch": 0.1, "grad_norm": 1.953125, "learning_rate": 0.00019877940701201234, "loss": 2.1639, "step": 42310 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.00019877911909518955, "loss": 2.1999, "step": 42315 }, { "epoch": 0.1, "grad_norm": 2.5625, "learning_rate": 0.00019877883114462203, "loss": 2.2536, "step": 42320 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019877854316030987, "loss": 2.2728, "step": 42325 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.00019877825514225318, "loss": 2.2524, "step": 42330 }, { "epoch": 0.1, "grad_norm": 4.5625, "learning_rate": 0.00019877796709045202, "loss": 2.2615, "step": 42335 }, { "epoch": 0.1, "grad_norm": 1.984375, "learning_rate": 0.00019877767900490652, "loss": 2.2286, "step": 42340 }, { "epoch": 0.1, "grad_norm": 1.7578125, "learning_rate": 0.0001987773908856168, "loss": 2.2823, "step": 42345 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.0001987771027325829, "loss": 2.0702, "step": 42350 }, { "epoch": 0.1, "grad_norm": 2.328125, "learning_rate": 0.00019877681454580497, "loss": 2.0473, "step": 42355 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.00019877652632528306, "loss": 2.2048, "step": 42360 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.00019877623807101734, "loss": 2.0853, "step": 42365 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019877594978300782, "loss": 2.2542, "step": 42370 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.0001987756614612547, "loss": 2.1683, "step": 42375 }, { "epoch": 0.1, "grad_norm": 1.6796875, "learning_rate": 0.00019877537310575798, "loss": 2.1546, "step": 42380 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.0001987750847165178, "loss": 1.9705, "step": 42385 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.0001987747962935343, "loss": 2.2191, "step": 42390 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.0001987745078368075, "loss": 2.2026, "step": 42395 }, { "epoch": 0.1, "grad_norm": 2.171875, "learning_rate": 0.00019877421934633754, "loss": 2.2883, "step": 42400 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.00019877393082212453, "loss": 2.1982, "step": 42405 }, { "epoch": 0.1, "grad_norm": 1.53125, "learning_rate": 0.00019877364226416851, "loss": 2.1749, "step": 42410 }, { "epoch": 0.1, "grad_norm": 1.703125, "learning_rate": 0.00019877335367246966, "loss": 2.1387, "step": 42415 }, { "epoch": 0.1, "grad_norm": 1.5390625, "learning_rate": 0.00019877306504702806, "loss": 2.1024, "step": 42420 }, { "epoch": 0.1, "grad_norm": 1.984375, "learning_rate": 0.00019877277638784377, "loss": 2.0369, "step": 42425 }, { "epoch": 0.1, "grad_norm": 2.265625, "learning_rate": 0.0001987724876949169, "loss": 2.3323, "step": 42430 }, { "epoch": 0.1, "grad_norm": 2.1875, "learning_rate": 0.00019877219896824756, "loss": 2.1646, "step": 42435 }, { "epoch": 0.1, "grad_norm": 1.921875, "learning_rate": 0.00019877191020783584, "loss": 2.1761, "step": 42440 }, { "epoch": 0.1, "grad_norm": 1.71875, "learning_rate": 0.0001987716214136819, "loss": 2.1607, "step": 42445 }, { "epoch": 0.1, "grad_norm": 1.640625, "learning_rate": 0.00019877133258578572, "loss": 2.2192, "step": 42450 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.00019877104372414744, "loss": 2.4, "step": 42455 }, { "epoch": 0.1, "grad_norm": 1.546875, "learning_rate": 0.00019877075482876725, "loss": 2.1381, "step": 42460 }, { "epoch": 0.1, "grad_norm": 1.7109375, "learning_rate": 0.00019877046589964514, "loss": 2.2435, "step": 42465 }, { "epoch": 0.1, "grad_norm": 1.4453125, "learning_rate": 0.00019877017693678125, "loss": 1.9338, "step": 42470 }, { "epoch": 0.1, "grad_norm": 2.421875, "learning_rate": 0.00019876988794017569, "loss": 2.2561, "step": 42475 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.00019876959890982853, "loss": 2.2193, "step": 42480 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.0001987693098457399, "loss": 2.1518, "step": 42485 }, { "epoch": 0.1, "grad_norm": 1.5703125, "learning_rate": 0.00019876902074790985, "loss": 2.3145, "step": 42490 }, { "epoch": 0.1, "grad_norm": 2.1875, "learning_rate": 0.00019876873161633855, "loss": 2.2604, "step": 42495 }, { "epoch": 0.1, "grad_norm": 1.5859375, "learning_rate": 0.00019876844245102603, "loss": 2.2074, "step": 42500 }, { "epoch": 0.1, "grad_norm": 1.8984375, "learning_rate": 0.00019876815325197244, "loss": 2.1523, "step": 42505 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.00019876786401917788, "loss": 2.197, "step": 42510 }, { "epoch": 0.1, "grad_norm": 1.671875, "learning_rate": 0.00019876757475264237, "loss": 2.1615, "step": 42515 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.0001987672854523661, "loss": 2.1618, "step": 42520 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.00019876699611834912, "loss": 2.1322, "step": 42525 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.00019876670675059155, "loss": 2.2693, "step": 42530 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019876641734909348, "loss": 2.3013, "step": 42535 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.000198766127913855, "loss": 2.0287, "step": 42540 }, { "epoch": 0.1, "grad_norm": 1.96875, "learning_rate": 0.00019876583844487624, "loss": 2.3276, "step": 42545 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.00019876554894215727, "loss": 2.0063, "step": 42550 }, { "epoch": 0.1, "grad_norm": 1.625, "learning_rate": 0.0001987652594056982, "loss": 2.2245, "step": 42555 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.0001987649698354991, "loss": 2.0759, "step": 42560 }, { "epoch": 0.1, "grad_norm": 2.0, "learning_rate": 0.00019876468023156014, "loss": 2.1144, "step": 42565 }, { "epoch": 0.1, "grad_norm": 1.6171875, "learning_rate": 0.00019876439059388133, "loss": 2.0941, "step": 42570 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.00019876410092246283, "loss": 2.1256, "step": 42575 }, { "epoch": 0.1, "grad_norm": 1.9765625, "learning_rate": 0.00019876381121730475, "loss": 2.1414, "step": 42580 }, { "epoch": 0.1, "grad_norm": 2.125, "learning_rate": 0.0001987635214784071, "loss": 1.9238, "step": 42585 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019876323170577012, "loss": 2.2482, "step": 42590 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.00019876294189939375, "loss": 2.2408, "step": 42595 }, { "epoch": 0.1, "grad_norm": 1.9921875, "learning_rate": 0.00019876265205927821, "loss": 2.0006, "step": 42600 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.00019876236218542356, "loss": 2.2494, "step": 42605 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019876207227782989, "loss": 2.1048, "step": 42610 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.0001987617823364973, "loss": 2.1811, "step": 42615 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.0001987614923614259, "loss": 2.3156, "step": 42620 }, { "epoch": 0.1, "grad_norm": 1.953125, "learning_rate": 0.00019876120235261577, "loss": 2.1182, "step": 42625 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.000198760912310067, "loss": 2.3579, "step": 42630 }, { "epoch": 0.1, "grad_norm": 1.9921875, "learning_rate": 0.00019876062223377975, "loss": 2.33, "step": 42635 }, { "epoch": 0.1, "grad_norm": 1.7109375, "learning_rate": 0.00019876033212375412, "loss": 2.2837, "step": 42640 }, { "epoch": 0.1, "grad_norm": 1.671875, "learning_rate": 0.0001987600419799901, "loss": 2.2489, "step": 42645 }, { "epoch": 0.1, "grad_norm": 1.6875, "learning_rate": 0.00019875975180248787, "loss": 2.2671, "step": 42650 }, { "epoch": 0.1, "grad_norm": 1.71875, "learning_rate": 0.00019875946159124755, "loss": 2.2173, "step": 42655 }, { "epoch": 0.1, "grad_norm": 1.9765625, "learning_rate": 0.0001987591713462692, "loss": 2.1383, "step": 42660 }, { "epoch": 0.1, "grad_norm": 1.6015625, "learning_rate": 0.00019875888106755287, "loss": 2.2872, "step": 42665 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.00019875859075509875, "loss": 2.1238, "step": 42670 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.00019875830040890692, "loss": 2.2398, "step": 42675 }, { "epoch": 0.1, "grad_norm": 2.0, "learning_rate": 0.00019875801002897748, "loss": 2.1935, "step": 42680 }, { "epoch": 0.1, "grad_norm": 1.8984375, "learning_rate": 0.00019875771961531046, "loss": 2.1103, "step": 42685 }, { "epoch": 0.1, "grad_norm": 1.6953125, "learning_rate": 0.00019875742916790606, "loss": 2.2278, "step": 42690 }, { "epoch": 0.1, "grad_norm": 1.8984375, "learning_rate": 0.00019875713868676428, "loss": 2.0759, "step": 42695 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.0001987568481718853, "loss": 2.35, "step": 42700 }, { "epoch": 0.1, "grad_norm": 2.171875, "learning_rate": 0.0001987565576232692, "loss": 2.1781, "step": 42705 }, { "epoch": 0.1, "grad_norm": 2.140625, "learning_rate": 0.00019875626704091606, "loss": 2.2017, "step": 42710 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.000198755976424826, "loss": 2.0817, "step": 42715 }, { "epoch": 0.1, "grad_norm": 1.890625, "learning_rate": 0.00019875568577499907, "loss": 2.0933, "step": 42720 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019875539509143543, "loss": 2.1648, "step": 42725 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019875510437413518, "loss": 2.1826, "step": 42730 }, { "epoch": 0.1, "grad_norm": 1.921875, "learning_rate": 0.00019875481362309835, "loss": 2.0648, "step": 42735 }, { "epoch": 0.1, "grad_norm": 1.71875, "learning_rate": 0.00019875452283832509, "loss": 2.3055, "step": 42740 }, { "epoch": 0.1, "grad_norm": 2.0, "learning_rate": 0.00019875423201981554, "loss": 2.2942, "step": 42745 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019875394116756971, "loss": 2.1949, "step": 42750 }, { "epoch": 0.1, "grad_norm": 1.859375, "learning_rate": 0.00019875365028158777, "loss": 2.1558, "step": 42755 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.0001987533593618698, "loss": 2.2209, "step": 42760 }, { "epoch": 0.1, "grad_norm": 1.859375, "learning_rate": 0.00019875306840841584, "loss": 2.3395, "step": 42765 }, { "epoch": 0.1, "grad_norm": 2.359375, "learning_rate": 0.00019875277742122607, "loss": 2.2602, "step": 42770 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019875248640030058, "loss": 2.0841, "step": 42775 }, { "epoch": 0.1, "grad_norm": 2.1875, "learning_rate": 0.00019875219534563941, "loss": 2.1703, "step": 42780 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.00019875190425724273, "loss": 2.1849, "step": 42785 }, { "epoch": 0.1, "grad_norm": 1.96875, "learning_rate": 0.0001987516131351106, "loss": 2.1396, "step": 42790 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.00019875132197924312, "loss": 2.2095, "step": 42795 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.0001987510307896404, "loss": 2.0775, "step": 42800 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.00019875073956630255, "loss": 2.0583, "step": 42805 }, { "epoch": 0.1, "grad_norm": 2.28125, "learning_rate": 0.00019875044830922964, "loss": 2.1182, "step": 42810 }, { "epoch": 0.1, "grad_norm": 1.6484375, "learning_rate": 0.0001987501570184218, "loss": 2.0602, "step": 42815 }, { "epoch": 0.1, "grad_norm": 1.71875, "learning_rate": 0.0001987498656938791, "loss": 1.9764, "step": 42820 }, { "epoch": 0.1, "grad_norm": 1.5546875, "learning_rate": 0.00019874957433560166, "loss": 2.2028, "step": 42825 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.00019874928294358958, "loss": 2.1131, "step": 42830 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019874899151784296, "loss": 1.9991, "step": 42835 }, { "epoch": 0.1, "grad_norm": 1.953125, "learning_rate": 0.00019874870005836187, "loss": 2.1963, "step": 42840 }, { "epoch": 0.1, "grad_norm": 1.96875, "learning_rate": 0.00019874840856514645, "loss": 2.3312, "step": 42845 }, { "epoch": 0.1, "grad_norm": 2.90625, "learning_rate": 0.0001987481170381968, "loss": 2.1116, "step": 42850 }, { "epoch": 0.1, "grad_norm": 2.171875, "learning_rate": 0.000198747825477513, "loss": 2.2505, "step": 42855 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.0001987475338830951, "loss": 2.206, "step": 42860 }, { "epoch": 0.1, "grad_norm": 1.65625, "learning_rate": 0.0001987472422549433, "loss": 2.2626, "step": 42865 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.00019874695059305764, "loss": 2.2096, "step": 42870 }, { "epoch": 0.1, "grad_norm": 1.6875, "learning_rate": 0.0001987466588974382, "loss": 2.2451, "step": 42875 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.00019874636716808516, "loss": 2.0848, "step": 42880 }, { "epoch": 0.1, "grad_norm": 2.21875, "learning_rate": 0.00019874607540499857, "loss": 2.3222, "step": 42885 }, { "epoch": 0.1, "grad_norm": 1.890625, "learning_rate": 0.0001987457836081785, "loss": 2.3103, "step": 42890 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.00019874549177762506, "loss": 2.1475, "step": 42895 }, { "epoch": 0.1, "grad_norm": 1.78125, "learning_rate": 0.00019874519991333842, "loss": 2.1112, "step": 42900 }, { "epoch": 0.1, "grad_norm": 2.0, "learning_rate": 0.0001987449080153186, "loss": 2.2852, "step": 42905 }, { "epoch": 0.1, "grad_norm": 1.9375, "learning_rate": 0.00019874461608356575, "loss": 2.2537, "step": 42910 }, { "epoch": 0.1, "grad_norm": 2.40625, "learning_rate": 0.00019874432411807992, "loss": 2.1941, "step": 42915 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.00019874403211886125, "loss": 2.2763, "step": 42920 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.00019874374008590982, "loss": 2.1962, "step": 42925 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019874344801922575, "loss": 2.3423, "step": 42930 }, { "epoch": 0.1, "grad_norm": 1.921875, "learning_rate": 0.00019874315591880915, "loss": 2.0738, "step": 42935 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019874286378466005, "loss": 2.0976, "step": 42940 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.00019874257161677864, "loss": 2.1232, "step": 42945 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019874227941516496, "loss": 2.4439, "step": 42950 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.00019874198717981912, "loss": 2.1402, "step": 42955 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019874169491074123, "loss": 2.349, "step": 42960 }, { "epoch": 0.1, "grad_norm": 1.984375, "learning_rate": 0.00019874140260793137, "loss": 2.1994, "step": 42965 }, { "epoch": 0.1, "grad_norm": 1.5546875, "learning_rate": 0.00019874111027138968, "loss": 2.2608, "step": 42970 }, { "epoch": 0.1, "grad_norm": 1.671875, "learning_rate": 0.00019874081790111623, "loss": 2.2514, "step": 42975 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.00019874052549711114, "loss": 2.1999, "step": 42980 }, { "epoch": 0.1, "grad_norm": 1.9375, "learning_rate": 0.00019874023305937444, "loss": 2.0834, "step": 42985 }, { "epoch": 0.1, "grad_norm": 2.234375, "learning_rate": 0.00019873994058790633, "loss": 2.1686, "step": 42990 }, { "epoch": 0.1, "grad_norm": 1.8984375, "learning_rate": 0.00019873964808270685, "loss": 2.1932, "step": 42995 }, { "epoch": 0.1, "grad_norm": 1.78125, "learning_rate": 0.00019873935554377614, "loss": 2.1479, "step": 43000 }, { "epoch": 0.1, "grad_norm": 1.9375, "learning_rate": 0.00019873906297111427, "loss": 2.2029, "step": 43005 }, { "epoch": 0.1, "grad_norm": 1.9765625, "learning_rate": 0.00019873877036472132, "loss": 2.3535, "step": 43010 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019873847772459744, "loss": 2.0431, "step": 43015 }, { "epoch": 0.1, "grad_norm": 1.703125, "learning_rate": 0.0001987381850507427, "loss": 2.2376, "step": 43020 }, { "epoch": 0.1, "grad_norm": 2.203125, "learning_rate": 0.00019873789234315718, "loss": 2.3278, "step": 43025 }, { "epoch": 0.1, "grad_norm": 1.6796875, "learning_rate": 0.00019873759960184104, "loss": 2.1596, "step": 43030 }, { "epoch": 0.1, "grad_norm": 2.125, "learning_rate": 0.00019873730682679432, "loss": 2.3392, "step": 43035 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.00019873701401801718, "loss": 2.2203, "step": 43040 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019873672117550967, "loss": 2.2233, "step": 43045 }, { "epoch": 0.1, "grad_norm": 2.46875, "learning_rate": 0.0001987364282992719, "loss": 2.0757, "step": 43050 }, { "epoch": 0.1, "grad_norm": 1.703125, "learning_rate": 0.00019873613538930396, "loss": 2.2467, "step": 43055 }, { "epoch": 0.1, "grad_norm": 1.8203125, "learning_rate": 0.00019873584244560597, "loss": 2.1726, "step": 43060 }, { "epoch": 0.1, "grad_norm": 1.9765625, "learning_rate": 0.00019873554946817806, "loss": 2.3454, "step": 43065 }, { "epoch": 0.1, "grad_norm": 1.71875, "learning_rate": 0.00019873525645702027, "loss": 2.0254, "step": 43070 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.0001987349634121327, "loss": 2.2588, "step": 43075 }, { "epoch": 0.1, "grad_norm": 1.828125, "learning_rate": 0.00019873467033351553, "loss": 2.1117, "step": 43080 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019873437722116877, "loss": 2.2527, "step": 43085 }, { "epoch": 0.1, "grad_norm": 2.453125, "learning_rate": 0.00019873408407509256, "loss": 2.0624, "step": 43090 }, { "epoch": 0.1, "grad_norm": 2.09375, "learning_rate": 0.00019873379089528702, "loss": 2.084, "step": 43095 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019873349768175218, "loss": 2.1078, "step": 43100 }, { "epoch": 0.1, "grad_norm": 2.234375, "learning_rate": 0.00019873320443448823, "loss": 2.2195, "step": 43105 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.0001987329111534952, "loss": 2.2199, "step": 43110 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.00019873261783877322, "loss": 2.2118, "step": 43115 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.0001987323244903224, "loss": 2.2845, "step": 43120 }, { "epoch": 0.1, "grad_norm": 2.296875, "learning_rate": 0.00019873203110814283, "loss": 2.1451, "step": 43125 }, { "epoch": 0.1, "grad_norm": 3.0, "learning_rate": 0.0001987317376922346, "loss": 2.0928, "step": 43130 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.0001987314442425978, "loss": 2.0877, "step": 43135 }, { "epoch": 0.1, "grad_norm": 1.7109375, "learning_rate": 0.00019873115075923258, "loss": 2.1873, "step": 43140 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019873085724213897, "loss": 2.0255, "step": 43145 }, { "epoch": 0.1, "grad_norm": 1.953125, "learning_rate": 0.00019873056369131714, "loss": 2.2054, "step": 43150 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019873027010676716, "loss": 2.325, "step": 43155 }, { "epoch": 0.1, "grad_norm": 1.6796875, "learning_rate": 0.0001987299764884891, "loss": 2.1585, "step": 43160 }, { "epoch": 0.1, "grad_norm": 1.859375, "learning_rate": 0.00019872968283648313, "loss": 2.1008, "step": 43165 }, { "epoch": 0.1, "grad_norm": 1.609375, "learning_rate": 0.00019872938915074928, "loss": 2.1892, "step": 43170 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.0001987290954312877, "loss": 2.1896, "step": 43175 }, { "epoch": 0.1, "grad_norm": 2.53125, "learning_rate": 0.00019872880167809843, "loss": 2.1613, "step": 43180 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.00019872850789118168, "loss": 2.3231, "step": 43185 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.0001987282140705374, "loss": 2.2392, "step": 43190 }, { "epoch": 0.1, "grad_norm": 2.1875, "learning_rate": 0.00019872792021616582, "loss": 2.1173, "step": 43195 }, { "epoch": 0.1, "grad_norm": 1.984375, "learning_rate": 0.000198727626328067, "loss": 2.0103, "step": 43200 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.000198727332406241, "loss": 2.1301, "step": 43205 }, { "epoch": 0.1, "grad_norm": 1.8203125, "learning_rate": 0.000198727038450688, "loss": 2.1601, "step": 43210 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.000198726744461408, "loss": 2.0556, "step": 43215 }, { "epoch": 0.1, "grad_norm": 2.578125, "learning_rate": 0.00019872645043840116, "loss": 2.2311, "step": 43220 }, { "epoch": 0.1, "grad_norm": 2.125, "learning_rate": 0.00019872615638166763, "loss": 2.0903, "step": 43225 }, { "epoch": 0.1, "grad_norm": 2.0, "learning_rate": 0.0001987258622912074, "loss": 2.1921, "step": 43230 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019872556816702064, "loss": 2.2582, "step": 43235 }, { "epoch": 0.1, "grad_norm": 2.140625, "learning_rate": 0.00019872527400910744, "loss": 2.2, "step": 43240 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.0001987249798174679, "loss": 2.1431, "step": 43245 }, { "epoch": 0.1, "grad_norm": 2.28125, "learning_rate": 0.0001987246855921021, "loss": 2.1297, "step": 43250 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019872439133301016, "loss": 2.0394, "step": 43255 }, { "epoch": 0.1, "grad_norm": 1.8203125, "learning_rate": 0.00019872409704019218, "loss": 2.2827, "step": 43260 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.00019872380271364825, "loss": 2.179, "step": 43265 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.0001987235083533785, "loss": 2.1182, "step": 43270 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.000198723213959383, "loss": 2.2247, "step": 43275 }, { "epoch": 0.1, "grad_norm": 1.6796875, "learning_rate": 0.00019872291953166186, "loss": 2.1506, "step": 43280 }, { "epoch": 0.1, "grad_norm": 2.140625, "learning_rate": 0.00019872262507021518, "loss": 1.9279, "step": 43285 }, { "epoch": 0.1, "grad_norm": 1.953125, "learning_rate": 0.00019872233057504306, "loss": 2.1814, "step": 43290 }, { "epoch": 0.1, "grad_norm": 1.625, "learning_rate": 0.00019872203604614561, "loss": 2.1974, "step": 43295 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.0001987217414835229, "loss": 2.1471, "step": 43300 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.00019872144688717506, "loss": 2.1764, "step": 43305 }, { "epoch": 0.1, "grad_norm": 2.109375, "learning_rate": 0.0001987211522571022, "loss": 2.1759, "step": 43310 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.0001987208575933044, "loss": 2.2411, "step": 43315 }, { "epoch": 0.1, "grad_norm": 2.359375, "learning_rate": 0.00019872056289578174, "loss": 2.2415, "step": 43320 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019872026816453438, "loss": 2.3732, "step": 43325 }, { "epoch": 0.1, "grad_norm": 1.8046875, "learning_rate": 0.00019871997339956235, "loss": 2.0828, "step": 43330 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.00019871967860086583, "loss": 2.2664, "step": 43335 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019871938376844484, "loss": 2.2973, "step": 43340 }, { "epoch": 0.1, "grad_norm": 1.9140625, "learning_rate": 0.00019871908890229954, "loss": 2.2109, "step": 43345 }, { "epoch": 0.1, "grad_norm": 1.984375, "learning_rate": 0.00019871879400242998, "loss": 2.1201, "step": 43350 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.0001987184990688363, "loss": 2.1602, "step": 43355 }, { "epoch": 0.1, "grad_norm": 2.25, "learning_rate": 0.00019871820410151864, "loss": 2.2466, "step": 43360 }, { "epoch": 0.1, "grad_norm": 1.5234375, "learning_rate": 0.00019871790910047697, "loss": 2.0879, "step": 43365 }, { "epoch": 0.1, "grad_norm": 1.9765625, "learning_rate": 0.00019871761406571153, "loss": 2.1287, "step": 43370 }, { "epoch": 0.1, "grad_norm": 2.3125, "learning_rate": 0.00019871731899722233, "loss": 2.218, "step": 43375 }, { "epoch": 0.1, "grad_norm": 1.5390625, "learning_rate": 0.00019871702389500954, "loss": 2.1164, "step": 43380 }, { "epoch": 0.1, "grad_norm": 2.328125, "learning_rate": 0.0001987167287590732, "loss": 1.9207, "step": 43385 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.00019871643358941344, "loss": 2.0896, "step": 43390 }, { "epoch": 0.1, "grad_norm": 1.5234375, "learning_rate": 0.00019871613838603035, "loss": 2.0607, "step": 43395 }, { "epoch": 0.1, "grad_norm": 2.109375, "learning_rate": 0.00019871584314892405, "loss": 2.4041, "step": 43400 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.00019871554787809461, "loss": 2.1204, "step": 43405 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.00019871525257354216, "loss": 1.9947, "step": 43410 }, { "epoch": 0.1, "grad_norm": 2.109375, "learning_rate": 0.00019871495723526676, "loss": 2.1489, "step": 43415 }, { "epoch": 0.1, "grad_norm": 1.8984375, "learning_rate": 0.00019871466186326857, "loss": 2.3304, "step": 43420 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019871436645754767, "loss": 2.2246, "step": 43425 }, { "epoch": 0.1, "grad_norm": 1.9140625, "learning_rate": 0.00019871407101810414, "loss": 2.3593, "step": 43430 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.0001987137755449381, "loss": 2.3342, "step": 43435 }, { "epoch": 0.1, "grad_norm": 2.09375, "learning_rate": 0.00019871348003804964, "loss": 2.1996, "step": 43440 }, { "epoch": 0.1, "grad_norm": 2.3125, "learning_rate": 0.00019871318449743886, "loss": 2.2515, "step": 43445 }, { "epoch": 0.1, "grad_norm": 2.140625, "learning_rate": 0.00019871288892310584, "loss": 2.066, "step": 43450 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019871259331505074, "loss": 2.1763, "step": 43455 }, { "epoch": 0.1, "grad_norm": 2.484375, "learning_rate": 0.00019871229767327365, "loss": 2.1436, "step": 43460 }, { "epoch": 0.1, "grad_norm": 1.8046875, "learning_rate": 0.00019871200199777462, "loss": 2.2347, "step": 43465 }, { "epoch": 0.1, "grad_norm": 2.3125, "learning_rate": 0.00019871170628855378, "loss": 2.0567, "step": 43470 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019871141054561122, "loss": 2.209, "step": 43475 }, { "epoch": 0.1, "grad_norm": 1.703125, "learning_rate": 0.00019871111476894705, "loss": 2.2567, "step": 43480 }, { "epoch": 0.1, "grad_norm": 1.8046875, "learning_rate": 0.0001987108189585614, "loss": 2.0887, "step": 43485 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019871052311445431, "loss": 2.3145, "step": 43490 }, { "epoch": 0.1, "grad_norm": 1.9375, "learning_rate": 0.00019871022723662595, "loss": 2.1514, "step": 43495 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.00019870993132507637, "loss": 2.1976, "step": 43500 }, { "epoch": 0.1, "grad_norm": 1.8203125, "learning_rate": 0.0001987096353798057, "loss": 2.1591, "step": 43505 }, { "epoch": 0.1, "grad_norm": 1.6484375, "learning_rate": 0.00019870933940081402, "loss": 2.2846, "step": 43510 }, { "epoch": 0.1, "grad_norm": 1.6328125, "learning_rate": 0.00019870904338810145, "loss": 2.1333, "step": 43515 }, { "epoch": 0.1, "grad_norm": 1.6484375, "learning_rate": 0.00019870874734166804, "loss": 2.1185, "step": 43520 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.000198708451261514, "loss": 2.1498, "step": 43525 }, { "epoch": 0.1, "grad_norm": 1.734375, "learning_rate": 0.0001987081551476393, "loss": 2.537, "step": 43530 }, { "epoch": 0.1, "grad_norm": 1.9140625, "learning_rate": 0.00019870785900004414, "loss": 2.1047, "step": 43535 }, { "epoch": 0.1, "grad_norm": 1.859375, "learning_rate": 0.0001987075628187286, "loss": 2.1697, "step": 43540 }, { "epoch": 0.1, "grad_norm": 1.921875, "learning_rate": 0.0001987072666036927, "loss": 2.1598, "step": 43545 }, { "epoch": 0.1, "grad_norm": 1.9375, "learning_rate": 0.00019870697035493664, "loss": 2.1482, "step": 43550 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019870667407246052, "loss": 2.0462, "step": 43555 }, { "epoch": 0.1, "grad_norm": 2.265625, "learning_rate": 0.00019870637775626438, "loss": 2.1591, "step": 43560 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019870608140634834, "loss": 2.37, "step": 43565 }, { "epoch": 0.1, "grad_norm": 2.28125, "learning_rate": 0.00019870578502271253, "loss": 2.1547, "step": 43570 }, { "epoch": 0.1, "grad_norm": 1.96875, "learning_rate": 0.00019870548860535702, "loss": 1.9917, "step": 43575 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.00019870519215428196, "loss": 2.3993, "step": 43580 }, { "epoch": 0.1, "grad_norm": 1.890625, "learning_rate": 0.00019870489566948738, "loss": 2.0658, "step": 43585 }, { "epoch": 0.1, "grad_norm": 1.96875, "learning_rate": 0.00019870459915097342, "loss": 2.0894, "step": 43590 }, { "epoch": 0.1, "grad_norm": 2.171875, "learning_rate": 0.0001987043025987402, "loss": 2.1506, "step": 43595 }, { "epoch": 0.1, "grad_norm": 1.6796875, "learning_rate": 0.0001987040060127878, "loss": 2.2567, "step": 43600 }, { "epoch": 0.1, "grad_norm": 2.4375, "learning_rate": 0.00019870370939311632, "loss": 2.1423, "step": 43605 }, { "epoch": 0.1, "grad_norm": 1.6953125, "learning_rate": 0.00019870341273972586, "loss": 2.3947, "step": 43610 }, { "epoch": 0.1, "grad_norm": 1.6875, "learning_rate": 0.0001987031160526165, "loss": 2.2818, "step": 43615 }, { "epoch": 0.1, "grad_norm": 1.3046875, "learning_rate": 0.00019870281933178838, "loss": 2.0002, "step": 43620 }, { "epoch": 0.1, "grad_norm": 2.234375, "learning_rate": 0.0001987025225772416, "loss": 2.233, "step": 43625 }, { "epoch": 0.1, "grad_norm": 2.125, "learning_rate": 0.00019870222578897623, "loss": 2.2806, "step": 43630 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.0001987019289669924, "loss": 2.3384, "step": 43635 }, { "epoch": 0.1, "grad_norm": 2.109375, "learning_rate": 0.0001987016321112902, "loss": 2.0597, "step": 43640 }, { "epoch": 0.1, "grad_norm": 1.71875, "learning_rate": 0.00019870133522186973, "loss": 2.2712, "step": 43645 }, { "epoch": 0.1, "grad_norm": 2.28125, "learning_rate": 0.0001987010382987311, "loss": 2.1789, "step": 43650 }, { "epoch": 0.1, "grad_norm": 2.265625, "learning_rate": 0.0001987007413418744, "loss": 2.2467, "step": 43655 }, { "epoch": 0.1, "grad_norm": 1.921875, "learning_rate": 0.00019870044435129974, "loss": 2.1916, "step": 43660 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.0001987001473270072, "loss": 2.1036, "step": 43665 }, { "epoch": 0.1, "grad_norm": 1.5703125, "learning_rate": 0.00019869985026899694, "loss": 2.0109, "step": 43670 }, { "epoch": 0.1, "grad_norm": 1.65625, "learning_rate": 0.00019869955317726898, "loss": 2.3171, "step": 43675 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019869925605182348, "loss": 2.3704, "step": 43680 }, { "epoch": 0.1, "grad_norm": 2.203125, "learning_rate": 0.00019869895889266053, "loss": 2.121, "step": 43685 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019869866169978023, "loss": 2.131, "step": 43690 }, { "epoch": 0.1, "grad_norm": 2.203125, "learning_rate": 0.00019869836447318267, "loss": 2.2823, "step": 43695 }, { "epoch": 0.1, "grad_norm": 1.96875, "learning_rate": 0.00019869806721286795, "loss": 2.0994, "step": 43700 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.0001986977699188362, "loss": 2.3438, "step": 43705 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.00019869747259108745, "loss": 2.2449, "step": 43710 }, { "epoch": 0.1, "grad_norm": 1.734375, "learning_rate": 0.00019869717522962192, "loss": 2.1163, "step": 43715 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.0001986968778344396, "loss": 1.9907, "step": 43720 }, { "epoch": 0.1, "grad_norm": 1.734375, "learning_rate": 0.00019869658040554066, "loss": 2.1549, "step": 43725 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019869628294292516, "loss": 2.1341, "step": 43730 }, { "epoch": 0.1, "grad_norm": 3.0625, "learning_rate": 0.00019869598544659326, "loss": 2.1663, "step": 43735 }, { "epoch": 0.1, "grad_norm": 1.71875, "learning_rate": 0.00019869568791654497, "loss": 2.075, "step": 43740 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.00019869539035278046, "loss": 2.2444, "step": 43745 }, { "epoch": 0.1, "grad_norm": 1.8203125, "learning_rate": 0.00019869509275529984, "loss": 2.1994, "step": 43750 }, { "epoch": 0.1, "grad_norm": 1.8984375, "learning_rate": 0.00019869479512410316, "loss": 2.188, "step": 43755 }, { "epoch": 0.1, "grad_norm": 1.7890625, "learning_rate": 0.00019869449745919053, "loss": 2.0306, "step": 43760 }, { "epoch": 0.1, "grad_norm": 1.7578125, "learning_rate": 0.00019869419976056211, "loss": 2.1845, "step": 43765 }, { "epoch": 0.1, "grad_norm": 1.984375, "learning_rate": 0.00019869390202821794, "loss": 2.2279, "step": 43770 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.00019869360426215816, "loss": 2.3924, "step": 43775 }, { "epoch": 0.1, "grad_norm": 2.234375, "learning_rate": 0.00019869330646238282, "loss": 2.2119, "step": 43780 }, { "epoch": 0.1, "grad_norm": 2.109375, "learning_rate": 0.0001986930086288921, "loss": 2.2301, "step": 43785 }, { "epoch": 0.1, "grad_norm": 1.6328125, "learning_rate": 0.00019869271076168604, "loss": 2.2332, "step": 43790 }, { "epoch": 0.1, "grad_norm": 1.8984375, "learning_rate": 0.00019869241286076474, "loss": 2.1607, "step": 43795 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019869211492612836, "loss": 2.1342, "step": 43800 }, { "epoch": 0.1, "grad_norm": 2.4375, "learning_rate": 0.00019869181695777695, "loss": 2.3004, "step": 43805 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.0001986915189557106, "loss": 2.2477, "step": 43810 }, { "epoch": 0.1, "grad_norm": 2.421875, "learning_rate": 0.00019869122091992946, "loss": 2.071, "step": 43815 }, { "epoch": 0.1, "grad_norm": 1.9921875, "learning_rate": 0.00019869092285043361, "loss": 2.2143, "step": 43820 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.00019869062474722315, "loss": 2.1778, "step": 43825 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.00019869032661029817, "loss": 2.202, "step": 43830 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019869002843965881, "loss": 2.2879, "step": 43835 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.0001986897302353051, "loss": 2.1314, "step": 43840 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.0001986894319972372, "loss": 2.1778, "step": 43845 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.00019868913372545524, "loss": 2.3063, "step": 43850 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.0001986888354199593, "loss": 2.1469, "step": 43855 }, { "epoch": 0.1, "grad_norm": 1.7109375, "learning_rate": 0.0001986885370807494, "loss": 2.33, "step": 43860 }, { "epoch": 0.1, "grad_norm": 2.359375, "learning_rate": 0.0001986882387078257, "loss": 2.0806, "step": 43865 }, { "epoch": 0.1, "grad_norm": 2.09375, "learning_rate": 0.00019868794030118836, "loss": 2.1506, "step": 43870 }, { "epoch": 0.1, "grad_norm": 1.6953125, "learning_rate": 0.00019868764186083742, "loss": 2.1345, "step": 43875 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.00019868734338677297, "loss": 2.0693, "step": 43880 }, { "epoch": 0.1, "grad_norm": 1.5, "learning_rate": 0.00019868704487899515, "loss": 2.2601, "step": 43885 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019868674633750403, "loss": 2.4083, "step": 43890 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.00019868644776229973, "loss": 2.1147, "step": 43895 }, { "epoch": 0.1, "grad_norm": 1.703125, "learning_rate": 0.00019868614915338238, "loss": 1.9413, "step": 43900 }, { "epoch": 0.1, "grad_norm": 2.09375, "learning_rate": 0.000198685850510752, "loss": 2.1952, "step": 43905 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.0001986855518344088, "loss": 2.3801, "step": 43910 }, { "epoch": 0.1, "grad_norm": 1.984375, "learning_rate": 0.0001986852531243528, "loss": 2.3132, "step": 43915 }, { "epoch": 0.1, "grad_norm": 1.984375, "learning_rate": 0.0001986849543805841, "loss": 2.2461, "step": 43920 }, { "epoch": 0.1, "grad_norm": 1.6171875, "learning_rate": 0.00019868465560310286, "loss": 2.014, "step": 43925 }, { "epoch": 0.1, "grad_norm": 1.8203125, "learning_rate": 0.00019868435679190917, "loss": 2.0962, "step": 43930 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.00019868405794700307, "loss": 2.139, "step": 43935 }, { "epoch": 0.1, "grad_norm": 1.8984375, "learning_rate": 0.00019868375906838474, "loss": 2.0594, "step": 43940 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019868346015605425, "loss": 2.0385, "step": 43945 }, { "epoch": 0.1, "grad_norm": 2.359375, "learning_rate": 0.00019868316121001166, "loss": 2.1348, "step": 43950 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.00019868286223025713, "loss": 2.2966, "step": 43955 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019868256321679074, "loss": 1.9308, "step": 43960 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.0001986822641696126, "loss": 2.1007, "step": 43965 }, { "epoch": 0.1, "grad_norm": 2.1875, "learning_rate": 0.00019868196508872286, "loss": 2.2434, "step": 43970 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019868166597412153, "loss": 2.1536, "step": 43975 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.00019868136682580875, "loss": 2.2093, "step": 43980 }, { "epoch": 0.1, "grad_norm": 1.890625, "learning_rate": 0.00019868106764378462, "loss": 2.3987, "step": 43985 }, { "epoch": 0.1, "grad_norm": 1.9375, "learning_rate": 0.00019868076842804926, "loss": 1.9663, "step": 43990 }, { "epoch": 0.1, "grad_norm": 1.5078125, "learning_rate": 0.00019868046917860275, "loss": 2.0855, "step": 43995 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.0001986801698954452, "loss": 2.0637, "step": 44000 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019867987057857673, "loss": 1.8614, "step": 44005 }, { "epoch": 0.1, "grad_norm": 3.046875, "learning_rate": 0.00019867957122799744, "loss": 2.2112, "step": 44010 }, { "epoch": 0.1, "grad_norm": 1.984375, "learning_rate": 0.00019867927184370737, "loss": 2.1706, "step": 44015 }, { "epoch": 0.1, "grad_norm": 2.265625, "learning_rate": 0.00019867897242570673, "loss": 1.9598, "step": 44020 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019867867297399552, "loss": 2.274, "step": 44025 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.0001986783734885739, "loss": 2.096, "step": 44030 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019867807396944197, "loss": 2.1419, "step": 44035 }, { "epoch": 0.1, "grad_norm": 1.703125, "learning_rate": 0.00019867777441659977, "loss": 2.2272, "step": 44040 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.0001986774748300475, "loss": 2.2569, "step": 44045 }, { "epoch": 0.1, "grad_norm": 2.171875, "learning_rate": 0.0001986771752097852, "loss": 2.0826, "step": 44050 }, { "epoch": 0.1, "grad_norm": 2.0, "learning_rate": 0.000198676875555813, "loss": 2.1867, "step": 44055 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.00019867657586813097, "loss": 2.0404, "step": 44060 }, { "epoch": 0.1, "grad_norm": 2.234375, "learning_rate": 0.00019867627614673924, "loss": 2.2504, "step": 44065 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019867597639163794, "loss": 2.2729, "step": 44070 }, { "epoch": 0.1, "grad_norm": 2.0, "learning_rate": 0.0001986756766028271, "loss": 2.2404, "step": 44075 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019867537678030684, "loss": 2.3219, "step": 44080 }, { "epoch": 0.1, "grad_norm": 3.390625, "learning_rate": 0.00019867507692407731, "loss": 2.1802, "step": 44085 }, { "epoch": 0.1, "grad_norm": 1.78125, "learning_rate": 0.00019867477703413856, "loss": 2.1205, "step": 44090 }, { "epoch": 0.1, "grad_norm": 1.609375, "learning_rate": 0.00019867447711049074, "loss": 1.9827, "step": 44095 }, { "epoch": 0.1, "grad_norm": 1.8828125, "learning_rate": 0.00019867417715313393, "loss": 2.2393, "step": 44100 }, { "epoch": 0.1, "grad_norm": 1.5078125, "learning_rate": 0.00019867387716206825, "loss": 2.066, "step": 44105 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.00019867357713729375, "loss": 2.2321, "step": 44110 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.0001986732770788106, "loss": 2.1919, "step": 44115 }, { "epoch": 0.1, "grad_norm": 1.8359375, "learning_rate": 0.00019867297698661883, "loss": 2.3372, "step": 44120 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.0001986726768607186, "loss": 2.3717, "step": 44125 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.00019867237670110997, "loss": 2.2774, "step": 44130 }, { "epoch": 0.1, "grad_norm": 2.3125, "learning_rate": 0.0001986720765077931, "loss": 2.3532, "step": 44135 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.00019867177628076803, "loss": 2.1592, "step": 44140 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.0001986714760200349, "loss": 2.1993, "step": 44145 }, { "epoch": 0.1, "grad_norm": 1.828125, "learning_rate": 0.0001986711757255938, "loss": 2.1151, "step": 44150 }, { "epoch": 0.1, "grad_norm": 1.9375, "learning_rate": 0.0001986708753974449, "loss": 2.2864, "step": 44155 }, { "epoch": 0.1, "grad_norm": 1.4375, "learning_rate": 0.00019867057503558818, "loss": 1.9358, "step": 44160 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.0001986702746400238, "loss": 2.1528, "step": 44165 }, { "epoch": 0.1, "grad_norm": 2.09375, "learning_rate": 0.0001986699742107519, "loss": 2.0845, "step": 44170 }, { "epoch": 0.1, "grad_norm": 1.96875, "learning_rate": 0.0001986696737477725, "loss": 2.2339, "step": 44175 }, { "epoch": 0.1, "grad_norm": 2.171875, "learning_rate": 0.00019866937325108578, "loss": 2.284, "step": 44180 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.0001986690727206918, "loss": 2.0614, "step": 44185 }, { "epoch": 0.1, "grad_norm": 1.828125, "learning_rate": 0.0001986687721565907, "loss": 2.0985, "step": 44190 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.00019866847155878252, "loss": 2.2715, "step": 44195 }, { "epoch": 0.1, "grad_norm": 2.703125, "learning_rate": 0.00019866817092726745, "loss": 1.9152, "step": 44200 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019866787026204548, "loss": 2.0435, "step": 44205 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.00019866756956311683, "loss": 2.0304, "step": 44210 }, { "epoch": 0.1, "grad_norm": 2.125, "learning_rate": 0.0001986672688304815, "loss": 2.4362, "step": 44215 }, { "epoch": 0.1, "grad_norm": 1.828125, "learning_rate": 0.00019866696806413966, "loss": 2.1062, "step": 44220 }, { "epoch": 0.1, "grad_norm": 1.78125, "learning_rate": 0.00019866666726409143, "loss": 2.2613, "step": 44225 }, { "epoch": 0.1, "grad_norm": 1.8203125, "learning_rate": 0.00019866636643033685, "loss": 2.1492, "step": 44230 }, { "epoch": 0.1, "grad_norm": 2.59375, "learning_rate": 0.00019866606556287605, "loss": 2.1194, "step": 44235 }, { "epoch": 0.1, "grad_norm": 2.390625, "learning_rate": 0.0001986657646617091, "loss": 2.0927, "step": 44240 }, { "epoch": 0.1, "grad_norm": 1.65625, "learning_rate": 0.0001986654637268362, "loss": 2.3468, "step": 44245 }, { "epoch": 0.1, "grad_norm": 2.265625, "learning_rate": 0.00019866516275825736, "loss": 2.2364, "step": 44250 }, { "epoch": 0.1, "grad_norm": 2.25, "learning_rate": 0.00019866486175597271, "loss": 2.3744, "step": 44255 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.00019866456071998236, "loss": 2.0896, "step": 44260 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.0001986642596502864, "loss": 2.3193, "step": 44265 }, { "epoch": 0.1, "grad_norm": 1.9375, "learning_rate": 0.00019866395854688492, "loss": 2.1109, "step": 44270 }, { "epoch": 0.1, "grad_norm": 2.234375, "learning_rate": 0.00019866365740977808, "loss": 2.3115, "step": 44275 }, { "epoch": 0.1, "grad_norm": 1.5703125, "learning_rate": 0.00019866335623896592, "loss": 2.292, "step": 44280 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.0001986630550344486, "loss": 2.1753, "step": 44285 }, { "epoch": 0.1, "grad_norm": 2.203125, "learning_rate": 0.00019866275379622615, "loss": 2.1513, "step": 44290 }, { "epoch": 0.1, "grad_norm": 1.859375, "learning_rate": 0.00019866245252429872, "loss": 2.1572, "step": 44295 }, { "epoch": 0.1, "grad_norm": 2.453125, "learning_rate": 0.00019866215121866642, "loss": 2.1483, "step": 44300 }, { "epoch": 0.1, "grad_norm": 1.7265625, "learning_rate": 0.00019866184987932933, "loss": 2.1022, "step": 44305 }, { "epoch": 0.1, "grad_norm": 1.703125, "learning_rate": 0.00019866154850628758, "loss": 2.2685, "step": 44310 }, { "epoch": 0.1, "grad_norm": 2.515625, "learning_rate": 0.0001986612470995412, "loss": 2.2782, "step": 44315 }, { "epoch": 0.1, "grad_norm": 2.234375, "learning_rate": 0.00019866094565909045, "loss": 2.1327, "step": 44320 }, { "epoch": 0.1, "grad_norm": 2.046875, "learning_rate": 0.00019866064418493524, "loss": 2.2553, "step": 44325 }, { "epoch": 0.1, "grad_norm": 1.4765625, "learning_rate": 0.00019866034267707581, "loss": 2.3288, "step": 44330 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019866004113551223, "loss": 2.104, "step": 44335 }, { "epoch": 0.1, "grad_norm": 1.875, "learning_rate": 0.00019865973956024452, "loss": 2.2357, "step": 44340 }, { "epoch": 0.1, "grad_norm": 1.796875, "learning_rate": 0.0001986594379512729, "loss": 2.1526, "step": 44345 }, { "epoch": 0.1, "grad_norm": 1.6953125, "learning_rate": 0.00019865913630859745, "loss": 2.0905, "step": 44350 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019865883463221823, "loss": 2.1508, "step": 44355 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.00019865853292213537, "loss": 2.1201, "step": 44360 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019865823117834895, "loss": 2.0502, "step": 44365 }, { "epoch": 0.1, "grad_norm": 1.515625, "learning_rate": 0.00019865792940085907, "loss": 2.0716, "step": 44370 }, { "epoch": 0.1, "grad_norm": 1.8671875, "learning_rate": 0.0001986576275896659, "loss": 2.2099, "step": 44375 }, { "epoch": 0.1, "grad_norm": 1.8046875, "learning_rate": 0.00019865732574476946, "loss": 2.1594, "step": 44380 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.00019865702386616992, "loss": 2.1683, "step": 44385 }, { "epoch": 0.1, "grad_norm": 1.8046875, "learning_rate": 0.00019865672195386734, "loss": 2.2462, "step": 44390 }, { "epoch": 0.1, "grad_norm": 1.953125, "learning_rate": 0.0001986564200078618, "loss": 2.1622, "step": 44395 }, { "epoch": 0.1, "grad_norm": 1.9453125, "learning_rate": 0.00019865611802815348, "loss": 2.1101, "step": 44400 }, { "epoch": 0.1, "grad_norm": 1.9140625, "learning_rate": 0.00019865581601474243, "loss": 2.1882, "step": 44405 }, { "epoch": 0.1, "grad_norm": 1.8515625, "learning_rate": 0.00019865551396762877, "loss": 2.3222, "step": 44410 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019865521188681257, "loss": 2.2742, "step": 44415 }, { "epoch": 0.1, "grad_norm": 1.5859375, "learning_rate": 0.00019865490977229397, "loss": 2.0954, "step": 44420 }, { "epoch": 0.1, "grad_norm": 1.5625, "learning_rate": 0.00019865460762407309, "loss": 2.0181, "step": 44425 }, { "epoch": 0.1, "grad_norm": 1.890625, "learning_rate": 0.00019865430544215, "loss": 2.0524, "step": 44430 }, { "epoch": 0.1, "grad_norm": 1.6171875, "learning_rate": 0.00019865400322652477, "loss": 2.2021, "step": 44435 }, { "epoch": 0.1, "grad_norm": 2.46875, "learning_rate": 0.0001986537009771976, "loss": 2.3275, "step": 44440 }, { "epoch": 0.1, "grad_norm": 2.078125, "learning_rate": 0.00019865339869416847, "loss": 2.2375, "step": 44445 }, { "epoch": 0.1, "grad_norm": 2.1875, "learning_rate": 0.0001986530963774376, "loss": 1.9357, "step": 44450 }, { "epoch": 0.1, "grad_norm": 1.5, "learning_rate": 0.00019865279402700502, "loss": 2.0722, "step": 44455 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019865249164287087, "loss": 2.1141, "step": 44460 }, { "epoch": 0.1, "grad_norm": 2.140625, "learning_rate": 0.00019865218922503525, "loss": 2.1228, "step": 44465 }, { "epoch": 0.1, "grad_norm": 1.5703125, "learning_rate": 0.00019865188677349824, "loss": 2.2431, "step": 44470 }, { "epoch": 0.1, "grad_norm": 1.78125, "learning_rate": 0.00019865158428825997, "loss": 2.1416, "step": 44475 }, { "epoch": 0.1, "grad_norm": 2.359375, "learning_rate": 0.00019865128176932051, "loss": 2.2629, "step": 44480 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019865097921667997, "loss": 2.1765, "step": 44485 }, { "epoch": 0.1, "grad_norm": 2.03125, "learning_rate": 0.00019865067663033846, "loss": 2.3237, "step": 44490 }, { "epoch": 0.1, "grad_norm": 3.171875, "learning_rate": 0.00019865037401029613, "loss": 2.2415, "step": 44495 }, { "epoch": 0.1, "grad_norm": 1.90625, "learning_rate": 0.00019865007135655302, "loss": 2.0458, "step": 44500 }, { "epoch": 0.1, "grad_norm": 1.8125, "learning_rate": 0.00019864976866910928, "loss": 2.2516, "step": 44505 }, { "epoch": 0.1, "grad_norm": 1.703125, "learning_rate": 0.00019864946594796496, "loss": 2.1164, "step": 44510 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.00019864916319312022, "loss": 2.2006, "step": 44515 }, { "epoch": 0.1, "grad_norm": 2.09375, "learning_rate": 0.00019864886040457514, "loss": 2.1558, "step": 44520 }, { "epoch": 0.1, "grad_norm": 2.109375, "learning_rate": 0.0001986485575823298, "loss": 2.2193, "step": 44525 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019864825472638433, "loss": 2.2792, "step": 44530 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.00019864795183673883, "loss": 2.1618, "step": 44535 }, { "epoch": 0.1, "grad_norm": 1.9609375, "learning_rate": 0.00019864764891339338, "loss": 2.0265, "step": 44540 }, { "epoch": 0.1, "grad_norm": 3.03125, "learning_rate": 0.00019864734595634813, "loss": 2.3159, "step": 44545 }, { "epoch": 0.1, "grad_norm": 1.765625, "learning_rate": 0.00019864704296560315, "loss": 2.1411, "step": 44550 }, { "epoch": 0.1, "grad_norm": 1.84375, "learning_rate": 0.00019864673994115855, "loss": 2.2159, "step": 44555 }, { "epoch": 0.1, "grad_norm": 2.125, "learning_rate": 0.0001986464368830144, "loss": 2.1141, "step": 44560 }, { "epoch": 0.1, "grad_norm": 2.015625, "learning_rate": 0.0001986461337911709, "loss": 2.2422, "step": 44565 }, { "epoch": 0.1, "grad_norm": 1.9296875, "learning_rate": 0.00019864583066562806, "loss": 2.234, "step": 44570 }, { "epoch": 0.1, "grad_norm": 1.6484375, "learning_rate": 0.00019864552750638602, "loss": 2.1209, "step": 44575 }, { "epoch": 0.1, "grad_norm": 2.15625, "learning_rate": 0.00019864522431344486, "loss": 2.2635, "step": 44580 }, { "epoch": 0.1, "grad_norm": 2.109375, "learning_rate": 0.00019864492108680472, "loss": 2.1858, "step": 44585 }, { "epoch": 0.1, "grad_norm": 1.7734375, "learning_rate": 0.00019864461782646568, "loss": 2.0096, "step": 44590 }, { "epoch": 0.1, "grad_norm": 1.6640625, "learning_rate": 0.00019864431453242787, "loss": 2.2162, "step": 44595 }, { "epoch": 0.1, "grad_norm": 2.0625, "learning_rate": 0.00019864401120469135, "loss": 2.0886, "step": 44600 }, { "epoch": 0.1, "grad_norm": 1.7421875, "learning_rate": 0.0001986437078432563, "loss": 2.3658, "step": 44605 }, { "epoch": 0.1, "grad_norm": 1.8984375, "learning_rate": 0.00019864340444812267, "loss": 2.1079, "step": 44610 }, { "epoch": 0.1, "grad_norm": 1.75, "learning_rate": 0.00019864310101929073, "loss": 2.0208, "step": 44615 }, { "epoch": 0.11, "grad_norm": 1.90625, "learning_rate": 0.00019864279755676053, "loss": 2.1929, "step": 44620 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.00019864249406053214, "loss": 2.1773, "step": 44625 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019864219053060566, "loss": 2.1228, "step": 44630 }, { "epoch": 0.11, "grad_norm": 1.75, "learning_rate": 0.00019864188696698125, "loss": 2.0614, "step": 44635 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019864158336965897, "loss": 2.0777, "step": 44640 }, { "epoch": 0.11, "grad_norm": 1.7421875, "learning_rate": 0.00019864127973863896, "loss": 2.2621, "step": 44645 }, { "epoch": 0.11, "grad_norm": 1.609375, "learning_rate": 0.00019864097607392127, "loss": 2.1571, "step": 44650 }, { "epoch": 0.11, "grad_norm": 1.8125, "learning_rate": 0.00019864067237550603, "loss": 2.2019, "step": 44655 }, { "epoch": 0.11, "grad_norm": 1.7109375, "learning_rate": 0.00019864036864339338, "loss": 2.4212, "step": 44660 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.00019864006487758335, "loss": 2.2169, "step": 44665 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.00019863976107807612, "loss": 2.2586, "step": 44670 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.00019863945724487173, "loss": 2.183, "step": 44675 }, { "epoch": 0.11, "grad_norm": 1.515625, "learning_rate": 0.00019863915337797034, "loss": 2.0799, "step": 44680 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.00019863884947737202, "loss": 2.3551, "step": 44685 }, { "epoch": 0.11, "grad_norm": 1.6171875, "learning_rate": 0.00019863854554307686, "loss": 2.2284, "step": 44690 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019863824157508503, "loss": 2.3252, "step": 44695 }, { "epoch": 0.11, "grad_norm": 1.7734375, "learning_rate": 0.0001986379375733965, "loss": 2.3866, "step": 44700 }, { "epoch": 0.11, "grad_norm": 1.3828125, "learning_rate": 0.00019863763353801156, "loss": 2.2345, "step": 44705 }, { "epoch": 0.11, "grad_norm": 1.8984375, "learning_rate": 0.00019863732946893015, "loss": 2.3164, "step": 44710 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019863702536615247, "loss": 2.253, "step": 44715 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.0001986367212296786, "loss": 2.2554, "step": 44720 }, { "epoch": 0.11, "grad_norm": 2.328125, "learning_rate": 0.00019863641705950862, "loss": 2.3602, "step": 44725 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.00019863611285564266, "loss": 2.0989, "step": 44730 }, { "epoch": 0.11, "grad_norm": 1.8125, "learning_rate": 0.0001986358086180808, "loss": 2.2301, "step": 44735 }, { "epoch": 0.11, "grad_norm": 1.5546875, "learning_rate": 0.0001986355043468232, "loss": 2.0108, "step": 44740 }, { "epoch": 0.11, "grad_norm": 1.6796875, "learning_rate": 0.00019863520004186985, "loss": 1.9053, "step": 44745 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.00019863489570322098, "loss": 2.2588, "step": 44750 }, { "epoch": 0.11, "grad_norm": 2.125, "learning_rate": 0.00019863459133087662, "loss": 2.2192, "step": 44755 }, { "epoch": 0.11, "grad_norm": 1.734375, "learning_rate": 0.0001986342869248369, "loss": 2.2658, "step": 44760 }, { "epoch": 0.11, "grad_norm": 1.625, "learning_rate": 0.00019863398248510194, "loss": 2.3481, "step": 44765 }, { "epoch": 0.11, "grad_norm": 1.78125, "learning_rate": 0.0001986336780116718, "loss": 2.3571, "step": 44770 }, { "epoch": 0.11, "grad_norm": 1.859375, "learning_rate": 0.0001986333735045466, "loss": 2.05, "step": 44775 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.00019863306896372647, "loss": 2.0894, "step": 44780 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.0001986327643892115, "loss": 2.2397, "step": 44785 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.00019863245978100178, "loss": 2.1246, "step": 44790 }, { "epoch": 0.11, "grad_norm": 1.7734375, "learning_rate": 0.00019863215513909742, "loss": 2.0481, "step": 44795 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019863185046349854, "loss": 2.0498, "step": 44800 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.0001986315457542052, "loss": 2.1107, "step": 44805 }, { "epoch": 0.11, "grad_norm": 2.25, "learning_rate": 0.00019863124101121755, "loss": 2.0501, "step": 44810 }, { "epoch": 0.11, "grad_norm": 1.8984375, "learning_rate": 0.00019863093623453569, "loss": 2.218, "step": 44815 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.0001986306314241597, "loss": 2.143, "step": 44820 }, { "epoch": 0.11, "grad_norm": 1.7578125, "learning_rate": 0.0001986303265800897, "loss": 2.1596, "step": 44825 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.0001986300217023258, "loss": 2.0257, "step": 44830 }, { "epoch": 0.11, "grad_norm": 1.5078125, "learning_rate": 0.0001986297167908681, "loss": 2.1302, "step": 44835 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.00019862941184571668, "loss": 1.9779, "step": 44840 }, { "epoch": 0.11, "grad_norm": 2.265625, "learning_rate": 0.00019862910686687166, "loss": 2.1354, "step": 44845 }, { "epoch": 0.11, "grad_norm": 1.765625, "learning_rate": 0.00019862880185433319, "loss": 2.2058, "step": 44850 }, { "epoch": 0.11, "grad_norm": 1.9765625, "learning_rate": 0.0001986284968081013, "loss": 2.0002, "step": 44855 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.00019862819172817612, "loss": 2.2692, "step": 44860 }, { "epoch": 0.11, "grad_norm": 1.8359375, "learning_rate": 0.00019862788661455778, "loss": 2.2819, "step": 44865 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019862758146724636, "loss": 2.1028, "step": 44870 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.00019862727628624195, "loss": 2.1959, "step": 44875 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.00019862697107154471, "loss": 2.1494, "step": 44880 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.00019862666582315467, "loss": 2.1059, "step": 44885 }, { "epoch": 0.11, "grad_norm": 2.078125, "learning_rate": 0.000198626360541072, "loss": 2.045, "step": 44890 }, { "epoch": 0.11, "grad_norm": 2.546875, "learning_rate": 0.00019862605522529678, "loss": 2.1177, "step": 44895 }, { "epoch": 0.11, "grad_norm": 1.453125, "learning_rate": 0.0001986257498758291, "loss": 2.0377, "step": 44900 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.0001986254444926691, "loss": 2.3329, "step": 44905 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.0001986251390758168, "loss": 2.159, "step": 44910 }, { "epoch": 0.11, "grad_norm": 1.8515625, "learning_rate": 0.0001986248336252724, "loss": 2.2181, "step": 44915 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019862452814103598, "loss": 2.2268, "step": 44920 }, { "epoch": 0.11, "grad_norm": 1.7890625, "learning_rate": 0.0001986242226231076, "loss": 2.3266, "step": 44925 }, { "epoch": 0.11, "grad_norm": 1.7578125, "learning_rate": 0.00019862391707148742, "loss": 2.3172, "step": 44930 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.0001986236114861755, "loss": 2.1561, "step": 44935 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.000198623305867172, "loss": 2.267, "step": 44940 }, { "epoch": 0.11, "grad_norm": 1.921875, "learning_rate": 0.00019862300021447697, "loss": 2.0548, "step": 44945 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019862269452809055, "loss": 2.1611, "step": 44950 }, { "epoch": 0.11, "grad_norm": 1.4921875, "learning_rate": 0.0001986223888080128, "loss": 2.297, "step": 44955 }, { "epoch": 0.11, "grad_norm": 2.3125, "learning_rate": 0.00019862208305424387, "loss": 2.1958, "step": 44960 }, { "epoch": 0.11, "grad_norm": 1.875, "learning_rate": 0.00019862177726678385, "loss": 2.175, "step": 44965 }, { "epoch": 0.11, "grad_norm": 2.328125, "learning_rate": 0.00019862147144563284, "loss": 2.1415, "step": 44970 }, { "epoch": 0.11, "grad_norm": 1.8984375, "learning_rate": 0.00019862116559079095, "loss": 2.272, "step": 44975 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.0001986208597022583, "loss": 2.1474, "step": 44980 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.00019862055378003492, "loss": 2.2223, "step": 44985 }, { "epoch": 0.11, "grad_norm": 1.7734375, "learning_rate": 0.00019862024782412103, "loss": 2.3473, "step": 44990 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.00019861994183451663, "loss": 2.1642, "step": 44995 }, { "epoch": 0.11, "grad_norm": 1.40625, "learning_rate": 0.0001986196358112219, "loss": 2.1065, "step": 45000 }, { "epoch": 0.11, "grad_norm": 1.6875, "learning_rate": 0.0001986193297542369, "loss": 2.283, "step": 45005 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.00019861902366356176, "loss": 2.3484, "step": 45010 }, { "epoch": 0.11, "grad_norm": 2.546875, "learning_rate": 0.00019861871753919656, "loss": 2.06, "step": 45015 }, { "epoch": 0.11, "grad_norm": 2.390625, "learning_rate": 0.0001986184113811414, "loss": 2.1475, "step": 45020 }, { "epoch": 0.11, "grad_norm": 1.984375, "learning_rate": 0.00019861810518939643, "loss": 2.1649, "step": 45025 }, { "epoch": 0.11, "grad_norm": 2.0, "learning_rate": 0.00019861779896396172, "loss": 2.1305, "step": 45030 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.00019861749270483738, "loss": 2.2451, "step": 45035 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.0001986171864120235, "loss": 2.1276, "step": 45040 }, { "epoch": 0.11, "grad_norm": 2.125, "learning_rate": 0.00019861688008552023, "loss": 2.2429, "step": 45045 }, { "epoch": 0.11, "grad_norm": 1.8125, "learning_rate": 0.00019861657372532765, "loss": 2.1878, "step": 45050 }, { "epoch": 0.11, "grad_norm": 1.6796875, "learning_rate": 0.00019861626733144584, "loss": 2.2906, "step": 45055 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.0001986159609038749, "loss": 2.171, "step": 45060 }, { "epoch": 0.11, "grad_norm": 1.9765625, "learning_rate": 0.000198615654442615, "loss": 2.1823, "step": 45065 }, { "epoch": 0.11, "grad_norm": 1.78125, "learning_rate": 0.0001986153479476662, "loss": 2.293, "step": 45070 }, { "epoch": 0.11, "grad_norm": 1.7421875, "learning_rate": 0.0001986150414190286, "loss": 2.1549, "step": 45075 }, { "epoch": 0.11, "grad_norm": 2.234375, "learning_rate": 0.00019861473485670232, "loss": 2.1838, "step": 45080 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.00019861442826068744, "loss": 2.1958, "step": 45085 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.0001986141216309841, "loss": 2.0663, "step": 45090 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.00019861381496759237, "loss": 2.1647, "step": 45095 }, { "epoch": 0.11, "grad_norm": 2.0, "learning_rate": 0.00019861350827051237, "loss": 2.3441, "step": 45100 }, { "epoch": 0.11, "grad_norm": 2.21875, "learning_rate": 0.00019861320153974422, "loss": 2.458, "step": 45105 }, { "epoch": 0.11, "grad_norm": 1.703125, "learning_rate": 0.00019861289477528802, "loss": 2.2507, "step": 45110 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.0001986125879771439, "loss": 2.1374, "step": 45115 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.00019861228114531184, "loss": 2.1277, "step": 45120 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.0001986119742797921, "loss": 2.1326, "step": 45125 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.00019861166738058473, "loss": 2.1328, "step": 45130 }, { "epoch": 0.11, "grad_norm": 1.640625, "learning_rate": 0.0001986113604476898, "loss": 2.1761, "step": 45135 }, { "epoch": 0.11, "grad_norm": 1.5859375, "learning_rate": 0.00019861105348110741, "loss": 2.3339, "step": 45140 }, { "epoch": 0.11, "grad_norm": 1.9921875, "learning_rate": 0.00019861074648083775, "loss": 2.2085, "step": 45145 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.00019861043944688085, "loss": 2.2468, "step": 45150 }, { "epoch": 0.11, "grad_norm": 1.90625, "learning_rate": 0.00019861013237923683, "loss": 2.0286, "step": 45155 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.0001986098252779058, "loss": 2.3192, "step": 45160 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.0001986095181428879, "loss": 2.27, "step": 45165 }, { "epoch": 0.11, "grad_norm": 1.8828125, "learning_rate": 0.00019860921097418315, "loss": 2.3308, "step": 45170 }, { "epoch": 0.11, "grad_norm": 1.875, "learning_rate": 0.00019860890377179172, "loss": 2.2961, "step": 45175 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.0001986085965357137, "loss": 2.218, "step": 45180 }, { "epoch": 0.11, "grad_norm": 1.46875, "learning_rate": 0.00019860828926594924, "loss": 2.3124, "step": 45185 }, { "epoch": 0.11, "grad_norm": 1.7734375, "learning_rate": 0.00019860798196249835, "loss": 2.1644, "step": 45190 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.0001986076746253612, "loss": 2.1402, "step": 45195 }, { "epoch": 0.11, "grad_norm": 1.875, "learning_rate": 0.00019860736725453787, "loss": 2.2509, "step": 45200 }, { "epoch": 0.11, "grad_norm": 1.65625, "learning_rate": 0.00019860705985002847, "loss": 2.296, "step": 45205 }, { "epoch": 0.11, "grad_norm": 1.6484375, "learning_rate": 0.00019860675241183313, "loss": 2.141, "step": 45210 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.00019860644493995192, "loss": 2.0074, "step": 45215 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.000198606137434385, "loss": 2.2528, "step": 45220 }, { "epoch": 0.11, "grad_norm": 1.6875, "learning_rate": 0.00019860582989513238, "loss": 2.2989, "step": 45225 }, { "epoch": 0.11, "grad_norm": 1.6640625, "learning_rate": 0.00019860552232219426, "loss": 1.9008, "step": 45230 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.00019860521471557068, "loss": 1.9469, "step": 45235 }, { "epoch": 0.11, "grad_norm": 1.8359375, "learning_rate": 0.00019860490707526178, "loss": 1.9733, "step": 45240 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.00019860459940126765, "loss": 2.1936, "step": 45245 }, { "epoch": 0.11, "grad_norm": 1.59375, "learning_rate": 0.0001986042916935884, "loss": 2.1399, "step": 45250 }, { "epoch": 0.11, "grad_norm": 2.3125, "learning_rate": 0.00019860398395222414, "loss": 2.2343, "step": 45255 }, { "epoch": 0.11, "grad_norm": 1.78125, "learning_rate": 0.00019860367617717496, "loss": 2.238, "step": 45260 }, { "epoch": 0.11, "grad_norm": 2.265625, "learning_rate": 0.000198603368368441, "loss": 2.3158, "step": 45265 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019860306052602234, "loss": 2.2779, "step": 45270 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019860275264991907, "loss": 2.2726, "step": 45275 }, { "epoch": 0.11, "grad_norm": 1.921875, "learning_rate": 0.0001986024447401313, "loss": 2.1083, "step": 45280 }, { "epoch": 0.11, "grad_norm": 1.9921875, "learning_rate": 0.0001986021367966592, "loss": 2.0897, "step": 45285 }, { "epoch": 0.11, "grad_norm": 1.9921875, "learning_rate": 0.00019860182881950277, "loss": 2.2738, "step": 45290 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.0001986015208086622, "loss": 2.0651, "step": 45295 }, { "epoch": 0.11, "grad_norm": 2.265625, "learning_rate": 0.00019860121276413752, "loss": 2.3048, "step": 45300 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.0001986009046859289, "loss": 2.171, "step": 45305 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019860059657403643, "loss": 2.2095, "step": 45310 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.0001986002884284602, "loss": 2.3258, "step": 45315 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019859998024920032, "loss": 2.2711, "step": 45320 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.0001985996720362569, "loss": 2.1371, "step": 45325 }, { "epoch": 0.11, "grad_norm": 1.7890625, "learning_rate": 0.00019859936378963005, "loss": 2.1877, "step": 45330 }, { "epoch": 0.11, "grad_norm": 1.75, "learning_rate": 0.00019859905550931987, "loss": 2.189, "step": 45335 }, { "epoch": 0.11, "grad_norm": 1.8515625, "learning_rate": 0.00019859874719532647, "loss": 2.2224, "step": 45340 }, { "epoch": 0.11, "grad_norm": 1.9921875, "learning_rate": 0.00019859843884764992, "loss": 2.2702, "step": 45345 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019859813046629038, "loss": 2.314, "step": 45350 }, { "epoch": 0.11, "grad_norm": 2.125, "learning_rate": 0.00019859782205124794, "loss": 2.2077, "step": 45355 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.0001985975136025227, "loss": 2.2694, "step": 45360 }, { "epoch": 0.11, "grad_norm": 1.6953125, "learning_rate": 0.00019859720512011472, "loss": 2.0933, "step": 45365 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.00019859689660402417, "loss": 2.2309, "step": 45370 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019859658805425115, "loss": 2.1382, "step": 45375 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.0001985962794707957, "loss": 1.9825, "step": 45380 }, { "epoch": 0.11, "grad_norm": 1.703125, "learning_rate": 0.00019859597085365802, "loss": 2.1006, "step": 45385 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.00019859566220283817, "loss": 2.1284, "step": 45390 }, { "epoch": 0.11, "grad_norm": 2.15625, "learning_rate": 0.0001985953535183362, "loss": 2.3759, "step": 45395 }, { "epoch": 0.11, "grad_norm": 1.6171875, "learning_rate": 0.0001985950448001523, "loss": 2.3061, "step": 45400 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019859473604828657, "loss": 2.3661, "step": 45405 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019859442726273907, "loss": 2.3664, "step": 45410 }, { "epoch": 0.11, "grad_norm": 1.7734375, "learning_rate": 0.00019859411844350993, "loss": 2.103, "step": 45415 }, { "epoch": 0.11, "grad_norm": 1.6953125, "learning_rate": 0.00019859380959059923, "loss": 2.2295, "step": 45420 }, { "epoch": 0.11, "grad_norm": 1.9765625, "learning_rate": 0.00019859350070400712, "loss": 2.0802, "step": 45425 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.00019859319178373367, "loss": 2.1923, "step": 45430 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.000198592882829779, "loss": 2.268, "step": 45435 }, { "epoch": 0.11, "grad_norm": 1.78125, "learning_rate": 0.0001985925738421432, "loss": 2.1131, "step": 45440 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.0001985922648208264, "loss": 2.0701, "step": 45445 }, { "epoch": 0.11, "grad_norm": 1.7421875, "learning_rate": 0.00019859195576582873, "loss": 2.1382, "step": 45450 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.0001985916466771502, "loss": 2.1818, "step": 45455 }, { "epoch": 0.11, "grad_norm": 1.5390625, "learning_rate": 0.00019859133755479103, "loss": 2.2306, "step": 45460 }, { "epoch": 0.11, "grad_norm": 1.6796875, "learning_rate": 0.00019859102839875123, "loss": 2.113, "step": 45465 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019859071920903096, "loss": 2.1717, "step": 45470 }, { "epoch": 0.11, "grad_norm": 1.6875, "learning_rate": 0.0001985904099856303, "loss": 2.3283, "step": 45475 }, { "epoch": 0.11, "grad_norm": 2.078125, "learning_rate": 0.00019859010072854937, "loss": 2.2435, "step": 45480 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.0001985897914377883, "loss": 2.2059, "step": 45485 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.00019858948211334715, "loss": 2.0836, "step": 45490 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019858917275522605, "loss": 2.0022, "step": 45495 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.0001985888633634251, "loss": 2.2237, "step": 45500 }, { "epoch": 0.11, "grad_norm": 1.6875, "learning_rate": 0.0001985885539379444, "loss": 2.2711, "step": 45505 }, { "epoch": 0.11, "grad_norm": 1.6015625, "learning_rate": 0.00019858824447878406, "loss": 2.1595, "step": 45510 }, { "epoch": 0.11, "grad_norm": 1.5859375, "learning_rate": 0.0001985879349859442, "loss": 2.0126, "step": 45515 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.0001985876254594249, "loss": 2.2179, "step": 45520 }, { "epoch": 0.11, "grad_norm": 1.8828125, "learning_rate": 0.0001985873158992263, "loss": 2.0422, "step": 45525 }, { "epoch": 0.11, "grad_norm": 1.7109375, "learning_rate": 0.00019858700630534848, "loss": 2.1681, "step": 45530 }, { "epoch": 0.11, "grad_norm": 2.796875, "learning_rate": 0.00019858669667779152, "loss": 2.13, "step": 45535 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.00019858638701655558, "loss": 2.221, "step": 45540 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.00019858607732164076, "loss": 2.1512, "step": 45545 }, { "epoch": 0.11, "grad_norm": 1.78125, "learning_rate": 0.00019858576759304713, "loss": 2.1432, "step": 45550 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019858545783077482, "loss": 2.1363, "step": 45555 }, { "epoch": 0.11, "grad_norm": 1.6796875, "learning_rate": 0.0001985851480348239, "loss": 2.2415, "step": 45560 }, { "epoch": 0.11, "grad_norm": 1.875, "learning_rate": 0.00019858483820519457, "loss": 2.1603, "step": 45565 }, { "epoch": 0.11, "grad_norm": 2.3125, "learning_rate": 0.00019858452834188678, "loss": 2.1934, "step": 45570 }, { "epoch": 0.11, "grad_norm": 2.0, "learning_rate": 0.0001985842184449008, "loss": 2.124, "step": 45575 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019858390851423664, "loss": 2.2194, "step": 45580 }, { "epoch": 0.11, "grad_norm": 1.859375, "learning_rate": 0.00019858359854989441, "loss": 1.8785, "step": 45585 }, { "epoch": 0.11, "grad_norm": 1.6328125, "learning_rate": 0.00019858328855187427, "loss": 2.0393, "step": 45590 }, { "epoch": 0.11, "grad_norm": 2.15625, "learning_rate": 0.00019858297852017625, "loss": 2.3415, "step": 45595 }, { "epoch": 0.11, "grad_norm": 1.921875, "learning_rate": 0.00019858266845480054, "loss": 2.2915, "step": 45600 }, { "epoch": 0.11, "grad_norm": 3.453125, "learning_rate": 0.0001985823583557472, "loss": 2.188, "step": 45605 }, { "epoch": 0.11, "grad_norm": 1.5390625, "learning_rate": 0.0001985820482230163, "loss": 2.1292, "step": 45610 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.000198581738056608, "loss": 2.3384, "step": 45615 }, { "epoch": 0.11, "grad_norm": 1.734375, "learning_rate": 0.00019858142785652243, "loss": 2.1956, "step": 45620 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.00019858111762275962, "loss": 2.2616, "step": 45625 }, { "epoch": 0.11, "grad_norm": 1.8828125, "learning_rate": 0.0001985808073553197, "loss": 2.1927, "step": 45630 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.0001985804970542028, "loss": 2.2103, "step": 45635 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019858018671940904, "loss": 2.1686, "step": 45640 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019857987635093846, "loss": 2.275, "step": 45645 }, { "epoch": 0.11, "grad_norm": 1.8359375, "learning_rate": 0.00019857956594879125, "loss": 2.2611, "step": 45650 }, { "epoch": 0.11, "grad_norm": 1.5, "learning_rate": 0.00019857925551296744, "loss": 2.0746, "step": 45655 }, { "epoch": 0.11, "grad_norm": 1.6796875, "learning_rate": 0.00019857894504346716, "loss": 1.9838, "step": 45660 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019857863454029055, "loss": 2.203, "step": 45665 }, { "epoch": 0.11, "grad_norm": 1.5, "learning_rate": 0.00019857832400343767, "loss": 2.2172, "step": 45670 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019857801343290868, "loss": 2.1143, "step": 45675 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.0001985777028287036, "loss": 2.0003, "step": 45680 }, { "epoch": 0.11, "grad_norm": 2.21875, "learning_rate": 0.00019857739219082264, "loss": 1.9731, "step": 45685 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.00019857708151926583, "loss": 2.2508, "step": 45690 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019857677081403332, "loss": 2.358, "step": 45695 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019857646007512516, "loss": 2.121, "step": 45700 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019857614930254153, "loss": 2.0005, "step": 45705 }, { "epoch": 0.11, "grad_norm": 1.546875, "learning_rate": 0.0001985758384962825, "loss": 2.1768, "step": 45710 }, { "epoch": 0.11, "grad_norm": 1.671875, "learning_rate": 0.00019857552765634813, "loss": 2.1751, "step": 45715 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.0001985752167827386, "loss": 2.0635, "step": 45720 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019857490587545402, "loss": 2.159, "step": 45725 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.0001985745949344944, "loss": 2.1859, "step": 45730 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019857428395985997, "loss": 2.1103, "step": 45735 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019857397295155074, "loss": 2.2395, "step": 45740 }, { "epoch": 0.11, "grad_norm": 1.8984375, "learning_rate": 0.00019857366190956688, "loss": 2.3639, "step": 45745 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.00019857335083390846, "loss": 2.309, "step": 45750 }, { "epoch": 0.11, "grad_norm": 2.234375, "learning_rate": 0.00019857303972457558, "loss": 2.2509, "step": 45755 }, { "epoch": 0.11, "grad_norm": 1.7265625, "learning_rate": 0.0001985727285815684, "loss": 2.2348, "step": 45760 }, { "epoch": 0.11, "grad_norm": 2.375, "learning_rate": 0.00019857241740488694, "loss": 2.1819, "step": 45765 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019857210619453138, "loss": 2.271, "step": 45770 }, { "epoch": 0.11, "grad_norm": 2.25, "learning_rate": 0.0001985717949505018, "loss": 2.2287, "step": 45775 }, { "epoch": 0.11, "grad_norm": 2.359375, "learning_rate": 0.00019857148367279833, "loss": 2.0892, "step": 45780 }, { "epoch": 0.11, "grad_norm": 1.8359375, "learning_rate": 0.00019857117236142101, "loss": 2.0163, "step": 45785 }, { "epoch": 0.11, "grad_norm": 1.6171875, "learning_rate": 0.00019857086101636998, "loss": 2.2267, "step": 45790 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019857054963764542, "loss": 2.2626, "step": 45795 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019857023822524733, "loss": 2.0753, "step": 45800 }, { "epoch": 0.11, "grad_norm": 1.6640625, "learning_rate": 0.00019856992677917584, "loss": 2.0402, "step": 45805 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019856961529943112, "loss": 2.1926, "step": 45810 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.00019856930378601322, "loss": 2.2537, "step": 45815 }, { "epoch": 0.11, "grad_norm": 1.8125, "learning_rate": 0.00019856899223892225, "loss": 2.2714, "step": 45820 }, { "epoch": 0.11, "grad_norm": 1.421875, "learning_rate": 0.00019856868065815832, "loss": 1.9832, "step": 45825 }, { "epoch": 0.11, "grad_norm": 1.671875, "learning_rate": 0.00019856836904372154, "loss": 2.1661, "step": 45830 }, { "epoch": 0.11, "grad_norm": 1.90625, "learning_rate": 0.00019856805739561204, "loss": 2.3203, "step": 45835 }, { "epoch": 0.11, "grad_norm": 2.3125, "learning_rate": 0.00019856774571382988, "loss": 2.2545, "step": 45840 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.00019856743399837519, "loss": 2.2161, "step": 45845 }, { "epoch": 0.11, "grad_norm": 2.15625, "learning_rate": 0.0001985671222492481, "loss": 1.9939, "step": 45850 }, { "epoch": 0.11, "grad_norm": 1.984375, "learning_rate": 0.00019856681046644868, "loss": 2.3192, "step": 45855 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.00019856649864997703, "loss": 2.0148, "step": 45860 }, { "epoch": 0.11, "grad_norm": 1.8828125, "learning_rate": 0.00019856618679983332, "loss": 2.2375, "step": 45865 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019856587491601756, "loss": 2.2718, "step": 45870 }, { "epoch": 0.11, "grad_norm": 1.5625, "learning_rate": 0.00019856556299852995, "loss": 2.2123, "step": 45875 }, { "epoch": 0.11, "grad_norm": 1.765625, "learning_rate": 0.00019856525104737055, "loss": 2.0182, "step": 45880 }, { "epoch": 0.11, "grad_norm": 2.328125, "learning_rate": 0.00019856493906253949, "loss": 2.2168, "step": 45885 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.0001985646270440368, "loss": 1.9636, "step": 45890 }, { "epoch": 0.11, "grad_norm": 1.734375, "learning_rate": 0.0001985643149918627, "loss": 2.1741, "step": 45895 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019856400290601723, "loss": 2.2369, "step": 45900 }, { "epoch": 0.11, "grad_norm": 2.25, "learning_rate": 0.0001985636907865005, "loss": 2.0762, "step": 45905 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.00019856337863331263, "loss": 2.1591, "step": 45910 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.00019856306644645372, "loss": 2.1471, "step": 45915 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.00019856275422592388, "loss": 2.2085, "step": 45920 }, { "epoch": 0.11, "grad_norm": 2.53125, "learning_rate": 0.00019856244197172322, "loss": 2.252, "step": 45925 }, { "epoch": 0.11, "grad_norm": 4.1875, "learning_rate": 0.00019856212968385185, "loss": 2.3398, "step": 45930 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.00019856181736230982, "loss": 2.1394, "step": 45935 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.00019856150500709735, "loss": 2.0664, "step": 45940 }, { "epoch": 0.11, "grad_norm": 2.34375, "learning_rate": 0.00019856119261821444, "loss": 2.2122, "step": 45945 }, { "epoch": 0.11, "grad_norm": 2.625, "learning_rate": 0.00019856088019566125, "loss": 2.2553, "step": 45950 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.0001985605677394379, "loss": 2.229, "step": 45955 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.00019856025524954442, "loss": 2.1764, "step": 45960 }, { "epoch": 0.11, "grad_norm": 4.84375, "learning_rate": 0.00019855994272598103, "loss": 2.1984, "step": 45965 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019855963016874776, "loss": 2.0496, "step": 45970 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.00019855931757784473, "loss": 2.1688, "step": 45975 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019855900495327199, "loss": 2.2594, "step": 45980 }, { "epoch": 0.11, "grad_norm": 2.515625, "learning_rate": 0.00019855869229502978, "loss": 2.2103, "step": 45985 }, { "epoch": 0.11, "grad_norm": 1.7265625, "learning_rate": 0.0001985583796031181, "loss": 2.2798, "step": 45990 }, { "epoch": 0.11, "grad_norm": 1.984375, "learning_rate": 0.0001985580668775371, "loss": 2.352, "step": 45995 }, { "epoch": 0.11, "grad_norm": 1.6796875, "learning_rate": 0.0001985577541182869, "loss": 2.223, "step": 46000 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.00019855744132536756, "loss": 2.0763, "step": 46005 }, { "epoch": 0.11, "grad_norm": 1.7265625, "learning_rate": 0.0001985571284987792, "loss": 2.2756, "step": 46010 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.00019855681563852196, "loss": 2.2631, "step": 46015 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019855650274459591, "loss": 2.1599, "step": 46020 }, { "epoch": 0.11, "grad_norm": 1.859375, "learning_rate": 0.00019855618981700116, "loss": 2.1754, "step": 46025 }, { "epoch": 0.11, "grad_norm": 1.8359375, "learning_rate": 0.00019855587685573784, "loss": 2.0773, "step": 46030 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019855556386080605, "loss": 2.3236, "step": 46035 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.0001985552508322059, "loss": 2.3111, "step": 46040 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.00019855493776993748, "loss": 2.2621, "step": 46045 }, { "epoch": 0.11, "grad_norm": 1.6015625, "learning_rate": 0.0001985546246740009, "loss": 2.118, "step": 46050 }, { "epoch": 0.11, "grad_norm": 1.6171875, "learning_rate": 0.00019855431154439628, "loss": 2.2454, "step": 46055 }, { "epoch": 0.11, "grad_norm": 2.125, "learning_rate": 0.00019855399838112373, "loss": 2.1727, "step": 46060 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019855368518418334, "loss": 2.0237, "step": 46065 }, { "epoch": 0.11, "grad_norm": 1.75, "learning_rate": 0.0001985533719535752, "loss": 2.1234, "step": 46070 }, { "epoch": 0.11, "grad_norm": 1.859375, "learning_rate": 0.00019855305868929945, "loss": 2.1335, "step": 46075 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019855274539135623, "loss": 2.2292, "step": 46080 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.00019855243205974554, "loss": 2.2757, "step": 46085 }, { "epoch": 0.11, "grad_norm": 2.15625, "learning_rate": 0.0001985521186944676, "loss": 1.8549, "step": 46090 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019855180529552245, "loss": 1.976, "step": 46095 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.0001985514918629102, "loss": 2.1435, "step": 46100 }, { "epoch": 0.11, "grad_norm": 1.90625, "learning_rate": 0.000198551178396631, "loss": 2.1791, "step": 46105 }, { "epoch": 0.11, "grad_norm": 1.5703125, "learning_rate": 0.0001985508648966849, "loss": 2.1228, "step": 46110 }, { "epoch": 0.11, "grad_norm": 2.078125, "learning_rate": 0.0001985505513630721, "loss": 2.2426, "step": 46115 }, { "epoch": 0.11, "grad_norm": 1.734375, "learning_rate": 0.00019855023779579258, "loss": 2.2002, "step": 46120 }, { "epoch": 0.11, "grad_norm": 2.5, "learning_rate": 0.00019854992419484653, "loss": 2.0984, "step": 46125 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.00019854961056023406, "loss": 2.1682, "step": 46130 }, { "epoch": 0.11, "grad_norm": 1.625, "learning_rate": 0.00019854929689195523, "loss": 2.3413, "step": 46135 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.0001985489831900102, "loss": 2.2537, "step": 46140 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.00019854866945439903, "loss": 2.1982, "step": 46145 }, { "epoch": 0.11, "grad_norm": 2.125, "learning_rate": 0.00019854835568512184, "loss": 2.3082, "step": 46150 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019854804188217874, "loss": 2.1121, "step": 46155 }, { "epoch": 0.11, "grad_norm": 2.125, "learning_rate": 0.00019854772804556986, "loss": 2.2416, "step": 46160 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.0001985474141752953, "loss": 2.1379, "step": 46165 }, { "epoch": 0.11, "grad_norm": 1.625, "learning_rate": 0.0001985471002713551, "loss": 2.2056, "step": 46170 }, { "epoch": 0.11, "grad_norm": 2.078125, "learning_rate": 0.0001985467863337495, "loss": 2.3207, "step": 46175 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.00019854647236247847, "loss": 2.1854, "step": 46180 }, { "epoch": 0.11, "grad_norm": 2.15625, "learning_rate": 0.00019854615835754221, "loss": 2.3626, "step": 46185 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.00019854584431894078, "loss": 1.9376, "step": 46190 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.00019854553024667432, "loss": 2.2442, "step": 46195 }, { "epoch": 0.11, "grad_norm": 1.7734375, "learning_rate": 0.0001985452161407429, "loss": 2.1576, "step": 46200 }, { "epoch": 0.11, "grad_norm": 1.6171875, "learning_rate": 0.00019854490200114667, "loss": 2.1315, "step": 46205 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019854458782788567, "loss": 2.1564, "step": 46210 }, { "epoch": 0.11, "grad_norm": 2.125, "learning_rate": 0.00019854427362096008, "loss": 2.1882, "step": 46215 }, { "epoch": 0.11, "grad_norm": 2.25, "learning_rate": 0.00019854395938037001, "loss": 2.0505, "step": 46220 }, { "epoch": 0.11, "grad_norm": 1.7890625, "learning_rate": 0.0001985436451061155, "loss": 2.1656, "step": 46225 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.00019854333079819672, "loss": 2.1732, "step": 46230 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.0001985430164566137, "loss": 2.266, "step": 46235 }, { "epoch": 0.11, "grad_norm": 1.734375, "learning_rate": 0.00019854270208136666, "loss": 2.256, "step": 46240 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.00019854238767245562, "loss": 2.2281, "step": 46245 }, { "epoch": 0.11, "grad_norm": 2.25, "learning_rate": 0.0001985420732298807, "loss": 2.3542, "step": 46250 }, { "epoch": 0.11, "grad_norm": 1.90625, "learning_rate": 0.000198541758753642, "loss": 2.158, "step": 46255 }, { "epoch": 0.11, "grad_norm": 1.8984375, "learning_rate": 0.00019854144424373972, "loss": 2.0643, "step": 46260 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.00019854112970017383, "loss": 2.1721, "step": 46265 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.00019854081512294452, "loss": 2.238, "step": 46270 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.0001985405005120519, "loss": 2.014, "step": 46275 }, { "epoch": 0.11, "grad_norm": 2.078125, "learning_rate": 0.00019854018586749604, "loss": 2.3704, "step": 46280 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019853987118927707, "loss": 2.1728, "step": 46285 }, { "epoch": 0.11, "grad_norm": 1.8125, "learning_rate": 0.00019853955647739508, "loss": 2.3328, "step": 46290 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.0001985392417318502, "loss": 2.233, "step": 46295 }, { "epoch": 0.11, "grad_norm": 1.7734375, "learning_rate": 0.00019853892695264252, "loss": 2.1953, "step": 46300 }, { "epoch": 0.11, "grad_norm": 1.9765625, "learning_rate": 0.0001985386121397722, "loss": 2.1456, "step": 46305 }, { "epoch": 0.11, "grad_norm": 2.25, "learning_rate": 0.00019853829729323926, "loss": 2.2598, "step": 46310 }, { "epoch": 0.11, "grad_norm": 2.34375, "learning_rate": 0.00019853798241304386, "loss": 2.1358, "step": 46315 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019853766749918607, "loss": 2.1556, "step": 46320 }, { "epoch": 0.11, "grad_norm": 1.7265625, "learning_rate": 0.00019853735255166607, "loss": 2.2128, "step": 46325 }, { "epoch": 0.11, "grad_norm": 1.671875, "learning_rate": 0.0001985370375704839, "loss": 2.1752, "step": 46330 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.00019853672255563972, "loss": 2.1303, "step": 46335 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.00019853640750713356, "loss": 2.3409, "step": 46340 }, { "epoch": 0.11, "grad_norm": 1.7421875, "learning_rate": 0.0001985360924249656, "loss": 2.2606, "step": 46345 }, { "epoch": 0.11, "grad_norm": 1.9765625, "learning_rate": 0.00019853577730913596, "loss": 2.2711, "step": 46350 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.00019853546215964466, "loss": 2.1768, "step": 46355 }, { "epoch": 0.11, "grad_norm": 1.765625, "learning_rate": 0.00019853514697649186, "loss": 2.1362, "step": 46360 }, { "epoch": 0.11, "grad_norm": 1.921875, "learning_rate": 0.0001985348317596777, "loss": 2.0848, "step": 46365 }, { "epoch": 0.11, "grad_norm": 1.9765625, "learning_rate": 0.00019853451650920226, "loss": 2.2451, "step": 46370 }, { "epoch": 0.11, "grad_norm": 1.7890625, "learning_rate": 0.00019853420122506558, "loss": 2.1331, "step": 46375 }, { "epoch": 0.11, "grad_norm": 1.7734375, "learning_rate": 0.00019853388590726788, "loss": 2.1605, "step": 46380 }, { "epoch": 0.11, "grad_norm": 1.921875, "learning_rate": 0.0001985335705558092, "loss": 2.2464, "step": 46385 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019853325517068968, "loss": 2.2194, "step": 46390 }, { "epoch": 0.11, "grad_norm": 1.7421875, "learning_rate": 0.00019853293975190942, "loss": 2.1548, "step": 46395 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.0001985326242994685, "loss": 2.0709, "step": 46400 }, { "epoch": 0.11, "grad_norm": 1.859375, "learning_rate": 0.00019853230881336707, "loss": 2.2455, "step": 46405 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.0001985319932936052, "loss": 2.3073, "step": 46410 }, { "epoch": 0.11, "grad_norm": 1.7578125, "learning_rate": 0.000198531677740183, "loss": 2.1235, "step": 46415 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019853136215310059, "loss": 2.1829, "step": 46420 }, { "epoch": 0.11, "grad_norm": 1.859375, "learning_rate": 0.00019853104653235813, "loss": 2.3708, "step": 46425 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.00019853073087795564, "loss": 2.1689, "step": 46430 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019853041518989327, "loss": 2.2872, "step": 46435 }, { "epoch": 0.11, "grad_norm": 1.7890625, "learning_rate": 0.00019853009946817112, "loss": 2.2462, "step": 46440 }, { "epoch": 0.11, "grad_norm": 1.65625, "learning_rate": 0.0001985297837127893, "loss": 2.1332, "step": 46445 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019852946792374795, "loss": 2.1651, "step": 46450 }, { "epoch": 0.11, "grad_norm": 1.8984375, "learning_rate": 0.0001985291521010471, "loss": 2.2524, "step": 46455 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.00019852883624468692, "loss": 2.2312, "step": 46460 }, { "epoch": 0.11, "grad_norm": 2.25, "learning_rate": 0.0001985285203546675, "loss": 2.4212, "step": 46465 }, { "epoch": 0.11, "grad_norm": 1.6953125, "learning_rate": 0.000198528204430989, "loss": 2.1377, "step": 46470 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.0001985278884736514, "loss": 2.178, "step": 46475 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019852757248265491, "loss": 2.2583, "step": 46480 }, { "epoch": 0.11, "grad_norm": 1.5703125, "learning_rate": 0.00019852725645799963, "loss": 2.049, "step": 46485 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.00019852694039968567, "loss": 2.1536, "step": 46490 }, { "epoch": 0.11, "grad_norm": 1.921875, "learning_rate": 0.0001985266243077131, "loss": 2.3179, "step": 46495 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.00019852630818208204, "loss": 2.1456, "step": 46500 }, { "epoch": 0.11, "grad_norm": 1.90625, "learning_rate": 0.0001985259920227926, "loss": 1.8515, "step": 46505 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019852567582984493, "loss": 2.2717, "step": 46510 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019852535960323907, "loss": 2.331, "step": 46515 }, { "epoch": 0.11, "grad_norm": 2.40625, "learning_rate": 0.0001985250433429752, "loss": 2.2769, "step": 46520 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.00019852472704905334, "loss": 2.2284, "step": 46525 }, { "epoch": 0.11, "grad_norm": 1.6484375, "learning_rate": 0.00019852441072147365, "loss": 2.3315, "step": 46530 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.00019852409436023626, "loss": 2.1518, "step": 46535 }, { "epoch": 0.11, "grad_norm": 1.90625, "learning_rate": 0.00019852377796534125, "loss": 2.357, "step": 46540 }, { "epoch": 0.11, "grad_norm": 2.296875, "learning_rate": 0.00019852346153678872, "loss": 2.0684, "step": 46545 }, { "epoch": 0.11, "grad_norm": 1.671875, "learning_rate": 0.0001985231450745788, "loss": 2.1824, "step": 46550 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019852282857871155, "loss": 2.4429, "step": 46555 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019852251204918717, "loss": 2.1847, "step": 46560 }, { "epoch": 0.11, "grad_norm": 1.5703125, "learning_rate": 0.00019852219548600571, "loss": 2.0482, "step": 46565 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.00019852187888916723, "loss": 2.2733, "step": 46570 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.00019852156225867195, "loss": 2.0565, "step": 46575 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.00019852124559451985, "loss": 2.1959, "step": 46580 }, { "epoch": 0.11, "grad_norm": 1.6328125, "learning_rate": 0.00019852092889671114, "loss": 2.2495, "step": 46585 }, { "epoch": 0.11, "grad_norm": 1.859375, "learning_rate": 0.00019852061216524592, "loss": 2.1748, "step": 46590 }, { "epoch": 0.11, "grad_norm": 2.125, "learning_rate": 0.00019852029540012422, "loss": 2.2727, "step": 46595 }, { "epoch": 0.11, "grad_norm": 2.15625, "learning_rate": 0.00019851997860134625, "loss": 2.0375, "step": 46600 }, { "epoch": 0.11, "grad_norm": 1.5625, "learning_rate": 0.00019851966176891202, "loss": 2.3537, "step": 46605 }, { "epoch": 0.11, "grad_norm": 1.90625, "learning_rate": 0.00019851934490282172, "loss": 2.2339, "step": 46610 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019851902800307542, "loss": 2.246, "step": 46615 }, { "epoch": 0.11, "grad_norm": 1.6875, "learning_rate": 0.00019851871106967323, "loss": 2.2921, "step": 46620 }, { "epoch": 0.11, "grad_norm": 1.734375, "learning_rate": 0.00019851839410261527, "loss": 1.9673, "step": 46625 }, { "epoch": 0.11, "grad_norm": 1.8359375, "learning_rate": 0.00019851807710190164, "loss": 2.2874, "step": 46630 }, { "epoch": 0.11, "grad_norm": 2.21875, "learning_rate": 0.00019851776006753246, "loss": 2.1529, "step": 46635 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.0001985174429995078, "loss": 2.1537, "step": 46640 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019851712589782778, "loss": 2.1031, "step": 46645 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.00019851680876249253, "loss": 1.9845, "step": 46650 }, { "epoch": 0.11, "grad_norm": 1.7578125, "learning_rate": 0.0001985164915935022, "loss": 2.1128, "step": 46655 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.0001985161743908568, "loss": 2.1693, "step": 46660 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.0001985158571545565, "loss": 2.2712, "step": 46665 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.0001985155398846014, "loss": 2.2321, "step": 46670 }, { "epoch": 0.11, "grad_norm": 1.765625, "learning_rate": 0.0001985152225809916, "loss": 2.0953, "step": 46675 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.00019851490524372723, "loss": 2.0957, "step": 46680 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019851458787280838, "loss": 2.2559, "step": 46685 }, { "epoch": 0.11, "grad_norm": 1.984375, "learning_rate": 0.00019851427046823513, "loss": 2.0384, "step": 46690 }, { "epoch": 0.11, "grad_norm": 1.734375, "learning_rate": 0.00019851395303000766, "loss": 2.2342, "step": 46695 }, { "epoch": 0.11, "grad_norm": 1.765625, "learning_rate": 0.000198513635558126, "loss": 2.0415, "step": 46700 }, { "epoch": 0.11, "grad_norm": 1.921875, "learning_rate": 0.00019851331805259033, "loss": 2.278, "step": 46705 }, { "epoch": 0.11, "grad_norm": 1.8828125, "learning_rate": 0.0001985130005134007, "loss": 2.1639, "step": 46710 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.00019851268294055724, "loss": 2.0752, "step": 46715 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019851236533406007, "loss": 2.0556, "step": 46720 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.0001985120476939093, "loss": 2.1132, "step": 46725 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.000198511730020105, "loss": 2.0581, "step": 46730 }, { "epoch": 0.11, "grad_norm": 1.8984375, "learning_rate": 0.00019851141231264733, "loss": 2.0455, "step": 46735 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019851109457153635, "loss": 2.2372, "step": 46740 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019851077679677225, "loss": 2.1435, "step": 46745 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.00019851045898835502, "loss": 2.3342, "step": 46750 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019851014114628487, "loss": 2.2385, "step": 46755 }, { "epoch": 0.11, "grad_norm": 2.390625, "learning_rate": 0.00019850982327056184, "loss": 2.1974, "step": 46760 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.0001985095053611861, "loss": 2.2007, "step": 46765 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.0001985091874181577, "loss": 2.1651, "step": 46770 }, { "epoch": 0.11, "grad_norm": 1.7578125, "learning_rate": 0.00019850886944147679, "loss": 2.2775, "step": 46775 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.00019850855143114344, "loss": 2.0522, "step": 46780 }, { "epoch": 0.11, "grad_norm": 1.7109375, "learning_rate": 0.00019850823338715782, "loss": 2.0658, "step": 46785 }, { "epoch": 0.11, "grad_norm": 2.265625, "learning_rate": 0.00019850791530951997, "loss": 2.1841, "step": 46790 }, { "epoch": 0.11, "grad_norm": 1.75, "learning_rate": 0.00019850759719823002, "loss": 2.2696, "step": 46795 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.00019850727905328813, "loss": 2.201, "step": 46800 }, { "epoch": 0.11, "grad_norm": 2.296875, "learning_rate": 0.00019850696087469433, "loss": 2.2229, "step": 46805 }, { "epoch": 0.11, "grad_norm": 2.078125, "learning_rate": 0.00019850664266244877, "loss": 2.2045, "step": 46810 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019850632441655158, "loss": 2.2931, "step": 46815 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.0001985060061370028, "loss": 2.2852, "step": 46820 }, { "epoch": 0.11, "grad_norm": 2.34375, "learning_rate": 0.00019850568782380263, "loss": 2.123, "step": 46825 }, { "epoch": 0.11, "grad_norm": 1.640625, "learning_rate": 0.00019850536947695112, "loss": 2.0709, "step": 46830 }, { "epoch": 0.11, "grad_norm": 1.875, "learning_rate": 0.00019850505109644836, "loss": 2.2116, "step": 46835 }, { "epoch": 0.11, "grad_norm": 1.6640625, "learning_rate": 0.0001985047326822945, "loss": 2.1464, "step": 46840 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.00019850441423448965, "loss": 2.22, "step": 46845 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.00019850409575303393, "loss": 2.3719, "step": 46850 }, { "epoch": 0.11, "grad_norm": 2.0, "learning_rate": 0.00019850377723792736, "loss": 2.2947, "step": 46855 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.00019850345868917016, "loss": 2.0784, "step": 46860 }, { "epoch": 0.11, "grad_norm": 1.75, "learning_rate": 0.0001985031401067624, "loss": 2.211, "step": 46865 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019850282149070413, "loss": 2.0591, "step": 46870 }, { "epoch": 0.11, "grad_norm": 1.859375, "learning_rate": 0.00019850250284099557, "loss": 2.1139, "step": 46875 }, { "epoch": 0.11, "grad_norm": 1.8359375, "learning_rate": 0.0001985021841576367, "loss": 1.9784, "step": 46880 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.00019850186544062773, "loss": 2.2258, "step": 46885 }, { "epoch": 0.11, "grad_norm": 1.6953125, "learning_rate": 0.00019850154668996876, "loss": 2.0833, "step": 46890 }, { "epoch": 0.11, "grad_norm": 1.6875, "learning_rate": 0.00019850122790565985, "loss": 2.198, "step": 46895 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.00019850090908770117, "loss": 2.4382, "step": 46900 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019850059023609275, "loss": 2.1297, "step": 46905 }, { "epoch": 0.11, "grad_norm": 1.984375, "learning_rate": 0.00019850027135083476, "loss": 1.884, "step": 46910 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019849995243192728, "loss": 2.0548, "step": 46915 }, { "epoch": 0.11, "grad_norm": 1.765625, "learning_rate": 0.00019849963347937045, "loss": 2.217, "step": 46920 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.00019849931449316435, "loss": 2.3414, "step": 46925 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019849899547330909, "loss": 2.0884, "step": 46930 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019849867641980477, "loss": 2.0406, "step": 46935 }, { "epoch": 0.11, "grad_norm": 2.28125, "learning_rate": 0.00019849835733265153, "loss": 2.306, "step": 46940 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019849803821184948, "loss": 2.2289, "step": 46945 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.0001984977190573987, "loss": 2.0617, "step": 46950 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.00019849739986929933, "loss": 2.2206, "step": 46955 }, { "epoch": 0.11, "grad_norm": 2.78125, "learning_rate": 0.00019849708064755147, "loss": 2.2595, "step": 46960 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019849676139215518, "loss": 2.2077, "step": 46965 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019849644210311062, "loss": 2.1585, "step": 46970 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.0001984961227804179, "loss": 2.2504, "step": 46975 }, { "epoch": 0.11, "grad_norm": 1.734375, "learning_rate": 0.00019849580342407713, "loss": 2.1334, "step": 46980 }, { "epoch": 0.11, "grad_norm": 1.859375, "learning_rate": 0.0001984954840340884, "loss": 2.156, "step": 46985 }, { "epoch": 0.11, "grad_norm": 1.546875, "learning_rate": 0.00019849516461045182, "loss": 2.1955, "step": 46990 }, { "epoch": 0.11, "grad_norm": 1.921875, "learning_rate": 0.0001984948451531675, "loss": 2.224, "step": 46995 }, { "epoch": 0.11, "grad_norm": 1.734375, "learning_rate": 0.00019849452566223557, "loss": 1.9791, "step": 47000 }, { "epoch": 0.11, "grad_norm": 2.234375, "learning_rate": 0.0001984942061376561, "loss": 2.1175, "step": 47005 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019849388657942925, "loss": 2.2699, "step": 47010 }, { "epoch": 0.11, "grad_norm": 1.6953125, "learning_rate": 0.00019849356698755508, "loss": 2.2215, "step": 47015 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.00019849324736203374, "loss": 2.2024, "step": 47020 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019849292770286533, "loss": 2.2306, "step": 47025 }, { "epoch": 0.11, "grad_norm": 1.78125, "learning_rate": 0.00019849260801004992, "loss": 2.3263, "step": 47030 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.00019849228828358766, "loss": 2.2224, "step": 47035 }, { "epoch": 0.11, "grad_norm": 1.984375, "learning_rate": 0.00019849196852347864, "loss": 2.267, "step": 47040 }, { "epoch": 0.11, "grad_norm": 1.7578125, "learning_rate": 0.00019849164872972298, "loss": 2.1482, "step": 47045 }, { "epoch": 0.11, "grad_norm": 1.6640625, "learning_rate": 0.0001984913289023208, "loss": 2.1468, "step": 47050 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.0001984910090412722, "loss": 2.2876, "step": 47055 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019849068914657724, "loss": 2.2936, "step": 47060 }, { "epoch": 0.11, "grad_norm": 1.6640625, "learning_rate": 0.00019849036921823615, "loss": 2.3343, "step": 47065 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.0001984900492562489, "loss": 2.1207, "step": 47070 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.0001984897292606157, "loss": 2.2473, "step": 47075 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019848940923133658, "loss": 2.1977, "step": 47080 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.00019848908916841174, "loss": 2.1285, "step": 47085 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.0001984887690718412, "loss": 2.343, "step": 47090 }, { "epoch": 0.11, "grad_norm": 2.296875, "learning_rate": 0.00019848844894162511, "loss": 2.2171, "step": 47095 }, { "epoch": 0.11, "grad_norm": 1.8828125, "learning_rate": 0.00019848812877776363, "loss": 2.2909, "step": 47100 }, { "epoch": 0.11, "grad_norm": 1.921875, "learning_rate": 0.0001984878085802568, "loss": 2.1959, "step": 47105 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.0001984874883491047, "loss": 2.1025, "step": 47110 }, { "epoch": 0.11, "grad_norm": 1.671875, "learning_rate": 0.00019848716808430755, "loss": 2.2423, "step": 47115 }, { "epoch": 0.11, "grad_norm": 1.78125, "learning_rate": 0.00019848684778586538, "loss": 2.2838, "step": 47120 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.0001984865274537783, "loss": 2.3285, "step": 47125 }, { "epoch": 0.11, "grad_norm": 1.71875, "learning_rate": 0.00019848620708804644, "loss": 2.2368, "step": 47130 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.0001984858866886699, "loss": 2.1099, "step": 47135 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.0001984855662556488, "loss": 2.2022, "step": 47140 }, { "epoch": 0.11, "grad_norm": 2.296875, "learning_rate": 0.00019848524578898325, "loss": 2.0573, "step": 47145 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.00019848492528867336, "loss": 2.4019, "step": 47150 }, { "epoch": 0.11, "grad_norm": 1.90625, "learning_rate": 0.00019848460475471922, "loss": 2.2974, "step": 47155 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019848428418712099, "loss": 2.3789, "step": 47160 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.00019848396358587868, "loss": 2.4349, "step": 47165 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.0001984836429509925, "loss": 2.3235, "step": 47170 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019848332228246252, "loss": 2.2028, "step": 47175 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019848300158028885, "loss": 2.1092, "step": 47180 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019848268084447158, "loss": 2.2247, "step": 47185 }, { "epoch": 0.11, "grad_norm": 2.484375, "learning_rate": 0.00019848236007501087, "loss": 2.2732, "step": 47190 }, { "epoch": 0.11, "grad_norm": 1.8515625, "learning_rate": 0.00019848203927190679, "loss": 2.1414, "step": 47195 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019848171843515945, "loss": 2.1761, "step": 47200 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019848139756476897, "loss": 2.4198, "step": 47205 }, { "epoch": 0.11, "grad_norm": 1.8515625, "learning_rate": 0.00019848107666073546, "loss": 2.1392, "step": 47210 }, { "epoch": 0.11, "grad_norm": 1.859375, "learning_rate": 0.00019848075572305904, "loss": 2.3308, "step": 47215 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.0001984804347517398, "loss": 2.3102, "step": 47220 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019848011374677785, "loss": 2.3107, "step": 47225 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.00019847979270817332, "loss": 2.1617, "step": 47230 }, { "epoch": 0.11, "grad_norm": 1.859375, "learning_rate": 0.0001984794716359263, "loss": 2.1241, "step": 47235 }, { "epoch": 0.11, "grad_norm": 2.59375, "learning_rate": 0.00019847915053003692, "loss": 2.1011, "step": 47240 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.00019847882939050523, "loss": 2.2321, "step": 47245 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.00019847850821733145, "loss": 2.2555, "step": 47250 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.00019847818701051559, "loss": 2.2186, "step": 47255 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.0001984778657700578, "loss": 2.217, "step": 47260 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019847754449595816, "loss": 2.2707, "step": 47265 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019847722318821684, "loss": 2.0801, "step": 47270 }, { "epoch": 0.11, "grad_norm": 1.90625, "learning_rate": 0.0001984769018468339, "loss": 2.3159, "step": 47275 }, { "epoch": 0.11, "grad_norm": 1.5703125, "learning_rate": 0.00019847658047180948, "loss": 2.0272, "step": 47280 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.00019847625906314366, "loss": 2.3162, "step": 47285 }, { "epoch": 0.11, "grad_norm": 1.7421875, "learning_rate": 0.0001984759376208366, "loss": 2.1878, "step": 47290 }, { "epoch": 0.11, "grad_norm": 1.9765625, "learning_rate": 0.00019847561614488832, "loss": 2.2162, "step": 47295 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.000198475294635299, "loss": 2.0507, "step": 47300 }, { "epoch": 0.11, "grad_norm": 1.9765625, "learning_rate": 0.00019847497309206875, "loss": 2.2825, "step": 47305 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.0001984746515151976, "loss": 2.159, "step": 47310 }, { "epoch": 0.11, "grad_norm": 1.7890625, "learning_rate": 0.0001984743299046858, "loss": 2.2182, "step": 47315 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.00019847400826053337, "loss": 2.2376, "step": 47320 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.0001984736865827404, "loss": 2.2576, "step": 47325 }, { "epoch": 0.11, "grad_norm": 1.65625, "learning_rate": 0.00019847336487130709, "loss": 2.1351, "step": 47330 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019847304312623341, "loss": 2.1028, "step": 47335 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.0001984727213475196, "loss": 2.3843, "step": 47340 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019847239953516574, "loss": 2.1416, "step": 47345 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.0001984720776891719, "loss": 2.0129, "step": 47350 }, { "epoch": 0.11, "grad_norm": 1.7421875, "learning_rate": 0.00019847175580953822, "loss": 2.2546, "step": 47355 }, { "epoch": 0.11, "grad_norm": 1.8984375, "learning_rate": 0.0001984714338962648, "loss": 2.2276, "step": 47360 }, { "epoch": 0.11, "grad_norm": 1.59375, "learning_rate": 0.00019847111194935176, "loss": 2.1675, "step": 47365 }, { "epoch": 0.11, "grad_norm": 1.78125, "learning_rate": 0.00019847078996879919, "loss": 2.2364, "step": 47370 }, { "epoch": 0.11, "grad_norm": 2.0, "learning_rate": 0.0001984704679546072, "loss": 2.2815, "step": 47375 }, { "epoch": 0.11, "grad_norm": 1.734375, "learning_rate": 0.00019847014590677593, "loss": 2.2107, "step": 47380 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019846982382530545, "loss": 2.1733, "step": 47385 }, { "epoch": 0.11, "grad_norm": 1.8125, "learning_rate": 0.00019846950171019593, "loss": 1.9673, "step": 47390 }, { "epoch": 0.11, "grad_norm": 1.75, "learning_rate": 0.00019846917956144742, "loss": 2.2481, "step": 47395 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019846885737906007, "loss": 2.3867, "step": 47400 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019846853516303394, "loss": 2.1024, "step": 47405 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.0001984682129133692, "loss": 2.1451, "step": 47410 }, { "epoch": 0.11, "grad_norm": 1.8359375, "learning_rate": 0.00019846789063006592, "loss": 2.1326, "step": 47415 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.00019846756831312423, "loss": 2.2455, "step": 47420 }, { "epoch": 0.11, "grad_norm": 2.265625, "learning_rate": 0.00019846724596254422, "loss": 2.1078, "step": 47425 }, { "epoch": 0.11, "grad_norm": 2.296875, "learning_rate": 0.00019846692357832605, "loss": 2.2208, "step": 47430 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.00019846660116046975, "loss": 2.2586, "step": 47435 }, { "epoch": 0.11, "grad_norm": 1.8984375, "learning_rate": 0.0001984662787089755, "loss": 2.1896, "step": 47440 }, { "epoch": 0.11, "grad_norm": 1.640625, "learning_rate": 0.00019846595622384334, "loss": 2.096, "step": 47445 }, { "epoch": 0.11, "grad_norm": 1.921875, "learning_rate": 0.00019846563370507346, "loss": 2.2607, "step": 47450 }, { "epoch": 0.11, "grad_norm": 2.703125, "learning_rate": 0.00019846531115266593, "loss": 2.1824, "step": 47455 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.00019846498856662087, "loss": 2.3007, "step": 47460 }, { "epoch": 0.11, "grad_norm": 1.703125, "learning_rate": 0.00019846466594693838, "loss": 2.1053, "step": 47465 }, { "epoch": 0.11, "grad_norm": 1.8515625, "learning_rate": 0.00019846434329361858, "loss": 2.1508, "step": 47470 }, { "epoch": 0.11, "grad_norm": 1.78125, "learning_rate": 0.00019846402060666156, "loss": 2.2042, "step": 47475 }, { "epoch": 0.11, "grad_norm": 1.8359375, "learning_rate": 0.00019846369788606744, "loss": 2.2672, "step": 47480 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.00019846337513183636, "loss": 2.2473, "step": 47485 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.0001984630523439684, "loss": 2.1876, "step": 47490 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.00019846272952246365, "loss": 2.0467, "step": 47495 }, { "epoch": 0.11, "grad_norm": 1.8828125, "learning_rate": 0.00019846240666732227, "loss": 2.2009, "step": 47500 }, { "epoch": 0.11, "grad_norm": 1.75, "learning_rate": 0.00019846208377854435, "loss": 2.2203, "step": 47505 }, { "epoch": 0.11, "grad_norm": 1.6796875, "learning_rate": 0.00019846176085612998, "loss": 2.2536, "step": 47510 }, { "epoch": 0.11, "grad_norm": 10.625, "learning_rate": 0.00019846143790007927, "loss": 2.2455, "step": 47515 }, { "epoch": 0.11, "grad_norm": 1.7578125, "learning_rate": 0.00019846111491039238, "loss": 2.1991, "step": 47520 }, { "epoch": 0.11, "grad_norm": 1.90625, "learning_rate": 0.0001984607918870694, "loss": 2.2244, "step": 47525 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.0001984604688301104, "loss": 2.1707, "step": 47530 }, { "epoch": 0.11, "grad_norm": 2.25, "learning_rate": 0.00019846014573951553, "loss": 1.9519, "step": 47535 }, { "epoch": 0.11, "grad_norm": 1.4765625, "learning_rate": 0.00019845982261528487, "loss": 2.0116, "step": 47540 }, { "epoch": 0.11, "grad_norm": 1.8984375, "learning_rate": 0.00019845949945741854, "loss": 2.0859, "step": 47545 }, { "epoch": 0.11, "grad_norm": 1.640625, "learning_rate": 0.0001984591762659167, "loss": 2.2199, "step": 47550 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.0001984588530407794, "loss": 2.1108, "step": 47555 }, { "epoch": 0.11, "grad_norm": 1.640625, "learning_rate": 0.00019845852978200677, "loss": 2.102, "step": 47560 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019845820648959893, "loss": 2.2135, "step": 47565 }, { "epoch": 0.11, "grad_norm": 1.640625, "learning_rate": 0.00019845788316355596, "loss": 2.1674, "step": 47570 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019845755980387798, "loss": 2.0996, "step": 47575 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.00019845723641056514, "loss": 2.1788, "step": 47580 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019845691298361754, "loss": 2.1184, "step": 47585 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.00019845658952303525, "loss": 2.0283, "step": 47590 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.0001984562660288184, "loss": 2.2341, "step": 47595 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.0001984559425009671, "loss": 2.1605, "step": 47600 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019845561893948148, "loss": 2.1756, "step": 47605 }, { "epoch": 0.11, "grad_norm": 1.765625, "learning_rate": 0.00019845529534436162, "loss": 2.2122, "step": 47610 }, { "epoch": 0.11, "grad_norm": 1.671875, "learning_rate": 0.00019845497171560764, "loss": 2.0609, "step": 47615 }, { "epoch": 0.11, "grad_norm": 2.015625, "learning_rate": 0.00019845464805321968, "loss": 2.2057, "step": 47620 }, { "epoch": 0.11, "grad_norm": 1.6796875, "learning_rate": 0.00019845432435719782, "loss": 2.1225, "step": 47625 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019845400062754216, "loss": 2.3706, "step": 47630 }, { "epoch": 0.11, "grad_norm": 1.703125, "learning_rate": 0.00019845367686425287, "loss": 2.0604, "step": 47635 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019845335306732998, "loss": 2.1485, "step": 47640 }, { "epoch": 0.11, "grad_norm": 1.6640625, "learning_rate": 0.0001984530292367736, "loss": 2.0328, "step": 47645 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.00019845270537258397, "loss": 2.1715, "step": 47650 }, { "epoch": 0.11, "grad_norm": 2.21875, "learning_rate": 0.00019845238147476107, "loss": 2.0392, "step": 47655 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019845205754330506, "loss": 1.9997, "step": 47660 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019845173357821604, "loss": 2.3227, "step": 47665 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.0001984514095794941, "loss": 2.3342, "step": 47670 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.00019845108554713935, "loss": 2.183, "step": 47675 }, { "epoch": 0.11, "grad_norm": 1.8125, "learning_rate": 0.00019845076148115198, "loss": 2.043, "step": 47680 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.00019845043738153203, "loss": 2.205, "step": 47685 }, { "epoch": 0.11, "grad_norm": 2.15625, "learning_rate": 0.00019845011324827962, "loss": 2.2269, "step": 47690 }, { "epoch": 0.11, "grad_norm": 1.6875, "learning_rate": 0.00019844978908139485, "loss": 2.2624, "step": 47695 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.00019844946488087786, "loss": 2.2724, "step": 47700 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.00019844914064672876, "loss": 2.2306, "step": 47705 }, { "epoch": 0.11, "grad_norm": 2.5625, "learning_rate": 0.00019844881637894762, "loss": 2.2086, "step": 47710 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.00019844849207753458, "loss": 2.3264, "step": 47715 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.0001984481677424898, "loss": 2.3765, "step": 47720 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.0001984478433738133, "loss": 2.1955, "step": 47725 }, { "epoch": 0.11, "grad_norm": 2.28125, "learning_rate": 0.00019844751897150523, "loss": 2.1874, "step": 47730 }, { "epoch": 0.11, "grad_norm": 2.328125, "learning_rate": 0.0001984471945355657, "loss": 2.3264, "step": 47735 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.00019844687006599485, "loss": 2.2239, "step": 47740 }, { "epoch": 0.11, "grad_norm": 2.640625, "learning_rate": 0.00019844654556279274, "loss": 2.034, "step": 47745 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.00019844622102595952, "loss": 2.5233, "step": 47750 }, { "epoch": 0.11, "grad_norm": 1.9921875, "learning_rate": 0.00019844589645549526, "loss": 2.2447, "step": 47755 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.00019844557185140014, "loss": 2.1986, "step": 47760 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.0001984452472136742, "loss": 2.0703, "step": 47765 }, { "epoch": 0.11, "grad_norm": 1.6171875, "learning_rate": 0.00019844492254231754, "loss": 2.1396, "step": 47770 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.00019844459783733038, "loss": 2.3227, "step": 47775 }, { "epoch": 0.11, "grad_norm": 2.234375, "learning_rate": 0.00019844427309871268, "loss": 2.2085, "step": 47780 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.00019844394832646472, "loss": 2.2812, "step": 47785 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.00019844362352058648, "loss": 2.2999, "step": 47790 }, { "epoch": 0.11, "grad_norm": 1.6640625, "learning_rate": 0.0001984432986810781, "loss": 2.1957, "step": 47795 }, { "epoch": 0.11, "grad_norm": 1.8359375, "learning_rate": 0.00019844297380793973, "loss": 2.1867, "step": 47800 }, { "epoch": 0.11, "grad_norm": 1.7109375, "learning_rate": 0.00019844264890117144, "loss": 2.3094, "step": 47805 }, { "epoch": 0.11, "grad_norm": 1.9921875, "learning_rate": 0.00019844232396077334, "loss": 2.1559, "step": 47810 }, { "epoch": 0.11, "grad_norm": 2.65625, "learning_rate": 0.0001984419989867456, "loss": 2.1254, "step": 47815 }, { "epoch": 0.11, "grad_norm": 2.375, "learning_rate": 0.00019844167397908824, "loss": 2.1275, "step": 47820 }, { "epoch": 0.11, "grad_norm": 1.8125, "learning_rate": 0.00019844134893780144, "loss": 2.1829, "step": 47825 }, { "epoch": 0.11, "grad_norm": 2.078125, "learning_rate": 0.00019844102386288527, "loss": 2.2654, "step": 47830 }, { "epoch": 0.11, "grad_norm": 1.6640625, "learning_rate": 0.0001984406987543399, "loss": 2.0371, "step": 47835 }, { "epoch": 0.11, "grad_norm": 2.203125, "learning_rate": 0.00019844037361216538, "loss": 2.0042, "step": 47840 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.00019844004843636186, "loss": 2.3364, "step": 47845 }, { "epoch": 0.11, "grad_norm": 2.28125, "learning_rate": 0.0001984397232269294, "loss": 2.1946, "step": 47850 }, { "epoch": 0.11, "grad_norm": 2.328125, "learning_rate": 0.00019843939798386818, "loss": 2.3031, "step": 47855 }, { "epoch": 0.11, "grad_norm": 2.125, "learning_rate": 0.00019843907270717827, "loss": 2.1198, "step": 47860 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019843874739685976, "loss": 2.0928, "step": 47865 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.0001984384220529128, "loss": 2.2367, "step": 47870 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.0001984380966753375, "loss": 2.2774, "step": 47875 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.00019843777126413398, "loss": 1.9763, "step": 47880 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.0001984374458193023, "loss": 2.182, "step": 47885 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.0001984371203408426, "loss": 2.2615, "step": 47890 }, { "epoch": 0.11, "grad_norm": 1.7578125, "learning_rate": 0.000198436794828755, "loss": 2.0256, "step": 47895 }, { "epoch": 0.11, "grad_norm": 1.7578125, "learning_rate": 0.00019843646928303962, "loss": 2.1615, "step": 47900 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.00019843614370369656, "loss": 2.1208, "step": 47905 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019843581809072592, "loss": 2.2975, "step": 47910 }, { "epoch": 0.11, "grad_norm": 1.71875, "learning_rate": 0.00019843549244412783, "loss": 2.0723, "step": 47915 }, { "epoch": 0.11, "grad_norm": 2.390625, "learning_rate": 0.00019843516676390236, "loss": 2.2701, "step": 47920 }, { "epoch": 0.11, "grad_norm": 1.8125, "learning_rate": 0.0001984348410500497, "loss": 2.2302, "step": 47925 }, { "epoch": 0.11, "grad_norm": 1.7578125, "learning_rate": 0.00019843451530256987, "loss": 2.2933, "step": 47930 }, { "epoch": 0.11, "grad_norm": 1.65625, "learning_rate": 0.00019843418952146302, "loss": 2.1358, "step": 47935 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.0001984338637067293, "loss": 2.2285, "step": 47940 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019843353785836877, "loss": 2.2373, "step": 47945 }, { "epoch": 0.11, "grad_norm": 1.71875, "learning_rate": 0.00019843321197638157, "loss": 2.1735, "step": 47950 }, { "epoch": 0.11, "grad_norm": 2.234375, "learning_rate": 0.00019843288606076777, "loss": 2.2752, "step": 47955 }, { "epoch": 0.11, "grad_norm": 1.734375, "learning_rate": 0.00019843256011152754, "loss": 2.0101, "step": 47960 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.00019843223412866094, "loss": 2.2807, "step": 47965 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019843190811216812, "loss": 2.2325, "step": 47970 }, { "epoch": 0.11, "grad_norm": 1.703125, "learning_rate": 0.00019843158206204916, "loss": 1.9771, "step": 47975 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.0001984312559783042, "loss": 2.2903, "step": 47980 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019843092986093335, "loss": 2.1426, "step": 47985 }, { "epoch": 0.11, "grad_norm": 1.6484375, "learning_rate": 0.00019843060370993668, "loss": 2.1927, "step": 47990 }, { "epoch": 0.11, "grad_norm": 2.234375, "learning_rate": 0.00019843027752531434, "loss": 1.9135, "step": 47995 }, { "epoch": 0.11, "grad_norm": 2.0, "learning_rate": 0.00019842995130706643, "loss": 2.2513, "step": 48000 }, { "epoch": 0.11, "grad_norm": 1.671875, "learning_rate": 0.0001984296250551931, "loss": 2.3097, "step": 48005 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.0001984292987696944, "loss": 2.2052, "step": 48010 }, { "epoch": 0.11, "grad_norm": 1.984375, "learning_rate": 0.00019842897245057043, "loss": 2.0484, "step": 48015 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019842864609782138, "loss": 2.1627, "step": 48020 }, { "epoch": 0.11, "grad_norm": 1.7890625, "learning_rate": 0.00019842831971144732, "loss": 2.1654, "step": 48025 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019842799329144835, "loss": 2.202, "step": 48030 }, { "epoch": 0.11, "grad_norm": 2.3125, "learning_rate": 0.00019842766683782456, "loss": 2.2816, "step": 48035 }, { "epoch": 0.11, "grad_norm": 1.7578125, "learning_rate": 0.00019842734035057613, "loss": 2.121, "step": 48040 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019842701382970313, "loss": 2.233, "step": 48045 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.0001984266872752057, "loss": 2.083, "step": 48050 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.00019842636068708388, "loss": 2.0589, "step": 48055 }, { "epoch": 0.11, "grad_norm": 1.78125, "learning_rate": 0.00019842603406533787, "loss": 2.1772, "step": 48060 }, { "epoch": 0.11, "grad_norm": 2.234375, "learning_rate": 0.00019842570740996772, "loss": 2.1632, "step": 48065 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019842538072097359, "loss": 2.0848, "step": 48070 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.00019842505399835555, "loss": 2.2048, "step": 48075 }, { "epoch": 0.11, "grad_norm": 2.0, "learning_rate": 0.00019842472724211372, "loss": 2.2081, "step": 48080 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.0001984244004522482, "loss": 2.2549, "step": 48085 }, { "epoch": 0.11, "grad_norm": 1.5703125, "learning_rate": 0.00019842407362875917, "loss": 2.0244, "step": 48090 }, { "epoch": 0.11, "grad_norm": 1.734375, "learning_rate": 0.00019842374677164664, "loss": 2.0775, "step": 48095 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.0001984234198809108, "loss": 2.0393, "step": 48100 }, { "epoch": 0.11, "grad_norm": 1.96875, "learning_rate": 0.00019842309295655172, "loss": 2.148, "step": 48105 }, { "epoch": 0.11, "grad_norm": 1.6015625, "learning_rate": 0.00019842276599856958, "loss": 2.1701, "step": 48110 }, { "epoch": 0.11, "grad_norm": 1.71875, "learning_rate": 0.00019842243900696435, "loss": 2.2906, "step": 48115 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.0001984221119817363, "loss": 2.0958, "step": 48120 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.00019842178492288545, "loss": 2.3557, "step": 48125 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.0001984214578304119, "loss": 2.3505, "step": 48130 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.00019842113070431584, "loss": 2.2414, "step": 48135 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.00019842080354459733, "loss": 2.1696, "step": 48140 }, { "epoch": 0.11, "grad_norm": 2.0, "learning_rate": 0.0001984204763512565, "loss": 2.0894, "step": 48145 }, { "epoch": 0.11, "grad_norm": 1.6171875, "learning_rate": 0.0001984201491242934, "loss": 1.9638, "step": 48150 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019841982186370825, "loss": 2.2513, "step": 48155 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019841949456950107, "loss": 2.0856, "step": 48160 }, { "epoch": 0.11, "grad_norm": 1.7890625, "learning_rate": 0.000198419167241672, "loss": 2.1147, "step": 48165 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019841883988022118, "loss": 2.2398, "step": 48170 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019841851248514868, "loss": 2.1963, "step": 48175 }, { "epoch": 0.11, "grad_norm": 1.7890625, "learning_rate": 0.00019841818505645465, "loss": 2.2295, "step": 48180 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019841785759413914, "loss": 2.1051, "step": 48185 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.00019841753009820236, "loss": 2.2517, "step": 48190 }, { "epoch": 0.11, "grad_norm": 1.8828125, "learning_rate": 0.00019841720256864433, "loss": 2.2082, "step": 48195 }, { "epoch": 0.11, "grad_norm": 1.8828125, "learning_rate": 0.00019841687500546523, "loss": 2.0406, "step": 48200 }, { "epoch": 0.11, "grad_norm": 2.53125, "learning_rate": 0.00019841654740866514, "loss": 2.2506, "step": 48205 }, { "epoch": 0.11, "grad_norm": 2.21875, "learning_rate": 0.00019841621977824414, "loss": 2.0619, "step": 48210 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.0001984158921142024, "loss": 2.2723, "step": 48215 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019841556441654, "loss": 2.0347, "step": 48220 }, { "epoch": 0.11, "grad_norm": 2.296875, "learning_rate": 0.00019841523668525705, "loss": 2.1302, "step": 48225 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019841490892035367, "loss": 2.1869, "step": 48230 }, { "epoch": 0.11, "grad_norm": 1.90625, "learning_rate": 0.00019841458112183, "loss": 2.3641, "step": 48235 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019841425328968607, "loss": 2.1689, "step": 48240 }, { "epoch": 0.11, "grad_norm": 1.7734375, "learning_rate": 0.0001984139254239221, "loss": 2.2521, "step": 48245 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.0001984135975245381, "loss": 2.2007, "step": 48250 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.00019841326959153428, "loss": 2.1148, "step": 48255 }, { "epoch": 0.11, "grad_norm": 1.7265625, "learning_rate": 0.0001984129416249107, "loss": 2.1895, "step": 48260 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.00019841261362466743, "loss": 2.3487, "step": 48265 }, { "epoch": 0.11, "grad_norm": 2.390625, "learning_rate": 0.00019841228559080464, "loss": 2.0319, "step": 48270 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.00019841195752332244, "loss": 2.1814, "step": 48275 }, { "epoch": 0.11, "grad_norm": 1.5, "learning_rate": 0.00019841162942222095, "loss": 2.1869, "step": 48280 }, { "epoch": 0.11, "grad_norm": 2.15625, "learning_rate": 0.00019841130128750022, "loss": 2.3657, "step": 48285 }, { "epoch": 0.11, "grad_norm": 2.6875, "learning_rate": 0.00019841097311916044, "loss": 2.3231, "step": 48290 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019841064491720167, "loss": 2.1087, "step": 48295 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019841031668162406, "loss": 2.1647, "step": 48300 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019840998841242767, "loss": 2.044, "step": 48305 }, { "epoch": 0.11, "grad_norm": 1.4765625, "learning_rate": 0.00019840966010961267, "loss": 1.933, "step": 48310 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019840933177317914, "loss": 2.1586, "step": 48315 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.00019840900340312718, "loss": 2.2011, "step": 48320 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.00019840867499945692, "loss": 2.3817, "step": 48325 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.0001984083465621685, "loss": 2.1554, "step": 48330 }, { "epoch": 0.11, "grad_norm": 1.984375, "learning_rate": 0.00019840801809126198, "loss": 2.2067, "step": 48335 }, { "epoch": 0.11, "grad_norm": 1.875, "learning_rate": 0.0001984076895867375, "loss": 2.1371, "step": 48340 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019840736104859515, "loss": 2.2595, "step": 48345 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.0001984070324768351, "loss": 2.3594, "step": 48350 }, { "epoch": 0.11, "grad_norm": 1.875, "learning_rate": 0.0001984067038714574, "loss": 2.2064, "step": 48355 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.0001984063752324622, "loss": 2.2336, "step": 48360 }, { "epoch": 0.11, "grad_norm": 2.328125, "learning_rate": 0.00019840604655984957, "loss": 2.1843, "step": 48365 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019840571785361967, "loss": 2.1612, "step": 48370 }, { "epoch": 0.11, "grad_norm": 1.8828125, "learning_rate": 0.0001984053891137726, "loss": 2.176, "step": 48375 }, { "epoch": 0.11, "grad_norm": 2.390625, "learning_rate": 0.00019840506034030843, "loss": 2.1547, "step": 48380 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.00019840473153322737, "loss": 2.2091, "step": 48385 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.0001984044026925294, "loss": 2.2494, "step": 48390 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019840407381821473, "loss": 2.238, "step": 48395 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.00019840374491028343, "loss": 2.364, "step": 48400 }, { "epoch": 0.11, "grad_norm": 1.546875, "learning_rate": 0.00019840341596873566, "loss": 2.2157, "step": 48405 }, { "epoch": 0.11, "grad_norm": 1.6875, "learning_rate": 0.0001984030869935715, "loss": 2.1098, "step": 48410 }, { "epoch": 0.11, "grad_norm": 1.9921875, "learning_rate": 0.000198402757984791, "loss": 2.1233, "step": 48415 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019840242894239438, "loss": 2.3057, "step": 48420 }, { "epoch": 0.11, "grad_norm": 1.8046875, "learning_rate": 0.00019840209986638171, "loss": 2.1645, "step": 48425 }, { "epoch": 0.11, "grad_norm": 1.8828125, "learning_rate": 0.0001984017707567531, "loss": 2.2883, "step": 48430 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019840144161350863, "loss": 2.1397, "step": 48435 }, { "epoch": 0.11, "grad_norm": 1.828125, "learning_rate": 0.00019840111243664844, "loss": 2.0237, "step": 48440 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.0001984007832261727, "loss": 2.223, "step": 48445 }, { "epoch": 0.11, "grad_norm": 1.6875, "learning_rate": 0.00019840045398208142, "loss": 2.3937, "step": 48450 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.00019840012470437478, "loss": 2.1553, "step": 48455 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.00019839979539305286, "loss": 2.2166, "step": 48460 }, { "epoch": 0.11, "grad_norm": 2.265625, "learning_rate": 0.0001983994660481158, "loss": 2.3276, "step": 48465 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.0001983991366695637, "loss": 2.0947, "step": 48470 }, { "epoch": 0.11, "grad_norm": 1.7265625, "learning_rate": 0.00019839880725739666, "loss": 2.2925, "step": 48475 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.0001983984778116148, "loss": 2.2737, "step": 48480 }, { "epoch": 0.11, "grad_norm": 2.15625, "learning_rate": 0.00019839814833221825, "loss": 2.0914, "step": 48485 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019839781881920708, "loss": 2.3785, "step": 48490 }, { "epoch": 0.11, "grad_norm": 2.09375, "learning_rate": 0.00019839748927258144, "loss": 2.1565, "step": 48495 }, { "epoch": 0.11, "grad_norm": 1.6953125, "learning_rate": 0.00019839715969234144, "loss": 1.9911, "step": 48500 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019839683007848716, "loss": 2.0983, "step": 48505 }, { "epoch": 0.11, "grad_norm": 1.8828125, "learning_rate": 0.00019839650043101877, "loss": 2.2703, "step": 48510 }, { "epoch": 0.11, "grad_norm": 2.234375, "learning_rate": 0.00019839617074993634, "loss": 2.1863, "step": 48515 }, { "epoch": 0.11, "grad_norm": 2.125, "learning_rate": 0.00019839584103524001, "loss": 2.021, "step": 48520 }, { "epoch": 0.11, "grad_norm": 2.171875, "learning_rate": 0.00019839551128692987, "loss": 2.1937, "step": 48525 }, { "epoch": 0.11, "grad_norm": 1.9921875, "learning_rate": 0.00019839518150500604, "loss": 2.3827, "step": 48530 }, { "epoch": 0.11, "grad_norm": 2.828125, "learning_rate": 0.00019839485168946864, "loss": 2.2258, "step": 48535 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.00019839452184031774, "loss": 2.1839, "step": 48540 }, { "epoch": 0.11, "grad_norm": 2.21875, "learning_rate": 0.00019839419195755352, "loss": 2.2173, "step": 48545 }, { "epoch": 0.11, "grad_norm": 2.15625, "learning_rate": 0.00019839386204117604, "loss": 2.1795, "step": 48550 }, { "epoch": 0.11, "grad_norm": 1.8984375, "learning_rate": 0.00019839353209118543, "loss": 2.2418, "step": 48555 }, { "epoch": 0.11, "grad_norm": 1.78125, "learning_rate": 0.00019839320210758183, "loss": 2.242, "step": 48560 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.0001983928720903653, "loss": 2.1377, "step": 48565 }, { "epoch": 0.11, "grad_norm": 1.7734375, "learning_rate": 0.00019839254203953598, "loss": 2.1574, "step": 48570 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.000198392211955094, "loss": 2.16, "step": 48575 }, { "epoch": 0.11, "grad_norm": 2.109375, "learning_rate": 0.00019839188183703944, "loss": 2.2676, "step": 48580 }, { "epoch": 0.11, "grad_norm": 1.875, "learning_rate": 0.00019839155168537245, "loss": 2.1804, "step": 48585 }, { "epoch": 0.11, "grad_norm": 2.0, "learning_rate": 0.0001983912215000931, "loss": 2.1533, "step": 48590 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.00019839089128120154, "loss": 2.2798, "step": 48595 }, { "epoch": 0.11, "grad_norm": 1.921875, "learning_rate": 0.0001983905610286979, "loss": 2.1332, "step": 48600 }, { "epoch": 0.11, "grad_norm": 2.125, "learning_rate": 0.0001983902307425822, "loss": 2.1548, "step": 48605 }, { "epoch": 0.11, "grad_norm": 2.0, "learning_rate": 0.00019838990042285462, "loss": 2.1888, "step": 48610 }, { "epoch": 0.11, "grad_norm": 2.28125, "learning_rate": 0.0001983895700695153, "loss": 2.2324, "step": 48615 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.00019838923968256428, "loss": 2.2791, "step": 48620 }, { "epoch": 0.11, "grad_norm": 1.9375, "learning_rate": 0.00019838890926200174, "loss": 2.0242, "step": 48625 }, { "epoch": 0.11, "grad_norm": 1.8515625, "learning_rate": 0.00019838857880782776, "loss": 1.958, "step": 48630 }, { "epoch": 0.11, "grad_norm": 2.265625, "learning_rate": 0.00019838824832004242, "loss": 2.249, "step": 48635 }, { "epoch": 0.11, "grad_norm": 1.875, "learning_rate": 0.0001983879177986459, "loss": 2.1359, "step": 48640 }, { "epoch": 0.11, "grad_norm": 2.484375, "learning_rate": 0.0001983875872436383, "loss": 2.1844, "step": 48645 }, { "epoch": 0.11, "grad_norm": 2.484375, "learning_rate": 0.0001983872566550197, "loss": 2.1645, "step": 48650 }, { "epoch": 0.11, "grad_norm": 1.9140625, "learning_rate": 0.00019838692603279024, "loss": 2.0811, "step": 48655 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019838659537695002, "loss": 2.2027, "step": 48660 }, { "epoch": 0.11, "grad_norm": 1.953125, "learning_rate": 0.00019838626468749913, "loss": 2.1379, "step": 48665 }, { "epoch": 0.11, "grad_norm": 1.6953125, "learning_rate": 0.00019838593396443772, "loss": 2.2218, "step": 48670 }, { "epoch": 0.11, "grad_norm": 2.140625, "learning_rate": 0.00019838560320776591, "loss": 2.2794, "step": 48675 }, { "epoch": 0.11, "grad_norm": 1.78125, "learning_rate": 0.00019838527241748376, "loss": 2.3609, "step": 48680 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019838494159359144, "loss": 2.0797, "step": 48685 }, { "epoch": 0.11, "grad_norm": 1.8125, "learning_rate": 0.00019838461073608902, "loss": 2.1631, "step": 48690 }, { "epoch": 0.11, "grad_norm": 2.0625, "learning_rate": 0.00019838427984497666, "loss": 2.2257, "step": 48695 }, { "epoch": 0.11, "grad_norm": 1.5546875, "learning_rate": 0.0001983839489202544, "loss": 2.1715, "step": 48700 }, { "epoch": 0.11, "grad_norm": 1.9609375, "learning_rate": 0.00019838361796192247, "loss": 2.2552, "step": 48705 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019838328696998087, "loss": 2.1394, "step": 48710 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.00019838295594442975, "loss": 2.1086, "step": 48715 }, { "epoch": 0.11, "grad_norm": 1.7109375, "learning_rate": 0.00019838262488526925, "loss": 2.1845, "step": 48720 }, { "epoch": 0.11, "grad_norm": 1.9453125, "learning_rate": 0.00019838229379249943, "loss": 2.2443, "step": 48725 }, { "epoch": 0.11, "grad_norm": 2.1875, "learning_rate": 0.00019838196266612043, "loss": 2.3439, "step": 48730 }, { "epoch": 0.11, "grad_norm": 2.046875, "learning_rate": 0.00019838163150613243, "loss": 2.094, "step": 48735 }, { "epoch": 0.11, "grad_norm": 2.5, "learning_rate": 0.00019838130031253544, "loss": 2.1568, "step": 48740 }, { "epoch": 0.11, "grad_norm": 2.484375, "learning_rate": 0.0001983809690853296, "loss": 2.1697, "step": 48745 }, { "epoch": 0.11, "grad_norm": 2.546875, "learning_rate": 0.00019838063782451503, "loss": 2.2976, "step": 48750 }, { "epoch": 0.11, "grad_norm": 2.21875, "learning_rate": 0.00019838030653009187, "loss": 1.9638, "step": 48755 }, { "epoch": 0.11, "grad_norm": 1.7265625, "learning_rate": 0.0001983799752020602, "loss": 2.0816, "step": 48760 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.00019837964384042018, "loss": 2.1557, "step": 48765 }, { "epoch": 0.11, "grad_norm": 1.8671875, "learning_rate": 0.00019837931244517186, "loss": 2.0476, "step": 48770 }, { "epoch": 0.11, "grad_norm": 2.125, "learning_rate": 0.0001983789810163154, "loss": 2.1698, "step": 48775 }, { "epoch": 0.11, "grad_norm": 2.6875, "learning_rate": 0.00019837864955385087, "loss": 2.0938, "step": 48780 }, { "epoch": 0.11, "grad_norm": 1.890625, "learning_rate": 0.00019837831805777841, "loss": 2.26, "step": 48785 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.0001983779865280982, "loss": 2.337, "step": 48790 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.0001983776549648102, "loss": 2.0255, "step": 48795 }, { "epoch": 0.11, "grad_norm": 1.984375, "learning_rate": 0.00019837732336791465, "loss": 1.9646, "step": 48800 }, { "epoch": 0.11, "grad_norm": 1.9296875, "learning_rate": 0.0001983769917374116, "loss": 2.0568, "step": 48805 }, { "epoch": 0.11, "grad_norm": 1.875, "learning_rate": 0.0001983766600733012, "loss": 2.2946, "step": 48810 }, { "epoch": 0.11, "grad_norm": 1.8203125, "learning_rate": 0.00019837632837558356, "loss": 2.1425, "step": 48815 }, { "epoch": 0.11, "grad_norm": 2.078125, "learning_rate": 0.00019837599664425878, "loss": 2.3044, "step": 48820 }, { "epoch": 0.11, "grad_norm": 1.8359375, "learning_rate": 0.00019837566487932695, "loss": 2.1675, "step": 48825 }, { "epoch": 0.11, "grad_norm": 1.59375, "learning_rate": 0.00019837533308078822, "loss": 2.2528, "step": 48830 }, { "epoch": 0.11, "grad_norm": 1.84375, "learning_rate": 0.0001983750012486427, "loss": 2.1521, "step": 48835 }, { "epoch": 0.11, "grad_norm": 2.03125, "learning_rate": 0.0001983746693828905, "loss": 2.1948, "step": 48840 }, { "epoch": 0.11, "grad_norm": 1.8984375, "learning_rate": 0.0001983743374835317, "loss": 2.1628, "step": 48845 }, { "epoch": 0.11, "grad_norm": 2.0, "learning_rate": 0.00019837400555056646, "loss": 2.1308, "step": 48850 }, { "epoch": 0.11, "grad_norm": 2.15625, "learning_rate": 0.0001983736735839949, "loss": 2.1676, "step": 48855 }, { "epoch": 0.11, "grad_norm": 1.796875, "learning_rate": 0.0001983733415838171, "loss": 2.0564, "step": 48860 }, { "epoch": 0.11, "grad_norm": 1.9765625, "learning_rate": 0.00019837300955003316, "loss": 2.245, "step": 48865 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019837267748264325, "loss": 2.1446, "step": 48870 }, { "epoch": 0.12, "grad_norm": 1.65625, "learning_rate": 0.0001983723453816474, "loss": 2.0995, "step": 48875 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.00019837201324704578, "loss": 2.147, "step": 48880 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.0001983716810788385, "loss": 2.1168, "step": 48885 }, { "epoch": 0.12, "grad_norm": 1.734375, "learning_rate": 0.00019837134887702571, "loss": 2.1899, "step": 48890 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.00019837101664160747, "loss": 2.0456, "step": 48895 }, { "epoch": 0.12, "grad_norm": 2.4375, "learning_rate": 0.00019837068437258387, "loss": 2.1798, "step": 48900 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.0001983703520699551, "loss": 2.1239, "step": 48905 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.00019837001973372124, "loss": 2.1671, "step": 48910 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.00019836968736388236, "loss": 2.2518, "step": 48915 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019836935496043862, "loss": 2.3225, "step": 48920 }, { "epoch": 0.12, "grad_norm": 1.8125, "learning_rate": 0.0001983690225233901, "loss": 2.0406, "step": 48925 }, { "epoch": 0.12, "grad_norm": 1.7890625, "learning_rate": 0.000198368690052737, "loss": 2.0155, "step": 48930 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019836835754847933, "loss": 2.1406, "step": 48935 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019836802501061723, "loss": 2.3225, "step": 48940 }, { "epoch": 0.12, "grad_norm": 2.546875, "learning_rate": 0.00019836769243915088, "loss": 2.2663, "step": 48945 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.00019836735983408033, "loss": 2.0399, "step": 48950 }, { "epoch": 0.12, "grad_norm": 2.890625, "learning_rate": 0.00019836702719540567, "loss": 2.1196, "step": 48955 }, { "epoch": 0.12, "grad_norm": 2.53125, "learning_rate": 0.00019836669452312706, "loss": 2.4393, "step": 48960 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.00019836636181724463, "loss": 2.0935, "step": 48965 }, { "epoch": 0.12, "grad_norm": 1.671875, "learning_rate": 0.00019836602907775846, "loss": 2.1128, "step": 48970 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019836569630466864, "loss": 2.2523, "step": 48975 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019836536349797533, "loss": 2.2092, "step": 48980 }, { "epoch": 0.12, "grad_norm": 1.7578125, "learning_rate": 0.00019836503065767867, "loss": 2.1458, "step": 48985 }, { "epoch": 0.12, "grad_norm": 1.859375, "learning_rate": 0.00019836469778377867, "loss": 2.1482, "step": 48990 }, { "epoch": 0.12, "grad_norm": 1.765625, "learning_rate": 0.00019836436487627554, "loss": 2.2506, "step": 48995 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.00019836403193516935, "loss": 1.8746, "step": 49000 }, { "epoch": 0.12, "grad_norm": 2.625, "learning_rate": 0.00019836369896046021, "loss": 2.2525, "step": 49005 }, { "epoch": 0.12, "grad_norm": 2.296875, "learning_rate": 0.00019836336595214826, "loss": 2.0286, "step": 49010 }, { "epoch": 0.12, "grad_norm": 1.703125, "learning_rate": 0.0001983630329102336, "loss": 2.0808, "step": 49015 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019836269983471632, "loss": 2.2508, "step": 49020 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.0001983623667255966, "loss": 2.0739, "step": 49025 }, { "epoch": 0.12, "grad_norm": 1.7734375, "learning_rate": 0.00019836203358287451, "loss": 2.334, "step": 49030 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019836170040655012, "loss": 2.057, "step": 49035 }, { "epoch": 0.12, "grad_norm": 2.609375, "learning_rate": 0.00019836136719662362, "loss": 2.2179, "step": 49040 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019836103395309512, "loss": 2.1552, "step": 49045 }, { "epoch": 0.12, "grad_norm": 1.7109375, "learning_rate": 0.00019836070067596467, "loss": 2.304, "step": 49050 }, { "epoch": 0.12, "grad_norm": 2.125, "learning_rate": 0.00019836036736523244, "loss": 2.3363, "step": 49055 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019836003402089848, "loss": 2.0706, "step": 49060 }, { "epoch": 0.12, "grad_norm": 1.734375, "learning_rate": 0.00019835970064296298, "loss": 2.176, "step": 49065 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.00019835936723142602, "loss": 2.1713, "step": 49070 }, { "epoch": 0.12, "grad_norm": 1.78125, "learning_rate": 0.00019835903378628774, "loss": 2.2452, "step": 49075 }, { "epoch": 0.12, "grad_norm": 1.671875, "learning_rate": 0.00019835870030754819, "loss": 2.0586, "step": 49080 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.00019835836679520753, "loss": 2.0854, "step": 49085 }, { "epoch": 0.12, "grad_norm": 2.328125, "learning_rate": 0.00019835803324926593, "loss": 2.2173, "step": 49090 }, { "epoch": 0.12, "grad_norm": 1.8046875, "learning_rate": 0.00019835769966972336, "loss": 2.0839, "step": 49095 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.00019835736605658007, "loss": 2.3435, "step": 49100 }, { "epoch": 0.12, "grad_norm": 1.734375, "learning_rate": 0.0001983570324098361, "loss": 2.1846, "step": 49105 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.0001983566987294916, "loss": 2.2167, "step": 49110 }, { "epoch": 0.12, "grad_norm": 2.15625, "learning_rate": 0.0001983563650155466, "loss": 2.1933, "step": 49115 }, { "epoch": 0.12, "grad_norm": 1.796875, "learning_rate": 0.00019835603126800135, "loss": 2.1345, "step": 49120 }, { "epoch": 0.12, "grad_norm": 1.71875, "learning_rate": 0.0001983556974868559, "loss": 2.1257, "step": 49125 }, { "epoch": 0.12, "grad_norm": 2.15625, "learning_rate": 0.00019835536367211032, "loss": 2.2104, "step": 49130 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.0001983550298237648, "loss": 2.1927, "step": 49135 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.0001983546959418194, "loss": 2.2222, "step": 49140 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.00019835436202627425, "loss": 2.2117, "step": 49145 }, { "epoch": 0.12, "grad_norm": 2.4375, "learning_rate": 0.00019835402807712947, "loss": 2.0787, "step": 49150 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019835369409438517, "loss": 2.4168, "step": 49155 }, { "epoch": 0.12, "grad_norm": 2.46875, "learning_rate": 0.00019835336007804146, "loss": 2.0771, "step": 49160 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019835302602809848, "loss": 2.2106, "step": 49165 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.0001983526919445563, "loss": 2.279, "step": 49170 }, { "epoch": 0.12, "grad_norm": 1.78125, "learning_rate": 0.00019835235782741502, "loss": 2.3044, "step": 49175 }, { "epoch": 0.12, "grad_norm": 1.9375, "learning_rate": 0.00019835202367667485, "loss": 2.1049, "step": 49180 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.00019835168949233585, "loss": 2.027, "step": 49185 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.0001983513552743981, "loss": 1.9705, "step": 49190 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.00019835102102286177, "loss": 2.2429, "step": 49195 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.0001983506867377269, "loss": 2.1726, "step": 49200 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.0001983503524189937, "loss": 2.1268, "step": 49205 }, { "epoch": 0.12, "grad_norm": 1.9375, "learning_rate": 0.0001983500180666622, "loss": 2.1591, "step": 49210 }, { "epoch": 0.12, "grad_norm": 1.7734375, "learning_rate": 0.00019834968368073256, "loss": 2.1505, "step": 49215 }, { "epoch": 0.12, "grad_norm": 2.3125, "learning_rate": 0.0001983493492612049, "loss": 2.2416, "step": 49220 }, { "epoch": 0.12, "grad_norm": 2.390625, "learning_rate": 0.00019834901480807933, "loss": 2.169, "step": 49225 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019834868032135588, "loss": 2.1717, "step": 49230 }, { "epoch": 0.12, "grad_norm": 2.28125, "learning_rate": 0.0001983483458010348, "loss": 2.3728, "step": 49235 }, { "epoch": 0.12, "grad_norm": 1.765625, "learning_rate": 0.00019834801124711616, "loss": 2.1464, "step": 49240 }, { "epoch": 0.12, "grad_norm": 1.796875, "learning_rate": 0.00019834767665960002, "loss": 2.1182, "step": 49245 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.00019834734203848654, "loss": 2.2534, "step": 49250 }, { "epoch": 0.12, "grad_norm": 1.78125, "learning_rate": 0.00019834700738377583, "loss": 2.1149, "step": 49255 }, { "epoch": 0.12, "grad_norm": 1.71875, "learning_rate": 0.00019834667269546795, "loss": 2.1944, "step": 49260 }, { "epoch": 0.12, "grad_norm": 2.296875, "learning_rate": 0.00019834633797356313, "loss": 2.2816, "step": 49265 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019834600321806138, "loss": 2.2569, "step": 49270 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.00019834566842896284, "loss": 2.0744, "step": 49275 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.0001983453336062677, "loss": 2.187, "step": 49280 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019834499874997596, "loss": 2.2095, "step": 49285 }, { "epoch": 0.12, "grad_norm": 1.5, "learning_rate": 0.0001983446638600878, "loss": 1.9687, "step": 49290 }, { "epoch": 0.12, "grad_norm": 1.6328125, "learning_rate": 0.00019834432893660328, "loss": 2.1057, "step": 49295 }, { "epoch": 0.12, "grad_norm": 2.28125, "learning_rate": 0.0001983439939795226, "loss": 2.2163, "step": 49300 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.00019834365898884583, "loss": 2.2467, "step": 49305 }, { "epoch": 0.12, "grad_norm": 1.671875, "learning_rate": 0.00019834332396457304, "loss": 2.1389, "step": 49310 }, { "epoch": 0.12, "grad_norm": 1.6484375, "learning_rate": 0.00019834298890670441, "loss": 2.2455, "step": 49315 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.00019834265381524005, "loss": 2.1128, "step": 49320 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019834231869018005, "loss": 2.1007, "step": 49325 }, { "epoch": 0.12, "grad_norm": 1.6328125, "learning_rate": 0.00019834198353152452, "loss": 2.1344, "step": 49330 }, { "epoch": 0.12, "grad_norm": 1.9296875, "learning_rate": 0.0001983416483392736, "loss": 2.2522, "step": 49335 }, { "epoch": 0.12, "grad_norm": 2.21875, "learning_rate": 0.00019834131311342735, "loss": 2.1126, "step": 49340 }, { "epoch": 0.12, "grad_norm": 1.671875, "learning_rate": 0.00019834097785398596, "loss": 2.2837, "step": 49345 }, { "epoch": 0.12, "grad_norm": 1.7109375, "learning_rate": 0.0001983406425609495, "loss": 2.1965, "step": 49350 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.0001983403072343181, "loss": 2.2122, "step": 49355 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.00019833997187409182, "loss": 2.319, "step": 49360 }, { "epoch": 0.12, "grad_norm": 1.6953125, "learning_rate": 0.00019833963648027087, "loss": 2.1672, "step": 49365 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019833930105285532, "loss": 2.2557, "step": 49370 }, { "epoch": 0.12, "grad_norm": 2.28125, "learning_rate": 0.00019833896559184525, "loss": 2.2002, "step": 49375 }, { "epoch": 0.12, "grad_norm": 2.390625, "learning_rate": 0.00019833863009724082, "loss": 2.1282, "step": 49380 }, { "epoch": 0.12, "grad_norm": 1.640625, "learning_rate": 0.00019833829456904214, "loss": 2.1381, "step": 49385 }, { "epoch": 0.12, "grad_norm": 1.59375, "learning_rate": 0.00019833795900724933, "loss": 2.1685, "step": 49390 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019833762341186245, "loss": 2.3151, "step": 49395 }, { "epoch": 0.12, "grad_norm": 1.5234375, "learning_rate": 0.0001983372877828817, "loss": 2.2229, "step": 49400 }, { "epoch": 0.12, "grad_norm": 1.8046875, "learning_rate": 0.00019833695212030712, "loss": 2.206, "step": 49405 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019833661642413883, "loss": 2.264, "step": 49410 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019833628069437702, "loss": 2.2873, "step": 49415 }, { "epoch": 0.12, "grad_norm": 1.7109375, "learning_rate": 0.00019833594493102173, "loss": 2.0145, "step": 49420 }, { "epoch": 0.12, "grad_norm": 2.296875, "learning_rate": 0.00019833560913407307, "loss": 2.2293, "step": 49425 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.0001983352733035312, "loss": 2.213, "step": 49430 }, { "epoch": 0.12, "grad_norm": 1.546875, "learning_rate": 0.00019833493743939625, "loss": 2.0751, "step": 49435 }, { "epoch": 0.12, "grad_norm": 1.875, "learning_rate": 0.00019833460154166827, "loss": 2.253, "step": 49440 }, { "epoch": 0.12, "grad_norm": 2.21875, "learning_rate": 0.00019833426561034743, "loss": 2.1807, "step": 49445 }, { "epoch": 0.12, "grad_norm": 1.96875, "learning_rate": 0.0001983339296454338, "loss": 2.2331, "step": 49450 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019833359364692751, "loss": 2.2359, "step": 49455 }, { "epoch": 0.12, "grad_norm": 1.7421875, "learning_rate": 0.0001983332576148287, "loss": 1.9846, "step": 49460 }, { "epoch": 0.12, "grad_norm": 1.828125, "learning_rate": 0.00019833292154913744, "loss": 2.238, "step": 49465 }, { "epoch": 0.12, "grad_norm": 1.6875, "learning_rate": 0.0001983325854498539, "loss": 2.263, "step": 49470 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.00019833224931697815, "loss": 2.2308, "step": 49475 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.00019833191315051033, "loss": 2.128, "step": 49480 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.00019833157695045055, "loss": 1.835, "step": 49485 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.0001983312407167989, "loss": 2.1374, "step": 49490 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.00019833090444955554, "loss": 2.1395, "step": 49495 }, { "epoch": 0.12, "grad_norm": 1.84375, "learning_rate": 0.00019833056814872054, "loss": 2.0702, "step": 49500 }, { "epoch": 0.12, "grad_norm": 1.8671875, "learning_rate": 0.00019833023181429403, "loss": 2.2202, "step": 49505 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.00019832989544627615, "loss": 2.3488, "step": 49510 }, { "epoch": 0.12, "grad_norm": 2.3125, "learning_rate": 0.000198329559044667, "loss": 2.2614, "step": 49515 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.00019832922260946666, "loss": 2.1155, "step": 49520 }, { "epoch": 0.12, "grad_norm": 1.96875, "learning_rate": 0.00019832888614067527, "loss": 2.1764, "step": 49525 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019832854963829297, "loss": 2.1912, "step": 49530 }, { "epoch": 0.12, "grad_norm": 1.6015625, "learning_rate": 0.00019832821310231986, "loss": 2.2209, "step": 49535 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.00019832787653275603, "loss": 2.1157, "step": 49540 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.0001983275399296016, "loss": 2.1429, "step": 49545 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019832720329285674, "loss": 2.2372, "step": 49550 }, { "epoch": 0.12, "grad_norm": 1.828125, "learning_rate": 0.00019832686662252155, "loss": 2.1186, "step": 49555 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.00019832652991859604, "loss": 2.1627, "step": 49560 }, { "epoch": 0.12, "grad_norm": 1.6875, "learning_rate": 0.00019832619318108042, "loss": 2.0639, "step": 49565 }, { "epoch": 0.12, "grad_norm": 2.1875, "learning_rate": 0.00019832585640997482, "loss": 2.1469, "step": 49570 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019832551960527933, "loss": 2.2182, "step": 49575 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.00019832518276699404, "loss": 2.3104, "step": 49580 }, { "epoch": 0.12, "grad_norm": 2.125, "learning_rate": 0.0001983248458951191, "loss": 2.3226, "step": 49585 }, { "epoch": 0.12, "grad_norm": 1.65625, "learning_rate": 0.0001983245089896546, "loss": 2.2532, "step": 49590 }, { "epoch": 0.12, "grad_norm": 2.28125, "learning_rate": 0.00019832417205060065, "loss": 2.2295, "step": 49595 }, { "epoch": 0.12, "grad_norm": 2.375, "learning_rate": 0.0001983238350779574, "loss": 2.1894, "step": 49600 }, { "epoch": 0.12, "grad_norm": 1.5390625, "learning_rate": 0.00019832349807172493, "loss": 2.0879, "step": 49605 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.00019832316103190337, "loss": 2.1099, "step": 49610 }, { "epoch": 0.12, "grad_norm": 1.9375, "learning_rate": 0.00019832282395849284, "loss": 2.3221, "step": 49615 }, { "epoch": 0.12, "grad_norm": 1.8125, "learning_rate": 0.00019832248685149346, "loss": 2.2435, "step": 49620 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.0001983221497109053, "loss": 2.1174, "step": 49625 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.00019832181253672855, "loss": 2.1686, "step": 49630 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019832147532896328, "loss": 2.0139, "step": 49635 }, { "epoch": 0.12, "grad_norm": 2.390625, "learning_rate": 0.0001983211380876096, "loss": 2.2326, "step": 49640 }, { "epoch": 0.12, "grad_norm": 2.46875, "learning_rate": 0.00019832080081266763, "loss": 2.1961, "step": 49645 }, { "epoch": 0.12, "grad_norm": 2.953125, "learning_rate": 0.00019832046350413748, "loss": 2.0069, "step": 49650 }, { "epoch": 0.12, "grad_norm": 1.859375, "learning_rate": 0.00019832012616201932, "loss": 2.2543, "step": 49655 }, { "epoch": 0.12, "grad_norm": 2.453125, "learning_rate": 0.0001983197887863132, "loss": 2.145, "step": 49660 }, { "epoch": 0.12, "grad_norm": 1.9453125, "learning_rate": 0.00019831945137701923, "loss": 2.1966, "step": 49665 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.0001983191139341376, "loss": 2.1606, "step": 49670 }, { "epoch": 0.12, "grad_norm": 1.859375, "learning_rate": 0.00019831877645766835, "loss": 2.1282, "step": 49675 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019831843894761164, "loss": 2.3569, "step": 49680 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019831810140396753, "loss": 2.1639, "step": 49685 }, { "epoch": 0.12, "grad_norm": 2.15625, "learning_rate": 0.0001983177638267362, "loss": 2.2236, "step": 49690 }, { "epoch": 0.12, "grad_norm": 1.75, "learning_rate": 0.00019831742621591775, "loss": 2.1453, "step": 49695 }, { "epoch": 0.12, "grad_norm": 2.359375, "learning_rate": 0.0001983170885715123, "loss": 2.1273, "step": 49700 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019831675089351992, "loss": 2.2828, "step": 49705 }, { "epoch": 0.12, "grad_norm": 2.921875, "learning_rate": 0.00019831641318194073, "loss": 2.2431, "step": 49710 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.0001983160754367749, "loss": 2.1933, "step": 49715 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.00019831573765802252, "loss": 2.1219, "step": 49720 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.0001983153998456837, "loss": 2.1959, "step": 49725 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019831506199975856, "loss": 2.2371, "step": 49730 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.0001983147241202472, "loss": 2.0157, "step": 49735 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019831438620714976, "loss": 2.1117, "step": 49740 }, { "epoch": 0.12, "grad_norm": 1.7578125, "learning_rate": 0.00019831404826046631, "loss": 2.122, "step": 49745 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019831371028019703, "loss": 2.2983, "step": 49750 }, { "epoch": 0.12, "grad_norm": 1.8125, "learning_rate": 0.000198313372266342, "loss": 2.3948, "step": 49755 }, { "epoch": 0.12, "grad_norm": 1.734375, "learning_rate": 0.00019831303421890132, "loss": 2.1482, "step": 49760 }, { "epoch": 0.12, "grad_norm": 2.28125, "learning_rate": 0.00019831269613787512, "loss": 2.3498, "step": 49765 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.00019831235802326354, "loss": 2.2992, "step": 49770 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019831201987506667, "loss": 2.1803, "step": 49775 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019831168169328464, "loss": 2.0626, "step": 49780 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019831134347791756, "loss": 2.1319, "step": 49785 }, { "epoch": 0.12, "grad_norm": 1.8671875, "learning_rate": 0.00019831100522896554, "loss": 2.1349, "step": 49790 }, { "epoch": 0.12, "grad_norm": 1.7109375, "learning_rate": 0.00019831066694642868, "loss": 2.2658, "step": 49795 }, { "epoch": 0.12, "grad_norm": 2.703125, "learning_rate": 0.00019831032863030712, "loss": 2.1034, "step": 49800 }, { "epoch": 0.12, "grad_norm": 1.859375, "learning_rate": 0.00019830999028060098, "loss": 2.0629, "step": 49805 }, { "epoch": 0.12, "grad_norm": 1.8671875, "learning_rate": 0.00019830965189731033, "loss": 1.9209, "step": 49810 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019830931348043538, "loss": 2.0805, "step": 49815 }, { "epoch": 0.12, "grad_norm": 1.9296875, "learning_rate": 0.00019830897502997615, "loss": 2.2517, "step": 49820 }, { "epoch": 0.12, "grad_norm": 1.5859375, "learning_rate": 0.00019830863654593278, "loss": 2.0239, "step": 49825 }, { "epoch": 0.12, "grad_norm": 1.84375, "learning_rate": 0.0001983082980283054, "loss": 2.198, "step": 49830 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019830795947709415, "loss": 2.1351, "step": 49835 }, { "epoch": 0.12, "grad_norm": 2.265625, "learning_rate": 0.0001983076208922991, "loss": 2.0943, "step": 49840 }, { "epoch": 0.12, "grad_norm": 1.78125, "learning_rate": 0.00019830728227392038, "loss": 2.2797, "step": 49845 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.0001983069436219581, "loss": 2.4199, "step": 49850 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019830660493641239, "loss": 2.3117, "step": 49855 }, { "epoch": 0.12, "grad_norm": 1.7734375, "learning_rate": 0.0001983062662172834, "loss": 2.2237, "step": 49860 }, { "epoch": 0.12, "grad_norm": 2.40625, "learning_rate": 0.00019830592746457115, "loss": 2.1441, "step": 49865 }, { "epoch": 0.12, "grad_norm": 2.25, "learning_rate": 0.00019830558867827588, "loss": 2.1826, "step": 49870 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.00019830524985839758, "loss": 2.1489, "step": 49875 }, { "epoch": 0.12, "grad_norm": 1.703125, "learning_rate": 0.00019830491100493642, "loss": 1.9819, "step": 49880 }, { "epoch": 0.12, "grad_norm": 2.375, "learning_rate": 0.00019830457211789256, "loss": 2.0671, "step": 49885 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019830423319726603, "loss": 2.1897, "step": 49890 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019830389424305703, "loss": 2.2293, "step": 49895 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.00019830355525526564, "loss": 2.0018, "step": 49900 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019830321623389195, "loss": 2.1903, "step": 49905 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.0001983028771789361, "loss": 2.0257, "step": 49910 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.0001983025380903982, "loss": 2.2274, "step": 49915 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019830219896827838, "loss": 2.1383, "step": 49920 }, { "epoch": 0.12, "grad_norm": 2.328125, "learning_rate": 0.00019830185981257675, "loss": 2.3482, "step": 49925 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.0001983015206232934, "loss": 2.2063, "step": 49930 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019830118140042852, "loss": 2.0027, "step": 49935 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019830084214398212, "loss": 2.2437, "step": 49940 }, { "epoch": 0.12, "grad_norm": 1.671875, "learning_rate": 0.00019830050285395438, "loss": 2.1493, "step": 49945 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019830016353034543, "loss": 2.2266, "step": 49950 }, { "epoch": 0.12, "grad_norm": 2.21875, "learning_rate": 0.00019829982417315535, "loss": 2.2473, "step": 49955 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019829948478238425, "loss": 2.2356, "step": 49960 }, { "epoch": 0.12, "grad_norm": 1.8046875, "learning_rate": 0.00019829914535803226, "loss": 2.1593, "step": 49965 }, { "epoch": 0.12, "grad_norm": 2.125, "learning_rate": 0.00019829880590009952, "loss": 2.2088, "step": 49970 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019829846640858611, "loss": 2.1871, "step": 49975 }, { "epoch": 0.12, "grad_norm": 1.5703125, "learning_rate": 0.00019829812688349218, "loss": 1.9858, "step": 49980 }, { "epoch": 0.12, "grad_norm": 2.15625, "learning_rate": 0.00019829778732481784, "loss": 2.2545, "step": 49985 }, { "epoch": 0.12, "grad_norm": 1.9453125, "learning_rate": 0.00019829744773256316, "loss": 2.2506, "step": 49990 }, { "epoch": 0.12, "grad_norm": 1.9375, "learning_rate": 0.00019829710810672827, "loss": 2.3073, "step": 49995 }, { "epoch": 0.12, "grad_norm": 2.59375, "learning_rate": 0.00019829676844731336, "loss": 1.9586, "step": 50000 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.00019829642875431847, "loss": 2.2607, "step": 50005 }, { "epoch": 0.12, "grad_norm": 1.6953125, "learning_rate": 0.00019829608902774371, "loss": 2.0223, "step": 50010 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019829574926758927, "loss": 2.1121, "step": 50015 }, { "epoch": 0.12, "grad_norm": 1.6640625, "learning_rate": 0.0001982954094738552, "loss": 2.2561, "step": 50020 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.00019829506964654162, "loss": 2.2002, "step": 50025 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.0001982947297856487, "loss": 2.0619, "step": 50030 }, { "epoch": 0.12, "grad_norm": 1.71875, "learning_rate": 0.00019829438989117648, "loss": 1.9597, "step": 50035 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.00019829404996312512, "loss": 2.2042, "step": 50040 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019829371000149474, "loss": 2.2238, "step": 50045 }, { "epoch": 0.12, "grad_norm": 1.640625, "learning_rate": 0.00019829337000628542, "loss": 2.2035, "step": 50050 }, { "epoch": 0.12, "grad_norm": 2.40625, "learning_rate": 0.00019829302997749734, "loss": 2.1289, "step": 50055 }, { "epoch": 0.12, "grad_norm": 1.75, "learning_rate": 0.00019829268991513054, "loss": 2.0004, "step": 50060 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.0001982923498191852, "loss": 2.1608, "step": 50065 }, { "epoch": 0.12, "grad_norm": 1.65625, "learning_rate": 0.0001982920096896614, "loss": 2.1161, "step": 50070 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.0001982916695265593, "loss": 2.2872, "step": 50075 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.00019829132932987895, "loss": 2.1952, "step": 50080 }, { "epoch": 0.12, "grad_norm": 1.5234375, "learning_rate": 0.0001982909890996205, "loss": 2.254, "step": 50085 }, { "epoch": 0.12, "grad_norm": 1.640625, "learning_rate": 0.00019829064883578406, "loss": 2.1061, "step": 50090 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.0001982903085383698, "loss": 2.2911, "step": 50095 }, { "epoch": 0.12, "grad_norm": 2.953125, "learning_rate": 0.00019828996820737773, "loss": 2.2285, "step": 50100 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.00019828962784280806, "loss": 2.2639, "step": 50105 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019828928744466083, "loss": 2.1771, "step": 50110 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019828894701293622, "loss": 2.1554, "step": 50115 }, { "epoch": 0.12, "grad_norm": 1.875, "learning_rate": 0.00019828860654763433, "loss": 2.4254, "step": 50120 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.0001982882660487553, "loss": 2.0003, "step": 50125 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.00019828792551629913, "loss": 2.0706, "step": 50130 }, { "epoch": 0.12, "grad_norm": 2.15625, "learning_rate": 0.0001982875849502661, "loss": 2.3752, "step": 50135 }, { "epoch": 0.12, "grad_norm": 1.75, "learning_rate": 0.00019828724435065623, "loss": 2.0869, "step": 50140 }, { "epoch": 0.12, "grad_norm": 1.7265625, "learning_rate": 0.00019828690371746964, "loss": 2.18, "step": 50145 }, { "epoch": 0.12, "grad_norm": 1.84375, "learning_rate": 0.00019828656305070645, "loss": 2.2161, "step": 50150 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.0001982862223503668, "loss": 2.2389, "step": 50155 }, { "epoch": 0.12, "grad_norm": 2.3125, "learning_rate": 0.0001982858816164508, "loss": 2.1401, "step": 50160 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019828554084895856, "loss": 2.0726, "step": 50165 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019828520004789023, "loss": 2.2041, "step": 50170 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.00019828485921324584, "loss": 1.9968, "step": 50175 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.00019828451834502558, "loss": 2.1671, "step": 50180 }, { "epoch": 0.12, "grad_norm": 1.765625, "learning_rate": 0.00019828417744322953, "loss": 2.1505, "step": 50185 }, { "epoch": 0.12, "grad_norm": 1.875, "learning_rate": 0.00019828383650785785, "loss": 2.1225, "step": 50190 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.0001982834955389106, "loss": 2.0937, "step": 50195 }, { "epoch": 0.12, "grad_norm": 1.7578125, "learning_rate": 0.00019828315453638794, "loss": 2.1916, "step": 50200 }, { "epoch": 0.12, "grad_norm": 1.6953125, "learning_rate": 0.00019828281350028998, "loss": 2.171, "step": 50205 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019828247243061683, "loss": 2.3202, "step": 50210 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019828213132736859, "loss": 2.2015, "step": 50215 }, { "epoch": 0.12, "grad_norm": 2.359375, "learning_rate": 0.0001982817901905454, "loss": 2.0879, "step": 50220 }, { "epoch": 0.12, "grad_norm": 2.21875, "learning_rate": 0.00019828144902014737, "loss": 2.2307, "step": 50225 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.00019828110781617463, "loss": 2.2182, "step": 50230 }, { "epoch": 0.12, "grad_norm": 2.15625, "learning_rate": 0.00019828076657862724, "loss": 2.1846, "step": 50235 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.00019828042530750537, "loss": 2.1014, "step": 50240 }, { "epoch": 0.12, "grad_norm": 2.296875, "learning_rate": 0.00019828008400280916, "loss": 2.2254, "step": 50245 }, { "epoch": 0.12, "grad_norm": 1.6015625, "learning_rate": 0.00019827974266453863, "loss": 2.1436, "step": 50250 }, { "epoch": 0.12, "grad_norm": 1.7421875, "learning_rate": 0.00019827940129269402, "loss": 2.1956, "step": 50255 }, { "epoch": 0.12, "grad_norm": 1.75, "learning_rate": 0.00019827905988727536, "loss": 1.9997, "step": 50260 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019827871844828277, "loss": 2.1832, "step": 50265 }, { "epoch": 0.12, "grad_norm": 1.6484375, "learning_rate": 0.00019827837697571643, "loss": 2.2623, "step": 50270 }, { "epoch": 0.12, "grad_norm": 1.609375, "learning_rate": 0.00019827803546957636, "loss": 2.1411, "step": 50275 }, { "epoch": 0.12, "grad_norm": 1.84375, "learning_rate": 0.00019827769392986278, "loss": 2.1438, "step": 50280 }, { "epoch": 0.12, "grad_norm": 1.5625, "learning_rate": 0.00019827735235657573, "loss": 2.1437, "step": 50285 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.00019827701074971537, "loss": 2.2442, "step": 50290 }, { "epoch": 0.12, "grad_norm": 2.1875, "learning_rate": 0.00019827666910928179, "loss": 2.2553, "step": 50295 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.0001982763274352751, "loss": 2.266, "step": 50300 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019827598572769547, "loss": 2.1565, "step": 50305 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.00019827564398654297, "loss": 2.2298, "step": 50310 }, { "epoch": 0.12, "grad_norm": 1.7734375, "learning_rate": 0.00019827530221181773, "loss": 2.0363, "step": 50315 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.00019827496040351986, "loss": 2.116, "step": 50320 }, { "epoch": 0.12, "grad_norm": 1.609375, "learning_rate": 0.00019827461856164946, "loss": 2.3114, "step": 50325 }, { "epoch": 0.12, "grad_norm": 1.96875, "learning_rate": 0.00019827427668620668, "loss": 2.1113, "step": 50330 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.00019827393477719164, "loss": 2.1172, "step": 50335 }, { "epoch": 0.12, "grad_norm": 1.9296875, "learning_rate": 0.00019827359283460444, "loss": 2.0656, "step": 50340 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019827325085844519, "loss": 2.078, "step": 50345 }, { "epoch": 0.12, "grad_norm": 3.6875, "learning_rate": 0.000198272908848714, "loss": 2.1747, "step": 50350 }, { "epoch": 0.12, "grad_norm": 1.5078125, "learning_rate": 0.00019827256680541103, "loss": 2.1129, "step": 50355 }, { "epoch": 0.12, "grad_norm": 1.9375, "learning_rate": 0.00019827222472853636, "loss": 2.3923, "step": 50360 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019827188261809008, "loss": 2.2867, "step": 50365 }, { "epoch": 0.12, "grad_norm": 1.7890625, "learning_rate": 0.0001982715404740724, "loss": 2.0713, "step": 50370 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019827119829648335, "loss": 2.0359, "step": 50375 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019827085608532308, "loss": 2.1417, "step": 50380 }, { "epoch": 0.12, "grad_norm": 1.9375, "learning_rate": 0.00019827051384059172, "loss": 2.0469, "step": 50385 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019827017156228934, "loss": 2.2373, "step": 50390 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.0001982698292504161, "loss": 2.1418, "step": 50395 }, { "epoch": 0.12, "grad_norm": 2.375, "learning_rate": 0.00019826948690497212, "loss": 2.1639, "step": 50400 }, { "epoch": 0.12, "grad_norm": 1.7578125, "learning_rate": 0.00019826914452595747, "loss": 2.0228, "step": 50405 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019826880211337234, "loss": 2.1394, "step": 50410 }, { "epoch": 0.12, "grad_norm": 2.125, "learning_rate": 0.00019826845966721676, "loss": 2.0207, "step": 50415 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019826811718749092, "loss": 2.0382, "step": 50420 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.0001982677746741949, "loss": 2.2104, "step": 50425 }, { "epoch": 0.12, "grad_norm": 1.8671875, "learning_rate": 0.00019826743212732884, "loss": 2.1082, "step": 50430 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019826708954689281, "loss": 2.1211, "step": 50435 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.000198266746932887, "loss": 2.1977, "step": 50440 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019826640428531145, "loss": 2.0871, "step": 50445 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019826606160416632, "loss": 2.1199, "step": 50450 }, { "epoch": 0.12, "grad_norm": 2.125, "learning_rate": 0.00019826571888945175, "loss": 2.1008, "step": 50455 }, { "epoch": 0.12, "grad_norm": 1.7109375, "learning_rate": 0.00019826537614116781, "loss": 2.1586, "step": 50460 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019826503335931465, "loss": 2.0917, "step": 50465 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019826469054389233, "loss": 2.1597, "step": 50470 }, { "epoch": 0.12, "grad_norm": 1.7109375, "learning_rate": 0.00019826434769490105, "loss": 2.1927, "step": 50475 }, { "epoch": 0.12, "grad_norm": 1.578125, "learning_rate": 0.0001982640048123409, "loss": 2.0835, "step": 50480 }, { "epoch": 0.12, "grad_norm": 1.515625, "learning_rate": 0.00019826366189621192, "loss": 2.2407, "step": 50485 }, { "epoch": 0.12, "grad_norm": 1.875, "learning_rate": 0.00019826331894651436, "loss": 2.1912, "step": 50490 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.0001982629759632482, "loss": 2.1075, "step": 50495 }, { "epoch": 0.12, "grad_norm": 2.15625, "learning_rate": 0.00019826263294641367, "loss": 2.1315, "step": 50500 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019826228989601086, "loss": 2.2642, "step": 50505 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019826194681203983, "loss": 2.1566, "step": 50510 }, { "epoch": 0.12, "grad_norm": 2.421875, "learning_rate": 0.00019826160369450078, "loss": 2.2568, "step": 50515 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019826126054339372, "loss": 2.286, "step": 50520 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019826091735871887, "loss": 2.1368, "step": 50525 }, { "epoch": 0.12, "grad_norm": 2.546875, "learning_rate": 0.0001982605741404763, "loss": 2.1002, "step": 50530 }, { "epoch": 0.12, "grad_norm": 2.28125, "learning_rate": 0.00019826023088866617, "loss": 2.1119, "step": 50535 }, { "epoch": 0.12, "grad_norm": 1.7734375, "learning_rate": 0.00019825988760328853, "loss": 2.0501, "step": 50540 }, { "epoch": 0.12, "grad_norm": 1.7109375, "learning_rate": 0.00019825954428434353, "loss": 2.2513, "step": 50545 }, { "epoch": 0.12, "grad_norm": 1.7890625, "learning_rate": 0.00019825920093183131, "loss": 2.2274, "step": 50550 }, { "epoch": 0.12, "grad_norm": 2.453125, "learning_rate": 0.00019825885754575192, "loss": 2.225, "step": 50555 }, { "epoch": 0.12, "grad_norm": 1.859375, "learning_rate": 0.00019825851412610558, "loss": 2.2502, "step": 50560 }, { "epoch": 0.12, "grad_norm": 1.578125, "learning_rate": 0.0001982581706728923, "loss": 2.2106, "step": 50565 }, { "epoch": 0.12, "grad_norm": 1.765625, "learning_rate": 0.00019825782718611227, "loss": 2.276, "step": 50570 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.00019825748366576558, "loss": 2.2038, "step": 50575 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019825714011185235, "loss": 2.0411, "step": 50580 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019825679652437273, "loss": 2.3148, "step": 50585 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.00019825645290332675, "loss": 2.3542, "step": 50590 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.0001982561092487146, "loss": 2.127, "step": 50595 }, { "epoch": 0.12, "grad_norm": 2.359375, "learning_rate": 0.00019825576556053642, "loss": 2.1033, "step": 50600 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019825542183879224, "loss": 2.0003, "step": 50605 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019825507808348224, "loss": 2.1969, "step": 50610 }, { "epoch": 0.12, "grad_norm": 2.46875, "learning_rate": 0.00019825473429460654, "loss": 2.0771, "step": 50615 }, { "epoch": 0.12, "grad_norm": 2.484375, "learning_rate": 0.00019825439047216527, "loss": 2.2189, "step": 50620 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.00019825404661615845, "loss": 2.1963, "step": 50625 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.0001982537027265863, "loss": 2.3156, "step": 50630 }, { "epoch": 0.12, "grad_norm": 1.828125, "learning_rate": 0.00019825335880344888, "loss": 2.1001, "step": 50635 }, { "epoch": 0.12, "grad_norm": 1.7265625, "learning_rate": 0.00019825301484674638, "loss": 2.2112, "step": 50640 }, { "epoch": 0.12, "grad_norm": 1.9375, "learning_rate": 0.00019825267085647882, "loss": 2.364, "step": 50645 }, { "epoch": 0.12, "grad_norm": 1.8671875, "learning_rate": 0.0001982523268326464, "loss": 2.2753, "step": 50650 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.00019825198277524918, "loss": 2.196, "step": 50655 }, { "epoch": 0.12, "grad_norm": 2.359375, "learning_rate": 0.0001982516386842873, "loss": 2.215, "step": 50660 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.0001982512945597609, "loss": 2.0795, "step": 50665 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019825095040167006, "loss": 2.2795, "step": 50670 }, { "epoch": 0.12, "grad_norm": 2.1875, "learning_rate": 0.0001982506062100149, "loss": 2.0741, "step": 50675 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019825026198479556, "loss": 2.2765, "step": 50680 }, { "epoch": 0.12, "grad_norm": 1.7890625, "learning_rate": 0.00019824991772601216, "loss": 1.9349, "step": 50685 }, { "epoch": 0.12, "grad_norm": 1.96875, "learning_rate": 0.00019824957343366482, "loss": 1.9896, "step": 50690 }, { "epoch": 0.12, "grad_norm": 1.734375, "learning_rate": 0.0001982492291077536, "loss": 2.1756, "step": 50695 }, { "epoch": 0.12, "grad_norm": 1.5078125, "learning_rate": 0.0001982488847482787, "loss": 2.1482, "step": 50700 }, { "epoch": 0.12, "grad_norm": 1.8125, "learning_rate": 0.0001982485403552402, "loss": 2.0363, "step": 50705 }, { "epoch": 0.12, "grad_norm": 1.78125, "learning_rate": 0.00019824819592863819, "loss": 2.2443, "step": 50710 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019824785146847282, "loss": 2.2329, "step": 50715 }, { "epoch": 0.12, "grad_norm": 1.8046875, "learning_rate": 0.00019824750697474422, "loss": 2.258, "step": 50720 }, { "epoch": 0.12, "grad_norm": 1.8671875, "learning_rate": 0.00019824716244745246, "loss": 2.0774, "step": 50725 }, { "epoch": 0.12, "grad_norm": 1.7578125, "learning_rate": 0.0001982468178865977, "loss": 2.2227, "step": 50730 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019824647329218006, "loss": 2.1143, "step": 50735 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.00019824612866419964, "loss": 2.1495, "step": 50740 }, { "epoch": 0.12, "grad_norm": 1.6171875, "learning_rate": 0.00019824578400265657, "loss": 2.261, "step": 50745 }, { "epoch": 0.12, "grad_norm": 1.9375, "learning_rate": 0.00019824543930755092, "loss": 2.1384, "step": 50750 }, { "epoch": 0.12, "grad_norm": 1.71875, "learning_rate": 0.00019824509457888286, "loss": 2.1436, "step": 50755 }, { "epoch": 0.12, "grad_norm": 1.7421875, "learning_rate": 0.00019824474981665254, "loss": 2.2182, "step": 50760 }, { "epoch": 0.12, "grad_norm": 1.7265625, "learning_rate": 0.00019824440502085996, "loss": 2.1167, "step": 50765 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.00019824406019150535, "loss": 2.2965, "step": 50770 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.0001982437153285888, "loss": 2.068, "step": 50775 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.0001982433704321104, "loss": 2.1866, "step": 50780 }, { "epoch": 0.12, "grad_norm": 2.15625, "learning_rate": 0.00019824302550207028, "loss": 2.0833, "step": 50785 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.00019824268053846855, "loss": 2.3617, "step": 50790 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019824233554130536, "loss": 2.2442, "step": 50795 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.0001982419905105808, "loss": 2.2064, "step": 50800 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019824164544629498, "loss": 2.195, "step": 50805 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019824130034844806, "loss": 2.1424, "step": 50810 }, { "epoch": 0.12, "grad_norm": 1.7578125, "learning_rate": 0.00019824095521704012, "loss": 2.1608, "step": 50815 }, { "epoch": 0.12, "grad_norm": 1.9453125, "learning_rate": 0.00019824061005207125, "loss": 2.2242, "step": 50820 }, { "epoch": 0.12, "grad_norm": 1.703125, "learning_rate": 0.00019824026485354166, "loss": 2.0247, "step": 50825 }, { "epoch": 0.12, "grad_norm": 1.734375, "learning_rate": 0.00019823991962145137, "loss": 2.2534, "step": 50830 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.0001982395743558006, "loss": 2.1844, "step": 50835 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.00019823922905658933, "loss": 2.1159, "step": 50840 }, { "epoch": 0.12, "grad_norm": 1.7890625, "learning_rate": 0.00019823888372381783, "loss": 2.0883, "step": 50845 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019823853835748608, "loss": 2.3321, "step": 50850 }, { "epoch": 0.12, "grad_norm": 2.359375, "learning_rate": 0.00019823819295759431, "loss": 2.0434, "step": 50855 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.0001982378475241426, "loss": 2.1224, "step": 50860 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.00019823750205713104, "loss": 2.073, "step": 50865 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019823715655655977, "loss": 2.0932, "step": 50870 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.0001982368110224289, "loss": 2.1939, "step": 50875 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019823646545473852, "loss": 2.1808, "step": 50880 }, { "epoch": 0.12, "grad_norm": 2.40625, "learning_rate": 0.00019823611985348883, "loss": 2.0652, "step": 50885 }, { "epoch": 0.12, "grad_norm": 2.34375, "learning_rate": 0.00019823577421867992, "loss": 2.1642, "step": 50890 }, { "epoch": 0.12, "grad_norm": 1.875, "learning_rate": 0.00019823542855031186, "loss": 2.1747, "step": 50895 }, { "epoch": 0.12, "grad_norm": 1.8671875, "learning_rate": 0.00019823508284838476, "loss": 2.1761, "step": 50900 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.0001982347371128988, "loss": 2.1888, "step": 50905 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.00019823439134385412, "loss": 2.2255, "step": 50910 }, { "epoch": 0.12, "grad_norm": 1.8671875, "learning_rate": 0.00019823404554125072, "loss": 2.2023, "step": 50915 }, { "epoch": 0.12, "grad_norm": 1.71875, "learning_rate": 0.00019823369970508882, "loss": 2.1319, "step": 50920 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.0001982333538353685, "loss": 2.1078, "step": 50925 }, { "epoch": 0.12, "grad_norm": 2.390625, "learning_rate": 0.0001982330079320899, "loss": 2.2852, "step": 50930 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.0001982326619952531, "loss": 2.3561, "step": 50935 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.00019823231602485826, "loss": 2.316, "step": 50940 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019823197002090547, "loss": 2.1733, "step": 50945 }, { "epoch": 0.12, "grad_norm": 1.9375, "learning_rate": 0.00019823162398339487, "loss": 2.1891, "step": 50950 }, { "epoch": 0.12, "grad_norm": 1.8671875, "learning_rate": 0.00019823127791232655, "loss": 2.0204, "step": 50955 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019823093180770064, "loss": 2.2849, "step": 50960 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019823058566951728, "loss": 2.3011, "step": 50965 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019823023949777657, "loss": 2.3, "step": 50970 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.0001982298932924786, "loss": 2.2555, "step": 50975 }, { "epoch": 0.12, "grad_norm": 1.640625, "learning_rate": 0.00019822954705362355, "loss": 2.0901, "step": 50980 }, { "epoch": 0.12, "grad_norm": 1.859375, "learning_rate": 0.0001982292007812115, "loss": 2.1363, "step": 50985 }, { "epoch": 0.12, "grad_norm": 1.7734375, "learning_rate": 0.00019822885447524254, "loss": 2.2406, "step": 50990 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019822850813571688, "loss": 2.1597, "step": 50995 }, { "epoch": 0.12, "grad_norm": 2.21875, "learning_rate": 0.00019822816176263454, "loss": 2.4306, "step": 51000 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.00019822781535599568, "loss": 2.3126, "step": 51005 }, { "epoch": 0.12, "grad_norm": 1.8046875, "learning_rate": 0.00019822746891580043, "loss": 2.1944, "step": 51010 }, { "epoch": 0.12, "grad_norm": 1.796875, "learning_rate": 0.0001982271224420489, "loss": 2.0555, "step": 51015 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.0001982267759347412, "loss": 2.1903, "step": 51020 }, { "epoch": 0.12, "grad_norm": 2.1875, "learning_rate": 0.00019822642939387744, "loss": 2.1401, "step": 51025 }, { "epoch": 0.12, "grad_norm": 1.7890625, "learning_rate": 0.00019822608281945775, "loss": 2.171, "step": 51030 }, { "epoch": 0.12, "grad_norm": 1.828125, "learning_rate": 0.00019822573621148225, "loss": 2.032, "step": 51035 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.00019822538956995107, "loss": 2.1822, "step": 51040 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.0001982250428948643, "loss": 2.1247, "step": 51045 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.00019822469618622208, "loss": 2.2306, "step": 51050 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019822434944402452, "loss": 2.1053, "step": 51055 }, { "epoch": 0.12, "grad_norm": 1.75, "learning_rate": 0.00019822400266827174, "loss": 1.999, "step": 51060 }, { "epoch": 0.12, "grad_norm": 2.234375, "learning_rate": 0.00019822365585896385, "loss": 2.0132, "step": 51065 }, { "epoch": 0.12, "grad_norm": 2.1875, "learning_rate": 0.000198223309016101, "loss": 2.0505, "step": 51070 }, { "epoch": 0.12, "grad_norm": 2.359375, "learning_rate": 0.0001982229621396833, "loss": 2.1583, "step": 51075 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.0001982226152297108, "loss": 2.0684, "step": 51080 }, { "epoch": 0.12, "grad_norm": 1.78125, "learning_rate": 0.0001982222682861837, "loss": 2.2165, "step": 51085 }, { "epoch": 0.12, "grad_norm": 1.8671875, "learning_rate": 0.00019822192130910211, "loss": 2.0038, "step": 51090 }, { "epoch": 0.12, "grad_norm": 2.15625, "learning_rate": 0.00019822157429846612, "loss": 2.351, "step": 51095 }, { "epoch": 0.12, "grad_norm": 1.7734375, "learning_rate": 0.0001982212272542759, "loss": 2.2701, "step": 51100 }, { "epoch": 0.12, "grad_norm": 1.7890625, "learning_rate": 0.00019822088017653144, "loss": 2.3089, "step": 51105 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.000198220533065233, "loss": 2.3233, "step": 51110 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019822018592038064, "loss": 2.1944, "step": 51115 }, { "epoch": 0.12, "grad_norm": 1.96875, "learning_rate": 0.00019821983874197447, "loss": 2.2805, "step": 51120 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019821949153001463, "loss": 1.9283, "step": 51125 }, { "epoch": 0.12, "grad_norm": 2.296875, "learning_rate": 0.00019821914428450122, "loss": 2.1878, "step": 51130 }, { "epoch": 0.12, "grad_norm": 1.9296875, "learning_rate": 0.00019821879700543437, "loss": 2.2816, "step": 51135 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019821844969281421, "loss": 2.1956, "step": 51140 }, { "epoch": 0.12, "grad_norm": 2.265625, "learning_rate": 0.00019821810234664088, "loss": 2.195, "step": 51145 }, { "epoch": 0.12, "grad_norm": 1.75, "learning_rate": 0.0001982177549669144, "loss": 2.1869, "step": 51150 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.00019821740755363498, "loss": 2.4865, "step": 51155 }, { "epoch": 0.12, "grad_norm": 1.9375, "learning_rate": 0.0001982170601068027, "loss": 2.0645, "step": 51160 }, { "epoch": 0.12, "grad_norm": 1.9453125, "learning_rate": 0.0001982167126264177, "loss": 2.1265, "step": 51165 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019821636511248013, "loss": 2.4285, "step": 51170 }, { "epoch": 0.12, "grad_norm": 2.234375, "learning_rate": 0.00019821601756499, "loss": 2.0437, "step": 51175 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019821566998394754, "loss": 2.1902, "step": 51180 }, { "epoch": 0.12, "grad_norm": 1.5078125, "learning_rate": 0.0001982153223693528, "loss": 2.0341, "step": 51185 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019821497472120595, "loss": 1.9892, "step": 51190 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.0001982146270395071, "loss": 2.2514, "step": 51195 }, { "epoch": 0.12, "grad_norm": 2.1875, "learning_rate": 0.0001982142793242563, "loss": 2.2594, "step": 51200 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.00019821393157545375, "loss": 2.1597, "step": 51205 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019821358379309954, "loss": 2.2444, "step": 51210 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019821323597719377, "loss": 2.1598, "step": 51215 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.00019821288812773663, "loss": 2.2327, "step": 51220 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019821254024472812, "loss": 2.2223, "step": 51225 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019821219232816847, "loss": 2.1279, "step": 51230 }, { "epoch": 0.12, "grad_norm": 1.6875, "learning_rate": 0.0001982118443780577, "loss": 2.2497, "step": 51235 }, { "epoch": 0.12, "grad_norm": 1.640625, "learning_rate": 0.00019821149639439605, "loss": 2.1645, "step": 51240 }, { "epoch": 0.12, "grad_norm": 1.84375, "learning_rate": 0.00019821114837718354, "loss": 1.9467, "step": 51245 }, { "epoch": 0.12, "grad_norm": 1.8125, "learning_rate": 0.00019821080032642033, "loss": 2.1791, "step": 51250 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019821045224210652, "loss": 2.207, "step": 51255 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019821010412424223, "loss": 2.2572, "step": 51260 }, { "epoch": 0.12, "grad_norm": 1.734375, "learning_rate": 0.0001982097559728276, "loss": 2.175, "step": 51265 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.00019820940778786272, "loss": 2.0593, "step": 51270 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019820905956934777, "loss": 2.0647, "step": 51275 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.00019820871131728278, "loss": 2.2161, "step": 51280 }, { "epoch": 0.12, "grad_norm": 1.6875, "learning_rate": 0.00019820836303166792, "loss": 2.2501, "step": 51285 }, { "epoch": 0.12, "grad_norm": 2.125, "learning_rate": 0.0001982080147125033, "loss": 2.2694, "step": 51290 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.00019820766635978907, "loss": 2.2397, "step": 51295 }, { "epoch": 0.12, "grad_norm": 1.9296875, "learning_rate": 0.00019820731797352527, "loss": 2.3351, "step": 51300 }, { "epoch": 0.12, "grad_norm": 1.6171875, "learning_rate": 0.00019820696955371212, "loss": 2.1832, "step": 51305 }, { "epoch": 0.12, "grad_norm": 1.9375, "learning_rate": 0.00019820662110034968, "loss": 2.1206, "step": 51310 }, { "epoch": 0.12, "grad_norm": 1.84375, "learning_rate": 0.00019820627261343804, "loss": 2.151, "step": 51315 }, { "epoch": 0.12, "grad_norm": 1.65625, "learning_rate": 0.0001982059240929774, "loss": 2.0975, "step": 51320 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.0001982055755389678, "loss": 1.9395, "step": 51325 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019820522695140942, "loss": 2.2611, "step": 51330 }, { "epoch": 0.12, "grad_norm": 2.328125, "learning_rate": 0.00019820487833030232, "loss": 2.299, "step": 51335 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.0001982045296756467, "loss": 2.2484, "step": 51340 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.0001982041809874426, "loss": 2.1266, "step": 51345 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.0001982038322656902, "loss": 2.1413, "step": 51350 }, { "epoch": 0.12, "grad_norm": 1.6484375, "learning_rate": 0.00019820348351038956, "loss": 2.0245, "step": 51355 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.00019820313472154082, "loss": 2.2146, "step": 51360 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.00019820278589914413, "loss": 2.3508, "step": 51365 }, { "epoch": 0.12, "grad_norm": 1.71875, "learning_rate": 0.0001982024370431996, "loss": 2.3963, "step": 51370 }, { "epoch": 0.12, "grad_norm": 1.7265625, "learning_rate": 0.0001982020881537073, "loss": 2.3302, "step": 51375 }, { "epoch": 0.12, "grad_norm": 2.21875, "learning_rate": 0.0001982017392306674, "loss": 2.1791, "step": 51380 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019820139027408002, "loss": 2.183, "step": 51385 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019820104128394527, "loss": 2.0963, "step": 51390 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.00019820069226026324, "loss": 2.2753, "step": 51395 }, { "epoch": 0.12, "grad_norm": 2.3125, "learning_rate": 0.00019820034320303407, "loss": 1.8223, "step": 51400 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.0001981999941122579, "loss": 2.1401, "step": 51405 }, { "epoch": 0.12, "grad_norm": 1.8046875, "learning_rate": 0.0001981996449879348, "loss": 2.4199, "step": 51410 }, { "epoch": 0.12, "grad_norm": 1.6171875, "learning_rate": 0.00019819929583006496, "loss": 1.9542, "step": 51415 }, { "epoch": 0.12, "grad_norm": 1.53125, "learning_rate": 0.00019819894663864843, "loss": 2.3847, "step": 51420 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019819859741368538, "loss": 2.1645, "step": 51425 }, { "epoch": 0.12, "grad_norm": 1.9453125, "learning_rate": 0.0001981982481551759, "loss": 2.2907, "step": 51430 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.0001981978988631201, "loss": 2.3281, "step": 51435 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019819754953751814, "loss": 2.303, "step": 51440 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.0001981972001783701, "loss": 2.055, "step": 51445 }, { "epoch": 0.12, "grad_norm": 1.8046875, "learning_rate": 0.00019819685078567613, "loss": 2.2108, "step": 51450 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019819650135943635, "loss": 2.2497, "step": 51455 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019819615189965083, "loss": 2.2666, "step": 51460 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.00019819580240631974, "loss": 2.1154, "step": 51465 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.0001981954528794432, "loss": 2.1958, "step": 51470 }, { "epoch": 0.12, "grad_norm": 1.84375, "learning_rate": 0.0001981951033190213, "loss": 2.2826, "step": 51475 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.00019819475372505414, "loss": 2.1709, "step": 51480 }, { "epoch": 0.12, "grad_norm": 2.515625, "learning_rate": 0.00019819440409754192, "loss": 2.2111, "step": 51485 }, { "epoch": 0.12, "grad_norm": 1.8125, "learning_rate": 0.00019819405443648468, "loss": 2.1559, "step": 51490 }, { "epoch": 0.12, "grad_norm": 2.453125, "learning_rate": 0.00019819370474188257, "loss": 2.3031, "step": 51495 }, { "epoch": 0.12, "grad_norm": 1.7265625, "learning_rate": 0.00019819335501373574, "loss": 2.1383, "step": 51500 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.00019819300525204423, "loss": 2.1794, "step": 51505 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019819265545680822, "loss": 2.2661, "step": 51510 }, { "epoch": 0.12, "grad_norm": 1.4453125, "learning_rate": 0.00019819230562802785, "loss": 2.0922, "step": 51515 }, { "epoch": 0.12, "grad_norm": 1.8046875, "learning_rate": 0.00019819195576570317, "loss": 2.2133, "step": 51520 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019819160586983435, "loss": 2.2967, "step": 51525 }, { "epoch": 0.12, "grad_norm": 1.875, "learning_rate": 0.00019819125594042154, "loss": 2.3298, "step": 51530 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.00019819090597746476, "loss": 2.0628, "step": 51535 }, { "epoch": 0.12, "grad_norm": 1.859375, "learning_rate": 0.0001981905559809642, "loss": 2.2117, "step": 51540 }, { "epoch": 0.12, "grad_norm": 1.7890625, "learning_rate": 0.00019819020595092, "loss": 2.1244, "step": 51545 }, { "epoch": 0.12, "grad_norm": 2.234375, "learning_rate": 0.0001981898558873322, "loss": 2.2454, "step": 51550 }, { "epoch": 0.12, "grad_norm": 2.453125, "learning_rate": 0.000198189505790201, "loss": 2.3493, "step": 51555 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019818915565952646, "loss": 2.1331, "step": 51560 }, { "epoch": 0.12, "grad_norm": 1.7265625, "learning_rate": 0.0001981888054953087, "loss": 2.2255, "step": 51565 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019818845529754792, "loss": 2.229, "step": 51570 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.00019818810506624414, "loss": 2.1616, "step": 51575 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019818775480139755, "loss": 2.1836, "step": 51580 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.00019818740450300824, "loss": 2.1487, "step": 51585 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.0001981870541710763, "loss": 2.1461, "step": 51590 }, { "epoch": 0.12, "grad_norm": 1.625, "learning_rate": 0.00019818670380560192, "loss": 1.9743, "step": 51595 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.00019818635340658515, "loss": 2.2348, "step": 51600 }, { "epoch": 0.12, "grad_norm": 2.578125, "learning_rate": 0.00019818600297402615, "loss": 2.2577, "step": 51605 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019818565250792505, "loss": 2.2221, "step": 51610 }, { "epoch": 0.12, "grad_norm": 2.328125, "learning_rate": 0.00019818530200828194, "loss": 2.2688, "step": 51615 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.00019818495147509694, "loss": 2.1873, "step": 51620 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.0001981846009083702, "loss": 2.1054, "step": 51625 }, { "epoch": 0.12, "grad_norm": 1.84375, "learning_rate": 0.00019818425030810179, "loss": 2.0773, "step": 51630 }, { "epoch": 0.12, "grad_norm": 3.625, "learning_rate": 0.00019818389967429188, "loss": 2.0198, "step": 51635 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019818354900694057, "loss": 1.9696, "step": 51640 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.00019818319830604796, "loss": 2.0209, "step": 51645 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019818284757161422, "loss": 2.3306, "step": 51650 }, { "epoch": 0.12, "grad_norm": 1.75, "learning_rate": 0.00019818249680363944, "loss": 2.1457, "step": 51655 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019818214600212367, "loss": 2.2282, "step": 51660 }, { "epoch": 0.12, "grad_norm": 2.265625, "learning_rate": 0.00019818179516706717, "loss": 2.2745, "step": 51665 }, { "epoch": 0.12, "grad_norm": 1.6796875, "learning_rate": 0.00019818144429846997, "loss": 2.1786, "step": 51670 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.0001981810933963322, "loss": 2.1615, "step": 51675 }, { "epoch": 0.12, "grad_norm": 2.25, "learning_rate": 0.000198180742460654, "loss": 2.1315, "step": 51680 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019818039149143546, "loss": 2.1159, "step": 51685 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.00019818004048867673, "loss": 2.1785, "step": 51690 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019817968945237793, "loss": 2.3179, "step": 51695 }, { "epoch": 0.12, "grad_norm": 1.828125, "learning_rate": 0.00019817933838253913, "loss": 2.1268, "step": 51700 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.00019817898727916052, "loss": 2.2627, "step": 51705 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019817863614224218, "loss": 2.2858, "step": 51710 }, { "epoch": 0.12, "grad_norm": 1.9296875, "learning_rate": 0.00019817828497178423, "loss": 2.2393, "step": 51715 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.0001981779337677868, "loss": 2.3481, "step": 51720 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019817758253025, "loss": 2.1726, "step": 51725 }, { "epoch": 0.12, "grad_norm": 2.234375, "learning_rate": 0.00019817723125917398, "loss": 1.9843, "step": 51730 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.0001981768799545588, "loss": 2.2626, "step": 51735 }, { "epoch": 0.12, "grad_norm": 2.34375, "learning_rate": 0.00019817652861640466, "loss": 2.2091, "step": 51740 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.00019817617724471163, "loss": 2.1309, "step": 51745 }, { "epoch": 0.12, "grad_norm": 1.6171875, "learning_rate": 0.00019817582583947981, "loss": 2.1184, "step": 51750 }, { "epoch": 0.12, "grad_norm": 1.375, "learning_rate": 0.0001981754744007094, "loss": 1.9464, "step": 51755 }, { "epoch": 0.12, "grad_norm": 2.328125, "learning_rate": 0.0001981751229284004, "loss": 2.1041, "step": 51760 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019817477142255302, "loss": 2.1943, "step": 51765 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.0001981744198831674, "loss": 2.0002, "step": 51770 }, { "epoch": 0.12, "grad_norm": 2.359375, "learning_rate": 0.00019817406831024355, "loss": 2.2096, "step": 51775 }, { "epoch": 0.12, "grad_norm": 1.9296875, "learning_rate": 0.0001981737167037817, "loss": 2.1268, "step": 51780 }, { "epoch": 0.12, "grad_norm": 1.828125, "learning_rate": 0.0001981733650637819, "loss": 2.2738, "step": 51785 }, { "epoch": 0.12, "grad_norm": 2.15625, "learning_rate": 0.00019817301339024432, "loss": 2.2824, "step": 51790 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.00019817266168316907, "loss": 2.2598, "step": 51795 }, { "epoch": 0.12, "grad_norm": 1.7890625, "learning_rate": 0.00019817230994255625, "loss": 2.1454, "step": 51800 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.00019817195816840597, "loss": 2.2015, "step": 51805 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.0001981716063607184, "loss": 2.2945, "step": 51810 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.00019817125451949359, "loss": 2.1633, "step": 51815 }, { "epoch": 0.12, "grad_norm": 1.7421875, "learning_rate": 0.00019817090264473173, "loss": 2.076, "step": 51820 }, { "epoch": 0.12, "grad_norm": 1.8671875, "learning_rate": 0.0001981705507364329, "loss": 2.2236, "step": 51825 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.00019817019879459722, "loss": 2.0436, "step": 51830 }, { "epoch": 0.12, "grad_norm": 2.765625, "learning_rate": 0.00019816984681922484, "loss": 2.2492, "step": 51835 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019816949481031584, "loss": 2.1564, "step": 51840 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019816914276787038, "loss": 2.1649, "step": 51845 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019816879069188854, "loss": 2.1542, "step": 51850 }, { "epoch": 0.12, "grad_norm": 1.6796875, "learning_rate": 0.00019816843858237046, "loss": 2.175, "step": 51855 }, { "epoch": 0.12, "grad_norm": 2.28125, "learning_rate": 0.0001981680864393163, "loss": 2.223, "step": 51860 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.00019816773426272608, "loss": 2.2674, "step": 51865 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.0001981673820526, "loss": 2.182, "step": 51870 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.0001981670298089382, "loss": 2.2498, "step": 51875 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019816667753174073, "loss": 2.265, "step": 51880 }, { "epoch": 0.12, "grad_norm": 2.28125, "learning_rate": 0.00019816632522100776, "loss": 2.312, "step": 51885 }, { "epoch": 0.12, "grad_norm": 2.21875, "learning_rate": 0.00019816597287673936, "loss": 2.1718, "step": 51890 }, { "epoch": 0.12, "grad_norm": 1.875, "learning_rate": 0.0001981656204989357, "loss": 2.2205, "step": 51895 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.0001981652680875969, "loss": 2.1569, "step": 51900 }, { "epoch": 0.12, "grad_norm": 1.96875, "learning_rate": 0.00019816491564272304, "loss": 2.0219, "step": 51905 }, { "epoch": 0.12, "grad_norm": 2.328125, "learning_rate": 0.00019816456316431428, "loss": 2.1572, "step": 51910 }, { "epoch": 0.12, "grad_norm": 1.765625, "learning_rate": 0.00019816421065237073, "loss": 2.1658, "step": 51915 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.0001981638581068925, "loss": 2.1036, "step": 51920 }, { "epoch": 0.12, "grad_norm": 1.828125, "learning_rate": 0.0001981635055278797, "loss": 2.2283, "step": 51925 }, { "epoch": 0.12, "grad_norm": 2.3125, "learning_rate": 0.0001981631529153325, "loss": 2.1307, "step": 51930 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.00019816280026925095, "loss": 2.178, "step": 51935 }, { "epoch": 0.12, "grad_norm": 1.7421875, "learning_rate": 0.00019816244758963521, "loss": 2.1128, "step": 51940 }, { "epoch": 0.12, "grad_norm": 1.7265625, "learning_rate": 0.00019816209487648543, "loss": 2.2062, "step": 51945 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.00019816174212980168, "loss": 2.2854, "step": 51950 }, { "epoch": 0.12, "grad_norm": 1.8984375, "learning_rate": 0.00019816138934958407, "loss": 2.2157, "step": 51955 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.0001981610365358328, "loss": 2.1624, "step": 51960 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.00019816068368854793, "loss": 2.2613, "step": 51965 }, { "epoch": 0.12, "grad_norm": 1.6796875, "learning_rate": 0.00019816033080772956, "loss": 2.2203, "step": 51970 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019815997789337784, "loss": 2.3243, "step": 51975 }, { "epoch": 0.12, "grad_norm": 1.84375, "learning_rate": 0.00019815962494549295, "loss": 2.0163, "step": 51980 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.00019815927196407487, "loss": 2.1123, "step": 51985 }, { "epoch": 0.12, "grad_norm": 2.53125, "learning_rate": 0.0001981589189491239, "loss": 2.2695, "step": 51990 }, { "epoch": 0.12, "grad_norm": 1.828125, "learning_rate": 0.00019815856590063997, "loss": 2.3087, "step": 51995 }, { "epoch": 0.12, "grad_norm": 2.90625, "learning_rate": 0.00019815821281862333, "loss": 2.0806, "step": 52000 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.00019815785970307408, "loss": 2.3707, "step": 52005 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.0001981575065539923, "loss": 2.1053, "step": 52010 }, { "epoch": 0.12, "grad_norm": 1.9296875, "learning_rate": 0.00019815715337137815, "loss": 2.2221, "step": 52015 }, { "epoch": 0.12, "grad_norm": 3.796875, "learning_rate": 0.00019815680015523174, "loss": 2.2235, "step": 52020 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.00019815644690555315, "loss": 2.1368, "step": 52025 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.00019815609362234258, "loss": 2.4068, "step": 52030 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.0001981557403056001, "loss": 2.0695, "step": 52035 }, { "epoch": 0.12, "grad_norm": 1.78125, "learning_rate": 0.00019815538695532582, "loss": 1.93, "step": 52040 }, { "epoch": 0.12, "grad_norm": 2.296875, "learning_rate": 0.0001981550335715199, "loss": 2.3215, "step": 52045 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019815468015418243, "loss": 2.2403, "step": 52050 }, { "epoch": 0.12, "grad_norm": 1.765625, "learning_rate": 0.00019815432670331355, "loss": 2.2157, "step": 52055 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.00019815397321891335, "loss": 2.2698, "step": 52060 }, { "epoch": 0.12, "grad_norm": 1.8046875, "learning_rate": 0.00019815361970098197, "loss": 2.3302, "step": 52065 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.00019815326614951956, "loss": 2.2559, "step": 52070 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019815291256452625, "loss": 2.3075, "step": 52075 }, { "epoch": 0.12, "grad_norm": 1.7265625, "learning_rate": 0.00019815255894600205, "loss": 2.0509, "step": 52080 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.0001981522052939472, "loss": 2.2232, "step": 52085 }, { "epoch": 0.12, "grad_norm": 1.75, "learning_rate": 0.00019815185160836177, "loss": 2.3162, "step": 52090 }, { "epoch": 0.12, "grad_norm": 1.6796875, "learning_rate": 0.00019815149788924587, "loss": 2.2212, "step": 52095 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019815114413659966, "loss": 2.3919, "step": 52100 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.0001981507903504232, "loss": 2.0888, "step": 52105 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.0001981504365307167, "loss": 2.2379, "step": 52110 }, { "epoch": 0.12, "grad_norm": 1.5234375, "learning_rate": 0.0001981500826774802, "loss": 2.095, "step": 52115 }, { "epoch": 0.12, "grad_norm": 1.6875, "learning_rate": 0.00019814972879071387, "loss": 2.2603, "step": 52120 }, { "epoch": 0.12, "grad_norm": 2.265625, "learning_rate": 0.0001981493748704178, "loss": 2.2241, "step": 52125 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.00019814902091659214, "loss": 2.1212, "step": 52130 }, { "epoch": 0.12, "grad_norm": 2.28125, "learning_rate": 0.00019814866692923698, "loss": 2.3263, "step": 52135 }, { "epoch": 0.12, "grad_norm": 1.984375, "learning_rate": 0.00019814831290835243, "loss": 2.1805, "step": 52140 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019814795885393868, "loss": 2.2361, "step": 52145 }, { "epoch": 0.12, "grad_norm": 1.6484375, "learning_rate": 0.0001981476047659958, "loss": 2.1432, "step": 52150 }, { "epoch": 0.12, "grad_norm": 2.234375, "learning_rate": 0.00019814725064452388, "loss": 2.2153, "step": 52155 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.00019814689648952314, "loss": 2.2246, "step": 52160 }, { "epoch": 0.12, "grad_norm": 1.59375, "learning_rate": 0.00019814654230099362, "loss": 2.1986, "step": 52165 }, { "epoch": 0.12, "grad_norm": 1.828125, "learning_rate": 0.00019814618807893544, "loss": 2.1563, "step": 52170 }, { "epoch": 0.12, "grad_norm": 2.4375, "learning_rate": 0.00019814583382334874, "loss": 2.1291, "step": 52175 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.00019814547953423363, "loss": 2.1888, "step": 52180 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019814512521159028, "loss": 2.1837, "step": 52185 }, { "epoch": 0.12, "grad_norm": 1.7578125, "learning_rate": 0.00019814477085541875, "loss": 2.1577, "step": 52190 }, { "epoch": 0.12, "grad_norm": 2.25, "learning_rate": 0.00019814441646571923, "loss": 2.1252, "step": 52195 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019814406204249176, "loss": 2.204, "step": 52200 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.0001981437075857365, "loss": 2.136, "step": 52205 }, { "epoch": 0.12, "grad_norm": 1.6953125, "learning_rate": 0.00019814335309545358, "loss": 2.1734, "step": 52210 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.0001981429985716431, "loss": 1.9939, "step": 52215 }, { "epoch": 0.12, "grad_norm": 2.328125, "learning_rate": 0.0001981426440143052, "loss": 2.0039, "step": 52220 }, { "epoch": 0.12, "grad_norm": 1.75, "learning_rate": 0.00019814228942344, "loss": 2.24, "step": 52225 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.0001981419347990476, "loss": 2.1876, "step": 52230 }, { "epoch": 0.12, "grad_norm": 2.15625, "learning_rate": 0.00019814158014112813, "loss": 2.2941, "step": 52235 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019814122544968172, "loss": 2.1175, "step": 52240 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.0001981408707247085, "loss": 2.0809, "step": 52245 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019814051596620858, "loss": 2.2952, "step": 52250 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019814016117418207, "loss": 2.2428, "step": 52255 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.0001981398063486291, "loss": 2.1485, "step": 52260 }, { "epoch": 0.12, "grad_norm": 1.9609375, "learning_rate": 0.00019813945148954983, "loss": 2.247, "step": 52265 }, { "epoch": 0.12, "grad_norm": 2.1875, "learning_rate": 0.0001981390965969443, "loss": 2.3015, "step": 52270 }, { "epoch": 0.12, "grad_norm": 1.875, "learning_rate": 0.00019813874167081265, "loss": 2.1115, "step": 52275 }, { "epoch": 0.12, "grad_norm": 1.7265625, "learning_rate": 0.0001981383867111551, "loss": 2.1735, "step": 52280 }, { "epoch": 0.12, "grad_norm": 1.4609375, "learning_rate": 0.00019813803171797164, "loss": 2.1014, "step": 52285 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.00019813767669126246, "loss": 2.1587, "step": 52290 }, { "epoch": 0.12, "grad_norm": 2.328125, "learning_rate": 0.0001981373216310277, "loss": 2.2977, "step": 52295 }, { "epoch": 0.12, "grad_norm": 1.71875, "learning_rate": 0.00019813696653726742, "loss": 2.097, "step": 52300 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019813661140998177, "loss": 2.1413, "step": 52305 }, { "epoch": 0.12, "grad_norm": 1.75, "learning_rate": 0.00019813625624917088, "loss": 2.2324, "step": 52310 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019813590105483488, "loss": 2.0521, "step": 52315 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019813554582697384, "loss": 2.1928, "step": 52320 }, { "epoch": 0.12, "grad_norm": 1.734375, "learning_rate": 0.00019813519056558797, "loss": 2.1727, "step": 52325 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.0001981348352706773, "loss": 2.1312, "step": 52330 }, { "epoch": 0.12, "grad_norm": 2.21875, "learning_rate": 0.000198134479942242, "loss": 2.2024, "step": 52335 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019813412458028216, "loss": 2.1903, "step": 52340 }, { "epoch": 0.12, "grad_norm": 1.9140625, "learning_rate": 0.000198133769184798, "loss": 2.386, "step": 52345 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.0001981334137557895, "loss": 2.2575, "step": 52350 }, { "epoch": 0.12, "grad_norm": 1.9453125, "learning_rate": 0.00019813305829325683, "loss": 2.2425, "step": 52355 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.00019813270279720016, "loss": 2.1358, "step": 52360 }, { "epoch": 0.12, "grad_norm": 2.265625, "learning_rate": 0.0001981323472676196, "loss": 2.1632, "step": 52365 }, { "epoch": 0.12, "grad_norm": 1.6484375, "learning_rate": 0.00019813199170451518, "loss": 2.0311, "step": 52370 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019813163610788717, "loss": 2.164, "step": 52375 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019813128047773556, "loss": 2.0728, "step": 52380 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019813092481406054, "loss": 2.0755, "step": 52385 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.00019813056911686223, "loss": 2.0488, "step": 52390 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.00019813021338614074, "loss": 2.1026, "step": 52395 }, { "epoch": 0.12, "grad_norm": 2.25, "learning_rate": 0.00019812985762189617, "loss": 2.3471, "step": 52400 }, { "epoch": 0.12, "grad_norm": 2.1875, "learning_rate": 0.00019812950182412865, "loss": 2.1804, "step": 52405 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019812914599283836, "loss": 2.1865, "step": 52410 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019812879012802534, "loss": 2.279, "step": 52415 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.00019812843422968977, "loss": 2.0177, "step": 52420 }, { "epoch": 0.12, "grad_norm": 1.84375, "learning_rate": 0.0001981280782978317, "loss": 2.1492, "step": 52425 }, { "epoch": 0.12, "grad_norm": 2.21875, "learning_rate": 0.00019812772233245135, "loss": 2.1426, "step": 52430 }, { "epoch": 0.12, "grad_norm": 1.6640625, "learning_rate": 0.00019812736633354876, "loss": 2.1726, "step": 52435 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019812701030112408, "loss": 2.1889, "step": 52440 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019812665423517745, "loss": 2.2239, "step": 52445 }, { "epoch": 0.12, "grad_norm": 1.765625, "learning_rate": 0.00019812629813570898, "loss": 2.4079, "step": 52450 }, { "epoch": 0.12, "grad_norm": 1.7109375, "learning_rate": 0.00019812594200271878, "loss": 2.2375, "step": 52455 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.00019812558583620694, "loss": 2.3452, "step": 52460 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.0001981252296361737, "loss": 2.1846, "step": 52465 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019812487340261904, "loss": 2.2281, "step": 52470 }, { "epoch": 0.12, "grad_norm": 1.8203125, "learning_rate": 0.00019812451713554317, "loss": 2.0776, "step": 52475 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019812416083494615, "loss": 2.1226, "step": 52480 }, { "epoch": 0.12, "grad_norm": 2.125, "learning_rate": 0.00019812380450082818, "loss": 2.1451, "step": 52485 }, { "epoch": 0.12, "grad_norm": 1.6796875, "learning_rate": 0.00019812344813318933, "loss": 2.2905, "step": 52490 }, { "epoch": 0.12, "grad_norm": 1.84375, "learning_rate": 0.00019812309173202971, "loss": 2.2272, "step": 52495 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.0001981227352973495, "loss": 2.0928, "step": 52500 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019812237882914873, "loss": 2.0603, "step": 52505 }, { "epoch": 0.12, "grad_norm": 1.671875, "learning_rate": 0.0001981220223274276, "loss": 2.1552, "step": 52510 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.00019812166579218622, "loss": 2.1584, "step": 52515 }, { "epoch": 0.12, "grad_norm": 2.15625, "learning_rate": 0.0001981213092234247, "loss": 2.0172, "step": 52520 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019812095262114316, "loss": 2.1955, "step": 52525 }, { "epoch": 0.12, "grad_norm": 1.6171875, "learning_rate": 0.0001981205959853417, "loss": 2.0995, "step": 52530 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019812023931602049, "loss": 2.1816, "step": 52535 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.0001981198826131796, "loss": 2.3674, "step": 52540 }, { "epoch": 0.12, "grad_norm": 1.7734375, "learning_rate": 0.00019811952587681921, "loss": 2.1117, "step": 52545 }, { "epoch": 0.12, "grad_norm": 1.71875, "learning_rate": 0.0001981191691069394, "loss": 2.3018, "step": 52550 }, { "epoch": 0.12, "grad_norm": 1.75, "learning_rate": 0.0001981188123035403, "loss": 2.0537, "step": 52555 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019811845546662205, "loss": 2.2884, "step": 52560 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.0001981180985961847, "loss": 2.1531, "step": 52565 }, { "epoch": 0.12, "grad_norm": 1.640625, "learning_rate": 0.0001981177416922285, "loss": 2.3227, "step": 52570 }, { "epoch": 0.12, "grad_norm": 1.78125, "learning_rate": 0.00019811738475475346, "loss": 2.2143, "step": 52575 }, { "epoch": 0.12, "grad_norm": 1.734375, "learning_rate": 0.00019811702778375976, "loss": 2.1775, "step": 52580 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.0001981166707792475, "loss": 2.2664, "step": 52585 }, { "epoch": 0.12, "grad_norm": 1.6640625, "learning_rate": 0.00019811631374121676, "loss": 2.1318, "step": 52590 }, { "epoch": 0.12, "grad_norm": 1.90625, "learning_rate": 0.00019811595666966776, "loss": 2.2889, "step": 52595 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.00019811559956460056, "loss": 1.9884, "step": 52600 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.00019811524242601528, "loss": 2.1597, "step": 52605 }, { "epoch": 0.12, "grad_norm": 2.46875, "learning_rate": 0.00019811488525391208, "loss": 2.1319, "step": 52610 }, { "epoch": 0.12, "grad_norm": 2.765625, "learning_rate": 0.00019811452804829104, "loss": 2.1347, "step": 52615 }, { "epoch": 0.12, "grad_norm": 2.671875, "learning_rate": 0.0001981141708091523, "loss": 2.1073, "step": 52620 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019811381353649596, "loss": 2.1172, "step": 52625 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.0001981134562303222, "loss": 2.0279, "step": 52630 }, { "epoch": 0.12, "grad_norm": 2.0, "learning_rate": 0.00019811309889063107, "loss": 2.1318, "step": 52635 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019811274151742273, "loss": 2.3371, "step": 52640 }, { "epoch": 0.12, "grad_norm": 1.96875, "learning_rate": 0.0001981123841106973, "loss": 2.1123, "step": 52645 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.0001981120266704549, "loss": 2.1928, "step": 52650 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019811166919669567, "loss": 2.0521, "step": 52655 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.00019811131168941972, "loss": 2.4033, "step": 52660 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.00019811095414862715, "loss": 2.2163, "step": 52665 }, { "epoch": 0.12, "grad_norm": 2.375, "learning_rate": 0.0001981105965743181, "loss": 2.0602, "step": 52670 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.00019811023896649266, "loss": 2.132, "step": 52675 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.00019810988132515104, "loss": 2.1346, "step": 52680 }, { "epoch": 0.12, "grad_norm": 1.515625, "learning_rate": 0.00019810952365029326, "loss": 1.9749, "step": 52685 }, { "epoch": 0.12, "grad_norm": 1.7890625, "learning_rate": 0.00019810916594191952, "loss": 2.1448, "step": 52690 }, { "epoch": 0.12, "grad_norm": 2.34375, "learning_rate": 0.00019810880820002988, "loss": 2.2228, "step": 52695 }, { "epoch": 0.12, "grad_norm": 2.453125, "learning_rate": 0.0001981084504246245, "loss": 2.2112, "step": 52700 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.0001981080926157035, "loss": 2.3127, "step": 52705 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.00019810773477326702, "loss": 2.3628, "step": 52710 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.00019810737689731513, "loss": 2.2382, "step": 52715 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.00019810701898784796, "loss": 2.1559, "step": 52720 }, { "epoch": 0.12, "grad_norm": 2.375, "learning_rate": 0.0001981066610448657, "loss": 2.0502, "step": 52725 }, { "epoch": 0.12, "grad_norm": 2.234375, "learning_rate": 0.0001981063030683684, "loss": 1.9993, "step": 52730 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.0001981059450583562, "loss": 2.1132, "step": 52735 }, { "epoch": 0.12, "grad_norm": 2.046875, "learning_rate": 0.00019810558701482926, "loss": 2.1539, "step": 52740 }, { "epoch": 0.12, "grad_norm": 1.75, "learning_rate": 0.00019810522893778763, "loss": 2.2091, "step": 52745 }, { "epoch": 0.12, "grad_norm": 2.34375, "learning_rate": 0.0001981048708272315, "loss": 2.203, "step": 52750 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.00019810451268316098, "loss": 2.2296, "step": 52755 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019810415450557617, "loss": 2.1851, "step": 52760 }, { "epoch": 0.12, "grad_norm": 1.859375, "learning_rate": 0.0001981037962944772, "loss": 2.3297, "step": 52765 }, { "epoch": 0.12, "grad_norm": 1.6640625, "learning_rate": 0.0001981034380498642, "loss": 1.9721, "step": 52770 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019810307977173727, "loss": 2.2071, "step": 52775 }, { "epoch": 0.12, "grad_norm": 2.21875, "learning_rate": 0.00019810272146009655, "loss": 2.2159, "step": 52780 }, { "epoch": 0.12, "grad_norm": 2.125, "learning_rate": 0.00019810236311494216, "loss": 2.1908, "step": 52785 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.00019810200473627422, "loss": 2.1644, "step": 52790 }, { "epoch": 0.12, "grad_norm": 1.71875, "learning_rate": 0.00019810164632409286, "loss": 1.9907, "step": 52795 }, { "epoch": 0.12, "grad_norm": 1.5859375, "learning_rate": 0.00019810128787839824, "loss": 2.1086, "step": 52800 }, { "epoch": 0.12, "grad_norm": 1.9296875, "learning_rate": 0.00019810092939919038, "loss": 2.1621, "step": 52805 }, { "epoch": 0.12, "grad_norm": 2.25, "learning_rate": 0.0001981005708864695, "loss": 2.1928, "step": 52810 }, { "epoch": 0.12, "grad_norm": 2.03125, "learning_rate": 0.00019810021234023566, "loss": 2.3158, "step": 52815 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019809985376048904, "loss": 2.2056, "step": 52820 }, { "epoch": 0.12, "grad_norm": 1.9453125, "learning_rate": 0.0001980994951472297, "loss": 2.0862, "step": 52825 }, { "epoch": 0.12, "grad_norm": 1.9296875, "learning_rate": 0.0001980991365004578, "loss": 2.1865, "step": 52830 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.00019809877782017346, "loss": 2.2073, "step": 52835 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019809841910637684, "loss": 2.1642, "step": 52840 }, { "epoch": 0.12, "grad_norm": 2.34375, "learning_rate": 0.00019809806035906794, "loss": 2.1773, "step": 52845 }, { "epoch": 0.12, "grad_norm": 2.203125, "learning_rate": 0.00019809770157824702, "loss": 2.1407, "step": 52850 }, { "epoch": 0.12, "grad_norm": 1.6875, "learning_rate": 0.00019809734276391415, "loss": 1.9763, "step": 52855 }, { "epoch": 0.12, "grad_norm": 2.171875, "learning_rate": 0.00019809698391606942, "loss": 2.2975, "step": 52860 }, { "epoch": 0.12, "grad_norm": 2.21875, "learning_rate": 0.000198096625034713, "loss": 2.2257, "step": 52865 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019809626611984497, "loss": 2.1222, "step": 52870 }, { "epoch": 0.12, "grad_norm": 1.8359375, "learning_rate": 0.00019809590717146552, "loss": 2.1699, "step": 52875 }, { "epoch": 0.12, "grad_norm": 1.8828125, "learning_rate": 0.00019809554818957467, "loss": 2.1437, "step": 52880 }, { "epoch": 0.12, "grad_norm": 1.9453125, "learning_rate": 0.00019809518917417266, "loss": 2.0609, "step": 52885 }, { "epoch": 0.12, "grad_norm": 2.25, "learning_rate": 0.00019809483012525953, "loss": 2.2609, "step": 52890 }, { "epoch": 0.12, "grad_norm": 2.140625, "learning_rate": 0.0001980944710428354, "loss": 2.196, "step": 52895 }, { "epoch": 0.12, "grad_norm": 2.234375, "learning_rate": 0.00019809411192690045, "loss": 2.1259, "step": 52900 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019809375277745475, "loss": 2.1124, "step": 52905 }, { "epoch": 0.12, "grad_norm": 1.734375, "learning_rate": 0.00019809339359449848, "loss": 2.252, "step": 52910 }, { "epoch": 0.12, "grad_norm": 1.71875, "learning_rate": 0.00019809303437803171, "loss": 2.1759, "step": 52915 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.00019809267512805456, "loss": 2.2507, "step": 52920 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.0001980923158445672, "loss": 2.3749, "step": 52925 }, { "epoch": 0.12, "grad_norm": 1.9375, "learning_rate": 0.0001980919565275697, "loss": 2.279, "step": 52930 }, { "epoch": 0.12, "grad_norm": 1.7421875, "learning_rate": 0.00019809159717706226, "loss": 2.3043, "step": 52935 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.0001980912377930449, "loss": 2.1856, "step": 52940 }, { "epoch": 0.12, "grad_norm": 1.6484375, "learning_rate": 0.0001980908783755178, "loss": 2.1785, "step": 52945 }, { "epoch": 0.12, "grad_norm": 1.953125, "learning_rate": 0.0001980905189244811, "loss": 2.2634, "step": 52950 }, { "epoch": 0.12, "grad_norm": 1.9296875, "learning_rate": 0.0001980901594399349, "loss": 2.0836, "step": 52955 }, { "epoch": 0.12, "grad_norm": 1.5625, "learning_rate": 0.0001980897999218793, "loss": 2.0337, "step": 52960 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019808944037031447, "loss": 2.5245, "step": 52965 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019808908078524048, "loss": 2.1162, "step": 52970 }, { "epoch": 0.12, "grad_norm": 1.9453125, "learning_rate": 0.0001980887211666575, "loss": 2.3574, "step": 52975 }, { "epoch": 0.12, "grad_norm": 1.9453125, "learning_rate": 0.00019808836151456562, "loss": 2.2821, "step": 52980 }, { "epoch": 0.12, "grad_norm": 1.71875, "learning_rate": 0.000198088001828965, "loss": 2.3261, "step": 52985 }, { "epoch": 0.12, "grad_norm": 1.7890625, "learning_rate": 0.00019808764210985574, "loss": 2.0333, "step": 52990 }, { "epoch": 0.12, "grad_norm": 1.890625, "learning_rate": 0.00019808728235723794, "loss": 2.2346, "step": 52995 }, { "epoch": 0.12, "grad_norm": 1.765625, "learning_rate": 0.00019808692257111178, "loss": 2.2244, "step": 53000 }, { "epoch": 0.12, "grad_norm": 1.859375, "learning_rate": 0.0001980865627514773, "loss": 2.0907, "step": 53005 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.0001980862028983347, "loss": 2.0409, "step": 53010 }, { "epoch": 0.12, "grad_norm": 1.8046875, "learning_rate": 0.00019808584301168406, "loss": 2.2304, "step": 53015 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019808548309152552, "loss": 2.1544, "step": 53020 }, { "epoch": 0.12, "grad_norm": 1.9921875, "learning_rate": 0.00019808512313785923, "loss": 2.0714, "step": 53025 }, { "epoch": 0.12, "grad_norm": 2.34375, "learning_rate": 0.00019808476315068526, "loss": 2.0995, "step": 53030 }, { "epoch": 0.12, "grad_norm": 2.734375, "learning_rate": 0.00019808440313000373, "loss": 2.2829, "step": 53035 }, { "epoch": 0.12, "grad_norm": 1.8046875, "learning_rate": 0.00019808404307581482, "loss": 2.2354, "step": 53040 }, { "epoch": 0.12, "grad_norm": 1.9453125, "learning_rate": 0.0001980836829881186, "loss": 2.1543, "step": 53045 }, { "epoch": 0.12, "grad_norm": 2.265625, "learning_rate": 0.00019808332286691527, "loss": 2.2617, "step": 53050 }, { "epoch": 0.12, "grad_norm": 2.359375, "learning_rate": 0.00019808296271220485, "loss": 2.2037, "step": 53055 }, { "epoch": 0.12, "grad_norm": 1.7109375, "learning_rate": 0.00019808260252398752, "loss": 2.3616, "step": 53060 }, { "epoch": 0.12, "grad_norm": 2.015625, "learning_rate": 0.00019808224230226338, "loss": 2.1164, "step": 53065 }, { "epoch": 0.12, "grad_norm": 2.09375, "learning_rate": 0.00019808188204703257, "loss": 2.1707, "step": 53070 }, { "epoch": 0.12, "grad_norm": 1.9765625, "learning_rate": 0.00019808152175829525, "loss": 2.2046, "step": 53075 }, { "epoch": 0.12, "grad_norm": 2.078125, "learning_rate": 0.00019808116143605147, "loss": 2.1712, "step": 53080 }, { "epoch": 0.12, "grad_norm": 2.109375, "learning_rate": 0.00019808080108030138, "loss": 2.2929, "step": 53085 }, { "epoch": 0.12, "grad_norm": 1.71875, "learning_rate": 0.00019808044069104513, "loss": 2.201, "step": 53090 }, { "epoch": 0.12, "grad_norm": 1.921875, "learning_rate": 0.00019808008026828283, "loss": 2.1221, "step": 53095 }, { "epoch": 0.12, "grad_norm": 1.8515625, "learning_rate": 0.00019807971981201456, "loss": 2.1209, "step": 53100 }, { "epoch": 0.12, "grad_norm": 2.0625, "learning_rate": 0.00019807935932224049, "loss": 2.166, "step": 53105 }, { "epoch": 0.12, "grad_norm": 2.421875, "learning_rate": 0.00019807899879896074, "loss": 2.3592, "step": 53110 }, { "epoch": 0.12, "grad_norm": 1.9453125, "learning_rate": 0.00019807863824217544, "loss": 2.0758, "step": 53115 }, { "epoch": 0.13, "grad_norm": 2.171875, "learning_rate": 0.00019807827765188466, "loss": 2.2847, "step": 53120 }, { "epoch": 0.13, "grad_norm": 1.625, "learning_rate": 0.0001980779170280886, "loss": 2.3244, "step": 53125 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.00019807755637078734, "loss": 2.1513, "step": 53130 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.000198077195679981, "loss": 2.2182, "step": 53135 }, { "epoch": 0.13, "grad_norm": 1.7578125, "learning_rate": 0.0001980768349556697, "loss": 2.1621, "step": 53140 }, { "epoch": 0.13, "grad_norm": 1.75, "learning_rate": 0.00019807647419785358, "loss": 2.2027, "step": 53145 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.00019807611340653275, "loss": 2.045, "step": 53150 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.00019807575258170735, "loss": 2.1343, "step": 53155 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.00019807539172337753, "loss": 2.1615, "step": 53160 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.00019807503083154332, "loss": 2.3389, "step": 53165 }, { "epoch": 0.13, "grad_norm": 1.5703125, "learning_rate": 0.00019807466990620496, "loss": 2.1652, "step": 53170 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.00019807430894736245, "loss": 2.1412, "step": 53175 }, { "epoch": 0.13, "grad_norm": 1.9140625, "learning_rate": 0.00019807394795501602, "loss": 2.0052, "step": 53180 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.00019807358692916575, "loss": 2.2584, "step": 53185 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019807322586981173, "loss": 2.2306, "step": 53190 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019807286477695416, "loss": 2.1595, "step": 53195 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019807250365059308, "loss": 2.1271, "step": 53200 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019807214249072867, "loss": 2.0848, "step": 53205 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.00019807178129736106, "loss": 2.3565, "step": 53210 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019807142007049032, "loss": 2.2101, "step": 53215 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019807105881011663, "loss": 2.1778, "step": 53220 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.00019807069751624009, "loss": 2.1376, "step": 53225 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.0001980703361888608, "loss": 2.1986, "step": 53230 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.00019806997482797888, "loss": 2.3617, "step": 53235 }, { "epoch": 0.13, "grad_norm": 2.171875, "learning_rate": 0.00019806961343359452, "loss": 2.325, "step": 53240 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.0001980692520057078, "loss": 1.9893, "step": 53245 }, { "epoch": 0.13, "grad_norm": 1.8046875, "learning_rate": 0.00019806889054431882, "loss": 2.1354, "step": 53250 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.00019806852904942775, "loss": 2.2623, "step": 53255 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.0001980681675210347, "loss": 2.2527, "step": 53260 }, { "epoch": 0.13, "grad_norm": 1.546875, "learning_rate": 0.00019806780595913977, "loss": 2.087, "step": 53265 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019806744436374308, "loss": 2.3164, "step": 53270 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.0001980670827348448, "loss": 2.1463, "step": 53275 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.00019806672107244502, "loss": 2.1481, "step": 53280 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.00019806635937654385, "loss": 2.3505, "step": 53285 }, { "epoch": 0.13, "grad_norm": 1.859375, "learning_rate": 0.00019806599764714148, "loss": 2.2324, "step": 53290 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019806563588423793, "loss": 2.3336, "step": 53295 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.0001980652740878334, "loss": 2.0269, "step": 53300 }, { "epoch": 0.13, "grad_norm": 2.296875, "learning_rate": 0.00019806491225792802, "loss": 2.0019, "step": 53305 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019806455039452186, "loss": 2.0861, "step": 53310 }, { "epoch": 0.13, "grad_norm": 1.875, "learning_rate": 0.0001980641884976151, "loss": 2.2623, "step": 53315 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.0001980638265672078, "loss": 2.0974, "step": 53320 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.00019806346460330015, "loss": 2.2603, "step": 53325 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.0001980631026058922, "loss": 2.1188, "step": 53330 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.00019806274057498414, "loss": 2.1111, "step": 53335 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.0001980623785105761, "loss": 1.9206, "step": 53340 }, { "epoch": 0.13, "grad_norm": 1.921875, "learning_rate": 0.00019806201641266812, "loss": 2.2852, "step": 53345 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.0001980616542812604, "loss": 2.0667, "step": 53350 }, { "epoch": 0.13, "grad_norm": 1.6953125, "learning_rate": 0.00019806129211635305, "loss": 2.2065, "step": 53355 }, { "epoch": 0.13, "grad_norm": 1.8828125, "learning_rate": 0.00019806092991794618, "loss": 2.1654, "step": 53360 }, { "epoch": 0.13, "grad_norm": 2.5625, "learning_rate": 0.0001980605676860399, "loss": 2.1263, "step": 53365 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.00019806020542063437, "loss": 2.1848, "step": 53370 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.00019805984312172968, "loss": 2.2323, "step": 53375 }, { "epoch": 0.13, "grad_norm": 2.625, "learning_rate": 0.000198059480789326, "loss": 2.2127, "step": 53380 }, { "epoch": 0.13, "grad_norm": 1.6171875, "learning_rate": 0.0001980591184234234, "loss": 2.2491, "step": 53385 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.000198058756024022, "loss": 2.1142, "step": 53390 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.000198058393591122, "loss": 2.0814, "step": 53395 }, { "epoch": 0.13, "grad_norm": 2.09375, "learning_rate": 0.00019805803112472347, "loss": 2.1131, "step": 53400 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.0001980576686248265, "loss": 2.2115, "step": 53405 }, { "epoch": 0.13, "grad_norm": 1.6015625, "learning_rate": 0.00019805730609143127, "loss": 2.1764, "step": 53410 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.0001980569435245379, "loss": 2.0827, "step": 53415 }, { "epoch": 0.13, "grad_norm": 2.4375, "learning_rate": 0.0001980565809241465, "loss": 2.0855, "step": 53420 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.00019805621829025718, "loss": 2.2132, "step": 53425 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.00019805585562287007, "loss": 2.1789, "step": 53430 }, { "epoch": 0.13, "grad_norm": 1.7890625, "learning_rate": 0.0001980554929219853, "loss": 2.2679, "step": 53435 }, { "epoch": 0.13, "grad_norm": 2.171875, "learning_rate": 0.00019805513018760303, "loss": 1.9787, "step": 53440 }, { "epoch": 0.13, "grad_norm": 1.7578125, "learning_rate": 0.0001980547674197233, "loss": 2.0465, "step": 53445 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.0001980544046183463, "loss": 2.0012, "step": 53450 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019805404178347216, "loss": 2.1061, "step": 53455 }, { "epoch": 0.13, "grad_norm": 2.359375, "learning_rate": 0.000198053678915101, "loss": 2.0293, "step": 53460 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019805331601323287, "loss": 2.1063, "step": 53465 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019805295307786795, "loss": 2.1571, "step": 53470 }, { "epoch": 0.13, "grad_norm": 2.171875, "learning_rate": 0.0001980525901090064, "loss": 2.2478, "step": 53475 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.0001980522271066483, "loss": 2.2374, "step": 53480 }, { "epoch": 0.13, "grad_norm": 2.265625, "learning_rate": 0.00019805186407079376, "loss": 2.39, "step": 53485 }, { "epoch": 0.13, "grad_norm": 1.7890625, "learning_rate": 0.00019805150100144295, "loss": 2.2207, "step": 53490 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.00019805113789859595, "loss": 2.1842, "step": 53495 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.0001980507747622529, "loss": 2.1672, "step": 53500 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019805041159241395, "loss": 2.2244, "step": 53505 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019805004838907916, "loss": 2.0031, "step": 53510 }, { "epoch": 0.13, "grad_norm": 2.203125, "learning_rate": 0.00019804968515224874, "loss": 1.946, "step": 53515 }, { "epoch": 0.13, "grad_norm": 2.296875, "learning_rate": 0.00019804932188192275, "loss": 2.0904, "step": 53520 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019804895857810135, "loss": 2.1685, "step": 53525 }, { "epoch": 0.13, "grad_norm": 2.34375, "learning_rate": 0.00019804859524078463, "loss": 2.1427, "step": 53530 }, { "epoch": 0.13, "grad_norm": 1.75, "learning_rate": 0.00019804823186997275, "loss": 2.28, "step": 53535 }, { "epoch": 0.13, "grad_norm": 1.953125, "learning_rate": 0.00019804786846566578, "loss": 2.1089, "step": 53540 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.0001980475050278639, "loss": 2.2555, "step": 53545 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019804714155656722, "loss": 2.2484, "step": 53550 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019804677805177585, "loss": 2.2932, "step": 53555 }, { "epoch": 0.13, "grad_norm": 2.296875, "learning_rate": 0.00019804641451348992, "loss": 2.13, "step": 53560 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019804605094170957, "loss": 2.33, "step": 53565 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.00019804568733643488, "loss": 2.0433, "step": 53570 }, { "epoch": 0.13, "grad_norm": 1.6328125, "learning_rate": 0.00019804532369766605, "loss": 2.0047, "step": 53575 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.00019804496002540312, "loss": 2.0391, "step": 53580 }, { "epoch": 0.13, "grad_norm": 15.75, "learning_rate": 0.00019804459631964628, "loss": 2.2346, "step": 53585 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.00019804423258039562, "loss": 2.2635, "step": 53590 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019804386880765127, "loss": 2.2811, "step": 53595 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019804350500141334, "loss": 2.182, "step": 53600 }, { "epoch": 0.13, "grad_norm": 1.7421875, "learning_rate": 0.00019804314116168202, "loss": 2.2738, "step": 53605 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.00019804277728845734, "loss": 2.356, "step": 53610 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019804241338173948, "loss": 2.1408, "step": 53615 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.00019804204944152856, "loss": 2.2472, "step": 53620 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.00019804168546782468, "loss": 2.1865, "step": 53625 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019804132146062798, "loss": 2.2399, "step": 53630 }, { "epoch": 0.13, "grad_norm": 1.8671875, "learning_rate": 0.00019804095741993863, "loss": 2.2382, "step": 53635 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019804059334575666, "loss": 2.2685, "step": 53640 }, { "epoch": 0.13, "grad_norm": 2.359375, "learning_rate": 0.0001980402292380823, "loss": 2.019, "step": 53645 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.00019803986509691558, "loss": 2.1364, "step": 53650 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019803950092225664, "loss": 2.113, "step": 53655 }, { "epoch": 0.13, "grad_norm": 1.75, "learning_rate": 0.00019803913671410567, "loss": 2.0557, "step": 53660 }, { "epoch": 0.13, "grad_norm": 2.515625, "learning_rate": 0.00019803877247246275, "loss": 2.2068, "step": 53665 }, { "epoch": 0.13, "grad_norm": 2.09375, "learning_rate": 0.000198038408197328, "loss": 2.1135, "step": 53670 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.00019803804388870156, "loss": 2.1744, "step": 53675 }, { "epoch": 0.13, "grad_norm": 2.28125, "learning_rate": 0.00019803767954658353, "loss": 2.2117, "step": 53680 }, { "epoch": 0.13, "grad_norm": 1.6015625, "learning_rate": 0.00019803731517097406, "loss": 2.17, "step": 53685 }, { "epoch": 0.13, "grad_norm": 1.859375, "learning_rate": 0.00019803695076187324, "loss": 2.1711, "step": 53690 }, { "epoch": 0.13, "grad_norm": 1.7734375, "learning_rate": 0.00019803658631928125, "loss": 2.3027, "step": 53695 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.00019803622184319818, "loss": 2.2768, "step": 53700 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019803585733362415, "loss": 2.2225, "step": 53705 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.0001980354927905593, "loss": 2.1942, "step": 53710 }, { "epoch": 0.13, "grad_norm": 2.484375, "learning_rate": 0.00019803512821400373, "loss": 1.8579, "step": 53715 }, { "epoch": 0.13, "grad_norm": 1.7421875, "learning_rate": 0.00019803476360395763, "loss": 2.0725, "step": 53720 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.00019803439896042105, "loss": 2.2114, "step": 53725 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019803403428339414, "loss": 1.9987, "step": 53730 }, { "epoch": 0.13, "grad_norm": 1.7421875, "learning_rate": 0.000198033669572877, "loss": 2.2693, "step": 53735 }, { "epoch": 0.13, "grad_norm": 2.25, "learning_rate": 0.00019803330482886982, "loss": 2.1753, "step": 53740 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019803294005137266, "loss": 2.2004, "step": 53745 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.00019803257524038568, "loss": 2.112, "step": 53750 }, { "epoch": 0.13, "grad_norm": 1.609375, "learning_rate": 0.000198032210395909, "loss": 2.1673, "step": 53755 }, { "epoch": 0.13, "grad_norm": 1.875, "learning_rate": 0.0001980318455179427, "loss": 1.9751, "step": 53760 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.000198031480606487, "loss": 2.1695, "step": 53765 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.00019803111566154194, "loss": 2.1849, "step": 53770 }, { "epoch": 0.13, "grad_norm": 1.8671875, "learning_rate": 0.0001980307506831077, "loss": 2.1425, "step": 53775 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.00019803038567118436, "loss": 2.3364, "step": 53780 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019803002062577203, "loss": 2.1012, "step": 53785 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.0001980296555468709, "loss": 2.2288, "step": 53790 }, { "epoch": 0.13, "grad_norm": 2.3125, "learning_rate": 0.00019802929043448106, "loss": 1.9904, "step": 53795 }, { "epoch": 0.13, "grad_norm": 2.234375, "learning_rate": 0.00019802892528860262, "loss": 2.2501, "step": 53800 }, { "epoch": 0.13, "grad_norm": 2.96875, "learning_rate": 0.00019802856010923573, "loss": 2.0341, "step": 53805 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.00019802819489638052, "loss": 2.3345, "step": 53810 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019802782965003707, "loss": 2.2427, "step": 53815 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019802746437020557, "loss": 2.397, "step": 53820 }, { "epoch": 0.13, "grad_norm": 1.921875, "learning_rate": 0.0001980270990568861, "loss": 2.2698, "step": 53825 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.0001980267337100788, "loss": 1.9374, "step": 53830 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.0001980263683297838, "loss": 2.0713, "step": 53835 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.00019802600291600117, "loss": 2.2954, "step": 53840 }, { "epoch": 0.13, "grad_norm": 1.828125, "learning_rate": 0.0001980256374687311, "loss": 2.2292, "step": 53845 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.0001980252719879737, "loss": 2.2073, "step": 53850 }, { "epoch": 0.13, "grad_norm": 1.671875, "learning_rate": 0.0001980249064737291, "loss": 2.1345, "step": 53855 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.0001980245409259974, "loss": 1.8726, "step": 53860 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.00019802417534477872, "loss": 2.2589, "step": 53865 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.00019802380973007325, "loss": 2.289, "step": 53870 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019802344408188102, "loss": 2.1687, "step": 53875 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019802307840020224, "loss": 2.2429, "step": 53880 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.000198022712685037, "loss": 2.1078, "step": 53885 }, { "epoch": 0.13, "grad_norm": 2.171875, "learning_rate": 0.00019802234693638542, "loss": 2.3197, "step": 53890 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.0001980219811542476, "loss": 2.1841, "step": 53895 }, { "epoch": 0.13, "grad_norm": 1.75, "learning_rate": 0.00019802161533862372, "loss": 2.2075, "step": 53900 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.00019802124948951387, "loss": 2.1233, "step": 53905 }, { "epoch": 0.13, "grad_norm": 1.6796875, "learning_rate": 0.00019802088360691816, "loss": 2.1677, "step": 53910 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.0001980205176908368, "loss": 2.1637, "step": 53915 }, { "epoch": 0.13, "grad_norm": 2.296875, "learning_rate": 0.0001980201517412698, "loss": 2.1549, "step": 53920 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.00019801978575821738, "loss": 2.1058, "step": 53925 }, { "epoch": 0.13, "grad_norm": 2.265625, "learning_rate": 0.00019801941974167958, "loss": 2.0954, "step": 53930 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019801905369165658, "loss": 2.2255, "step": 53935 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.0001980186876081485, "loss": 2.1432, "step": 53940 }, { "epoch": 0.13, "grad_norm": 2.09375, "learning_rate": 0.00019801832149115547, "loss": 2.1041, "step": 53945 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.0001980179553406776, "loss": 2.0585, "step": 53950 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.000198017589156715, "loss": 2.5417, "step": 53955 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019801722293926783, "loss": 2.2979, "step": 53960 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.0001980168566883362, "loss": 2.1867, "step": 53965 }, { "epoch": 0.13, "grad_norm": 1.953125, "learning_rate": 0.00019801649040392024, "loss": 1.9731, "step": 53970 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.00019801612408602007, "loss": 2.0842, "step": 53975 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.00019801575773463579, "loss": 2.1922, "step": 53980 }, { "epoch": 0.13, "grad_norm": 2.84375, "learning_rate": 0.00019801539134976754, "loss": 2.2646, "step": 53985 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.00019801502493141547, "loss": 2.1071, "step": 53990 }, { "epoch": 0.13, "grad_norm": 2.265625, "learning_rate": 0.00019801465847957972, "loss": 2.2537, "step": 53995 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.00019801429199426036, "loss": 2.2241, "step": 54000 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.00019801392547545756, "loss": 2.1993, "step": 54005 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.0001980135589231714, "loss": 2.3831, "step": 54010 }, { "epoch": 0.13, "grad_norm": 1.7421875, "learning_rate": 0.00019801319233740203, "loss": 2.2624, "step": 54015 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.00019801282571814958, "loss": 2.2028, "step": 54020 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.0001980124590654142, "loss": 2.0032, "step": 54025 }, { "epoch": 0.13, "grad_norm": 1.7578125, "learning_rate": 0.00019801209237919594, "loss": 2.1487, "step": 54030 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.00019801172565949503, "loss": 2.1737, "step": 54035 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.00019801135890631146, "loss": 2.1731, "step": 54040 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019801099211964548, "loss": 2.1438, "step": 54045 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019801062529949717, "loss": 2.3016, "step": 54050 }, { "epoch": 0.13, "grad_norm": 1.921875, "learning_rate": 0.00019801025844586666, "loss": 2.2177, "step": 54055 }, { "epoch": 0.13, "grad_norm": 2.4375, "learning_rate": 0.00019800989155875404, "loss": 2.2303, "step": 54060 }, { "epoch": 0.13, "grad_norm": 1.9609375, "learning_rate": 0.00019800952463815946, "loss": 2.1549, "step": 54065 }, { "epoch": 0.13, "grad_norm": 1.921875, "learning_rate": 0.00019800915768408307, "loss": 2.3248, "step": 54070 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019800879069652497, "loss": 2.1951, "step": 54075 }, { "epoch": 0.13, "grad_norm": 1.875, "learning_rate": 0.0001980084236754853, "loss": 1.9921, "step": 54080 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.00019800805662096416, "loss": 2.0798, "step": 54085 }, { "epoch": 0.13, "grad_norm": 1.71875, "learning_rate": 0.0001980076895329617, "loss": 2.051, "step": 54090 }, { "epoch": 0.13, "grad_norm": 2.421875, "learning_rate": 0.000198007322411478, "loss": 2.1581, "step": 54095 }, { "epoch": 0.13, "grad_norm": 2.484375, "learning_rate": 0.00019800695525651327, "loss": 2.0847, "step": 54100 }, { "epoch": 0.13, "grad_norm": 4.15625, "learning_rate": 0.00019800658806806758, "loss": 2.2802, "step": 54105 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.00019800622084614102, "loss": 2.0719, "step": 54110 }, { "epoch": 0.13, "grad_norm": 1.7578125, "learning_rate": 0.0001980058535907338, "loss": 2.0951, "step": 54115 }, { "epoch": 0.13, "grad_norm": 2.3125, "learning_rate": 0.000198005486301846, "loss": 2.1547, "step": 54120 }, { "epoch": 0.13, "grad_norm": 1.9140625, "learning_rate": 0.00019800511897947773, "loss": 2.067, "step": 54125 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.00019800475162362914, "loss": 2.0593, "step": 54130 }, { "epoch": 0.13, "grad_norm": 2.453125, "learning_rate": 0.00019800438423430038, "loss": 2.2271, "step": 54135 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019800401681149149, "loss": 2.1447, "step": 54140 }, { "epoch": 0.13, "grad_norm": 2.34375, "learning_rate": 0.00019800364935520267, "loss": 2.0361, "step": 54145 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019800328186543405, "loss": 2.46, "step": 54150 }, { "epoch": 0.13, "grad_norm": 1.828125, "learning_rate": 0.00019800291434218572, "loss": 2.223, "step": 54155 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.0001980025467854578, "loss": 2.1964, "step": 54160 }, { "epoch": 0.13, "grad_norm": 2.09375, "learning_rate": 0.00019800217919525043, "loss": 2.1035, "step": 54165 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019800181157156375, "loss": 2.0899, "step": 54170 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019800144391439787, "loss": 2.3277, "step": 54175 }, { "epoch": 0.13, "grad_norm": 1.6484375, "learning_rate": 0.00019800107622375295, "loss": 2.1094, "step": 54180 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019800070849962907, "loss": 2.3351, "step": 54185 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.00019800034074202633, "loss": 2.0298, "step": 54190 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.00019799997295094493, "loss": 2.1216, "step": 54195 }, { "epoch": 0.13, "grad_norm": 1.703125, "learning_rate": 0.00019799960512638498, "loss": 2.04, "step": 54200 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019799923726834657, "loss": 2.2421, "step": 54205 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.00019799886937682984, "loss": 2.0212, "step": 54210 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019799850145183493, "loss": 2.1416, "step": 54215 }, { "epoch": 0.13, "grad_norm": 2.734375, "learning_rate": 0.0001979981334933619, "loss": 2.107, "step": 54220 }, { "epoch": 0.13, "grad_norm": 1.765625, "learning_rate": 0.000197997765501411, "loss": 2.2621, "step": 54225 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.00019799739747598224, "loss": 2.3274, "step": 54230 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019799702941707584, "loss": 2.2282, "step": 54235 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019799666132469185, "loss": 2.1549, "step": 54240 }, { "epoch": 0.13, "grad_norm": 1.8359375, "learning_rate": 0.00019799629319883043, "loss": 2.1064, "step": 54245 }, { "epoch": 0.13, "grad_norm": 1.6875, "learning_rate": 0.0001979959250394917, "loss": 2.0513, "step": 54250 }, { "epoch": 0.13, "grad_norm": 1.921875, "learning_rate": 0.00019799555684667578, "loss": 2.207, "step": 54255 }, { "epoch": 0.13, "grad_norm": 2.34375, "learning_rate": 0.00019799518862038279, "loss": 2.2336, "step": 54260 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.00019799482036061286, "loss": 2.0963, "step": 54265 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.00019799445206736613, "loss": 2.1967, "step": 54270 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.00019799408374064273, "loss": 2.1303, "step": 54275 }, { "epoch": 0.13, "grad_norm": 1.8046875, "learning_rate": 0.00019799371538044275, "loss": 1.9668, "step": 54280 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019799334698676637, "loss": 2.2808, "step": 54285 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019799297855961368, "loss": 2.174, "step": 54290 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.0001979926100989848, "loss": 2.1132, "step": 54295 }, { "epoch": 0.13, "grad_norm": 1.8828125, "learning_rate": 0.00019799224160487987, "loss": 2.2879, "step": 54300 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019799187307729902, "loss": 2.2011, "step": 54305 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.00019799150451624235, "loss": 2.2982, "step": 54310 }, { "epoch": 0.13, "grad_norm": 2.09375, "learning_rate": 0.00019799113592171, "loss": 2.1329, "step": 54315 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.00019799076729370215, "loss": 2.1342, "step": 54320 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.00019799039863221883, "loss": 2.2506, "step": 54325 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019799002993726024, "loss": 2.2431, "step": 54330 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019798966120882647, "loss": 1.9453, "step": 54335 }, { "epoch": 0.13, "grad_norm": 1.7421875, "learning_rate": 0.00019798929244691765, "loss": 2.0065, "step": 54340 }, { "epoch": 0.13, "grad_norm": 2.171875, "learning_rate": 0.0001979889236515339, "loss": 2.1419, "step": 54345 }, { "epoch": 0.13, "grad_norm": 1.7890625, "learning_rate": 0.00019798855482267537, "loss": 2.1717, "step": 54350 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019798818596034217, "loss": 2.181, "step": 54355 }, { "epoch": 0.13, "grad_norm": 2.28125, "learning_rate": 0.00019798781706453444, "loss": 2.1794, "step": 54360 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.0001979874481352523, "loss": 2.26, "step": 54365 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.00019798707917249584, "loss": 1.9945, "step": 54370 }, { "epoch": 0.13, "grad_norm": 1.7421875, "learning_rate": 0.00019798671017626522, "loss": 2.1021, "step": 54375 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.00019798634114656058, "loss": 2.0686, "step": 54380 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019798597208338202, "loss": 2.2883, "step": 54385 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.0001979856029867297, "loss": 2.3317, "step": 54390 }, { "epoch": 0.13, "grad_norm": 1.859375, "learning_rate": 0.00019798523385660367, "loss": 2.0675, "step": 54395 }, { "epoch": 0.13, "grad_norm": 1.75, "learning_rate": 0.00019798486469300414, "loss": 2.0666, "step": 54400 }, { "epoch": 0.13, "grad_norm": 1.9609375, "learning_rate": 0.0001979844954959312, "loss": 2.07, "step": 54405 }, { "epoch": 0.13, "grad_norm": 1.8671875, "learning_rate": 0.00019798412626538498, "loss": 2.0525, "step": 54410 }, { "epoch": 0.13, "grad_norm": 1.8828125, "learning_rate": 0.0001979837570013656, "loss": 2.1107, "step": 54415 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.0001979833877038732, "loss": 2.3992, "step": 54420 }, { "epoch": 0.13, "grad_norm": 1.9609375, "learning_rate": 0.0001979830183729079, "loss": 2.2262, "step": 54425 }, { "epoch": 0.13, "grad_norm": 2.21875, "learning_rate": 0.0001979826490084698, "loss": 2.0476, "step": 54430 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.00019798227961055907, "loss": 2.2461, "step": 54435 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.00019798191017917583, "loss": 2.2216, "step": 54440 }, { "epoch": 0.13, "grad_norm": 1.765625, "learning_rate": 0.00019798154071432018, "loss": 2.1066, "step": 54445 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.00019798117121599224, "loss": 2.1479, "step": 54450 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019798080168419217, "loss": 2.1508, "step": 54455 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019798043211892007, "loss": 2.2605, "step": 54460 }, { "epoch": 0.13, "grad_norm": 2.375, "learning_rate": 0.0001979800625201761, "loss": 2.24, "step": 54465 }, { "epoch": 0.13, "grad_norm": 1.9609375, "learning_rate": 0.00019797969288796034, "loss": 1.9464, "step": 54470 }, { "epoch": 0.13, "grad_norm": 2.171875, "learning_rate": 0.00019797932322227296, "loss": 2.3248, "step": 54475 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019797895352311406, "loss": 2.1615, "step": 54480 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019797858379048378, "loss": 2.0867, "step": 54485 }, { "epoch": 0.13, "grad_norm": 1.765625, "learning_rate": 0.00019797821402438222, "loss": 2.2431, "step": 54490 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019797784422480953, "loss": 2.2734, "step": 54495 }, { "epoch": 0.13, "grad_norm": 1.9609375, "learning_rate": 0.00019797747439176584, "loss": 2.2808, "step": 54500 }, { "epoch": 0.13, "grad_norm": 1.875, "learning_rate": 0.00019797710452525128, "loss": 2.3455, "step": 54505 }, { "epoch": 0.13, "grad_norm": 1.7421875, "learning_rate": 0.00019797673462526594, "loss": 2.1672, "step": 54510 }, { "epoch": 0.13, "grad_norm": 1.921875, "learning_rate": 0.00019797636469180997, "loss": 1.9672, "step": 54515 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.00019797599472488352, "loss": 2.1324, "step": 54520 }, { "epoch": 0.13, "grad_norm": 1.953125, "learning_rate": 0.00019797562472448666, "loss": 2.0928, "step": 54525 }, { "epoch": 0.13, "grad_norm": 1.75, "learning_rate": 0.00019797525469061956, "loss": 2.1506, "step": 54530 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019797488462328238, "loss": 2.0427, "step": 54535 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.00019797451452247515, "loss": 2.0946, "step": 54540 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.00019797414438819805, "loss": 2.193, "step": 54545 }, { "epoch": 0.13, "grad_norm": 2.9375, "learning_rate": 0.00019797377422045123, "loss": 2.2254, "step": 54550 }, { "epoch": 0.13, "grad_norm": 2.203125, "learning_rate": 0.00019797340401923479, "loss": 2.1711, "step": 54555 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.00019797303378454886, "loss": 2.0279, "step": 54560 }, { "epoch": 0.13, "grad_norm": 1.703125, "learning_rate": 0.00019797266351639353, "loss": 2.1075, "step": 54565 }, { "epoch": 0.13, "grad_norm": 1.59375, "learning_rate": 0.000197972293214769, "loss": 2.1755, "step": 54570 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019797192287967534, "loss": 2.2584, "step": 54575 }, { "epoch": 0.13, "grad_norm": 1.765625, "learning_rate": 0.00019797155251111273, "loss": 2.3997, "step": 54580 }, { "epoch": 0.13, "grad_norm": 1.953125, "learning_rate": 0.0001979711821090812, "loss": 1.9182, "step": 54585 }, { "epoch": 0.13, "grad_norm": 1.8828125, "learning_rate": 0.00019797081167358097, "loss": 2.3028, "step": 54590 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019797044120461216, "loss": 2.2609, "step": 54595 }, { "epoch": 0.13, "grad_norm": 1.6953125, "learning_rate": 0.0001979700707021748, "loss": 2.009, "step": 54600 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019796970016626916, "loss": 2.2285, "step": 54605 }, { "epoch": 0.13, "grad_norm": 2.171875, "learning_rate": 0.00019796932959689528, "loss": 2.1627, "step": 54610 }, { "epoch": 0.13, "grad_norm": 2.515625, "learning_rate": 0.00019796895899405325, "loss": 2.2286, "step": 54615 }, { "epoch": 0.13, "grad_norm": 1.765625, "learning_rate": 0.0001979685883577433, "loss": 2.1877, "step": 54620 }, { "epoch": 0.13, "grad_norm": 1.609375, "learning_rate": 0.00019796821768796547, "loss": 2.0875, "step": 54625 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.00019796784698471992, "loss": 2.1827, "step": 54630 }, { "epoch": 0.13, "grad_norm": 1.7265625, "learning_rate": 0.00019796747624800683, "loss": 2.1725, "step": 54635 }, { "epoch": 0.13, "grad_norm": 1.8359375, "learning_rate": 0.00019796710547782622, "loss": 2.2065, "step": 54640 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019796673467417829, "loss": 2.1225, "step": 54645 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019796636383706312, "loss": 2.1798, "step": 54650 }, { "epoch": 0.13, "grad_norm": 1.953125, "learning_rate": 0.0001979659929664809, "loss": 2.2433, "step": 54655 }, { "epoch": 0.13, "grad_norm": 1.7578125, "learning_rate": 0.0001979656220624317, "loss": 2.0807, "step": 54660 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019796525112491565, "loss": 2.1935, "step": 54665 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.00019796488015393294, "loss": 2.2779, "step": 54670 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.0001979645091494836, "loss": 2.2396, "step": 54675 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.00019796413811156785, "loss": 2.1667, "step": 54680 }, { "epoch": 0.13, "grad_norm": 1.859375, "learning_rate": 0.00019796376704018576, "loss": 2.317, "step": 54685 }, { "epoch": 0.13, "grad_norm": 1.71875, "learning_rate": 0.00019796339593533747, "loss": 2.2649, "step": 54690 }, { "epoch": 0.13, "grad_norm": 2.296875, "learning_rate": 0.00019796302479702308, "loss": 2.059, "step": 54695 }, { "epoch": 0.13, "grad_norm": 1.7265625, "learning_rate": 0.0001979626536252428, "loss": 2.1119, "step": 54700 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.00019796228241999666, "loss": 2.2775, "step": 54705 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.00019796191118128485, "loss": 2.353, "step": 54710 }, { "epoch": 0.13, "grad_norm": 1.875, "learning_rate": 0.00019796153990910744, "loss": 2.309, "step": 54715 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.0001979611686034646, "loss": 2.1417, "step": 54720 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.00019796079726435646, "loss": 2.2061, "step": 54725 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019796042589178314, "loss": 2.2019, "step": 54730 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.00019796005448574478, "loss": 2.0824, "step": 54735 }, { "epoch": 0.13, "grad_norm": 1.75, "learning_rate": 0.00019795968304624144, "loss": 2.1097, "step": 54740 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019795931157327333, "loss": 2.2129, "step": 54745 }, { "epoch": 0.13, "grad_norm": 2.203125, "learning_rate": 0.00019795894006684054, "loss": 2.0429, "step": 54750 }, { "epoch": 0.13, "grad_norm": 1.6484375, "learning_rate": 0.0001979585685269432, "loss": 2.2295, "step": 54755 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.00019795819695358143, "loss": 2.0945, "step": 54760 }, { "epoch": 0.13, "grad_norm": 2.234375, "learning_rate": 0.00019795782534675537, "loss": 1.9433, "step": 54765 }, { "epoch": 0.13, "grad_norm": 1.7421875, "learning_rate": 0.0001979574537064651, "loss": 2.2882, "step": 54770 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.00019795708203271083, "loss": 2.0429, "step": 54775 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019795671032549263, "loss": 2.0659, "step": 54780 }, { "epoch": 0.13, "grad_norm": 1.828125, "learning_rate": 0.00019795633858481066, "loss": 2.1623, "step": 54785 }, { "epoch": 0.13, "grad_norm": 2.21875, "learning_rate": 0.000197955966810665, "loss": 1.9648, "step": 54790 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019795559500305582, "loss": 2.1265, "step": 54795 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.00019795522316198326, "loss": 2.2626, "step": 54800 }, { "epoch": 0.13, "grad_norm": 1.7578125, "learning_rate": 0.00019795485128744738, "loss": 2.2007, "step": 54805 }, { "epoch": 0.13, "grad_norm": 1.9140625, "learning_rate": 0.00019795447937944835, "loss": 2.1904, "step": 54810 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.0001979541074379863, "loss": 2.2844, "step": 54815 }, { "epoch": 0.13, "grad_norm": 1.671875, "learning_rate": 0.00019795373546306136, "loss": 2.2852, "step": 54820 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019795336345467365, "loss": 2.0318, "step": 54825 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.00019795299141282327, "loss": 1.9976, "step": 54830 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.00019795261933751039, "loss": 2.0527, "step": 54835 }, { "epoch": 0.13, "grad_norm": 2.3125, "learning_rate": 0.00019795224722873512, "loss": 2.3189, "step": 54840 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.00019795187508649757, "loss": 2.27, "step": 54845 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.0001979515029107979, "loss": 2.2175, "step": 54850 }, { "epoch": 0.13, "grad_norm": 1.71875, "learning_rate": 0.0001979511307016362, "loss": 2.235, "step": 54855 }, { "epoch": 0.13, "grad_norm": 1.875, "learning_rate": 0.00019795075845901262, "loss": 2.2446, "step": 54860 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.0001979503861829273, "loss": 2.2026, "step": 54865 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.00019795001387338035, "loss": 2.0824, "step": 54870 }, { "epoch": 0.13, "grad_norm": 5.34375, "learning_rate": 0.0001979496415303719, "loss": 2.17, "step": 54875 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019794926915390207, "loss": 2.0324, "step": 54880 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.000197948896743971, "loss": 2.21, "step": 54885 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.0001979485243005788, "loss": 2.2104, "step": 54890 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.0001979481518237256, "loss": 2.0167, "step": 54895 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.00019794777931341154, "loss": 2.1796, "step": 54900 }, { "epoch": 0.13, "grad_norm": 2.640625, "learning_rate": 0.00019794740676963677, "loss": 2.1151, "step": 54905 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.00019794703419240137, "loss": 2.2359, "step": 54910 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.00019794666158170547, "loss": 2.0647, "step": 54915 }, { "epoch": 0.13, "grad_norm": 1.7421875, "learning_rate": 0.00019794628893754924, "loss": 2.2259, "step": 54920 }, { "epoch": 0.13, "grad_norm": 2.28125, "learning_rate": 0.00019794591625993277, "loss": 2.1961, "step": 54925 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.0001979455435488562, "loss": 2.2389, "step": 54930 }, { "epoch": 0.13, "grad_norm": 1.671875, "learning_rate": 0.00019794517080431968, "loss": 2.2642, "step": 54935 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.00019794479802632327, "loss": 2.3808, "step": 54940 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019794442521486717, "loss": 2.2463, "step": 54945 }, { "epoch": 0.13, "grad_norm": 2.328125, "learning_rate": 0.00019794405236995145, "loss": 2.0044, "step": 54950 }, { "epoch": 0.13, "grad_norm": 1.953125, "learning_rate": 0.00019794367949157627, "loss": 2.1892, "step": 54955 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.0001979433065797418, "loss": 2.153, "step": 54960 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.0001979429336344481, "loss": 2.2596, "step": 54965 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.0001979425606556953, "loss": 2.3063, "step": 54970 }, { "epoch": 0.13, "grad_norm": 1.8828125, "learning_rate": 0.00019794218764348354, "loss": 2.3054, "step": 54975 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019794181459781296, "loss": 2.3274, "step": 54980 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.00019794144151868368, "loss": 2.142, "step": 54985 }, { "epoch": 0.13, "grad_norm": 2.09375, "learning_rate": 0.00019794106840609582, "loss": 2.219, "step": 54990 }, { "epoch": 0.13, "grad_norm": 2.265625, "learning_rate": 0.00019794069526004953, "loss": 2.2109, "step": 54995 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.00019794032208054493, "loss": 2.1858, "step": 55000 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.0001979399488675821, "loss": 2.2434, "step": 55005 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.00019793957562116123, "loss": 2.268, "step": 55010 }, { "epoch": 0.13, "grad_norm": 2.71875, "learning_rate": 0.00019793920234128246, "loss": 2.2336, "step": 55015 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.00019793882902794584, "loss": 2.2395, "step": 55020 }, { "epoch": 0.13, "grad_norm": 1.8359375, "learning_rate": 0.00019793845568115152, "loss": 2.0995, "step": 55025 }, { "epoch": 0.13, "grad_norm": 1.875, "learning_rate": 0.0001979380823008997, "loss": 2.2265, "step": 55030 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019793770888719043, "loss": 2.2125, "step": 55035 }, { "epoch": 0.13, "grad_norm": 2.265625, "learning_rate": 0.00019793733544002387, "loss": 2.1963, "step": 55040 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.0001979369619594001, "loss": 2.2553, "step": 55045 }, { "epoch": 0.13, "grad_norm": 1.8671875, "learning_rate": 0.00019793658844531934, "loss": 2.3102, "step": 55050 }, { "epoch": 0.13, "grad_norm": 1.578125, "learning_rate": 0.00019793621489778165, "loss": 2.2345, "step": 55055 }, { "epoch": 0.13, "grad_norm": 1.6875, "learning_rate": 0.00019793584131678717, "loss": 2.1725, "step": 55060 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.00019793546770233603, "loss": 2.2399, "step": 55065 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.00019793509405442836, "loss": 2.3285, "step": 55070 }, { "epoch": 0.13, "grad_norm": 1.875, "learning_rate": 0.0001979347203730643, "loss": 1.8923, "step": 55075 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.00019793434665824392, "loss": 2.2601, "step": 55080 }, { "epoch": 0.13, "grad_norm": 1.625, "learning_rate": 0.00019793397290996745, "loss": 2.2534, "step": 55085 }, { "epoch": 0.13, "grad_norm": 1.8828125, "learning_rate": 0.00019793359912823492, "loss": 2.0313, "step": 55090 }, { "epoch": 0.13, "grad_norm": 1.8828125, "learning_rate": 0.0001979332253130465, "loss": 2.2212, "step": 55095 }, { "epoch": 0.13, "grad_norm": 1.8671875, "learning_rate": 0.00019793285146440232, "loss": 1.9939, "step": 55100 }, { "epoch": 0.13, "grad_norm": 1.7578125, "learning_rate": 0.00019793247758230248, "loss": 2.1911, "step": 55105 }, { "epoch": 0.13, "grad_norm": 1.5546875, "learning_rate": 0.00019793210366674714, "loss": 2.3167, "step": 55110 }, { "epoch": 0.13, "grad_norm": 2.890625, "learning_rate": 0.00019793172971773647, "loss": 2.234, "step": 55115 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.0001979313557352705, "loss": 2.1261, "step": 55120 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.0001979309817193494, "loss": 2.1322, "step": 55125 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019793060766997328, "loss": 2.2608, "step": 55130 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019793023358714233, "loss": 2.268, "step": 55135 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.0001979298594708566, "loss": 2.0495, "step": 55140 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.00019792948532111628, "loss": 2.2592, "step": 55145 }, { "epoch": 0.13, "grad_norm": 1.7890625, "learning_rate": 0.00019792911113792147, "loss": 2.0631, "step": 55150 }, { "epoch": 0.13, "grad_norm": 2.234375, "learning_rate": 0.0001979287369212723, "loss": 2.1102, "step": 55155 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.0001979283626711689, "loss": 2.1146, "step": 55160 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019792798838761137, "loss": 2.2272, "step": 55165 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.0001979276140705999, "loss": 2.1238, "step": 55170 }, { "epoch": 0.13, "grad_norm": 1.65625, "learning_rate": 0.00019792723972013456, "loss": 2.1661, "step": 55175 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019792686533621549, "loss": 1.9913, "step": 55180 }, { "epoch": 0.13, "grad_norm": 1.8671875, "learning_rate": 0.00019792649091884284, "loss": 2.2965, "step": 55185 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.0001979261164680167, "loss": 2.262, "step": 55190 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.00019792574198373725, "loss": 2.2084, "step": 55195 }, { "epoch": 0.13, "grad_norm": 1.859375, "learning_rate": 0.00019792536746600462, "loss": 2.3492, "step": 55200 }, { "epoch": 0.13, "grad_norm": 1.59375, "learning_rate": 0.00019792499291481884, "loss": 2.4232, "step": 55205 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.00019792461833018015, "loss": 2.2219, "step": 55210 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.0001979242437120886, "loss": 2.1075, "step": 55215 }, { "epoch": 0.13, "grad_norm": 1.9609375, "learning_rate": 0.00019792386906054442, "loss": 2.1661, "step": 55220 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.0001979234943755476, "loss": 2.1969, "step": 55225 }, { "epoch": 0.13, "grad_norm": 2.234375, "learning_rate": 0.00019792311965709838, "loss": 2.2232, "step": 55230 }, { "epoch": 0.13, "grad_norm": 2.40625, "learning_rate": 0.00019792274490519683, "loss": 2.2801, "step": 55235 }, { "epoch": 0.13, "grad_norm": 1.6796875, "learning_rate": 0.0001979223701198431, "loss": 2.298, "step": 55240 }, { "epoch": 0.13, "grad_norm": 1.7109375, "learning_rate": 0.0001979219953010373, "loss": 2.2154, "step": 55245 }, { "epoch": 0.13, "grad_norm": 1.671875, "learning_rate": 0.00019792162044877958, "loss": 2.1306, "step": 55250 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019792124556307006, "loss": 2.1983, "step": 55255 }, { "epoch": 0.13, "grad_norm": 1.5625, "learning_rate": 0.00019792087064390886, "loss": 2.2813, "step": 55260 }, { "epoch": 0.13, "grad_norm": 1.7890625, "learning_rate": 0.00019792049569129612, "loss": 2.2116, "step": 55265 }, { "epoch": 0.13, "grad_norm": 1.7578125, "learning_rate": 0.00019792012070523197, "loss": 2.1243, "step": 55270 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019791974568571653, "loss": 2.2382, "step": 55275 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019791937063274993, "loss": 2.2225, "step": 55280 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019791899554633227, "loss": 2.0194, "step": 55285 }, { "epoch": 0.13, "grad_norm": 1.734375, "learning_rate": 0.00019791862042646373, "loss": 1.8723, "step": 55290 }, { "epoch": 0.13, "grad_norm": 1.640625, "learning_rate": 0.0001979182452731444, "loss": 2.2011, "step": 55295 }, { "epoch": 0.13, "grad_norm": 1.59375, "learning_rate": 0.00019791787008637443, "loss": 2.3129, "step": 55300 }, { "epoch": 0.13, "grad_norm": 2.21875, "learning_rate": 0.00019791749486615395, "loss": 2.1414, "step": 55305 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.00019791711961248307, "loss": 1.9942, "step": 55310 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019791674432536192, "loss": 2.1113, "step": 55315 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.0001979163690047906, "loss": 2.3012, "step": 55320 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019791599365076934, "loss": 2.055, "step": 55325 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.00019791561826329817, "loss": 2.1068, "step": 55330 }, { "epoch": 0.13, "grad_norm": 1.6015625, "learning_rate": 0.00019791524284237726, "loss": 2.0457, "step": 55335 }, { "epoch": 0.13, "grad_norm": 2.265625, "learning_rate": 0.0001979148673880067, "loss": 2.1677, "step": 55340 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019791449190018667, "loss": 2.2988, "step": 55345 }, { "epoch": 0.13, "grad_norm": 2.203125, "learning_rate": 0.0001979141163789173, "loss": 2.126, "step": 55350 }, { "epoch": 0.13, "grad_norm": 1.8125, "learning_rate": 0.00019791374082419862, "loss": 2.0246, "step": 55355 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019791336523603085, "loss": 2.092, "step": 55360 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019791298961441414, "loss": 2.0682, "step": 55365 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019791261395934857, "loss": 2.1732, "step": 55370 }, { "epoch": 0.13, "grad_norm": 2.3125, "learning_rate": 0.00019791223827083425, "loss": 2.156, "step": 55375 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019791186254887135, "loss": 2.2449, "step": 55380 }, { "epoch": 0.13, "grad_norm": 2.421875, "learning_rate": 0.00019791148679345998, "loss": 2.09, "step": 55385 }, { "epoch": 0.13, "grad_norm": 1.953125, "learning_rate": 0.00019791111100460026, "loss": 2.1451, "step": 55390 }, { "epoch": 0.13, "grad_norm": 2.203125, "learning_rate": 0.00019791073518229233, "loss": 2.2636, "step": 55395 }, { "epoch": 0.13, "grad_norm": 2.359375, "learning_rate": 0.00019791035932653633, "loss": 2.2872, "step": 55400 }, { "epoch": 0.13, "grad_norm": 2.4375, "learning_rate": 0.00019790998343733235, "loss": 2.1042, "step": 55405 }, { "epoch": 0.13, "grad_norm": 2.390625, "learning_rate": 0.00019790960751468056, "loss": 2.0957, "step": 55410 }, { "epoch": 0.13, "grad_norm": 2.65625, "learning_rate": 0.00019790923155858109, "loss": 2.2382, "step": 55415 }, { "epoch": 0.13, "grad_norm": 1.6328125, "learning_rate": 0.00019790885556903403, "loss": 2.1261, "step": 55420 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.00019790847954603952, "loss": 2.1993, "step": 55425 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.00019790810348959773, "loss": 2.1641, "step": 55430 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019790772739970873, "loss": 2.1489, "step": 55435 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.00019790735127637267, "loss": 2.0396, "step": 55440 }, { "epoch": 0.13, "grad_norm": 2.53125, "learning_rate": 0.0001979069751195897, "loss": 2.1871, "step": 55445 }, { "epoch": 0.13, "grad_norm": 2.296875, "learning_rate": 0.00019790659892935994, "loss": 2.0851, "step": 55450 }, { "epoch": 0.13, "grad_norm": 2.09375, "learning_rate": 0.00019790622270568348, "loss": 2.365, "step": 55455 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.0001979058464485605, "loss": 2.2918, "step": 55460 }, { "epoch": 0.13, "grad_norm": 1.75, "learning_rate": 0.00019790547015799114, "loss": 2.1373, "step": 55465 }, { "epoch": 0.13, "grad_norm": 1.6875, "learning_rate": 0.00019790509383397543, "loss": 2.0312, "step": 55470 }, { "epoch": 0.13, "grad_norm": 2.09375, "learning_rate": 0.00019790471747651358, "loss": 2.128, "step": 55475 }, { "epoch": 0.13, "grad_norm": 1.625, "learning_rate": 0.00019790434108560572, "loss": 2.1832, "step": 55480 }, { "epoch": 0.13, "grad_norm": 2.625, "learning_rate": 0.00019790396466125195, "loss": 2.1213, "step": 55485 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019790358820345242, "loss": 2.0113, "step": 55490 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.00019790321171220726, "loss": 2.1481, "step": 55495 }, { "epoch": 0.13, "grad_norm": 1.6953125, "learning_rate": 0.00019790283518751658, "loss": 2.138, "step": 55500 }, { "epoch": 0.13, "grad_norm": 2.21875, "learning_rate": 0.0001979024586293805, "loss": 2.4075, "step": 55505 }, { "epoch": 0.13, "grad_norm": 1.578125, "learning_rate": 0.00019790208203779915, "loss": 2.1227, "step": 55510 }, { "epoch": 0.13, "grad_norm": 1.8671875, "learning_rate": 0.00019790170541277274, "loss": 2.0299, "step": 55515 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.00019790132875430126, "loss": 2.1648, "step": 55520 }, { "epoch": 0.13, "grad_norm": 1.7890625, "learning_rate": 0.00019790095206238494, "loss": 2.0878, "step": 55525 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.0001979005753370239, "loss": 2.2664, "step": 55530 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.00019790019857821823, "loss": 2.0348, "step": 55535 }, { "epoch": 0.13, "grad_norm": 2.203125, "learning_rate": 0.00019789982178596807, "loss": 2.0639, "step": 55540 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.00019789944496027357, "loss": 2.2456, "step": 55545 }, { "epoch": 0.13, "grad_norm": 2.34375, "learning_rate": 0.00019789906810113484, "loss": 2.417, "step": 55550 }, { "epoch": 0.13, "grad_norm": 1.859375, "learning_rate": 0.000197898691208552, "loss": 2.3428, "step": 55555 }, { "epoch": 0.13, "grad_norm": 2.328125, "learning_rate": 0.0001978983142825252, "loss": 2.1656, "step": 55560 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.00019789793732305456, "loss": 2.216, "step": 55565 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.0001978975603301402, "loss": 2.0964, "step": 55570 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.00019789718330378225, "loss": 2.4054, "step": 55575 }, { "epoch": 0.13, "grad_norm": 2.375, "learning_rate": 0.00019789680624398087, "loss": 2.2429, "step": 55580 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.00019789642915073616, "loss": 2.1822, "step": 55585 }, { "epoch": 0.13, "grad_norm": 2.640625, "learning_rate": 0.00019789605202404827, "loss": 2.2358, "step": 55590 }, { "epoch": 0.13, "grad_norm": 1.765625, "learning_rate": 0.00019789567486391726, "loss": 2.0843, "step": 55595 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.00019789529767034336, "loss": 2.0396, "step": 55600 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.00019789492044332665, "loss": 2.2404, "step": 55605 }, { "epoch": 0.13, "grad_norm": 1.8671875, "learning_rate": 0.00019789454318286726, "loss": 2.083, "step": 55610 }, { "epoch": 0.13, "grad_norm": 2.09375, "learning_rate": 0.0001978941658889653, "loss": 2.2805, "step": 55615 }, { "epoch": 0.13, "grad_norm": 2.359375, "learning_rate": 0.0001978937885616209, "loss": 2.3273, "step": 55620 }, { "epoch": 0.13, "grad_norm": 2.171875, "learning_rate": 0.00019789341120083424, "loss": 2.0043, "step": 55625 }, { "epoch": 0.13, "grad_norm": 2.21875, "learning_rate": 0.0001978930338066054, "loss": 2.3073, "step": 55630 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.00019789265637893454, "loss": 2.0372, "step": 55635 }, { "epoch": 0.13, "grad_norm": 2.203125, "learning_rate": 0.00019789227891782175, "loss": 2.1511, "step": 55640 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.0001978919014232672, "loss": 2.0612, "step": 55645 }, { "epoch": 0.13, "grad_norm": 1.71875, "learning_rate": 0.000197891523895271, "loss": 2.228, "step": 55650 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019789114633383327, "loss": 2.3042, "step": 55655 }, { "epoch": 0.13, "grad_norm": 1.734375, "learning_rate": 0.00019789076873895416, "loss": 2.1418, "step": 55660 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019789039111063378, "loss": 2.2208, "step": 55665 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.00019789001344887227, "loss": 2.1323, "step": 55670 }, { "epoch": 0.13, "grad_norm": 1.8046875, "learning_rate": 0.00019788963575366976, "loss": 2.0872, "step": 55675 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019788925802502637, "loss": 2.2169, "step": 55680 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.00019788888026294225, "loss": 2.2721, "step": 55685 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019788850246741749, "loss": 2.2836, "step": 55690 }, { "epoch": 0.13, "grad_norm": 1.8046875, "learning_rate": 0.00019788812463845224, "loss": 1.8916, "step": 55695 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019788774677604668, "loss": 2.0252, "step": 55700 }, { "epoch": 0.13, "grad_norm": 1.953125, "learning_rate": 0.00019788736888020082, "loss": 2.2921, "step": 55705 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.0001978869909509149, "loss": 2.2654, "step": 55710 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.000197886612988189, "loss": 2.1948, "step": 55715 }, { "epoch": 0.13, "grad_norm": 2.28125, "learning_rate": 0.00019788623499202327, "loss": 2.2004, "step": 55720 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.00019788585696241782, "loss": 2.2055, "step": 55725 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019788547889937276, "loss": 2.2379, "step": 55730 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019788510080288827, "loss": 1.9319, "step": 55735 }, { "epoch": 0.13, "grad_norm": 1.671875, "learning_rate": 0.00019788472267296444, "loss": 2.0888, "step": 55740 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019788434450960146, "loss": 2.1857, "step": 55745 }, { "epoch": 0.13, "grad_norm": 1.7734375, "learning_rate": 0.00019788396631279938, "loss": 2.2586, "step": 55750 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019788358808255833, "loss": 2.3202, "step": 55755 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019788320981887851, "loss": 1.9522, "step": 55760 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.00019788283152176, "loss": 2.1371, "step": 55765 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.00019788245319120296, "loss": 2.1371, "step": 55770 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019788207482720747, "loss": 1.9991, "step": 55775 }, { "epoch": 0.13, "grad_norm": 1.9609375, "learning_rate": 0.0001978816964297737, "loss": 2.0081, "step": 55780 }, { "epoch": 0.13, "grad_norm": 1.6015625, "learning_rate": 0.00019788131799890177, "loss": 2.2693, "step": 55785 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.0001978809395345918, "loss": 2.0723, "step": 55790 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.0001978805610368439, "loss": 2.1385, "step": 55795 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.00019788018250565826, "loss": 2.2353, "step": 55800 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.00019787980394103498, "loss": 2.2371, "step": 55805 }, { "epoch": 0.13, "grad_norm": 1.3984375, "learning_rate": 0.00019787942534297417, "loss": 2.01, "step": 55810 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.00019787904671147598, "loss": 2.1502, "step": 55815 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.00019787866804654052, "loss": 2.1175, "step": 55820 }, { "epoch": 0.13, "grad_norm": 1.7734375, "learning_rate": 0.00019787828934816792, "loss": 2.1305, "step": 55825 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019787791061635836, "loss": 2.2952, "step": 55830 }, { "epoch": 0.13, "grad_norm": 2.21875, "learning_rate": 0.0001978775318511119, "loss": 2.2154, "step": 55835 }, { "epoch": 0.13, "grad_norm": 1.7734375, "learning_rate": 0.0001978771530524287, "loss": 2.2801, "step": 55840 }, { "epoch": 0.13, "grad_norm": 2.25, "learning_rate": 0.00019787677422030892, "loss": 2.4096, "step": 55845 }, { "epoch": 0.13, "grad_norm": 2.5, "learning_rate": 0.0001978763953547526, "loss": 2.2992, "step": 55850 }, { "epoch": 0.13, "grad_norm": 1.8046875, "learning_rate": 0.00019787601645576, "loss": 2.0885, "step": 55855 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.00019787563752333114, "loss": 2.2739, "step": 55860 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019787525855746618, "loss": 2.3111, "step": 55865 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019787487955816527, "loss": 2.2848, "step": 55870 }, { "epoch": 0.13, "grad_norm": 3.171875, "learning_rate": 0.00019787450052542854, "loss": 2.1664, "step": 55875 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.0001978741214592561, "loss": 2.0952, "step": 55880 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.00019787374235964806, "loss": 2.2071, "step": 55885 }, { "epoch": 0.13, "grad_norm": 2.375, "learning_rate": 0.0001978733632266046, "loss": 2.0815, "step": 55890 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.0001978729840601258, "loss": 2.3095, "step": 55895 }, { "epoch": 0.13, "grad_norm": 1.765625, "learning_rate": 0.00019787260486021184, "loss": 2.1456, "step": 55900 }, { "epoch": 0.13, "grad_norm": 1.75, "learning_rate": 0.0001978722256268628, "loss": 2.0773, "step": 55905 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.00019787184636007885, "loss": 2.1701, "step": 55910 }, { "epoch": 0.13, "grad_norm": 1.8046875, "learning_rate": 0.00019787146705986007, "loss": 2.0855, "step": 55915 }, { "epoch": 0.13, "grad_norm": 2.265625, "learning_rate": 0.00019787108772620666, "loss": 2.2742, "step": 55920 }, { "epoch": 0.13, "grad_norm": 1.8671875, "learning_rate": 0.0001978707083591187, "loss": 2.2472, "step": 55925 }, { "epoch": 0.13, "grad_norm": 1.765625, "learning_rate": 0.00019787032895859634, "loss": 2.1004, "step": 55930 }, { "epoch": 0.13, "grad_norm": 1.7734375, "learning_rate": 0.00019786994952463967, "loss": 2.2143, "step": 55935 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.0001978695700572489, "loss": 2.1976, "step": 55940 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019786919055642406, "loss": 2.1128, "step": 55945 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.00019786881102216536, "loss": 1.9854, "step": 55950 }, { "epoch": 0.13, "grad_norm": 1.9140625, "learning_rate": 0.0001978684314544729, "loss": 2.2322, "step": 55955 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.0001978680518533468, "loss": 2.124, "step": 55960 }, { "epoch": 0.13, "grad_norm": 2.296875, "learning_rate": 0.00019786767221878718, "loss": 2.2303, "step": 55965 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019786729255079423, "loss": 2.2102, "step": 55970 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019786691284936797, "loss": 2.1616, "step": 55975 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.00019786653311450867, "loss": 2.168, "step": 55980 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.00019786615334621636, "loss": 2.125, "step": 55985 }, { "epoch": 0.13, "grad_norm": 2.265625, "learning_rate": 0.0001978657735444912, "loss": 2.1659, "step": 55990 }, { "epoch": 0.13, "grad_norm": 1.703125, "learning_rate": 0.00019786539370933331, "loss": 2.28, "step": 55995 }, { "epoch": 0.13, "grad_norm": 1.75, "learning_rate": 0.00019786501384074282, "loss": 2.2648, "step": 56000 }, { "epoch": 0.13, "grad_norm": 1.7890625, "learning_rate": 0.00019786463393871987, "loss": 2.1999, "step": 56005 }, { "epoch": 0.13, "grad_norm": 2.21875, "learning_rate": 0.00019786425400326462, "loss": 2.0564, "step": 56010 }, { "epoch": 0.13, "grad_norm": 1.859375, "learning_rate": 0.0001978638740343771, "loss": 2.2694, "step": 56015 }, { "epoch": 0.13, "grad_norm": 2.328125, "learning_rate": 0.00019786349403205756, "loss": 2.1858, "step": 56020 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019786311399630605, "loss": 2.2882, "step": 56025 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.00019786273392712273, "loss": 2.1062, "step": 56030 }, { "epoch": 0.13, "grad_norm": 1.953125, "learning_rate": 0.00019786235382450774, "loss": 2.4016, "step": 56035 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.00019786197368846118, "loss": 1.9655, "step": 56040 }, { "epoch": 0.13, "grad_norm": 1.828125, "learning_rate": 0.0001978615935189832, "loss": 2.1687, "step": 56045 }, { "epoch": 0.13, "grad_norm": 1.8125, "learning_rate": 0.00019786121331607392, "loss": 2.064, "step": 56050 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019786083307973349, "loss": 2.3961, "step": 56055 }, { "epoch": 0.13, "grad_norm": 1.75, "learning_rate": 0.000197860452809962, "loss": 2.1077, "step": 56060 }, { "epoch": 0.13, "grad_norm": 2.390625, "learning_rate": 0.0001978600725067596, "loss": 2.2539, "step": 56065 }, { "epoch": 0.13, "grad_norm": 1.9609375, "learning_rate": 0.00019785969217012646, "loss": 2.1594, "step": 56070 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.00019785931180006267, "loss": 2.0147, "step": 56075 }, { "epoch": 0.13, "grad_norm": 1.65625, "learning_rate": 0.00019785893139656835, "loss": 2.3286, "step": 56080 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019785855095964366, "loss": 2.1198, "step": 56085 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.00019785817048928868, "loss": 2.0154, "step": 56090 }, { "epoch": 0.13, "grad_norm": 2.234375, "learning_rate": 0.0001978577899855036, "loss": 2.1869, "step": 56095 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.00019785740944828852, "loss": 2.1343, "step": 56100 }, { "epoch": 0.13, "grad_norm": 4.0625, "learning_rate": 0.00019785702887764356, "loss": 2.0995, "step": 56105 }, { "epoch": 0.13, "grad_norm": 1.5859375, "learning_rate": 0.00019785664827356886, "loss": 2.1477, "step": 56110 }, { "epoch": 0.13, "grad_norm": 1.828125, "learning_rate": 0.0001978562676360646, "loss": 2.23, "step": 56115 }, { "epoch": 0.13, "grad_norm": 1.6875, "learning_rate": 0.00019785588696513083, "loss": 2.3307, "step": 56120 }, { "epoch": 0.13, "grad_norm": 1.96875, "learning_rate": 0.0001978555062607677, "loss": 2.2547, "step": 56125 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019785512552297538, "loss": 2.4608, "step": 56130 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.000197854744751754, "loss": 2.0467, "step": 56135 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.0001978543639471036, "loss": 2.1828, "step": 56140 }, { "epoch": 0.13, "grad_norm": 1.53125, "learning_rate": 0.00019785398310902442, "loss": 2.2069, "step": 56145 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.00019785360223751653, "loss": 2.1501, "step": 56150 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.00019785322133258006, "loss": 2.1382, "step": 56155 }, { "epoch": 0.13, "grad_norm": 1.875, "learning_rate": 0.0001978528403942152, "loss": 2.1637, "step": 56160 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.000197852459422422, "loss": 2.2131, "step": 56165 }, { "epoch": 0.13, "grad_norm": 1.8125, "learning_rate": 0.00019785207841720062, "loss": 2.2012, "step": 56170 }, { "epoch": 0.13, "grad_norm": 2.21875, "learning_rate": 0.00019785169737855123, "loss": 2.0594, "step": 56175 }, { "epoch": 0.13, "grad_norm": 2.09375, "learning_rate": 0.00019785131630647388, "loss": 2.1608, "step": 56180 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.00019785093520096876, "loss": 2.2331, "step": 56185 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.000197850554062036, "loss": 2.1813, "step": 56190 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019785017288967573, "loss": 2.1015, "step": 56195 }, { "epoch": 0.13, "grad_norm": 1.9140625, "learning_rate": 0.00019784979168388804, "loss": 2.0702, "step": 56200 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.0001978494104446731, "loss": 2.078, "step": 56205 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.000197849029172031, "loss": 1.9747, "step": 56210 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.00019784864786596192, "loss": 2.2464, "step": 56215 }, { "epoch": 0.13, "grad_norm": 2.5, "learning_rate": 0.00019784826652646594, "loss": 2.0362, "step": 56220 }, { "epoch": 0.13, "grad_norm": 1.609375, "learning_rate": 0.00019784788515354325, "loss": 2.2285, "step": 56225 }, { "epoch": 0.13, "grad_norm": 1.859375, "learning_rate": 0.00019784750374719396, "loss": 2.0886, "step": 56230 }, { "epoch": 0.13, "grad_norm": 1.8828125, "learning_rate": 0.00019784712230741817, "loss": 2.2213, "step": 56235 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.00019784674083421602, "loss": 2.0201, "step": 56240 }, { "epoch": 0.13, "grad_norm": 1.75, "learning_rate": 0.00019784635932758765, "loss": 2.2216, "step": 56245 }, { "epoch": 0.13, "grad_norm": 1.8828125, "learning_rate": 0.00019784597778753322, "loss": 2.1317, "step": 56250 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019784559621405278, "loss": 2.0445, "step": 56255 }, { "epoch": 0.13, "grad_norm": 1.7421875, "learning_rate": 0.00019784521460714655, "loss": 2.1652, "step": 56260 }, { "epoch": 0.13, "grad_norm": 1.828125, "learning_rate": 0.0001978448329668146, "loss": 2.2793, "step": 56265 }, { "epoch": 0.13, "grad_norm": 1.765625, "learning_rate": 0.00019784445129305707, "loss": 2.3682, "step": 56270 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.00019784406958587414, "loss": 2.3328, "step": 56275 }, { "epoch": 0.13, "grad_norm": 1.7109375, "learning_rate": 0.00019784368784526586, "loss": 2.1802, "step": 56280 }, { "epoch": 0.13, "grad_norm": 1.6875, "learning_rate": 0.00019784330607123241, "loss": 2.2513, "step": 56285 }, { "epoch": 0.13, "grad_norm": 1.921875, "learning_rate": 0.00019784292426377394, "loss": 2.2317, "step": 56290 }, { "epoch": 0.13, "grad_norm": 1.5703125, "learning_rate": 0.00019784254242289053, "loss": 2.2162, "step": 56295 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.00019784216054858234, "loss": 2.027, "step": 56300 }, { "epoch": 0.13, "grad_norm": 2.265625, "learning_rate": 0.00019784177864084947, "loss": 2.1336, "step": 56305 }, { "epoch": 0.13, "grad_norm": 1.875, "learning_rate": 0.00019784139669969212, "loss": 2.3621, "step": 56310 }, { "epoch": 0.13, "grad_norm": 2.265625, "learning_rate": 0.00019784101472511032, "loss": 2.3488, "step": 56315 }, { "epoch": 0.13, "grad_norm": 1.9140625, "learning_rate": 0.00019784063271710429, "loss": 1.9832, "step": 56320 }, { "epoch": 0.13, "grad_norm": 1.625, "learning_rate": 0.0001978402506756741, "loss": 2.0227, "step": 56325 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.00019783986860081996, "loss": 2.2191, "step": 56330 }, { "epoch": 0.13, "grad_norm": 2.3125, "learning_rate": 0.00019783948649254192, "loss": 2.0651, "step": 56335 }, { "epoch": 0.13, "grad_norm": 2.3125, "learning_rate": 0.00019783910435084012, "loss": 2.341, "step": 56340 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.0001978387221757147, "loss": 2.1797, "step": 56345 }, { "epoch": 0.13, "grad_norm": 1.703125, "learning_rate": 0.00019783833996716581, "loss": 2.0951, "step": 56350 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019783795772519358, "loss": 2.044, "step": 56355 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.00019783757544979813, "loss": 2.2733, "step": 56360 }, { "epoch": 0.13, "grad_norm": 2.203125, "learning_rate": 0.00019783719314097958, "loss": 2.3985, "step": 56365 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.00019783681079873805, "loss": 2.1629, "step": 56370 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019783642842307372, "loss": 2.0144, "step": 56375 }, { "epoch": 0.13, "grad_norm": 1.7890625, "learning_rate": 0.0001978360460139867, "loss": 2.0507, "step": 56380 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.00019783566357147708, "loss": 1.9493, "step": 56385 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019783528109554505, "loss": 1.8906, "step": 56390 }, { "epoch": 0.13, "grad_norm": 1.5234375, "learning_rate": 0.0001978348985861907, "loss": 2.1825, "step": 56395 }, { "epoch": 0.13, "grad_norm": 1.859375, "learning_rate": 0.0001978345160434142, "loss": 2.068, "step": 56400 }, { "epoch": 0.13, "grad_norm": 1.7890625, "learning_rate": 0.0001978341334672156, "loss": 2.1882, "step": 56405 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.00019783375085759514, "loss": 2.1067, "step": 56410 }, { "epoch": 0.13, "grad_norm": 1.8203125, "learning_rate": 0.00019783336821455288, "loss": 2.1654, "step": 56415 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.00019783298553808894, "loss": 2.2188, "step": 56420 }, { "epoch": 0.13, "grad_norm": 1.9140625, "learning_rate": 0.0001978326028282035, "loss": 2.258, "step": 56425 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019783222008489668, "loss": 2.0237, "step": 56430 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019783183730816858, "loss": 2.1789, "step": 56435 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.00019783145449801936, "loss": 2.0511, "step": 56440 }, { "epoch": 0.13, "grad_norm": 1.7578125, "learning_rate": 0.00019783107165444915, "loss": 2.1532, "step": 56445 }, { "epoch": 0.13, "grad_norm": 1.6875, "learning_rate": 0.00019783068877745805, "loss": 2.4503, "step": 56450 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.00019783030586704623, "loss": 2.1492, "step": 56455 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.0001978299229232138, "loss": 2.1378, "step": 56460 }, { "epoch": 0.13, "grad_norm": 1.7578125, "learning_rate": 0.0001978295399459609, "loss": 2.1353, "step": 56465 }, { "epoch": 0.13, "grad_norm": 2.09375, "learning_rate": 0.00019782915693528763, "loss": 2.327, "step": 56470 }, { "epoch": 0.13, "grad_norm": 2.3125, "learning_rate": 0.00019782877389119417, "loss": 2.174, "step": 56475 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019782839081368062, "loss": 2.1835, "step": 56480 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.0001978280077027471, "loss": 2.0849, "step": 56485 }, { "epoch": 0.13, "grad_norm": 2.4375, "learning_rate": 0.00019782762455839377, "loss": 2.3756, "step": 56490 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019782724138062076, "loss": 2.1978, "step": 56495 }, { "epoch": 0.13, "grad_norm": 1.6484375, "learning_rate": 0.00019782685816942817, "loss": 2.1324, "step": 56500 }, { "epoch": 0.13, "grad_norm": 2.203125, "learning_rate": 0.00019782647492481618, "loss": 2.0677, "step": 56505 }, { "epoch": 0.13, "grad_norm": 2.484375, "learning_rate": 0.00019782609164678487, "loss": 2.1305, "step": 56510 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.0001978257083353344, "loss": 2.0203, "step": 56515 }, { "epoch": 0.13, "grad_norm": 1.71875, "learning_rate": 0.00019782532499046488, "loss": 2.0064, "step": 56520 }, { "epoch": 0.13, "grad_norm": 1.6796875, "learning_rate": 0.00019782494161217648, "loss": 2.0837, "step": 56525 }, { "epoch": 0.13, "grad_norm": 1.6796875, "learning_rate": 0.00019782455820046928, "loss": 2.3425, "step": 56530 }, { "epoch": 0.13, "grad_norm": 2.34375, "learning_rate": 0.00019782417475534345, "loss": 2.2669, "step": 56535 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.00019782379127679912, "loss": 2.1011, "step": 56540 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019782340776483638, "loss": 2.1366, "step": 56545 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019782302421945543, "loss": 2.1784, "step": 56550 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.00019782264064065633, "loss": 2.3219, "step": 56555 }, { "epoch": 0.13, "grad_norm": 1.921875, "learning_rate": 0.00019782225702843923, "loss": 2.0564, "step": 56560 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.0001978218733828043, "loss": 2.184, "step": 56565 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.00019782148970375166, "loss": 2.0814, "step": 56570 }, { "epoch": 0.13, "grad_norm": 1.8125, "learning_rate": 0.0001978211059912814, "loss": 2.0735, "step": 56575 }, { "epoch": 0.13, "grad_norm": 2.328125, "learning_rate": 0.00019782072224539365, "loss": 2.3329, "step": 56580 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.0001978203384660886, "loss": 2.3076, "step": 56585 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.00019781995465336635, "loss": 1.9642, "step": 56590 }, { "epoch": 0.13, "grad_norm": 2.171875, "learning_rate": 0.00019781957080722702, "loss": 2.2368, "step": 56595 }, { "epoch": 0.13, "grad_norm": 1.9453125, "learning_rate": 0.00019781918692767073, "loss": 2.154, "step": 56600 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019781880301469767, "loss": 2.0498, "step": 56605 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019781841906830794, "loss": 2.1925, "step": 56610 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019781803508850163, "loss": 2.2903, "step": 56615 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.00019781765107527893, "loss": 2.1194, "step": 56620 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.00019781726702863993, "loss": 2.152, "step": 56625 }, { "epoch": 0.13, "grad_norm": 1.859375, "learning_rate": 0.0001978168829485848, "loss": 2.1324, "step": 56630 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.0001978164988351136, "loss": 2.1805, "step": 56635 }, { "epoch": 0.13, "grad_norm": 1.6640625, "learning_rate": 0.00019781611468822654, "loss": 2.1809, "step": 56640 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.0001978157305079237, "loss": 2.2249, "step": 56645 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.00019781534629420525, "loss": 2.0567, "step": 56650 }, { "epoch": 0.13, "grad_norm": 2.296875, "learning_rate": 0.00019781496204707133, "loss": 2.1701, "step": 56655 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.000197814577766522, "loss": 2.2005, "step": 56660 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.00019781419345255748, "loss": 2.2066, "step": 56665 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.00019781380910517782, "loss": 2.3509, "step": 56670 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.0001978134247243832, "loss": 2.1368, "step": 56675 }, { "epoch": 0.13, "grad_norm": 1.9140625, "learning_rate": 0.00019781304031017376, "loss": 2.2935, "step": 56680 }, { "epoch": 0.13, "grad_norm": 1.71875, "learning_rate": 0.0001978126558625496, "loss": 2.1894, "step": 56685 }, { "epoch": 0.13, "grad_norm": 1.8046875, "learning_rate": 0.00019781227138151083, "loss": 2.1527, "step": 56690 }, { "epoch": 0.13, "grad_norm": 2.53125, "learning_rate": 0.00019781188686705768, "loss": 2.1214, "step": 56695 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.00019781150231919017, "loss": 2.1332, "step": 56700 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.00019781111773790847, "loss": 2.1953, "step": 56705 }, { "epoch": 0.13, "grad_norm": 1.8125, "learning_rate": 0.00019781073312321275, "loss": 2.1589, "step": 56710 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019781034847510308, "loss": 2.4012, "step": 56715 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019780996379357966, "loss": 2.1478, "step": 56720 }, { "epoch": 0.13, "grad_norm": 2.25, "learning_rate": 0.00019780957907864256, "loss": 2.2077, "step": 56725 }, { "epoch": 0.13, "grad_norm": 1.6328125, "learning_rate": 0.0001978091943302919, "loss": 2.1166, "step": 56730 }, { "epoch": 0.13, "grad_norm": 1.921875, "learning_rate": 0.0001978088095485279, "loss": 2.2685, "step": 56735 }, { "epoch": 0.13, "grad_norm": 1.828125, "learning_rate": 0.0001978084247333506, "loss": 2.1754, "step": 56740 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019780803988476018, "loss": 2.3179, "step": 56745 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019780765500275677, "loss": 2.3017, "step": 56750 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019780727008734047, "loss": 2.2682, "step": 56755 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.00019780688513851145, "loss": 2.1898, "step": 56760 }, { "epoch": 0.13, "grad_norm": 1.828125, "learning_rate": 0.0001978065001562698, "loss": 2.2586, "step": 56765 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.0001978061151406157, "loss": 2.1539, "step": 56770 }, { "epoch": 0.13, "grad_norm": 1.9296875, "learning_rate": 0.0001978057300915493, "loss": 2.2521, "step": 56775 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.0001978053450090706, "loss": 1.9922, "step": 56780 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019780495989317985, "loss": 2.1817, "step": 56785 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.00019780457474387716, "loss": 2.3543, "step": 56790 }, { "epoch": 0.13, "grad_norm": 2.171875, "learning_rate": 0.00019780418956116268, "loss": 2.1421, "step": 56795 }, { "epoch": 0.13, "grad_norm": 1.6953125, "learning_rate": 0.0001978038043450365, "loss": 2.3724, "step": 56800 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019780341909549876, "loss": 2.1779, "step": 56805 }, { "epoch": 0.13, "grad_norm": 2.359375, "learning_rate": 0.00019780303381254958, "loss": 2.336, "step": 56810 }, { "epoch": 0.13, "grad_norm": 2.59375, "learning_rate": 0.00019780264849618914, "loss": 2.3556, "step": 56815 }, { "epoch": 0.13, "grad_norm": 1.71875, "learning_rate": 0.0001978022631464175, "loss": 2.2757, "step": 56820 }, { "epoch": 0.13, "grad_norm": 2.078125, "learning_rate": 0.0001978018777632349, "loss": 2.2833, "step": 56825 }, { "epoch": 0.13, "grad_norm": 1.6484375, "learning_rate": 0.00019780149234664135, "loss": 2.4228, "step": 56830 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.00019780110689663705, "loss": 2.1121, "step": 56835 }, { "epoch": 0.13, "grad_norm": 2.484375, "learning_rate": 0.0001978007214132221, "loss": 2.0858, "step": 56840 }, { "epoch": 0.13, "grad_norm": 1.7109375, "learning_rate": 0.00019780033589639667, "loss": 2.2856, "step": 56845 }, { "epoch": 0.13, "grad_norm": 1.5234375, "learning_rate": 0.0001977999503461609, "loss": 2.3012, "step": 56850 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019779956476251488, "loss": 2.0978, "step": 56855 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.00019779917914545872, "loss": 2.1565, "step": 56860 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.00019779879349499262, "loss": 2.1266, "step": 56865 }, { "epoch": 0.13, "grad_norm": 1.8671875, "learning_rate": 0.00019779840781111664, "loss": 2.2286, "step": 56870 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.00019779802209383098, "loss": 2.225, "step": 56875 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019779763634313575, "loss": 2.1781, "step": 56880 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.00019779725055903103, "loss": 2.282, "step": 56885 }, { "epoch": 0.13, "grad_norm": 2.359375, "learning_rate": 0.00019779686474151706, "loss": 1.9844, "step": 56890 }, { "epoch": 0.13, "grad_norm": 1.984375, "learning_rate": 0.00019779647889059385, "loss": 2.1305, "step": 56895 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.00019779609300626163, "loss": 2.2018, "step": 56900 }, { "epoch": 0.13, "grad_norm": 2.328125, "learning_rate": 0.00019779570708852048, "loss": 2.2383, "step": 56905 }, { "epoch": 0.13, "grad_norm": 1.71875, "learning_rate": 0.0001977953211373705, "loss": 2.1873, "step": 56910 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.00019779493515281193, "loss": 2.1229, "step": 56915 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.0001977945491348448, "loss": 2.2008, "step": 56920 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.0001977941630834693, "loss": 2.027, "step": 56925 }, { "epoch": 0.13, "grad_norm": 2.296875, "learning_rate": 0.00019779377699868553, "loss": 2.1337, "step": 56930 }, { "epoch": 0.13, "grad_norm": 1.7578125, "learning_rate": 0.00019779339088049362, "loss": 2.2488, "step": 56935 }, { "epoch": 0.13, "grad_norm": 2.3125, "learning_rate": 0.00019779300472889372, "loss": 2.0986, "step": 56940 }, { "epoch": 0.13, "grad_norm": 1.8828125, "learning_rate": 0.00019779261854388594, "loss": 2.1245, "step": 56945 }, { "epoch": 0.13, "grad_norm": 2.21875, "learning_rate": 0.00019779223232547049, "loss": 1.9495, "step": 56950 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.00019779184607364737, "loss": 2.2012, "step": 56955 }, { "epoch": 0.13, "grad_norm": 1.9609375, "learning_rate": 0.00019779145978841682, "loss": 2.338, "step": 56960 }, { "epoch": 0.13, "grad_norm": 2.09375, "learning_rate": 0.00019779107346977893, "loss": 2.2271, "step": 56965 }, { "epoch": 0.13, "grad_norm": 2.234375, "learning_rate": 0.00019779068711773382, "loss": 2.1703, "step": 56970 }, { "epoch": 0.13, "grad_norm": 1.59375, "learning_rate": 0.00019779030073228168, "loss": 2.0178, "step": 56975 }, { "epoch": 0.13, "grad_norm": 2.359375, "learning_rate": 0.00019778991431342253, "loss": 2.2781, "step": 56980 }, { "epoch": 0.13, "grad_norm": 2.0625, "learning_rate": 0.0001977895278611566, "loss": 2.2897, "step": 56985 }, { "epoch": 0.13, "grad_norm": 1.7734375, "learning_rate": 0.00019778914137548405, "loss": 2.3299, "step": 56990 }, { "epoch": 0.13, "grad_norm": 1.7890625, "learning_rate": 0.00019778875485640488, "loss": 2.2193, "step": 56995 }, { "epoch": 0.13, "grad_norm": 2.5625, "learning_rate": 0.0001977883683039193, "loss": 2.2381, "step": 57000 }, { "epoch": 0.13, "grad_norm": 2.1875, "learning_rate": 0.00019778798171802748, "loss": 2.0611, "step": 57005 }, { "epoch": 0.13, "grad_norm": 1.765625, "learning_rate": 0.0001977875950987295, "loss": 2.311, "step": 57010 }, { "epoch": 0.13, "grad_norm": 1.671875, "learning_rate": 0.0001977872084460255, "loss": 2.3327, "step": 57015 }, { "epoch": 0.13, "grad_norm": 1.6015625, "learning_rate": 0.0001977868217599156, "loss": 2.226, "step": 57020 }, { "epoch": 0.13, "grad_norm": 1.9609375, "learning_rate": 0.0001977864350404, "loss": 2.1198, "step": 57025 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019778604828747873, "loss": 2.2408, "step": 57030 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.000197785661501152, "loss": 2.0847, "step": 57035 }, { "epoch": 0.13, "grad_norm": 1.828125, "learning_rate": 0.0001977852746814199, "loss": 2.2264, "step": 57040 }, { "epoch": 0.13, "grad_norm": 2.25, "learning_rate": 0.0001977848878282826, "loss": 2.2465, "step": 57045 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019778450094174016, "loss": 2.3702, "step": 57050 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.0001977841140217928, "loss": 2.2275, "step": 57055 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.00019778372706844062, "loss": 2.2475, "step": 57060 }, { "epoch": 0.13, "grad_norm": 1.859375, "learning_rate": 0.00019778334008168372, "loss": 2.075, "step": 57065 }, { "epoch": 0.13, "grad_norm": 2.359375, "learning_rate": 0.00019778295306152225, "loss": 2.2422, "step": 57070 }, { "epoch": 0.13, "grad_norm": 1.7734375, "learning_rate": 0.00019778256600795638, "loss": 2.1607, "step": 57075 }, { "epoch": 0.13, "grad_norm": 1.8671875, "learning_rate": 0.0001977821789209862, "loss": 2.203, "step": 57080 }, { "epoch": 0.13, "grad_norm": 2.59375, "learning_rate": 0.00019778179180061182, "loss": 2.0528, "step": 57085 }, { "epoch": 0.13, "grad_norm": 1.7890625, "learning_rate": 0.00019778140464683347, "loss": 2.0752, "step": 57090 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.0001977810174596512, "loss": 2.3312, "step": 57095 }, { "epoch": 0.13, "grad_norm": 1.84375, "learning_rate": 0.0001977806302390651, "loss": 2.3613, "step": 57100 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.0001977802429850754, "loss": 2.2283, "step": 57105 }, { "epoch": 0.13, "grad_norm": 2.140625, "learning_rate": 0.00019777985569768226, "loss": 2.2522, "step": 57110 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.00019777946837688568, "loss": 2.3468, "step": 57115 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019777908102268588, "loss": 2.1898, "step": 57120 }, { "epoch": 0.13, "grad_norm": 2.25, "learning_rate": 0.00019777869363508298, "loss": 2.2877, "step": 57125 }, { "epoch": 0.13, "grad_norm": 1.9765625, "learning_rate": 0.0001977783062140771, "loss": 2.0646, "step": 57130 }, { "epoch": 0.13, "grad_norm": 1.7421875, "learning_rate": 0.00019777791875966836, "loss": 2.3187, "step": 57135 }, { "epoch": 0.13, "grad_norm": 2.03125, "learning_rate": 0.0001977775312718569, "loss": 2.3909, "step": 57140 }, { "epoch": 0.13, "grad_norm": 1.671875, "learning_rate": 0.0001977771437506429, "loss": 2.1018, "step": 57145 }, { "epoch": 0.13, "grad_norm": 1.6484375, "learning_rate": 0.00019777675619602644, "loss": 2.0323, "step": 57150 }, { "epoch": 0.13, "grad_norm": 2.0, "learning_rate": 0.00019777636860800765, "loss": 2.1307, "step": 57155 }, { "epoch": 0.13, "grad_norm": 1.765625, "learning_rate": 0.00019777598098658672, "loss": 2.1232, "step": 57160 }, { "epoch": 0.13, "grad_norm": 1.828125, "learning_rate": 0.0001977755933317637, "loss": 2.039, "step": 57165 }, { "epoch": 0.13, "grad_norm": 1.8359375, "learning_rate": 0.0001977752056435388, "loss": 2.0678, "step": 57170 }, { "epoch": 0.13, "grad_norm": 2.125, "learning_rate": 0.00019777481792191206, "loss": 2.085, "step": 57175 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.0001977744301668837, "loss": 2.2526, "step": 57180 }, { "epoch": 0.13, "grad_norm": 1.796875, "learning_rate": 0.00019777404237845386, "loss": 2.2465, "step": 57185 }, { "epoch": 0.13, "grad_norm": 1.6171875, "learning_rate": 0.00019777365455662257, "loss": 2.2047, "step": 57190 }, { "epoch": 0.13, "grad_norm": 1.546875, "learning_rate": 0.00019777326670139008, "loss": 2.229, "step": 57195 }, { "epoch": 0.13, "grad_norm": 1.8671875, "learning_rate": 0.00019777287881275643, "loss": 2.1561, "step": 57200 }, { "epoch": 0.13, "grad_norm": 1.90625, "learning_rate": 0.00019777249089072182, "loss": 2.2095, "step": 57205 }, { "epoch": 0.13, "grad_norm": 2.3125, "learning_rate": 0.00019777210293528633, "loss": 2.2004, "step": 57210 }, { "epoch": 0.13, "grad_norm": 1.78125, "learning_rate": 0.00019777171494645012, "loss": 2.1669, "step": 57215 }, { "epoch": 0.13, "grad_norm": 1.875, "learning_rate": 0.00019777132692421333, "loss": 2.2636, "step": 57220 }, { "epoch": 0.13, "grad_norm": 2.203125, "learning_rate": 0.0001977709388685761, "loss": 2.1496, "step": 57225 }, { "epoch": 0.13, "grad_norm": 2.203125, "learning_rate": 0.0001977705507795385, "loss": 2.2205, "step": 57230 }, { "epoch": 0.13, "grad_norm": 1.6953125, "learning_rate": 0.00019777016265710073, "loss": 1.9921, "step": 57235 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.0001977697745012629, "loss": 2.131, "step": 57240 }, { "epoch": 0.13, "grad_norm": 1.8125, "learning_rate": 0.00019776938631202513, "loss": 2.1502, "step": 57245 }, { "epoch": 0.13, "grad_norm": 1.890625, "learning_rate": 0.00019776899808938758, "loss": 2.3206, "step": 57250 }, { "epoch": 0.13, "grad_norm": 2.046875, "learning_rate": 0.00019776860983335034, "loss": 2.1764, "step": 57255 }, { "epoch": 0.13, "grad_norm": 1.8046875, "learning_rate": 0.0001977682215439136, "loss": 2.0874, "step": 57260 }, { "epoch": 0.13, "grad_norm": 1.8359375, "learning_rate": 0.00019776783322107745, "loss": 2.2186, "step": 57265 }, { "epoch": 0.13, "grad_norm": 1.9921875, "learning_rate": 0.00019776744486484205, "loss": 2.1333, "step": 57270 }, { "epoch": 0.13, "grad_norm": 1.6875, "learning_rate": 0.0001977670564752075, "loss": 2.0704, "step": 57275 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.00019776666805217395, "loss": 2.231, "step": 57280 }, { "epoch": 0.13, "grad_norm": 2.015625, "learning_rate": 0.00019776627959574157, "loss": 2.3678, "step": 57285 }, { "epoch": 0.13, "grad_norm": 1.8984375, "learning_rate": 0.00019776589110591042, "loss": 2.192, "step": 57290 }, { "epoch": 0.13, "grad_norm": 2.28125, "learning_rate": 0.00019776550258268066, "loss": 2.091, "step": 57295 }, { "epoch": 0.13, "grad_norm": 1.71875, "learning_rate": 0.00019776511402605244, "loss": 2.1474, "step": 57300 }, { "epoch": 0.13, "grad_norm": 2.28125, "learning_rate": 0.00019776472543602588, "loss": 2.0642, "step": 57305 }, { "epoch": 0.13, "grad_norm": 2.28125, "learning_rate": 0.00019776433681260112, "loss": 1.9248, "step": 57310 }, { "epoch": 0.13, "grad_norm": 1.8515625, "learning_rate": 0.0001977639481557783, "loss": 1.9309, "step": 57315 }, { "epoch": 0.13, "grad_norm": 1.9375, "learning_rate": 0.00019776355946555754, "loss": 2.223, "step": 57320 }, { "epoch": 0.13, "grad_norm": 2.15625, "learning_rate": 0.000197763170741939, "loss": 2.1084, "step": 57325 }, { "epoch": 0.13, "grad_norm": 1.828125, "learning_rate": 0.00019776278198492273, "loss": 2.2157, "step": 57330 }, { "epoch": 0.13, "grad_norm": 1.734375, "learning_rate": 0.00019776239319450894, "loss": 2.1957, "step": 57335 }, { "epoch": 0.13, "grad_norm": 1.7578125, "learning_rate": 0.00019776200437069777, "loss": 2.157, "step": 57340 }, { "epoch": 0.13, "grad_norm": 2.28125, "learning_rate": 0.00019776161551348932, "loss": 2.3102, "step": 57345 }, { "epoch": 0.13, "grad_norm": 1.8359375, "learning_rate": 0.00019776122662288373, "loss": 2.3411, "step": 57350 }, { "epoch": 0.13, "grad_norm": 1.6484375, "learning_rate": 0.0001977608376988811, "loss": 2.1914, "step": 57355 }, { "epoch": 0.13, "grad_norm": 2.109375, "learning_rate": 0.00019776044874148164, "loss": 2.0924, "step": 57360 }, { "epoch": 0.13, "grad_norm": 1.6875, "learning_rate": 0.00019776005975068541, "loss": 2.2642, "step": 57365 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.00019775967072649256, "loss": 2.2262, "step": 57370 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.00019775928166890325, "loss": 2.1973, "step": 57375 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.0001977588925779176, "loss": 2.2076, "step": 57380 }, { "epoch": 0.14, "grad_norm": 2.3125, "learning_rate": 0.00019775850345353572, "loss": 2.2563, "step": 57385 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019775811429575778, "loss": 2.2146, "step": 57390 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.0001977577251045839, "loss": 2.1244, "step": 57395 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.00019775733588001422, "loss": 2.2568, "step": 57400 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.0001977569466220488, "loss": 2.1448, "step": 57405 }, { "epoch": 0.14, "grad_norm": 1.703125, "learning_rate": 0.0001977565573306879, "loss": 2.0046, "step": 57410 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.00019775616800593156, "loss": 2.1857, "step": 57415 }, { "epoch": 0.14, "grad_norm": 2.3125, "learning_rate": 0.00019775577864777994, "loss": 2.13, "step": 57420 }, { "epoch": 0.14, "grad_norm": 2.234375, "learning_rate": 0.00019775538925623318, "loss": 2.3647, "step": 57425 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.0001977549998312914, "loss": 2.1027, "step": 57430 }, { "epoch": 0.14, "grad_norm": 2.640625, "learning_rate": 0.00019775461037295475, "loss": 2.1206, "step": 57435 }, { "epoch": 0.14, "grad_norm": 1.7578125, "learning_rate": 0.00019775422088122332, "loss": 2.0762, "step": 57440 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.0001977538313560973, "loss": 2.244, "step": 57445 }, { "epoch": 0.14, "grad_norm": 2.4375, "learning_rate": 0.00019775344179757682, "loss": 2.0516, "step": 57450 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019775305220566197, "loss": 2.1402, "step": 57455 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.00019775266258035287, "loss": 2.072, "step": 57460 }, { "epoch": 0.14, "grad_norm": 1.796875, "learning_rate": 0.00019775227292164976, "loss": 2.1382, "step": 57465 }, { "epoch": 0.14, "grad_norm": 1.78125, "learning_rate": 0.00019775188322955262, "loss": 2.0206, "step": 57470 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019775149350406172, "loss": 2.1535, "step": 57475 }, { "epoch": 0.14, "grad_norm": 1.6796875, "learning_rate": 0.00019775110374517713, "loss": 2.0687, "step": 57480 }, { "epoch": 0.14, "grad_norm": 2.796875, "learning_rate": 0.00019775071395289898, "loss": 2.2244, "step": 57485 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.00019775032412722743, "loss": 2.158, "step": 57490 }, { "epoch": 0.14, "grad_norm": 1.78125, "learning_rate": 0.00019774993426816258, "loss": 2.216, "step": 57495 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.00019774954437570456, "loss": 2.3048, "step": 57500 }, { "epoch": 0.14, "grad_norm": 2.3125, "learning_rate": 0.00019774915444985353, "loss": 2.1781, "step": 57505 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019774876449060966, "loss": 2.4018, "step": 57510 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.00019774837449797297, "loss": 2.2064, "step": 57515 }, { "epoch": 0.14, "grad_norm": 1.5546875, "learning_rate": 0.00019774798447194373, "loss": 2.0848, "step": 57520 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.000197747594412522, "loss": 2.3489, "step": 57525 }, { "epoch": 0.14, "grad_norm": 1.9453125, "learning_rate": 0.00019774720431970787, "loss": 2.1268, "step": 57530 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019774681419350157, "loss": 2.072, "step": 57535 }, { "epoch": 0.14, "grad_norm": 1.7734375, "learning_rate": 0.00019774642403390315, "loss": 2.2195, "step": 57540 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.0001977460338409128, "loss": 2.2498, "step": 57545 }, { "epoch": 0.14, "grad_norm": 1.7734375, "learning_rate": 0.00019774564361453063, "loss": 2.0868, "step": 57550 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019774525335475675, "loss": 2.2956, "step": 57555 }, { "epoch": 0.14, "grad_norm": 1.9453125, "learning_rate": 0.00019774486306159137, "loss": 2.114, "step": 57560 }, { "epoch": 0.14, "grad_norm": 1.71875, "learning_rate": 0.00019774447273503452, "loss": 2.0681, "step": 57565 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.0001977440823750864, "loss": 2.1035, "step": 57570 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019774369198174717, "loss": 2.1918, "step": 57575 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.00019774330155501689, "loss": 2.1227, "step": 57580 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.0001977429110948957, "loss": 2.1728, "step": 57585 }, { "epoch": 0.14, "grad_norm": 2.21875, "learning_rate": 0.0001977425206013838, "loss": 2.1843, "step": 57590 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.00019774213007448123, "loss": 2.1714, "step": 57595 }, { "epoch": 0.14, "grad_norm": 2.09375, "learning_rate": 0.00019774173951418825, "loss": 2.2043, "step": 57600 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019774134892050485, "loss": 2.1858, "step": 57605 }, { "epoch": 0.14, "grad_norm": 1.96875, "learning_rate": 0.00019774095829343128, "loss": 2.1851, "step": 57610 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.0001977405676329676, "loss": 2.1846, "step": 57615 }, { "epoch": 0.14, "grad_norm": 2.71875, "learning_rate": 0.00019774017693911398, "loss": 2.2293, "step": 57620 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.00019773978621187054, "loss": 2.1191, "step": 57625 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.0001977393954512374, "loss": 2.3218, "step": 57630 }, { "epoch": 0.14, "grad_norm": 1.78125, "learning_rate": 0.00019773900465721474, "loss": 2.36, "step": 57635 }, { "epoch": 0.14, "grad_norm": 2.234375, "learning_rate": 0.00019773861382980264, "loss": 2.2732, "step": 57640 }, { "epoch": 0.14, "grad_norm": 1.7109375, "learning_rate": 0.0001977382229690013, "loss": 1.9247, "step": 57645 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019773783207481077, "loss": 2.2048, "step": 57650 }, { "epoch": 0.14, "grad_norm": 2.359375, "learning_rate": 0.0001977374411472312, "loss": 2.1658, "step": 57655 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.0001977370501862628, "loss": 2.1075, "step": 57660 }, { "epoch": 0.14, "grad_norm": 2.203125, "learning_rate": 0.00019773665919190563, "loss": 2.0956, "step": 57665 }, { "epoch": 0.14, "grad_norm": 1.9296875, "learning_rate": 0.00019773626816415986, "loss": 2.1376, "step": 57670 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.0001977358771030256, "loss": 2.0527, "step": 57675 }, { "epoch": 0.14, "grad_norm": 2.375, "learning_rate": 0.00019773548600850296, "loss": 2.0979, "step": 57680 }, { "epoch": 0.14, "grad_norm": 1.96875, "learning_rate": 0.00019773509488059214, "loss": 2.1735, "step": 57685 }, { "epoch": 0.14, "grad_norm": 2.203125, "learning_rate": 0.00019773470371929323, "loss": 2.2052, "step": 57690 }, { "epoch": 0.14, "grad_norm": 2.296875, "learning_rate": 0.00019773431252460637, "loss": 2.0001, "step": 57695 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.0001977339212965317, "loss": 2.2898, "step": 57700 }, { "epoch": 0.14, "grad_norm": 2.703125, "learning_rate": 0.00019773353003506934, "loss": 2.0991, "step": 57705 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019773313874021944, "loss": 2.097, "step": 57710 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.00019773274741198213, "loss": 2.166, "step": 57715 }, { "epoch": 0.14, "grad_norm": 2.59375, "learning_rate": 0.00019773235605035754, "loss": 2.3007, "step": 57720 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.0001977319646553458, "loss": 2.2435, "step": 57725 }, { "epoch": 0.14, "grad_norm": 1.9453125, "learning_rate": 0.00019773157322694706, "loss": 2.317, "step": 57730 }, { "epoch": 0.14, "grad_norm": 2.234375, "learning_rate": 0.00019773118176516144, "loss": 2.153, "step": 57735 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019773079026998906, "loss": 2.1871, "step": 57740 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.0001977303987414301, "loss": 2.2096, "step": 57745 }, { "epoch": 0.14, "grad_norm": 1.953125, "learning_rate": 0.00019773000717948465, "loss": 2.2114, "step": 57750 }, { "epoch": 0.14, "grad_norm": 2.328125, "learning_rate": 0.00019772961558415286, "loss": 2.2918, "step": 57755 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.00019772922395543482, "loss": 1.984, "step": 57760 }, { "epoch": 0.14, "grad_norm": 2.203125, "learning_rate": 0.00019772883229333073, "loss": 2.2887, "step": 57765 }, { "epoch": 0.14, "grad_norm": 1.5703125, "learning_rate": 0.00019772844059784075, "loss": 2.2167, "step": 57770 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.0001977280488689649, "loss": 2.0836, "step": 57775 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.0001977276571067034, "loss": 2.175, "step": 57780 }, { "epoch": 0.14, "grad_norm": 1.9453125, "learning_rate": 0.00019772726531105635, "loss": 2.3358, "step": 57785 }, { "epoch": 0.14, "grad_norm": 2.265625, "learning_rate": 0.0001977268734820239, "loss": 2.1978, "step": 57790 }, { "epoch": 0.14, "grad_norm": 1.75, "learning_rate": 0.00019772648161960619, "loss": 2.0961, "step": 57795 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.00019772608972380332, "loss": 2.0167, "step": 57800 }, { "epoch": 0.14, "grad_norm": 2.296875, "learning_rate": 0.00019772569779461546, "loss": 1.8811, "step": 57805 }, { "epoch": 0.14, "grad_norm": 1.828125, "learning_rate": 0.00019772530583204268, "loss": 2.1204, "step": 57810 }, { "epoch": 0.14, "grad_norm": 1.7421875, "learning_rate": 0.00019772491383608523, "loss": 2.2687, "step": 57815 }, { "epoch": 0.14, "grad_norm": 1.78125, "learning_rate": 0.00019772452180674317, "loss": 2.2508, "step": 57820 }, { "epoch": 0.14, "grad_norm": 1.7578125, "learning_rate": 0.00019772412974401662, "loss": 2.2378, "step": 57825 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.00019772373764790572, "loss": 2.1148, "step": 57830 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.00019772334551841064, "loss": 2.1153, "step": 57835 }, { "epoch": 0.14, "grad_norm": 1.828125, "learning_rate": 0.0001977229533555315, "loss": 2.1673, "step": 57840 }, { "epoch": 0.14, "grad_norm": 1.734375, "learning_rate": 0.0001977225611592684, "loss": 2.0828, "step": 57845 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019772216892962154, "loss": 2.1051, "step": 57850 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.000197721776666591, "loss": 2.1034, "step": 57855 }, { "epoch": 0.14, "grad_norm": 1.890625, "learning_rate": 0.0001977213843701769, "loss": 2.1367, "step": 57860 }, { "epoch": 0.14, "grad_norm": 1.7265625, "learning_rate": 0.00019772099204037945, "loss": 1.9633, "step": 57865 }, { "epoch": 0.14, "grad_norm": 1.875, "learning_rate": 0.00019772059967719868, "loss": 2.1444, "step": 57870 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019772020728063482, "loss": 2.176, "step": 57875 }, { "epoch": 0.14, "grad_norm": 1.859375, "learning_rate": 0.00019771981485068796, "loss": 2.1462, "step": 57880 }, { "epoch": 0.14, "grad_norm": 1.703125, "learning_rate": 0.00019771942238735823, "loss": 2.0493, "step": 57885 }, { "epoch": 0.14, "grad_norm": 2.578125, "learning_rate": 0.00019771902989064577, "loss": 2.2617, "step": 57890 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.00019771863736055075, "loss": 2.3041, "step": 57895 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.00019771824479707323, "loss": 2.0091, "step": 57900 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.0001977178522002134, "loss": 2.2325, "step": 57905 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.00019771745956997138, "loss": 2.2025, "step": 57910 }, { "epoch": 0.14, "grad_norm": 1.796875, "learning_rate": 0.00019771706690634733, "loss": 2.0909, "step": 57915 }, { "epoch": 0.14, "grad_norm": 1.6640625, "learning_rate": 0.0001977166742093413, "loss": 2.1961, "step": 57920 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019771628147895352, "loss": 2.1638, "step": 57925 }, { "epoch": 0.14, "grad_norm": 1.9765625, "learning_rate": 0.0001977158887151841, "loss": 2.2472, "step": 57930 }, { "epoch": 0.14, "grad_norm": 1.6953125, "learning_rate": 0.00019771549591803312, "loss": 2.2966, "step": 57935 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.0001977151030875008, "loss": 2.1781, "step": 57940 }, { "epoch": 0.14, "grad_norm": 1.734375, "learning_rate": 0.00019771471022358717, "loss": 2.0305, "step": 57945 }, { "epoch": 0.14, "grad_norm": 2.484375, "learning_rate": 0.00019771431732629248, "loss": 2.2963, "step": 57950 }, { "epoch": 0.14, "grad_norm": 1.953125, "learning_rate": 0.0001977139243956168, "loss": 2.2655, "step": 57955 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.00019771353143156027, "loss": 2.118, "step": 57960 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.000197713138434123, "loss": 2.0891, "step": 57965 }, { "epoch": 0.14, "grad_norm": 1.921875, "learning_rate": 0.00019771274540330517, "loss": 2.1059, "step": 57970 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.00019771235233910685, "loss": 2.1465, "step": 57975 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019771195924152827, "loss": 2.1369, "step": 57980 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.00019771156611056952, "loss": 2.1702, "step": 57985 }, { "epoch": 0.14, "grad_norm": 2.234375, "learning_rate": 0.0001977111729462307, "loss": 2.3519, "step": 57990 }, { "epoch": 0.14, "grad_norm": 1.9296875, "learning_rate": 0.00019771077974851196, "loss": 2.2067, "step": 57995 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019771038651741348, "loss": 2.1067, "step": 58000 }, { "epoch": 0.14, "grad_norm": 1.7265625, "learning_rate": 0.00019770999325293532, "loss": 2.3443, "step": 58005 }, { "epoch": 0.14, "grad_norm": 1.7578125, "learning_rate": 0.00019770959995507768, "loss": 1.9158, "step": 58010 }, { "epoch": 0.14, "grad_norm": 2.375, "learning_rate": 0.00019770920662384064, "loss": 2.1165, "step": 58015 }, { "epoch": 0.14, "grad_norm": 1.6875, "learning_rate": 0.00019770881325922442, "loss": 2.1298, "step": 58020 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.00019770841986122903, "loss": 2.0077, "step": 58025 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.00019770802642985472, "loss": 2.1477, "step": 58030 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.00019770763296510154, "loss": 2.1211, "step": 58035 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.0001977072394669697, "loss": 2.0999, "step": 58040 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.00019770684593545926, "loss": 2.0784, "step": 58045 }, { "epoch": 0.14, "grad_norm": 1.703125, "learning_rate": 0.0001977064523705704, "loss": 2.2114, "step": 58050 }, { "epoch": 0.14, "grad_norm": 1.609375, "learning_rate": 0.0001977060587723033, "loss": 2.2889, "step": 58055 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019770566514065797, "loss": 2.2353, "step": 58060 }, { "epoch": 0.14, "grad_norm": 1.6328125, "learning_rate": 0.00019770527147563462, "loss": 2.2322, "step": 58065 }, { "epoch": 0.14, "grad_norm": 1.890625, "learning_rate": 0.00019770487777723338, "loss": 2.1076, "step": 58070 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.0001977044840454544, "loss": 2.2766, "step": 58075 }, { "epoch": 0.14, "grad_norm": 1.7890625, "learning_rate": 0.0001977040902802978, "loss": 2.0308, "step": 58080 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019770369648176367, "loss": 2.2811, "step": 58085 }, { "epoch": 0.14, "grad_norm": 1.7109375, "learning_rate": 0.00019770330264985223, "loss": 2.2548, "step": 58090 }, { "epoch": 0.14, "grad_norm": 2.296875, "learning_rate": 0.00019770290878456354, "loss": 2.171, "step": 58095 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.0001977025148858978, "loss": 2.0896, "step": 58100 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019770212095385507, "loss": 2.412, "step": 58105 }, { "epoch": 0.14, "grad_norm": 1.875, "learning_rate": 0.00019770172698843554, "loss": 2.0882, "step": 58110 }, { "epoch": 0.14, "grad_norm": 1.96875, "learning_rate": 0.00019770133298963933, "loss": 2.1226, "step": 58115 }, { "epoch": 0.14, "grad_norm": 1.65625, "learning_rate": 0.00019770093895746658, "loss": 1.9865, "step": 58120 }, { "epoch": 0.14, "grad_norm": 1.7734375, "learning_rate": 0.0001977005448919174, "loss": 2.259, "step": 58125 }, { "epoch": 0.14, "grad_norm": 1.7890625, "learning_rate": 0.00019770015079299195, "loss": 2.1568, "step": 58130 }, { "epoch": 0.14, "grad_norm": 1.84375, "learning_rate": 0.00019769975666069037, "loss": 2.3269, "step": 58135 }, { "epoch": 0.14, "grad_norm": 1.921875, "learning_rate": 0.00019769936249501276, "loss": 1.9915, "step": 58140 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.00019769896829595926, "loss": 2.0635, "step": 58145 }, { "epoch": 0.14, "grad_norm": 2.3125, "learning_rate": 0.00019769857406353006, "loss": 2.0914, "step": 58150 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019769817979772521, "loss": 2.0879, "step": 58155 }, { "epoch": 0.14, "grad_norm": 2.21875, "learning_rate": 0.00019769778549854494, "loss": 2.3019, "step": 58160 }, { "epoch": 0.14, "grad_norm": 1.75, "learning_rate": 0.00019769739116598932, "loss": 2.1472, "step": 58165 }, { "epoch": 0.14, "grad_norm": 1.765625, "learning_rate": 0.00019769699680005848, "loss": 1.9905, "step": 58170 }, { "epoch": 0.14, "grad_norm": 1.84375, "learning_rate": 0.00019769660240075257, "loss": 2.1275, "step": 58175 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019769620796807177, "loss": 2.1938, "step": 58180 }, { "epoch": 0.14, "grad_norm": 2.21875, "learning_rate": 0.00019769581350201617, "loss": 2.3153, "step": 58185 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.0001976954190025859, "loss": 2.0262, "step": 58190 }, { "epoch": 0.14, "grad_norm": 1.859375, "learning_rate": 0.00019769502446978106, "loss": 2.0688, "step": 58195 }, { "epoch": 0.14, "grad_norm": 3.234375, "learning_rate": 0.00019769462990360185, "loss": 2.2583, "step": 58200 }, { "epoch": 0.14, "grad_norm": 1.640625, "learning_rate": 0.0001976942353040484, "loss": 2.3213, "step": 58205 }, { "epoch": 0.14, "grad_norm": 3.375, "learning_rate": 0.00019769384067112083, "loss": 2.1166, "step": 58210 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.00019769344600481924, "loss": 2.3544, "step": 58215 }, { "epoch": 0.14, "grad_norm": 1.875, "learning_rate": 0.00019769305130514381, "loss": 2.1447, "step": 58220 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.00019769265657209467, "loss": 2.154, "step": 58225 }, { "epoch": 0.14, "grad_norm": 1.6796875, "learning_rate": 0.00019769226180567197, "loss": 2.1151, "step": 58230 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.00019769186700587579, "loss": 2.2506, "step": 58235 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.0001976914721727063, "loss": 2.1706, "step": 58240 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019769107730616364, "loss": 1.9916, "step": 58245 }, { "epoch": 0.14, "grad_norm": 1.890625, "learning_rate": 0.00019769068240624792, "loss": 2.3522, "step": 58250 }, { "epoch": 0.14, "grad_norm": 1.65625, "learning_rate": 0.00019769028747295932, "loss": 2.3614, "step": 58255 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.00019768989250629795, "loss": 2.1984, "step": 58260 }, { "epoch": 0.14, "grad_norm": 1.8125, "learning_rate": 0.00019768949750626388, "loss": 2.0201, "step": 58265 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019768910247285737, "loss": 2.1487, "step": 58270 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.00019768870740607847, "loss": 2.0574, "step": 58275 }, { "epoch": 0.14, "grad_norm": 1.84375, "learning_rate": 0.00019768831230592733, "loss": 2.1852, "step": 58280 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.0001976879171724041, "loss": 2.2931, "step": 58285 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.0001976875220055089, "loss": 2.0677, "step": 58290 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019768712680524186, "loss": 2.2244, "step": 58295 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.00019768673157160316, "loss": 2.0121, "step": 58300 }, { "epoch": 0.14, "grad_norm": 1.59375, "learning_rate": 0.00019768633630459285, "loss": 2.0405, "step": 58305 }, { "epoch": 0.14, "grad_norm": 1.6953125, "learning_rate": 0.00019768594100421117, "loss": 2.0526, "step": 58310 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.00019768554567045816, "loss": 2.1709, "step": 58315 }, { "epoch": 0.14, "grad_norm": 1.953125, "learning_rate": 0.000197685150303334, "loss": 2.2078, "step": 58320 }, { "epoch": 0.14, "grad_norm": 1.921875, "learning_rate": 0.00019768475490283883, "loss": 2.1504, "step": 58325 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.0001976843594689728, "loss": 2.2271, "step": 58330 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.00019768396400173598, "loss": 2.13, "step": 58335 }, { "epoch": 0.14, "grad_norm": 1.890625, "learning_rate": 0.00019768356850112856, "loss": 1.97, "step": 58340 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019768317296715068, "loss": 2.3702, "step": 58345 }, { "epoch": 0.14, "grad_norm": 2.203125, "learning_rate": 0.00019768277739980246, "loss": 2.0897, "step": 58350 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.00019768238179908402, "loss": 2.1462, "step": 58355 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019768198616499548, "loss": 2.4325, "step": 58360 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.00019768159049753702, "loss": 2.2761, "step": 58365 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.00019768119479670877, "loss": 2.1628, "step": 58370 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.0001976807990625108, "loss": 2.0719, "step": 58375 }, { "epoch": 0.14, "grad_norm": 1.703125, "learning_rate": 0.00019768040329494338, "loss": 1.9564, "step": 58380 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019768000749400652, "loss": 2.1318, "step": 58385 }, { "epoch": 0.14, "grad_norm": 1.65625, "learning_rate": 0.0001976796116597004, "loss": 2.1567, "step": 58390 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019767921579202515, "loss": 2.3548, "step": 58395 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.00019767881989098091, "loss": 2.1892, "step": 58400 }, { "epoch": 0.14, "grad_norm": 1.71875, "learning_rate": 0.00019767842395656784, "loss": 2.1772, "step": 58405 }, { "epoch": 0.14, "grad_norm": 2.328125, "learning_rate": 0.000197678027988786, "loss": 2.3416, "step": 58410 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019767763198763562, "loss": 2.1, "step": 58415 }, { "epoch": 0.14, "grad_norm": 1.9765625, "learning_rate": 0.00019767723595311674, "loss": 2.0223, "step": 58420 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.0001976768398852296, "loss": 2.1754, "step": 58425 }, { "epoch": 0.14, "grad_norm": 1.7421875, "learning_rate": 0.00019767644378397426, "loss": 2.183, "step": 58430 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.00019767604764935084, "loss": 2.2167, "step": 58435 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.00019767565148135955, "loss": 2.2344, "step": 58440 }, { "epoch": 0.14, "grad_norm": 1.7578125, "learning_rate": 0.00019767525528000047, "loss": 2.0206, "step": 58445 }, { "epoch": 0.14, "grad_norm": 1.7890625, "learning_rate": 0.00019767485904527378, "loss": 2.3612, "step": 58450 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.00019767446277717957, "loss": 2.0651, "step": 58455 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.00019767406647571795, "loss": 2.3374, "step": 58460 }, { "epoch": 0.14, "grad_norm": 1.9765625, "learning_rate": 0.00019767367014088914, "loss": 2.1488, "step": 58465 }, { "epoch": 0.14, "grad_norm": 2.203125, "learning_rate": 0.00019767327377269323, "loss": 2.1301, "step": 58470 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.00019767287737113037, "loss": 2.1145, "step": 58475 }, { "epoch": 0.14, "grad_norm": 1.8125, "learning_rate": 0.00019767248093620064, "loss": 2.1611, "step": 58480 }, { "epoch": 0.14, "grad_norm": 2.265625, "learning_rate": 0.00019767208446790427, "loss": 2.1725, "step": 58485 }, { "epoch": 0.14, "grad_norm": 2.09375, "learning_rate": 0.0001976716879662413, "loss": 2.1902, "step": 58490 }, { "epoch": 0.14, "grad_norm": 1.9296875, "learning_rate": 0.00019767129143121194, "loss": 2.1708, "step": 58495 }, { "epoch": 0.14, "grad_norm": 1.78125, "learning_rate": 0.0001976708948628163, "loss": 2.093, "step": 58500 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.00019767049826105448, "loss": 2.1953, "step": 58505 }, { "epoch": 0.14, "grad_norm": 2.546875, "learning_rate": 0.00019767010162592666, "loss": 2.254, "step": 58510 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.00019766970495743299, "loss": 2.1739, "step": 58515 }, { "epoch": 0.14, "grad_norm": 2.09375, "learning_rate": 0.00019766930825557355, "loss": 1.9783, "step": 58520 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.0001976689115203485, "loss": 2.1323, "step": 58525 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019766851475175797, "loss": 2.1851, "step": 58530 }, { "epoch": 0.14, "grad_norm": 1.765625, "learning_rate": 0.00019766811794980215, "loss": 2.1502, "step": 58535 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.0001976677211144811, "loss": 2.2701, "step": 58540 }, { "epoch": 0.14, "grad_norm": 2.6875, "learning_rate": 0.00019766732424579498, "loss": 2.1177, "step": 58545 }, { "epoch": 0.14, "grad_norm": 1.5390625, "learning_rate": 0.00019766692734374394, "loss": 2.2018, "step": 58550 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019766653040832809, "loss": 2.1798, "step": 58555 }, { "epoch": 0.14, "grad_norm": 2.203125, "learning_rate": 0.0001976661334395476, "loss": 2.0107, "step": 58560 }, { "epoch": 0.14, "grad_norm": 1.875, "learning_rate": 0.00019766573643740257, "loss": 2.3259, "step": 58565 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.00019766533940189315, "loss": 2.0513, "step": 58570 }, { "epoch": 0.14, "grad_norm": 1.5859375, "learning_rate": 0.00019766494233301951, "loss": 2.1898, "step": 58575 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019766454523078176, "loss": 2.14, "step": 58580 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019766414809517998, "loss": 2.1456, "step": 58585 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.00019766375092621438, "loss": 2.1555, "step": 58590 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.00019766335372388508, "loss": 2.2836, "step": 58595 }, { "epoch": 0.14, "grad_norm": 1.6875, "learning_rate": 0.00019766295648819223, "loss": 2.3846, "step": 58600 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.0001976625592191359, "loss": 2.2575, "step": 58605 }, { "epoch": 0.14, "grad_norm": 1.9296875, "learning_rate": 0.00019766216191671626, "loss": 2.1793, "step": 58610 }, { "epoch": 0.14, "grad_norm": 2.640625, "learning_rate": 0.0001976617645809335, "loss": 2.2553, "step": 58615 }, { "epoch": 0.14, "grad_norm": 1.8125, "learning_rate": 0.0001976613672117877, "loss": 2.2066, "step": 58620 }, { "epoch": 0.14, "grad_norm": 2.59375, "learning_rate": 0.00019766096980927897, "loss": 2.1852, "step": 58625 }, { "epoch": 0.14, "grad_norm": 1.84375, "learning_rate": 0.00019766057237340752, "loss": 1.9443, "step": 58630 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.00019766017490417342, "loss": 2.2088, "step": 58635 }, { "epoch": 0.14, "grad_norm": 2.390625, "learning_rate": 0.00019765977740157683, "loss": 2.1932, "step": 58640 }, { "epoch": 0.14, "grad_norm": 1.875, "learning_rate": 0.00019765937986561792, "loss": 2.1396, "step": 58645 }, { "epoch": 0.14, "grad_norm": 2.234375, "learning_rate": 0.0001976589822962968, "loss": 2.2024, "step": 58650 }, { "epoch": 0.14, "grad_norm": 1.890625, "learning_rate": 0.00019765858469361356, "loss": 2.1012, "step": 58655 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.0001976581870575684, "loss": 2.1451, "step": 58660 }, { "epoch": 0.14, "grad_norm": 2.265625, "learning_rate": 0.00019765778938816143, "loss": 2.1696, "step": 58665 }, { "epoch": 0.14, "grad_norm": 1.7578125, "learning_rate": 0.00019765739168539276, "loss": 2.1696, "step": 58670 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.0001976569939492626, "loss": 2.105, "step": 58675 }, { "epoch": 0.14, "grad_norm": 1.828125, "learning_rate": 0.00019765659617977102, "loss": 2.256, "step": 58680 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019765619837691818, "loss": 2.2091, "step": 58685 }, { "epoch": 0.14, "grad_norm": 1.796875, "learning_rate": 0.00019765580054070418, "loss": 2.3195, "step": 58690 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.00019765540267112922, "loss": 2.2792, "step": 58695 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.0001976550047681934, "loss": 2.1041, "step": 58700 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.00019765460683189687, "loss": 2.3104, "step": 58705 }, { "epoch": 0.14, "grad_norm": 1.734375, "learning_rate": 0.00019765420886223972, "loss": 2.1175, "step": 58710 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019765381085922216, "loss": 2.1635, "step": 58715 }, { "epoch": 0.14, "grad_norm": 1.5859375, "learning_rate": 0.00019765341282284427, "loss": 2.2679, "step": 58720 }, { "epoch": 0.14, "grad_norm": 2.234375, "learning_rate": 0.0001976530147531062, "loss": 1.9457, "step": 58725 }, { "epoch": 0.14, "grad_norm": 1.78125, "learning_rate": 0.0001976526166500081, "loss": 2.0811, "step": 58730 }, { "epoch": 0.14, "grad_norm": 2.28125, "learning_rate": 0.0001976522185135501, "loss": 2.2316, "step": 58735 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.0001976518203437323, "loss": 2.2218, "step": 58740 }, { "epoch": 0.14, "grad_norm": 2.34375, "learning_rate": 0.0001976514221405549, "loss": 2.222, "step": 58745 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.000197651023904018, "loss": 2.1862, "step": 58750 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.00019765062563412172, "loss": 2.2131, "step": 58755 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.0001976502273308662, "loss": 2.2073, "step": 58760 }, { "epoch": 0.14, "grad_norm": 1.890625, "learning_rate": 0.00019764982899425163, "loss": 2.1354, "step": 58765 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.0001976494306242781, "loss": 2.1835, "step": 58770 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019764903222094576, "loss": 2.1997, "step": 58775 }, { "epoch": 0.14, "grad_norm": 2.09375, "learning_rate": 0.0001976486337842547, "loss": 2.0239, "step": 58780 }, { "epoch": 0.14, "grad_norm": 1.953125, "learning_rate": 0.00019764823531420514, "loss": 2.3509, "step": 58785 }, { "epoch": 0.14, "grad_norm": 1.9296875, "learning_rate": 0.00019764783681079716, "loss": 2.2659, "step": 58790 }, { "epoch": 0.14, "grad_norm": 1.734375, "learning_rate": 0.0001976474382740309, "loss": 2.1435, "step": 58795 }, { "epoch": 0.14, "grad_norm": 1.5625, "learning_rate": 0.0001976470397039065, "loss": 2.1355, "step": 58800 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019764664110042408, "loss": 2.2324, "step": 58805 }, { "epoch": 0.14, "grad_norm": 2.09375, "learning_rate": 0.00019764624246358382, "loss": 2.0667, "step": 58810 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019764584379338587, "loss": 2.0762, "step": 58815 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.00019764544508983026, "loss": 2.3549, "step": 58820 }, { "epoch": 0.14, "grad_norm": 1.96875, "learning_rate": 0.00019764504635291725, "loss": 2.2292, "step": 58825 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.0001976446475826469, "loss": 2.3329, "step": 58830 }, { "epoch": 0.14, "grad_norm": 1.9765625, "learning_rate": 0.00019764424877901938, "loss": 2.1658, "step": 58835 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.00019764384994203478, "loss": 2.1164, "step": 58840 }, { "epoch": 0.14, "grad_norm": 2.421875, "learning_rate": 0.00019764345107169332, "loss": 2.2398, "step": 58845 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.00019764305216799503, "loss": 2.1288, "step": 58850 }, { "epoch": 0.14, "grad_norm": 1.734375, "learning_rate": 0.00019764265323094014, "loss": 2.3128, "step": 58855 }, { "epoch": 0.14, "grad_norm": 1.8125, "learning_rate": 0.00019764225426052874, "loss": 2.1337, "step": 58860 }, { "epoch": 0.14, "grad_norm": 1.6875, "learning_rate": 0.00019764185525676097, "loss": 2.1383, "step": 58865 }, { "epoch": 0.14, "grad_norm": 1.3671875, "learning_rate": 0.00019764145621963697, "loss": 2.1117, "step": 58870 }, { "epoch": 0.14, "grad_norm": 1.9765625, "learning_rate": 0.00019764105714915688, "loss": 2.1802, "step": 58875 }, { "epoch": 0.14, "grad_norm": 2.21875, "learning_rate": 0.00019764065804532086, "loss": 2.2717, "step": 58880 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.00019764025890812897, "loss": 2.3305, "step": 58885 }, { "epoch": 0.14, "grad_norm": 2.3125, "learning_rate": 0.00019763985973758144, "loss": 2.264, "step": 58890 }, { "epoch": 0.14, "grad_norm": 1.7265625, "learning_rate": 0.00019763946053367835, "loss": 2.0958, "step": 58895 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.00019763906129641984, "loss": 2.2432, "step": 58900 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.00019763866202580606, "loss": 2.1307, "step": 58905 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019763826272183716, "loss": 2.1006, "step": 58910 }, { "epoch": 0.14, "grad_norm": 2.234375, "learning_rate": 0.00019763786338451323, "loss": 2.1121, "step": 58915 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.00019763746401383445, "loss": 2.0394, "step": 58920 }, { "epoch": 0.14, "grad_norm": 1.78125, "learning_rate": 0.00019763706460980095, "loss": 2.2045, "step": 58925 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019763666517241287, "loss": 2.0771, "step": 58930 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.00019763626570167028, "loss": 2.1473, "step": 58935 }, { "epoch": 0.14, "grad_norm": 1.953125, "learning_rate": 0.00019763586619757343, "loss": 2.2556, "step": 58940 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.00019763546666012237, "loss": 2.2004, "step": 58945 }, { "epoch": 0.14, "grad_norm": 1.875, "learning_rate": 0.00019763506708931724, "loss": 2.1555, "step": 58950 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.0001976346674851582, "loss": 2.3529, "step": 58955 }, { "epoch": 0.14, "grad_norm": 1.75, "learning_rate": 0.00019763426784764546, "loss": 2.2358, "step": 58960 }, { "epoch": 0.14, "grad_norm": 2.34375, "learning_rate": 0.000197633868176779, "loss": 2.1461, "step": 58965 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.0001976334684725591, "loss": 2.2276, "step": 58970 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.0001976330687349858, "loss": 2.3187, "step": 58975 }, { "epoch": 0.14, "grad_norm": 2.484375, "learning_rate": 0.0001976326689640593, "loss": 2.1441, "step": 58980 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.0001976322691597797, "loss": 2.1848, "step": 58985 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.00019763186932214715, "loss": 2.1252, "step": 58990 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019763146945116176, "loss": 2.1071, "step": 58995 }, { "epoch": 0.14, "grad_norm": 2.453125, "learning_rate": 0.0001976310695468237, "loss": 2.0999, "step": 59000 }, { "epoch": 0.14, "grad_norm": 1.671875, "learning_rate": 0.0001976306696091331, "loss": 2.204, "step": 59005 }, { "epoch": 0.14, "grad_norm": 1.859375, "learning_rate": 0.0001976302696380901, "loss": 2.0778, "step": 59010 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.0001976298696336948, "loss": 2.2088, "step": 59015 }, { "epoch": 0.14, "grad_norm": 1.9765625, "learning_rate": 0.0001976294695959474, "loss": 2.0969, "step": 59020 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019762906952484798, "loss": 2.1798, "step": 59025 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.00019762866942039673, "loss": 2.1725, "step": 59030 }, { "epoch": 0.14, "grad_norm": 1.5234375, "learning_rate": 0.0001976282692825937, "loss": 2.0106, "step": 59035 }, { "epoch": 0.14, "grad_norm": 1.8125, "learning_rate": 0.00019762786911143915, "loss": 2.014, "step": 59040 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.0001976274689069331, "loss": 2.3109, "step": 59045 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.00019762706866907577, "loss": 2.3315, "step": 59050 }, { "epoch": 0.14, "grad_norm": 2.5625, "learning_rate": 0.00019762666839786723, "loss": 2.3296, "step": 59055 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.00019762626809330768, "loss": 2.2064, "step": 59060 }, { "epoch": 0.14, "grad_norm": 2.359375, "learning_rate": 0.0001976258677553972, "loss": 2.1126, "step": 59065 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019762546738413597, "loss": 2.0696, "step": 59070 }, { "epoch": 0.14, "grad_norm": 2.3125, "learning_rate": 0.0001976250669795241, "loss": 2.2259, "step": 59075 }, { "epoch": 0.14, "grad_norm": 2.453125, "learning_rate": 0.00019762466654156175, "loss": 2.2421, "step": 59080 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.000197624266070249, "loss": 2.1342, "step": 59085 }, { "epoch": 0.14, "grad_norm": 1.6640625, "learning_rate": 0.0001976238655655861, "loss": 2.2434, "step": 59090 }, { "epoch": 0.14, "grad_norm": 1.75, "learning_rate": 0.0001976234650275731, "loss": 2.2841, "step": 59095 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.00019762306445621013, "loss": 2.1492, "step": 59100 }, { "epoch": 0.14, "grad_norm": 1.5859375, "learning_rate": 0.00019762266385149735, "loss": 2.1783, "step": 59105 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.00019762226321343488, "loss": 2.0961, "step": 59110 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.00019762186254202292, "loss": 2.1402, "step": 59115 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.00019762146183726153, "loss": 2.024, "step": 59120 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.0001976210610991509, "loss": 2.1682, "step": 59125 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.00019762066032769112, "loss": 2.153, "step": 59130 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019762025952288237, "loss": 2.2221, "step": 59135 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.00019761985868472476, "loss": 2.0816, "step": 59140 }, { "epoch": 0.14, "grad_norm": 1.7890625, "learning_rate": 0.00019761945781321844, "loss": 2.1267, "step": 59145 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.00019761905690836353, "loss": 2.3028, "step": 59150 }, { "epoch": 0.14, "grad_norm": 2.203125, "learning_rate": 0.0001976186559701602, "loss": 2.1975, "step": 59155 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019761825499860856, "loss": 2.171, "step": 59160 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.00019761785399370876, "loss": 2.0592, "step": 59165 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.00019761745295546093, "loss": 2.2265, "step": 59170 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019761705188386518, "loss": 2.2825, "step": 59175 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.0001976166507789217, "loss": 2.3207, "step": 59180 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019761624964063058, "loss": 2.3479, "step": 59185 }, { "epoch": 0.14, "grad_norm": 1.9296875, "learning_rate": 0.000197615848468992, "loss": 2.3105, "step": 59190 }, { "epoch": 0.14, "grad_norm": 1.96875, "learning_rate": 0.0001976154472640061, "loss": 1.9833, "step": 59195 }, { "epoch": 0.14, "grad_norm": 2.265625, "learning_rate": 0.00019761504602567296, "loss": 2.2097, "step": 59200 }, { "epoch": 0.14, "grad_norm": 1.7578125, "learning_rate": 0.00019761464475399274, "loss": 2.2587, "step": 59205 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.0001976142434489656, "loss": 2.1609, "step": 59210 }, { "epoch": 0.14, "grad_norm": 2.40625, "learning_rate": 0.00019761384211059167, "loss": 2.1187, "step": 59215 }, { "epoch": 0.14, "grad_norm": 1.640625, "learning_rate": 0.0001976134407388711, "loss": 2.2967, "step": 59220 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019761303933380398, "loss": 2.0422, "step": 59225 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.00019761263789539046, "loss": 2.2363, "step": 59230 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.0001976122364236307, "loss": 2.4463, "step": 59235 }, { "epoch": 0.14, "grad_norm": 2.4375, "learning_rate": 0.00019761183491852488, "loss": 2.1979, "step": 59240 }, { "epoch": 0.14, "grad_norm": 1.9765625, "learning_rate": 0.00019761143338007303, "loss": 2.076, "step": 59245 }, { "epoch": 0.14, "grad_norm": 2.265625, "learning_rate": 0.00019761103180827538, "loss": 2.4185, "step": 59250 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019761063020313203, "loss": 2.232, "step": 59255 }, { "epoch": 0.14, "grad_norm": 2.515625, "learning_rate": 0.0001976102285646431, "loss": 2.045, "step": 59260 }, { "epoch": 0.14, "grad_norm": 2.28125, "learning_rate": 0.00019760982689280874, "loss": 2.0831, "step": 59265 }, { "epoch": 0.14, "grad_norm": 1.7578125, "learning_rate": 0.0001976094251876291, "loss": 2.1429, "step": 59270 }, { "epoch": 0.14, "grad_norm": 1.921875, "learning_rate": 0.00019760902344910432, "loss": 2.2179, "step": 59275 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.0001976086216772345, "loss": 2.1851, "step": 59280 }, { "epoch": 0.14, "grad_norm": 1.46875, "learning_rate": 0.00019760821987201984, "loss": 2.2887, "step": 59285 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.00019760781803346042, "loss": 2.2619, "step": 59290 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.0001976074161615564, "loss": 2.3035, "step": 59295 }, { "epoch": 0.14, "grad_norm": 2.09375, "learning_rate": 0.00019760701425630792, "loss": 2.2137, "step": 59300 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.0001976066123177151, "loss": 1.9698, "step": 59305 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.00019760621034577814, "loss": 2.1585, "step": 59310 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019760580834049708, "loss": 2.3467, "step": 59315 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.00019760540630187214, "loss": 1.9525, "step": 59320 }, { "epoch": 0.14, "grad_norm": 2.328125, "learning_rate": 0.0001976050042299034, "loss": 2.2501, "step": 59325 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.00019760460212459103, "loss": 2.2726, "step": 59330 }, { "epoch": 0.14, "grad_norm": 1.9296875, "learning_rate": 0.00019760419998593513, "loss": 2.0697, "step": 59335 }, { "epoch": 0.14, "grad_norm": 1.6640625, "learning_rate": 0.0001976037978139359, "loss": 2.0316, "step": 59340 }, { "epoch": 0.14, "grad_norm": 2.453125, "learning_rate": 0.00019760339560859344, "loss": 2.4135, "step": 59345 }, { "epoch": 0.14, "grad_norm": 1.84375, "learning_rate": 0.00019760299336990785, "loss": 2.2254, "step": 59350 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.00019760259109787934, "loss": 2.1862, "step": 59355 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.00019760218879250803, "loss": 2.2242, "step": 59360 }, { "epoch": 0.14, "grad_norm": 1.7421875, "learning_rate": 0.000197601786453794, "loss": 2.1894, "step": 59365 }, { "epoch": 0.14, "grad_norm": 1.921875, "learning_rate": 0.00019760138408173746, "loss": 2.0562, "step": 59370 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019760098167633853, "loss": 2.1179, "step": 59375 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.00019760057923759732, "loss": 2.2416, "step": 59380 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.00019760017676551396, "loss": 2.2249, "step": 59385 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.00019759977426008862, "loss": 2.1921, "step": 59390 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019759937172132147, "loss": 2.0998, "step": 59395 }, { "epoch": 0.14, "grad_norm": 1.7734375, "learning_rate": 0.00019759896914921254, "loss": 2.0752, "step": 59400 }, { "epoch": 0.14, "grad_norm": 1.71875, "learning_rate": 0.00019759856654376207, "loss": 2.0767, "step": 59405 }, { "epoch": 0.14, "grad_norm": 1.6640625, "learning_rate": 0.00019759816390497014, "loss": 2.3199, "step": 59410 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.00019759776123283692, "loss": 2.1823, "step": 59415 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.00019759735852736254, "loss": 2.2231, "step": 59420 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.0001975969557885471, "loss": 2.1756, "step": 59425 }, { "epoch": 0.14, "grad_norm": 2.265625, "learning_rate": 0.0001975965530163908, "loss": 2.2636, "step": 59430 }, { "epoch": 0.14, "grad_norm": 2.53125, "learning_rate": 0.00019759615021089374, "loss": 2.1273, "step": 59435 }, { "epoch": 0.14, "grad_norm": 1.796875, "learning_rate": 0.0001975957473720561, "loss": 2.2306, "step": 59440 }, { "epoch": 0.14, "grad_norm": 1.6796875, "learning_rate": 0.00019759534449987793, "loss": 2.1109, "step": 59445 }, { "epoch": 0.14, "grad_norm": 1.75, "learning_rate": 0.00019759494159435944, "loss": 2.1241, "step": 59450 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019759453865550076, "loss": 2.1856, "step": 59455 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.000197594135683302, "loss": 2.3401, "step": 59460 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.0001975937326777633, "loss": 2.3522, "step": 59465 }, { "epoch": 0.14, "grad_norm": 1.828125, "learning_rate": 0.00019759332963888485, "loss": 2.0737, "step": 59470 }, { "epoch": 0.14, "grad_norm": 2.375, "learning_rate": 0.00019759292656666672, "loss": 2.0555, "step": 59475 }, { "epoch": 0.14, "grad_norm": 2.34375, "learning_rate": 0.00019759252346110908, "loss": 2.0936, "step": 59480 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019759212032221206, "loss": 2.1624, "step": 59485 }, { "epoch": 0.14, "grad_norm": 2.359375, "learning_rate": 0.0001975917171499758, "loss": 2.2623, "step": 59490 }, { "epoch": 0.14, "grad_norm": 1.9453125, "learning_rate": 0.00019759131394440048, "loss": 2.0532, "step": 59495 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.00019759091070548613, "loss": 2.2511, "step": 59500 }, { "epoch": 0.14, "grad_norm": 1.84375, "learning_rate": 0.000197590507433233, "loss": 2.0116, "step": 59505 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.00019759010412764117, "loss": 2.0149, "step": 59510 }, { "epoch": 0.14, "grad_norm": 1.828125, "learning_rate": 0.0001975897007887108, "loss": 2.2137, "step": 59515 }, { "epoch": 0.14, "grad_norm": 1.671875, "learning_rate": 0.000197589297416442, "loss": 2.1516, "step": 59520 }, { "epoch": 0.14, "grad_norm": 1.84375, "learning_rate": 0.00019758889401083493, "loss": 2.0649, "step": 59525 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019758849057188976, "loss": 2.2815, "step": 59530 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.00019758808709960656, "loss": 2.3797, "step": 59535 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.0001975876835939855, "loss": 2.1988, "step": 59540 }, { "epoch": 0.14, "grad_norm": 2.28125, "learning_rate": 0.0001975872800550267, "loss": 1.9853, "step": 59545 }, { "epoch": 0.14, "grad_norm": 1.7890625, "learning_rate": 0.00019758687648273036, "loss": 2.1176, "step": 59550 }, { "epoch": 0.14, "grad_norm": 1.6953125, "learning_rate": 0.00019758647287709656, "loss": 2.0505, "step": 59555 }, { "epoch": 0.14, "grad_norm": 1.890625, "learning_rate": 0.0001975860692381254, "loss": 2.1192, "step": 59560 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.00019758566556581713, "loss": 2.2257, "step": 59565 }, { "epoch": 0.14, "grad_norm": 1.9296875, "learning_rate": 0.0001975852618601718, "loss": 2.0843, "step": 59570 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019758485812118958, "loss": 2.0646, "step": 59575 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.00019758445434887062, "loss": 2.2133, "step": 59580 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.00019758405054321504, "loss": 2.1551, "step": 59585 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019758364670422292, "loss": 2.1695, "step": 59590 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019758324283189452, "loss": 2.1706, "step": 59595 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.00019758283892622988, "loss": 2.1078, "step": 59600 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.0001975824349872292, "loss": 2.188, "step": 59605 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.00019758203101489258, "loss": 2.2505, "step": 59610 }, { "epoch": 0.14, "grad_norm": 2.40625, "learning_rate": 0.00019758162700922016, "loss": 2.1139, "step": 59615 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.0001975812229702121, "loss": 2.1175, "step": 59620 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.0001975808188978685, "loss": 1.9842, "step": 59625 }, { "epoch": 0.14, "grad_norm": 1.890625, "learning_rate": 0.00019758041479218954, "loss": 2.1851, "step": 59630 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.00019758001065317532, "loss": 2.0764, "step": 59635 }, { "epoch": 0.14, "grad_norm": 1.546875, "learning_rate": 0.00019757960648082606, "loss": 2.0622, "step": 59640 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.00019757920227514179, "loss": 2.134, "step": 59645 }, { "epoch": 0.14, "grad_norm": 1.7421875, "learning_rate": 0.00019757879803612268, "loss": 2.196, "step": 59650 }, { "epoch": 0.14, "grad_norm": 2.4375, "learning_rate": 0.00019757839376376892, "loss": 2.1346, "step": 59655 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.00019757798945808057, "loss": 2.1503, "step": 59660 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.00019757758511905783, "loss": 2.2037, "step": 59665 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019757718074670083, "loss": 1.9665, "step": 59670 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.0001975767763410097, "loss": 2.1944, "step": 59675 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.00019757637190198456, "loss": 2.1609, "step": 59680 }, { "epoch": 0.14, "grad_norm": 1.65625, "learning_rate": 0.00019757596742962555, "loss": 2.2909, "step": 59685 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.00019757556292393283, "loss": 2.2609, "step": 59690 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.00019757515838490653, "loss": 2.2935, "step": 59695 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019757475381254678, "loss": 2.3606, "step": 59700 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.0001975743492068537, "loss": 2.011, "step": 59705 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.0001975739445678275, "loss": 2.2119, "step": 59710 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.00019757353989546824, "loss": 2.1125, "step": 59715 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.00019757313518977614, "loss": 2.1741, "step": 59720 }, { "epoch": 0.14, "grad_norm": 1.953125, "learning_rate": 0.0001975727304507512, "loss": 2.1618, "step": 59725 }, { "epoch": 0.14, "grad_norm": 1.6796875, "learning_rate": 0.00019757232567839373, "loss": 2.2354, "step": 59730 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.00019757192087270373, "loss": 2.0799, "step": 59735 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019757151603368143, "loss": 2.2115, "step": 59740 }, { "epoch": 0.14, "grad_norm": 3.265625, "learning_rate": 0.0001975711111613269, "loss": 2.1968, "step": 59745 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.0001975707062556403, "loss": 2.1676, "step": 59750 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019757030131662182, "loss": 2.0428, "step": 59755 }, { "epoch": 0.14, "grad_norm": 1.6875, "learning_rate": 0.00019756989634427154, "loss": 2.1527, "step": 59760 }, { "epoch": 0.14, "grad_norm": 1.9453125, "learning_rate": 0.0001975694913385896, "loss": 2.3246, "step": 59765 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.0001975690862995762, "loss": 2.1388, "step": 59770 }, { "epoch": 0.14, "grad_norm": 1.921875, "learning_rate": 0.00019756868122723135, "loss": 2.1831, "step": 59775 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.00019756827612155532, "loss": 2.3271, "step": 59780 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.00019756787098254818, "loss": 2.1887, "step": 59785 }, { "epoch": 0.14, "grad_norm": 1.875, "learning_rate": 0.0001975674658102101, "loss": 2.1553, "step": 59790 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.0001975670606045412, "loss": 2.1081, "step": 59795 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.00019756665536554163, "loss": 2.0197, "step": 59800 }, { "epoch": 0.14, "grad_norm": 1.953125, "learning_rate": 0.00019756625009321148, "loss": 2.1679, "step": 59805 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.00019756584478755097, "loss": 1.9901, "step": 59810 }, { "epoch": 0.14, "grad_norm": 1.875, "learning_rate": 0.0001975654394485602, "loss": 2.2383, "step": 59815 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.0001975650340762393, "loss": 2.1974, "step": 59820 }, { "epoch": 0.14, "grad_norm": 2.21875, "learning_rate": 0.0001975646286705884, "loss": 2.0414, "step": 59825 }, { "epoch": 0.14, "grad_norm": 1.890625, "learning_rate": 0.00019756422323160763, "loss": 2.1062, "step": 59830 }, { "epoch": 0.14, "grad_norm": 1.65625, "learning_rate": 0.0001975638177592972, "loss": 2.283, "step": 59835 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.0001975634122536572, "loss": 2.3233, "step": 59840 }, { "epoch": 0.14, "grad_norm": 1.78125, "learning_rate": 0.00019756300671468774, "loss": 2.1992, "step": 59845 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.000197562601142389, "loss": 2.052, "step": 59850 }, { "epoch": 0.14, "grad_norm": 1.828125, "learning_rate": 0.00019756219553676113, "loss": 2.0064, "step": 59855 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019756178989780422, "loss": 2.1189, "step": 59860 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.00019756138422551841, "loss": 2.2756, "step": 59865 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019756097851990388, "loss": 2.2416, "step": 59870 }, { "epoch": 0.14, "grad_norm": 2.71875, "learning_rate": 0.00019756057278096076, "loss": 2.2941, "step": 59875 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.0001975601670086892, "loss": 2.0726, "step": 59880 }, { "epoch": 0.14, "grad_norm": 1.9296875, "learning_rate": 0.0001975597612030893, "loss": 2.1598, "step": 59885 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.0001975593553641612, "loss": 2.1421, "step": 59890 }, { "epoch": 0.14, "grad_norm": 2.265625, "learning_rate": 0.00019755894949190504, "loss": 2.1036, "step": 59895 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.00019755854358632102, "loss": 2.1308, "step": 59900 }, { "epoch": 0.14, "grad_norm": 1.953125, "learning_rate": 0.0001975581376474092, "loss": 2.3069, "step": 59905 }, { "epoch": 0.14, "grad_norm": 1.9765625, "learning_rate": 0.00019755773167516978, "loss": 2.2715, "step": 59910 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.00019755732566960284, "loss": 2.3645, "step": 59915 }, { "epoch": 0.14, "grad_norm": 2.21875, "learning_rate": 0.00019755691963070854, "loss": 2.319, "step": 59920 }, { "epoch": 0.14, "grad_norm": 1.6171875, "learning_rate": 0.00019755651355848707, "loss": 2.066, "step": 59925 }, { "epoch": 0.14, "grad_norm": 1.65625, "learning_rate": 0.0001975561074529385, "loss": 2.1057, "step": 59930 }, { "epoch": 0.14, "grad_norm": 1.8125, "learning_rate": 0.00019755570131406298, "loss": 2.2572, "step": 59935 }, { "epoch": 0.14, "grad_norm": 2.21875, "learning_rate": 0.00019755529514186071, "loss": 2.1113, "step": 59940 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019755488893633173, "loss": 2.2769, "step": 59945 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.00019755448269747627, "loss": 2.2602, "step": 59950 }, { "epoch": 0.14, "grad_norm": 1.640625, "learning_rate": 0.0001975540764252944, "loss": 2.2877, "step": 59955 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.0001975536701197863, "loss": 2.076, "step": 59960 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.0001975532637809521, "loss": 2.2493, "step": 59965 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.00019755285740879192, "loss": 2.3533, "step": 59970 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.00019755245100330594, "loss": 2.2158, "step": 59975 }, { "epoch": 0.14, "grad_norm": 2.359375, "learning_rate": 0.00019755204456449425, "loss": 2.1107, "step": 59980 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.00019755163809235705, "loss": 2.0768, "step": 59985 }, { "epoch": 0.14, "grad_norm": 2.359375, "learning_rate": 0.0001975512315868944, "loss": 2.1828, "step": 59990 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.0001975508250481065, "loss": 2.2241, "step": 59995 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019755041847599345, "loss": 2.1499, "step": 60000 }, { "epoch": 0.14, "grad_norm": 1.6328125, "learning_rate": 0.0001975500118705554, "loss": 2.1677, "step": 60005 }, { "epoch": 0.14, "grad_norm": 1.7109375, "learning_rate": 0.00019754960523179254, "loss": 2.3546, "step": 60010 }, { "epoch": 0.14, "grad_norm": 2.21875, "learning_rate": 0.00019754919855970496, "loss": 2.4069, "step": 60015 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.00019754879185429278, "loss": 2.0341, "step": 60020 }, { "epoch": 0.14, "grad_norm": 2.375, "learning_rate": 0.00019754838511555617, "loss": 2.3201, "step": 60025 }, { "epoch": 0.14, "grad_norm": 1.7421875, "learning_rate": 0.00019754797834349526, "loss": 2.2806, "step": 60030 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.0001975475715381102, "loss": 2.17, "step": 60035 }, { "epoch": 0.14, "grad_norm": 2.203125, "learning_rate": 0.0001975471646994011, "loss": 2.0811, "step": 60040 }, { "epoch": 0.14, "grad_norm": 2.359375, "learning_rate": 0.00019754675782736816, "loss": 2.0504, "step": 60045 }, { "epoch": 0.14, "grad_norm": 2.09375, "learning_rate": 0.00019754635092201147, "loss": 2.3022, "step": 60050 }, { "epoch": 0.14, "grad_norm": 1.9765625, "learning_rate": 0.00019754594398333115, "loss": 2.2761, "step": 60055 }, { "epoch": 0.14, "grad_norm": 1.7734375, "learning_rate": 0.00019754553701132737, "loss": 2.2079, "step": 60060 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019754513000600028, "loss": 2.2615, "step": 60065 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019754472296734999, "loss": 2.1843, "step": 60070 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019754431589537666, "loss": 2.1931, "step": 60075 }, { "epoch": 0.14, "grad_norm": 1.890625, "learning_rate": 0.0001975439087900804, "loss": 2.3946, "step": 60080 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.00019754350165146142, "loss": 2.137, "step": 60085 }, { "epoch": 0.14, "grad_norm": 1.640625, "learning_rate": 0.0001975430944795198, "loss": 2.1329, "step": 60090 }, { "epoch": 0.14, "grad_norm": 2.296875, "learning_rate": 0.00019754268727425567, "loss": 2.4263, "step": 60095 }, { "epoch": 0.14, "grad_norm": 1.796875, "learning_rate": 0.0001975422800356692, "loss": 2.2582, "step": 60100 }, { "epoch": 0.14, "grad_norm": 1.7421875, "learning_rate": 0.00019754187276376054, "loss": 2.1765, "step": 60105 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019754146545852974, "loss": 2.1878, "step": 60110 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.00019754105811997706, "loss": 2.239, "step": 60115 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019754065074810258, "loss": 2.0627, "step": 60120 }, { "epoch": 0.14, "grad_norm": 1.9453125, "learning_rate": 0.0001975402433429064, "loss": 2.2091, "step": 60125 }, { "epoch": 0.14, "grad_norm": 1.7890625, "learning_rate": 0.00019753983590438878, "loss": 2.2088, "step": 60130 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.00019753942843254974, "loss": 2.0316, "step": 60135 }, { "epoch": 0.14, "grad_norm": 1.7734375, "learning_rate": 0.00019753902092738948, "loss": 2.1031, "step": 60140 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.00019753861338890808, "loss": 2.151, "step": 60145 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.00019753820581710577, "loss": 2.328, "step": 60150 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.0001975377982119826, "loss": 2.2228, "step": 60155 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.0001975373905735388, "loss": 2.116, "step": 60160 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.0001975369829017744, "loss": 2.2575, "step": 60165 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019753657519668965, "loss": 2.272, "step": 60170 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.0001975361674582846, "loss": 2.1544, "step": 60175 }, { "epoch": 0.14, "grad_norm": 1.875, "learning_rate": 0.0001975357596865594, "loss": 1.9426, "step": 60180 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019753535188151428, "loss": 2.0986, "step": 60185 }, { "epoch": 0.14, "grad_norm": 1.84375, "learning_rate": 0.0001975349440431493, "loss": 2.024, "step": 60190 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.0001975345361714646, "loss": 2.2295, "step": 60195 }, { "epoch": 0.14, "grad_norm": 1.765625, "learning_rate": 0.00019753412826646033, "loss": 2.0892, "step": 60200 }, { "epoch": 0.14, "grad_norm": 1.875, "learning_rate": 0.00019753372032813664, "loss": 2.094, "step": 60205 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.00019753331235649365, "loss": 2.1302, "step": 60210 }, { "epoch": 0.14, "grad_norm": 1.6875, "learning_rate": 0.00019753290435153153, "loss": 2.1009, "step": 60215 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019753249631325042, "loss": 2.281, "step": 60220 }, { "epoch": 0.14, "grad_norm": 1.8125, "learning_rate": 0.0001975320882416504, "loss": 2.1509, "step": 60225 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.00019753168013673163, "loss": 2.1779, "step": 60230 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019753127199849434, "loss": 2.2783, "step": 60235 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.00019753086382693854, "loss": 2.2176, "step": 60240 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.00019753045562206448, "loss": 2.2347, "step": 60245 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.0001975300473838722, "loss": 2.1898, "step": 60250 }, { "epoch": 0.14, "grad_norm": 1.796875, "learning_rate": 0.0001975296391123619, "loss": 2.3098, "step": 60255 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019752923080753373, "loss": 2.1324, "step": 60260 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.00019752882246938777, "loss": 2.3168, "step": 60265 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019752841409792423, "loss": 2.2691, "step": 60270 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.00019752800569314318, "loss": 2.2154, "step": 60275 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.0001975275972550448, "loss": 2.1555, "step": 60280 }, { "epoch": 0.14, "grad_norm": 2.265625, "learning_rate": 0.00019752718878362927, "loss": 2.1006, "step": 60285 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.00019752678027889665, "loss": 2.1703, "step": 60290 }, { "epoch": 0.14, "grad_norm": 2.09375, "learning_rate": 0.0001975263717408471, "loss": 2.0968, "step": 60295 }, { "epoch": 0.14, "grad_norm": 1.7890625, "learning_rate": 0.0001975259631694808, "loss": 2.1137, "step": 60300 }, { "epoch": 0.14, "grad_norm": 2.265625, "learning_rate": 0.00019752555456479783, "loss": 2.25, "step": 60305 }, { "epoch": 0.14, "grad_norm": 1.9765625, "learning_rate": 0.00019752514592679839, "loss": 2.2787, "step": 60310 }, { "epoch": 0.14, "grad_norm": 1.6796875, "learning_rate": 0.00019752473725548258, "loss": 2.0635, "step": 60315 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019752432855085054, "loss": 2.1262, "step": 60320 }, { "epoch": 0.14, "grad_norm": 1.671875, "learning_rate": 0.00019752391981290243, "loss": 1.9421, "step": 60325 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.0001975235110416384, "loss": 2.1861, "step": 60330 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.00019752310223705854, "loss": 2.2098, "step": 60335 }, { "epoch": 0.14, "grad_norm": 1.96875, "learning_rate": 0.00019752269339916306, "loss": 2.1617, "step": 60340 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.00019752228452795203, "loss": 2.0061, "step": 60345 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.0001975218756234256, "loss": 2.2481, "step": 60350 }, { "epoch": 0.14, "grad_norm": 1.625, "learning_rate": 0.00019752146668558396, "loss": 2.0997, "step": 60355 }, { "epoch": 0.14, "grad_norm": 2.296875, "learning_rate": 0.0001975210577144272, "loss": 2.1695, "step": 60360 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.0001975206487099555, "loss": 2.1724, "step": 60365 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019752023967216896, "loss": 2.1454, "step": 60370 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.00019751983060106775, "loss": 2.1712, "step": 60375 }, { "epoch": 0.14, "grad_norm": 2.625, "learning_rate": 0.00019751942149665196, "loss": 2.055, "step": 60380 }, { "epoch": 0.14, "grad_norm": 1.875, "learning_rate": 0.0001975190123589218, "loss": 2.2659, "step": 60385 }, { "epoch": 0.14, "grad_norm": 1.71875, "learning_rate": 0.00019751860318787737, "loss": 2.3116, "step": 60390 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.00019751819398351883, "loss": 2.3876, "step": 60395 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.0001975177847458463, "loss": 2.1892, "step": 60400 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.00019751737547485992, "loss": 2.2429, "step": 60405 }, { "epoch": 0.14, "grad_norm": 1.921875, "learning_rate": 0.0001975169661705598, "loss": 2.0855, "step": 60410 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.0001975165568329462, "loss": 2.0749, "step": 60415 }, { "epoch": 0.14, "grad_norm": 1.6796875, "learning_rate": 0.00019751614746201911, "loss": 2.132, "step": 60420 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.00019751573805777873, "loss": 2.1443, "step": 60425 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.00019751532862022527, "loss": 2.2721, "step": 60430 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.00019751491914935874, "loss": 2.2044, "step": 60435 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.00019751450964517936, "loss": 2.3135, "step": 60440 }, { "epoch": 0.14, "grad_norm": 1.7265625, "learning_rate": 0.00019751410010768728, "loss": 2.1548, "step": 60445 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.0001975136905368826, "loss": 2.1782, "step": 60450 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019751328093276546, "loss": 2.2154, "step": 60455 }, { "epoch": 0.14, "grad_norm": 1.84375, "learning_rate": 0.00019751287129533607, "loss": 2.179, "step": 60460 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019751246162459448, "loss": 2.2151, "step": 60465 }, { "epoch": 0.14, "grad_norm": 1.859375, "learning_rate": 0.00019751205192054084, "loss": 2.2085, "step": 60470 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019751164218317533, "loss": 2.3284, "step": 60475 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.00019751123241249806, "loss": 2.645, "step": 60480 }, { "epoch": 0.14, "grad_norm": 1.828125, "learning_rate": 0.00019751082260850922, "loss": 2.1576, "step": 60485 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.0001975104127712089, "loss": 2.2007, "step": 60490 }, { "epoch": 0.14, "grad_norm": 2.09375, "learning_rate": 0.00019751000290059724, "loss": 2.0483, "step": 60495 }, { "epoch": 0.14, "grad_norm": 2.3125, "learning_rate": 0.00019750959299667442, "loss": 2.3185, "step": 60500 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.00019750918305944053, "loss": 2.0667, "step": 60505 }, { "epoch": 0.14, "grad_norm": 1.921875, "learning_rate": 0.00019750877308889574, "loss": 2.3039, "step": 60510 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.0001975083630850402, "loss": 2.0303, "step": 60515 }, { "epoch": 0.14, "grad_norm": 1.8125, "learning_rate": 0.00019750795304787402, "loss": 2.2681, "step": 60520 }, { "epoch": 0.14, "grad_norm": 2.515625, "learning_rate": 0.00019750754297739737, "loss": 1.9259, "step": 60525 }, { "epoch": 0.14, "grad_norm": 3.515625, "learning_rate": 0.00019750713287361037, "loss": 2.3055, "step": 60530 }, { "epoch": 0.14, "grad_norm": 1.796875, "learning_rate": 0.00019750672273651315, "loss": 2.2561, "step": 60535 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019750631256610585, "loss": 2.0082, "step": 60540 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.0001975059023623887, "loss": 2.3702, "step": 60545 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.00019750549212536168, "loss": 2.1564, "step": 60550 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.00019750508185502506, "loss": 2.0125, "step": 60555 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.00019750467155137892, "loss": 2.145, "step": 60560 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019750426121442342, "loss": 2.1026, "step": 60565 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.0001975038508441587, "loss": 2.092, "step": 60570 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.00019750344044058488, "loss": 2.3035, "step": 60575 }, { "epoch": 0.14, "grad_norm": 2.3125, "learning_rate": 0.00019750303000370213, "loss": 2.1815, "step": 60580 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.0001975026195335106, "loss": 2.3326, "step": 60585 }, { "epoch": 0.14, "grad_norm": 2.296875, "learning_rate": 0.00019750220903001038, "loss": 2.0559, "step": 60590 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.00019750179849320162, "loss": 2.232, "step": 60595 }, { "epoch": 0.14, "grad_norm": 1.921875, "learning_rate": 0.00019750138792308452, "loss": 2.2702, "step": 60600 }, { "epoch": 0.14, "grad_norm": 2.09375, "learning_rate": 0.00019750097731965915, "loss": 2.2648, "step": 60605 }, { "epoch": 0.14, "grad_norm": 1.75, "learning_rate": 0.00019750056668292569, "loss": 2.3818, "step": 60610 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.00019750015601288428, "loss": 2.1708, "step": 60615 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.00019749974530953502, "loss": 2.1798, "step": 60620 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019749933457287807, "loss": 2.1745, "step": 60625 }, { "epoch": 0.14, "grad_norm": 2.90625, "learning_rate": 0.0001974989238029136, "loss": 2.1743, "step": 60630 }, { "epoch": 0.14, "grad_norm": 1.5859375, "learning_rate": 0.00019749851299964176, "loss": 2.1473, "step": 60635 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.0001974981021630626, "loss": 2.0955, "step": 60640 }, { "epoch": 0.14, "grad_norm": 2.203125, "learning_rate": 0.00019749769129317636, "loss": 2.3074, "step": 60645 }, { "epoch": 0.14, "grad_norm": 2.21875, "learning_rate": 0.0001974972803899831, "loss": 2.1885, "step": 60650 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.00019749686945348308, "loss": 2.1834, "step": 60655 }, { "epoch": 0.14, "grad_norm": 1.96875, "learning_rate": 0.00019749645848367628, "loss": 2.234, "step": 60660 }, { "epoch": 0.14, "grad_norm": 1.765625, "learning_rate": 0.00019749604748056296, "loss": 2.0567, "step": 60665 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.0001974956364441432, "loss": 2.2062, "step": 60670 }, { "epoch": 0.14, "grad_norm": 1.96875, "learning_rate": 0.00019749522537441721, "loss": 2.2422, "step": 60675 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019749481427138504, "loss": 2.2659, "step": 60680 }, { "epoch": 0.14, "grad_norm": 1.6796875, "learning_rate": 0.00019749440313504687, "loss": 2.216, "step": 60685 }, { "epoch": 0.14, "grad_norm": 1.875, "learning_rate": 0.00019749399196540286, "loss": 2.1357, "step": 60690 }, { "epoch": 0.14, "grad_norm": 1.9296875, "learning_rate": 0.00019749358076245313, "loss": 2.2085, "step": 60695 }, { "epoch": 0.14, "grad_norm": 1.796875, "learning_rate": 0.0001974931695261978, "loss": 2.1145, "step": 60700 }, { "epoch": 0.14, "grad_norm": 1.6328125, "learning_rate": 0.0001974927582566371, "loss": 2.1998, "step": 60705 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019749234695377106, "loss": 2.1372, "step": 60710 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.00019749193561759986, "loss": 2.1042, "step": 60715 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.00019749152424812367, "loss": 1.8138, "step": 60720 }, { "epoch": 0.14, "grad_norm": 1.84375, "learning_rate": 0.00019749111284534258, "loss": 2.1204, "step": 60725 }, { "epoch": 0.14, "grad_norm": 1.9453125, "learning_rate": 0.00019749070140925677, "loss": 2.2133, "step": 60730 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.00019749028993986637, "loss": 2.1592, "step": 60735 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019748987843717152, "loss": 2.1326, "step": 60740 }, { "epoch": 0.14, "grad_norm": 3.375, "learning_rate": 0.0001974894669011724, "loss": 2.2932, "step": 60745 }, { "epoch": 0.14, "grad_norm": 1.7109375, "learning_rate": 0.00019748905533186904, "loss": 2.2429, "step": 60750 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.00019748864372926169, "loss": 2.2173, "step": 60755 }, { "epoch": 0.14, "grad_norm": 1.234375, "learning_rate": 0.00019748823209335043, "loss": 1.9039, "step": 60760 }, { "epoch": 0.14, "grad_norm": 2.234375, "learning_rate": 0.00019748782042413546, "loss": 2.2156, "step": 60765 }, { "epoch": 0.14, "grad_norm": 2.09375, "learning_rate": 0.00019748740872161684, "loss": 2.1223, "step": 60770 }, { "epoch": 0.14, "grad_norm": 1.6953125, "learning_rate": 0.00019748699698579478, "loss": 1.9877, "step": 60775 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.00019748658521666937, "loss": 2.1515, "step": 60780 }, { "epoch": 0.14, "grad_norm": 2.234375, "learning_rate": 0.0001974861734142408, "loss": 1.899, "step": 60785 }, { "epoch": 0.14, "grad_norm": 1.5078125, "learning_rate": 0.00019748576157850914, "loss": 1.9814, "step": 60790 }, { "epoch": 0.14, "grad_norm": 1.5, "learning_rate": 0.00019748534970947463, "loss": 2.0663, "step": 60795 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019748493780713733, "loss": 2.0409, "step": 60800 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.0001974845258714974, "loss": 2.314, "step": 60805 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.00019748411390255503, "loss": 2.1527, "step": 60810 }, { "epoch": 0.14, "grad_norm": 1.9453125, "learning_rate": 0.00019748370190031026, "loss": 2.1682, "step": 60815 }, { "epoch": 0.14, "grad_norm": 1.828125, "learning_rate": 0.00019748328986476336, "loss": 2.1742, "step": 60820 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.00019748287779591434, "loss": 2.2374, "step": 60825 }, { "epoch": 0.14, "grad_norm": 1.71875, "learning_rate": 0.0001974824656937634, "loss": 2.1229, "step": 60830 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.00019748205355831072, "loss": 2.1231, "step": 60835 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.00019748164138955638, "loss": 2.0801, "step": 60840 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.00019748122918750055, "loss": 2.1731, "step": 60845 }, { "epoch": 0.14, "grad_norm": 2.484375, "learning_rate": 0.00019748081695214337, "loss": 2.2072, "step": 60850 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.00019748040468348498, "loss": 2.3707, "step": 60855 }, { "epoch": 0.14, "grad_norm": 1.890625, "learning_rate": 0.0001974799923815255, "loss": 2.2053, "step": 60860 }, { "epoch": 0.14, "grad_norm": 1.796875, "learning_rate": 0.00019747958004626509, "loss": 2.0481, "step": 60865 }, { "epoch": 0.14, "grad_norm": 2.34375, "learning_rate": 0.00019747916767770386, "loss": 2.107, "step": 60870 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.000197478755275842, "loss": 2.0319, "step": 60875 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.00019747834284067966, "loss": 2.066, "step": 60880 }, { "epoch": 0.14, "grad_norm": 1.7890625, "learning_rate": 0.00019747793037221692, "loss": 2.0475, "step": 60885 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019747751787045398, "loss": 2.2826, "step": 60890 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.0001974771053353909, "loss": 2.088, "step": 60895 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.0001974766927670279, "loss": 2.2054, "step": 60900 }, { "epoch": 0.14, "grad_norm": 2.328125, "learning_rate": 0.00019747628016536511, "loss": 2.2831, "step": 60905 }, { "epoch": 0.14, "grad_norm": 2.5625, "learning_rate": 0.00019747586753040265, "loss": 2.2513, "step": 60910 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019747545486214064, "loss": 2.2298, "step": 60915 }, { "epoch": 0.14, "grad_norm": 1.796875, "learning_rate": 0.00019747504216057927, "loss": 2.2414, "step": 60920 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019747462942571863, "loss": 2.2489, "step": 60925 }, { "epoch": 0.14, "grad_norm": 1.8125, "learning_rate": 0.00019747421665755895, "loss": 2.1901, "step": 60930 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.00019747380385610024, "loss": 1.9389, "step": 60935 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.00019747339102134276, "loss": 2.2485, "step": 60940 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.00019747297815328655, "loss": 2.2123, "step": 60945 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.00019747256525193185, "loss": 2.124, "step": 60950 }, { "epoch": 0.14, "grad_norm": 1.6640625, "learning_rate": 0.00019747215231727877, "loss": 2.1416, "step": 60955 }, { "epoch": 0.14, "grad_norm": 1.9296875, "learning_rate": 0.00019747173934932737, "loss": 2.0923, "step": 60960 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.0001974713263480779, "loss": 2.4571, "step": 60965 }, { "epoch": 0.14, "grad_norm": 1.96875, "learning_rate": 0.00019747091331353045, "loss": 2.1555, "step": 60970 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019747050024568517, "loss": 2.1784, "step": 60975 }, { "epoch": 0.14, "grad_norm": 1.9921875, "learning_rate": 0.00019747008714454217, "loss": 2.3201, "step": 60980 }, { "epoch": 0.14, "grad_norm": 1.6484375, "learning_rate": 0.00019746967401010167, "loss": 1.9942, "step": 60985 }, { "epoch": 0.14, "grad_norm": 2.375, "learning_rate": 0.00019746926084236372, "loss": 2.1083, "step": 60990 }, { "epoch": 0.14, "grad_norm": 2.265625, "learning_rate": 0.00019746884764132853, "loss": 2.0822, "step": 60995 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.0001974684344069962, "loss": 2.0507, "step": 61000 }, { "epoch": 0.14, "grad_norm": 1.953125, "learning_rate": 0.00019746802113936688, "loss": 2.085, "step": 61005 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019746760783844074, "loss": 2.3202, "step": 61010 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019746719450421788, "loss": 2.1283, "step": 61015 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.00019746678113669847, "loss": 2.2735, "step": 61020 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.0001974663677358826, "loss": 2.2177, "step": 61025 }, { "epoch": 0.14, "grad_norm": 1.734375, "learning_rate": 0.0001974659543017705, "loss": 2.3374, "step": 61030 }, { "epoch": 0.14, "grad_norm": 1.953125, "learning_rate": 0.00019746554083436223, "loss": 2.1475, "step": 61035 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.000197465127333658, "loss": 2.0819, "step": 61040 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.00019746471379965787, "loss": 2.2919, "step": 61045 }, { "epoch": 0.14, "grad_norm": 1.6875, "learning_rate": 0.0001974643002323621, "loss": 1.9744, "step": 61050 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.00019746388663177068, "loss": 2.3112, "step": 61055 }, { "epoch": 0.14, "grad_norm": 2.3125, "learning_rate": 0.00019746347299788385, "loss": 2.2259, "step": 61060 }, { "epoch": 0.14, "grad_norm": 1.9765625, "learning_rate": 0.00019746305933070172, "loss": 2.201, "step": 61065 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.00019746264563022447, "loss": 2.3149, "step": 61070 }, { "epoch": 0.14, "grad_norm": 1.71875, "learning_rate": 0.0001974622318964522, "loss": 2.2871, "step": 61075 }, { "epoch": 0.14, "grad_norm": 1.921875, "learning_rate": 0.00019746181812938508, "loss": 2.1377, "step": 61080 }, { "epoch": 0.14, "grad_norm": 1.8828125, "learning_rate": 0.00019746140432902324, "loss": 2.1647, "step": 61085 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019746099049536677, "loss": 2.3319, "step": 61090 }, { "epoch": 0.14, "grad_norm": 1.8046875, "learning_rate": 0.00019746057662841588, "loss": 2.1793, "step": 61095 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.0001974601627281707, "loss": 2.1964, "step": 61100 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.00019745974879463134, "loss": 2.1889, "step": 61105 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.00019745933482779798, "loss": 2.1766, "step": 61110 }, { "epoch": 0.14, "grad_norm": 1.78125, "learning_rate": 0.00019745892082767075, "loss": 2.2492, "step": 61115 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.0001974585067942498, "loss": 2.0972, "step": 61120 }, { "epoch": 0.14, "grad_norm": 1.84375, "learning_rate": 0.00019745809272753522, "loss": 2.071, "step": 61125 }, { "epoch": 0.14, "grad_norm": 1.5546875, "learning_rate": 0.00019745767862752717, "loss": 2.0962, "step": 61130 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019745726449422587, "loss": 2.232, "step": 61135 }, { "epoch": 0.14, "grad_norm": 2.328125, "learning_rate": 0.00019745685032763135, "loss": 2.1591, "step": 61140 }, { "epoch": 0.14, "grad_norm": 1.8125, "learning_rate": 0.00019745643612774382, "loss": 2.0084, "step": 61145 }, { "epoch": 0.14, "grad_norm": 1.7734375, "learning_rate": 0.0001974560218945634, "loss": 2.2437, "step": 61150 }, { "epoch": 0.14, "grad_norm": 1.96875, "learning_rate": 0.00019745560762809026, "loss": 2.0864, "step": 61155 }, { "epoch": 0.14, "grad_norm": 1.8203125, "learning_rate": 0.00019745519332832449, "loss": 2.1252, "step": 61160 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.00019745477899526628, "loss": 2.092, "step": 61165 }, { "epoch": 0.14, "grad_norm": 1.984375, "learning_rate": 0.00019745436462891572, "loss": 2.1752, "step": 61170 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.00019745395022927303, "loss": 2.1607, "step": 61175 }, { "epoch": 0.14, "grad_norm": 1.6875, "learning_rate": 0.00019745353579633825, "loss": 2.3512, "step": 61180 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.0001974531213301116, "loss": 2.2593, "step": 61185 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.0001974527068305932, "loss": 2.1688, "step": 61190 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019745229229778318, "loss": 1.9087, "step": 61195 }, { "epoch": 0.14, "grad_norm": 1.5546875, "learning_rate": 0.0001974518777316817, "loss": 2.0592, "step": 61200 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.00019745146313228888, "loss": 1.9799, "step": 61205 }, { "epoch": 0.14, "grad_norm": 1.8671875, "learning_rate": 0.00019745104849960488, "loss": 2.0846, "step": 61210 }, { "epoch": 0.14, "grad_norm": 1.6015625, "learning_rate": 0.00019745063383362982, "loss": 2.1726, "step": 61215 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019745021913436387, "loss": 2.1332, "step": 61220 }, { "epoch": 0.14, "grad_norm": 2.3125, "learning_rate": 0.00019744980440180717, "loss": 2.0844, "step": 61225 }, { "epoch": 0.14, "grad_norm": 1.859375, "learning_rate": 0.0001974493896359598, "loss": 2.0518, "step": 61230 }, { "epoch": 0.14, "grad_norm": 1.5234375, "learning_rate": 0.000197448974836822, "loss": 2.0169, "step": 61235 }, { "epoch": 0.14, "grad_norm": 1.7109375, "learning_rate": 0.00019744856000439382, "loss": 2.2611, "step": 61240 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.0001974481451386755, "loss": 2.1775, "step": 61245 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.00019744773023966708, "loss": 2.1374, "step": 61250 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.00019744731530736876, "loss": 2.2877, "step": 61255 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.0001974469003417807, "loss": 2.3534, "step": 61260 }, { "epoch": 0.14, "grad_norm": 1.6796875, "learning_rate": 0.00019744648534290299, "loss": 2.2107, "step": 61265 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.0001974460703107358, "loss": 2.3483, "step": 61270 }, { "epoch": 0.14, "grad_norm": 2.0, "learning_rate": 0.00019744565524527924, "loss": 2.3632, "step": 61275 }, { "epoch": 0.14, "grad_norm": 1.828125, "learning_rate": 0.00019744524014653347, "loss": 2.2034, "step": 61280 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.00019744482501449868, "loss": 2.2858, "step": 61285 }, { "epoch": 0.14, "grad_norm": 1.84375, "learning_rate": 0.00019744440984917496, "loss": 2.3116, "step": 61290 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.00019744399465056243, "loss": 2.1389, "step": 61295 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.00019744357941866133, "loss": 2.1435, "step": 61300 }, { "epoch": 0.14, "grad_norm": 2.265625, "learning_rate": 0.00019744316415347166, "loss": 2.077, "step": 61305 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.0001974427488549937, "loss": 2.2607, "step": 61310 }, { "epoch": 0.14, "grad_norm": 1.953125, "learning_rate": 0.00019744233352322747, "loss": 2.1704, "step": 61315 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.0001974419181581732, "loss": 2.2473, "step": 61320 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019744150275983102, "loss": 2.1645, "step": 61325 }, { "epoch": 0.14, "grad_norm": 2.4375, "learning_rate": 0.00019744108732820106, "loss": 2.1761, "step": 61330 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019744067186328344, "loss": 2.1257, "step": 61335 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.0001974402563650783, "loss": 2.2086, "step": 61340 }, { "epoch": 0.14, "grad_norm": 1.8515625, "learning_rate": 0.00019743984083358585, "loss": 2.216, "step": 61345 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.00019743942526880614, "loss": 2.233, "step": 61350 }, { "epoch": 0.14, "grad_norm": 1.9375, "learning_rate": 0.00019743900967073935, "loss": 2.0767, "step": 61355 }, { "epoch": 0.14, "grad_norm": 2.515625, "learning_rate": 0.00019743859403938564, "loss": 2.0664, "step": 61360 }, { "epoch": 0.14, "grad_norm": 2.015625, "learning_rate": 0.00019743817837474515, "loss": 2.1175, "step": 61365 }, { "epoch": 0.14, "grad_norm": 2.296875, "learning_rate": 0.000197437762676818, "loss": 2.1269, "step": 61370 }, { "epoch": 0.14, "grad_norm": 2.125, "learning_rate": 0.0001974373469456044, "loss": 2.1023, "step": 61375 }, { "epoch": 0.14, "grad_norm": 1.4765625, "learning_rate": 0.00019743693118110436, "loss": 2.0956, "step": 61380 }, { "epoch": 0.14, "grad_norm": 3.140625, "learning_rate": 0.0001974365153833181, "loss": 2.1933, "step": 61385 }, { "epoch": 0.14, "grad_norm": 1.796875, "learning_rate": 0.0001974360995522458, "loss": 2.3467, "step": 61390 }, { "epoch": 0.14, "grad_norm": 1.96875, "learning_rate": 0.00019743568368788754, "loss": 2.0658, "step": 61395 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.0001974352677902435, "loss": 2.3232, "step": 61400 }, { "epoch": 0.14, "grad_norm": 2.203125, "learning_rate": 0.0001974348518593138, "loss": 1.9589, "step": 61405 }, { "epoch": 0.14, "grad_norm": 2.140625, "learning_rate": 0.00019743443589509855, "loss": 2.2032, "step": 61410 }, { "epoch": 0.14, "grad_norm": 1.7109375, "learning_rate": 0.00019743401989759797, "loss": 2.0885, "step": 61415 }, { "epoch": 0.14, "grad_norm": 2.15625, "learning_rate": 0.00019743360386681214, "loss": 2.1618, "step": 61420 }, { "epoch": 0.14, "grad_norm": 1.8359375, "learning_rate": 0.00019743318780274125, "loss": 2.0656, "step": 61425 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.0001974327717053854, "loss": 2.1777, "step": 61430 }, { "epoch": 0.14, "grad_norm": 1.921875, "learning_rate": 0.00019743235557474474, "loss": 2.2681, "step": 61435 }, { "epoch": 0.14, "grad_norm": 2.03125, "learning_rate": 0.00019743193941081943, "loss": 2.1393, "step": 61440 }, { "epoch": 0.14, "grad_norm": 1.6171875, "learning_rate": 0.0001974315232136096, "loss": 2.1485, "step": 61445 }, { "epoch": 0.14, "grad_norm": 1.9296875, "learning_rate": 0.0001974311069831154, "loss": 2.1382, "step": 61450 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.00019743069071933696, "loss": 2.1993, "step": 61455 }, { "epoch": 0.14, "grad_norm": 1.6796875, "learning_rate": 0.0001974302744222744, "loss": 2.1195, "step": 61460 }, { "epoch": 0.14, "grad_norm": 2.21875, "learning_rate": 0.00019742985809192797, "loss": 2.1554, "step": 61465 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019742944172829768, "loss": 2.2946, "step": 61470 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019742902533138372, "loss": 2.1496, "step": 61475 }, { "epoch": 0.14, "grad_norm": 1.859375, "learning_rate": 0.00019742860890118626, "loss": 2.1627, "step": 61480 }, { "epoch": 0.14, "grad_norm": 3.078125, "learning_rate": 0.0001974281924377054, "loss": 2.1902, "step": 61485 }, { "epoch": 0.14, "grad_norm": 2.265625, "learning_rate": 0.0001974277759409413, "loss": 2.2409, "step": 61490 }, { "epoch": 0.14, "grad_norm": 2.078125, "learning_rate": 0.00019742735941089413, "loss": 2.2725, "step": 61495 }, { "epoch": 0.14, "grad_norm": 2.25, "learning_rate": 0.000197426942847564, "loss": 2.3046, "step": 61500 }, { "epoch": 0.14, "grad_norm": 2.53125, "learning_rate": 0.00019742652625095105, "loss": 2.0458, "step": 61505 }, { "epoch": 0.14, "grad_norm": 1.7265625, "learning_rate": 0.00019742610962105545, "loss": 2.184, "step": 61510 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.0001974256929578773, "loss": 2.2311, "step": 61515 }, { "epoch": 0.14, "grad_norm": 2.171875, "learning_rate": 0.00019742527626141678, "loss": 2.2414, "step": 61520 }, { "epoch": 0.14, "grad_norm": 1.90625, "learning_rate": 0.00019742485953167402, "loss": 2.157, "step": 61525 }, { "epoch": 0.14, "grad_norm": 1.828125, "learning_rate": 0.00019742444276864914, "loss": 2.343, "step": 61530 }, { "epoch": 0.14, "grad_norm": 2.09375, "learning_rate": 0.00019742402597234233, "loss": 2.1702, "step": 61535 }, { "epoch": 0.14, "grad_norm": 2.1875, "learning_rate": 0.00019742360914275372, "loss": 2.273, "step": 61540 }, { "epoch": 0.14, "grad_norm": 2.28125, "learning_rate": 0.0001974231922798834, "loss": 2.2675, "step": 61545 }, { "epoch": 0.14, "grad_norm": 1.9140625, "learning_rate": 0.00019742277538373155, "loss": 2.2867, "step": 61550 }, { "epoch": 0.14, "grad_norm": 2.109375, "learning_rate": 0.00019742235845429833, "loss": 2.2669, "step": 61555 }, { "epoch": 0.14, "grad_norm": 1.96875, "learning_rate": 0.00019742194149158387, "loss": 2.2959, "step": 61560 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019742152449558827, "loss": 2.1671, "step": 61565 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019742110746631176, "loss": 2.1432, "step": 61570 }, { "epoch": 0.14, "grad_norm": 1.9453125, "learning_rate": 0.00019742069040375442, "loss": 2.2024, "step": 61575 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019742027330791638, "loss": 2.1736, "step": 61580 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.00019741985617879784, "loss": 2.1497, "step": 61585 }, { "epoch": 0.14, "grad_norm": 2.046875, "learning_rate": 0.0001974194390163989, "loss": 2.2453, "step": 61590 }, { "epoch": 0.14, "grad_norm": 1.9609375, "learning_rate": 0.0001974190218207197, "loss": 2.2885, "step": 61595 }, { "epoch": 0.14, "grad_norm": 1.640625, "learning_rate": 0.0001974186045917604, "loss": 2.2627, "step": 61600 }, { "epoch": 0.14, "grad_norm": 2.0625, "learning_rate": 0.00019741818732952116, "loss": 2.2332, "step": 61605 }, { "epoch": 0.14, "grad_norm": 1.8984375, "learning_rate": 0.00019741777003400208, "loss": 2.2205, "step": 61610 }, { "epoch": 0.14, "grad_norm": 1.6953125, "learning_rate": 0.0001974173527052033, "loss": 2.2174, "step": 61615 }, { "epoch": 0.15, "grad_norm": 1.9765625, "learning_rate": 0.00019741693534312504, "loss": 2.1722, "step": 61620 }, { "epoch": 0.15, "grad_norm": 1.6796875, "learning_rate": 0.00019741651794776737, "loss": 2.0875, "step": 61625 }, { "epoch": 0.15, "grad_norm": 1.9453125, "learning_rate": 0.00019741610051913044, "loss": 2.1995, "step": 61630 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.0001974156830572144, "loss": 2.2125, "step": 61635 }, { "epoch": 0.15, "grad_norm": 1.6875, "learning_rate": 0.0001974152655620194, "loss": 1.9224, "step": 61640 }, { "epoch": 0.15, "grad_norm": 1.8125, "learning_rate": 0.0001974148480335456, "loss": 2.107, "step": 61645 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.0001974144304717931, "loss": 2.2135, "step": 61650 }, { "epoch": 0.15, "grad_norm": 1.953125, "learning_rate": 0.00019741401287676205, "loss": 2.4213, "step": 61655 }, { "epoch": 0.15, "grad_norm": 1.9296875, "learning_rate": 0.00019741359524845262, "loss": 2.2869, "step": 61660 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.00019741317758686493, "loss": 2.0803, "step": 61665 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019741275989199918, "loss": 2.2533, "step": 61670 }, { "epoch": 0.15, "grad_norm": 2.46875, "learning_rate": 0.0001974123421638554, "loss": 2.1769, "step": 61675 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.00019741192440243382, "loss": 2.2073, "step": 61680 }, { "epoch": 0.15, "grad_norm": 2.25, "learning_rate": 0.0001974115066077346, "loss": 2.2174, "step": 61685 }, { "epoch": 0.15, "grad_norm": 1.6875, "learning_rate": 0.00019741108877975778, "loss": 2.0629, "step": 61690 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019741067091850358, "loss": 2.2092, "step": 61695 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019741025302397213, "loss": 2.2127, "step": 61700 }, { "epoch": 0.15, "grad_norm": 1.96875, "learning_rate": 0.0001974098350961636, "loss": 2.0767, "step": 61705 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.00019740941713507809, "loss": 2.0178, "step": 61710 }, { "epoch": 0.15, "grad_norm": 2.328125, "learning_rate": 0.00019740899914071576, "loss": 2.215, "step": 61715 }, { "epoch": 0.15, "grad_norm": 1.6875, "learning_rate": 0.00019740858111307673, "loss": 2.1419, "step": 61720 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.00019740816305216117, "loss": 2.2851, "step": 61725 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.0001974077449579692, "loss": 2.1491, "step": 61730 }, { "epoch": 0.15, "grad_norm": 2.625, "learning_rate": 0.000197407326830501, "loss": 2.1477, "step": 61735 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.0001974069086697567, "loss": 2.2499, "step": 61740 }, { "epoch": 0.15, "grad_norm": 1.9296875, "learning_rate": 0.0001974064904757364, "loss": 2.1354, "step": 61745 }, { "epoch": 0.15, "grad_norm": 1.8359375, "learning_rate": 0.00019740607224844032, "loss": 2.0506, "step": 61750 }, { "epoch": 0.15, "grad_norm": 2.1875, "learning_rate": 0.00019740565398786852, "loss": 2.1794, "step": 61755 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.0001974052356940212, "loss": 2.1656, "step": 61760 }, { "epoch": 0.15, "grad_norm": 2.28125, "learning_rate": 0.00019740481736689848, "loss": 2.0103, "step": 61765 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.0001974043990065005, "loss": 2.0644, "step": 61770 }, { "epoch": 0.15, "grad_norm": 2.203125, "learning_rate": 0.00019740398061282742, "loss": 2.2044, "step": 61775 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019740356218587939, "loss": 2.0895, "step": 61780 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.0001974031437256565, "loss": 2.1796, "step": 61785 }, { "epoch": 0.15, "grad_norm": 1.9765625, "learning_rate": 0.00019740272523215897, "loss": 2.1463, "step": 61790 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.0001974023067053869, "loss": 2.0373, "step": 61795 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.0001974018881453404, "loss": 2.1635, "step": 61800 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019740146955201965, "loss": 2.1819, "step": 61805 }, { "epoch": 0.15, "grad_norm": 1.6875, "learning_rate": 0.00019740105092542484, "loss": 2.176, "step": 61810 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019740063226555602, "loss": 2.1932, "step": 61815 }, { "epoch": 0.15, "grad_norm": 1.890625, "learning_rate": 0.0001974002135724134, "loss": 2.3076, "step": 61820 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019739979484599708, "loss": 2.2613, "step": 61825 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.00019739937608630724, "loss": 2.012, "step": 61830 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.00019739895729334398, "loss": 2.2199, "step": 61835 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.0001973985384671075, "loss": 1.9897, "step": 61840 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.0001973981196075979, "loss": 2.1113, "step": 61845 }, { "epoch": 0.15, "grad_norm": 1.734375, "learning_rate": 0.00019739770071481533, "loss": 2.215, "step": 61850 }, { "epoch": 0.15, "grad_norm": 1.8359375, "learning_rate": 0.00019739728178875998, "loss": 2.1148, "step": 61855 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.00019739686282943192, "loss": 2.2469, "step": 61860 }, { "epoch": 0.15, "grad_norm": 1.6953125, "learning_rate": 0.00019739644383683135, "loss": 2.0554, "step": 61865 }, { "epoch": 0.15, "grad_norm": 1.6953125, "learning_rate": 0.00019739602481095835, "loss": 2.0287, "step": 61870 }, { "epoch": 0.15, "grad_norm": 1.8359375, "learning_rate": 0.0001973956057518131, "loss": 2.302, "step": 61875 }, { "epoch": 0.15, "grad_norm": 1.953125, "learning_rate": 0.00019739518665939576, "loss": 2.3393, "step": 61880 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.00019739476753370645, "loss": 2.0305, "step": 61885 }, { "epoch": 0.15, "grad_norm": 1.953125, "learning_rate": 0.00019739434837474535, "loss": 2.1644, "step": 61890 }, { "epoch": 0.15, "grad_norm": 2.46875, "learning_rate": 0.00019739392918251256, "loss": 2.2768, "step": 61895 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.0001973935099570082, "loss": 2.2643, "step": 61900 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.0001973930906982325, "loss": 2.2699, "step": 61905 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019739267140618554, "loss": 2.1964, "step": 61910 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.00019739225208086747, "loss": 2.2114, "step": 61915 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019739183272227844, "loss": 2.2766, "step": 61920 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.0001973914133304186, "loss": 2.0871, "step": 61925 }, { "epoch": 0.15, "grad_norm": 1.8984375, "learning_rate": 0.00019739099390528807, "loss": 2.1857, "step": 61930 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019739057444688704, "loss": 2.0552, "step": 61935 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.0001973901549552156, "loss": 2.2729, "step": 61940 }, { "epoch": 0.15, "grad_norm": 2.1875, "learning_rate": 0.00019738973543027393, "loss": 2.2834, "step": 61945 }, { "epoch": 0.15, "grad_norm": 2.53125, "learning_rate": 0.00019738931587206214, "loss": 2.1303, "step": 61950 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.0001973888962805804, "loss": 2.1568, "step": 61955 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.0001973884766558289, "loss": 2.2298, "step": 61960 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.00019738805699780768, "loss": 1.9532, "step": 61965 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.00019738763730651693, "loss": 2.1899, "step": 61970 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.0001973872175819568, "loss": 2.1369, "step": 61975 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.00019738679782412745, "loss": 2.3113, "step": 61980 }, { "epoch": 0.15, "grad_norm": 1.9609375, "learning_rate": 0.000197386378033029, "loss": 2.0575, "step": 61985 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019738595820866157, "loss": 2.3116, "step": 61990 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.00019738553835102533, "loss": 2.122, "step": 61995 }, { "epoch": 0.15, "grad_norm": 2.28125, "learning_rate": 0.00019738511846012044, "loss": 2.3489, "step": 62000 }, { "epoch": 0.15, "grad_norm": 1.9765625, "learning_rate": 0.00019738469853594704, "loss": 2.0626, "step": 62005 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019738427857850525, "loss": 2.1815, "step": 62010 }, { "epoch": 0.15, "grad_norm": 2.125, "learning_rate": 0.0001973838585877952, "loss": 2.1575, "step": 62015 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.0001973834385638171, "loss": 2.1016, "step": 62020 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.00019738301850657104, "loss": 2.1858, "step": 62025 }, { "epoch": 0.15, "grad_norm": 2.265625, "learning_rate": 0.00019738259841605717, "loss": 2.0929, "step": 62030 }, { "epoch": 0.15, "grad_norm": 1.96875, "learning_rate": 0.00019738217829227563, "loss": 2.2391, "step": 62035 }, { "epoch": 0.15, "grad_norm": 2.203125, "learning_rate": 0.00019738175813522658, "loss": 1.9371, "step": 62040 }, { "epoch": 0.15, "grad_norm": 1.9609375, "learning_rate": 0.00019738133794491015, "loss": 2.1024, "step": 62045 }, { "epoch": 0.15, "grad_norm": 1.7578125, "learning_rate": 0.0001973809177213265, "loss": 2.1167, "step": 62050 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019738049746447577, "loss": 2.1603, "step": 62055 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019738007717435803, "loss": 2.1397, "step": 62060 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019737965685097356, "loss": 2.2354, "step": 62065 }, { "epoch": 0.15, "grad_norm": 1.8515625, "learning_rate": 0.0001973792364943224, "loss": 2.1101, "step": 62070 }, { "epoch": 0.15, "grad_norm": 1.8359375, "learning_rate": 0.00019737881610440476, "loss": 2.259, "step": 62075 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.00019737839568122073, "loss": 1.9472, "step": 62080 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019737797522477045, "loss": 2.2273, "step": 62085 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019737755473505413, "loss": 1.9666, "step": 62090 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019737713421207187, "loss": 2.0092, "step": 62095 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019737671365582378, "loss": 2.1723, "step": 62100 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019737629306631007, "loss": 2.2642, "step": 62105 }, { "epoch": 0.15, "grad_norm": 2.359375, "learning_rate": 0.00019737587244353084, "loss": 2.1876, "step": 62110 }, { "epoch": 0.15, "grad_norm": 1.984375, "learning_rate": 0.00019737545178748623, "loss": 2.1512, "step": 62115 }, { "epoch": 0.15, "grad_norm": 1.984375, "learning_rate": 0.00019737503109817643, "loss": 2.0959, "step": 62120 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019737461037560151, "loss": 2.1483, "step": 62125 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.0001973741896197617, "loss": 2.178, "step": 62130 }, { "epoch": 0.15, "grad_norm": 2.203125, "learning_rate": 0.00019737376883065708, "loss": 2.1989, "step": 62135 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.0001973733480082878, "loss": 2.1529, "step": 62140 }, { "epoch": 0.15, "grad_norm": 2.21875, "learning_rate": 0.00019737292715265404, "loss": 2.018, "step": 62145 }, { "epoch": 0.15, "grad_norm": 2.34375, "learning_rate": 0.00019737250626375595, "loss": 2.0511, "step": 62150 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.00019737208534159362, "loss": 2.0051, "step": 62155 }, { "epoch": 0.15, "grad_norm": 2.421875, "learning_rate": 0.0001973716643861672, "loss": 2.2481, "step": 62160 }, { "epoch": 0.15, "grad_norm": 1.984375, "learning_rate": 0.00019737124339747687, "loss": 2.1103, "step": 62165 }, { "epoch": 0.15, "grad_norm": 1.71875, "learning_rate": 0.00019737082237552276, "loss": 2.2056, "step": 62170 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019737040132030502, "loss": 2.0743, "step": 62175 }, { "epoch": 0.15, "grad_norm": 1.7265625, "learning_rate": 0.00019736998023182377, "loss": 2.2971, "step": 62180 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019736955911007917, "loss": 1.9602, "step": 62185 }, { "epoch": 0.15, "grad_norm": 2.4375, "learning_rate": 0.00019736913795507136, "loss": 2.5054, "step": 62190 }, { "epoch": 0.15, "grad_norm": 1.71875, "learning_rate": 0.0001973687167668005, "loss": 2.0416, "step": 62195 }, { "epoch": 0.15, "grad_norm": 1.8359375, "learning_rate": 0.0001973682955452667, "loss": 2.4656, "step": 62200 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019736787429047015, "loss": 2.0081, "step": 62205 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.00019736745300241094, "loss": 2.2342, "step": 62210 }, { "epoch": 0.15, "grad_norm": 2.578125, "learning_rate": 0.00019736703168108925, "loss": 1.916, "step": 62215 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019736661032650522, "loss": 2.2329, "step": 62220 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.00019736618893865902, "loss": 2.2838, "step": 62225 }, { "epoch": 0.15, "grad_norm": 1.7890625, "learning_rate": 0.00019736576751755073, "loss": 2.3164, "step": 62230 }, { "epoch": 0.15, "grad_norm": 2.25, "learning_rate": 0.00019736534606318054, "loss": 2.2951, "step": 62235 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.00019736492457554858, "loss": 2.2343, "step": 62240 }, { "epoch": 0.15, "grad_norm": 2.3125, "learning_rate": 0.00019736450305465498, "loss": 2.1236, "step": 62245 }, { "epoch": 0.15, "grad_norm": 1.7890625, "learning_rate": 0.00019736408150049993, "loss": 2.13, "step": 62250 }, { "epoch": 0.15, "grad_norm": 1.8515625, "learning_rate": 0.00019736365991308353, "loss": 2.1966, "step": 62255 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.0001973632382924059, "loss": 2.1344, "step": 62260 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.00019736281663846727, "loss": 2.2938, "step": 62265 }, { "epoch": 0.15, "grad_norm": 1.8125, "learning_rate": 0.00019736239495126773, "loss": 2.1749, "step": 62270 }, { "epoch": 0.15, "grad_norm": 2.28125, "learning_rate": 0.00019736197323080742, "loss": 2.1703, "step": 62275 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.0001973615514770865, "loss": 2.1653, "step": 62280 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.0001973611296901051, "loss": 2.2172, "step": 62285 }, { "epoch": 0.15, "grad_norm": 2.375, "learning_rate": 0.0001973607078698634, "loss": 1.9822, "step": 62290 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019736028601636147, "loss": 2.1654, "step": 62295 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019735986412959956, "loss": 2.2234, "step": 62300 }, { "epoch": 0.15, "grad_norm": 1.796875, "learning_rate": 0.0001973594422095777, "loss": 2.1879, "step": 62305 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019735902025629608, "loss": 2.1838, "step": 62310 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.0001973585982697549, "loss": 2.0209, "step": 62315 }, { "epoch": 0.15, "grad_norm": 1.46875, "learning_rate": 0.00019735817624995426, "loss": 2.0815, "step": 62320 }, { "epoch": 0.15, "grad_norm": 1.9765625, "learning_rate": 0.00019735775419689426, "loss": 2.1376, "step": 62325 }, { "epoch": 0.15, "grad_norm": 1.7578125, "learning_rate": 0.0001973573321105751, "loss": 2.1869, "step": 62330 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.00019735690999099693, "loss": 2.1715, "step": 62335 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019735648783815985, "loss": 2.0584, "step": 62340 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019735606565206404, "loss": 2.1677, "step": 62345 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.00019735564343270962, "loss": 2.262, "step": 62350 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.00019735522118009678, "loss": 2.2114, "step": 62355 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.0001973547988942256, "loss": 2.3752, "step": 62360 }, { "epoch": 0.15, "grad_norm": 1.9609375, "learning_rate": 0.00019735437657509626, "loss": 2.1585, "step": 62365 }, { "epoch": 0.15, "grad_norm": 1.96875, "learning_rate": 0.00019735395422270887, "loss": 2.1965, "step": 62370 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019735353183706367, "loss": 2.1504, "step": 62375 }, { "epoch": 0.15, "grad_norm": 2.125, "learning_rate": 0.0001973531094181607, "loss": 2.0146, "step": 62380 }, { "epoch": 0.15, "grad_norm": 2.125, "learning_rate": 0.00019735268696600012, "loss": 2.1447, "step": 62385 }, { "epoch": 0.15, "grad_norm": 2.3125, "learning_rate": 0.00019735226448058213, "loss": 2.2068, "step": 62390 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.00019735184196190684, "loss": 2.2255, "step": 62395 }, { "epoch": 0.15, "grad_norm": 1.890625, "learning_rate": 0.0001973514194099744, "loss": 2.2166, "step": 62400 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019735099682478493, "loss": 2.138, "step": 62405 }, { "epoch": 0.15, "grad_norm": 2.265625, "learning_rate": 0.0001973505742063386, "loss": 2.2545, "step": 62410 }, { "epoch": 0.15, "grad_norm": 2.3125, "learning_rate": 0.00019735015155463554, "loss": 2.2423, "step": 62415 }, { "epoch": 0.15, "grad_norm": 2.25, "learning_rate": 0.00019734972886967592, "loss": 2.2685, "step": 62420 }, { "epoch": 0.15, "grad_norm": 1.9453125, "learning_rate": 0.00019734930615145985, "loss": 2.1307, "step": 62425 }, { "epoch": 0.15, "grad_norm": 2.96875, "learning_rate": 0.00019734888339998752, "loss": 2.2178, "step": 62430 }, { "epoch": 0.15, "grad_norm": 1.625, "learning_rate": 0.000197348460615259, "loss": 2.2953, "step": 62435 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.0001973480377972745, "loss": 2.2644, "step": 62440 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019734761494603418, "loss": 2.2913, "step": 62445 }, { "epoch": 0.15, "grad_norm": 2.265625, "learning_rate": 0.00019734719206153812, "loss": 2.1268, "step": 62450 }, { "epoch": 0.15, "grad_norm": 2.3125, "learning_rate": 0.0001973467691437865, "loss": 2.2867, "step": 62455 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019734634619277946, "loss": 1.9796, "step": 62460 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.00019734592320851712, "loss": 2.2418, "step": 62465 }, { "epoch": 0.15, "grad_norm": 1.96875, "learning_rate": 0.00019734550019099968, "loss": 2.1447, "step": 62470 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.0001973450771402272, "loss": 2.1502, "step": 62475 }, { "epoch": 0.15, "grad_norm": 2.1875, "learning_rate": 0.00019734465405619992, "loss": 2.1332, "step": 62480 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019734423093891793, "loss": 2.1781, "step": 62485 }, { "epoch": 0.15, "grad_norm": 1.609375, "learning_rate": 0.0001973438077883814, "loss": 2.2344, "step": 62490 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019734338460459044, "loss": 2.1717, "step": 62495 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.0001973429613875452, "loss": 2.0556, "step": 62500 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019734253813724587, "loss": 2.1843, "step": 62505 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.00019734211485369256, "loss": 2.2313, "step": 62510 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.00019734169153688542, "loss": 2.1104, "step": 62515 }, { "epoch": 0.15, "grad_norm": 1.6640625, "learning_rate": 0.00019734126818682457, "loss": 2.1862, "step": 62520 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.0001973408448035102, "loss": 2.2297, "step": 62525 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.0001973404213869424, "loss": 2.0996, "step": 62530 }, { "epoch": 0.15, "grad_norm": 1.859375, "learning_rate": 0.00019733999793712138, "loss": 2.2071, "step": 62535 }, { "epoch": 0.15, "grad_norm": 1.9453125, "learning_rate": 0.00019733957445404725, "loss": 2.1376, "step": 62540 }, { "epoch": 0.15, "grad_norm": 1.671875, "learning_rate": 0.00019733915093772016, "loss": 2.1846, "step": 62545 }, { "epoch": 0.15, "grad_norm": 2.1875, "learning_rate": 0.00019733872738814023, "loss": 2.0603, "step": 62550 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.00019733830380530765, "loss": 2.1726, "step": 62555 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.0001973378801892225, "loss": 2.2212, "step": 62560 }, { "epoch": 0.15, "grad_norm": 2.296875, "learning_rate": 0.000197337456539885, "loss": 2.3105, "step": 62565 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019733703285729527, "loss": 2.1262, "step": 62570 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.0001973366091414534, "loss": 2.0057, "step": 62575 }, { "epoch": 0.15, "grad_norm": 2.28125, "learning_rate": 0.0001973361853923596, "loss": 2.3898, "step": 62580 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.000197335761610014, "loss": 2.0586, "step": 62585 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019733533779441674, "loss": 2.2229, "step": 62590 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019733491394556793, "loss": 2.1888, "step": 62595 }, { "epoch": 0.15, "grad_norm": 1.84375, "learning_rate": 0.0001973344900634678, "loss": 2.2236, "step": 62600 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.0001973340661481164, "loss": 2.1017, "step": 62605 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019733364219951395, "loss": 2.1283, "step": 62610 }, { "epoch": 0.15, "grad_norm": 1.8359375, "learning_rate": 0.00019733321821766055, "loss": 2.2051, "step": 62615 }, { "epoch": 0.15, "grad_norm": 1.8515625, "learning_rate": 0.00019733279420255638, "loss": 2.275, "step": 62620 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019733237015420152, "loss": 2.2408, "step": 62625 }, { "epoch": 0.15, "grad_norm": 1.6015625, "learning_rate": 0.00019733194607259618, "loss": 2.0573, "step": 62630 }, { "epoch": 0.15, "grad_norm": 2.125, "learning_rate": 0.00019733152195774048, "loss": 2.1101, "step": 62635 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019733109780963455, "loss": 2.1705, "step": 62640 }, { "epoch": 0.15, "grad_norm": 1.9453125, "learning_rate": 0.00019733067362827857, "loss": 2.0401, "step": 62645 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019733024941367267, "loss": 2.2211, "step": 62650 }, { "epoch": 0.15, "grad_norm": 1.59375, "learning_rate": 0.00019732982516581696, "loss": 2.1932, "step": 62655 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.00019732940088471165, "loss": 2.0384, "step": 62660 }, { "epoch": 0.15, "grad_norm": 1.71875, "learning_rate": 0.00019732897657035684, "loss": 2.1723, "step": 62665 }, { "epoch": 0.15, "grad_norm": 1.8515625, "learning_rate": 0.0001973285522227527, "loss": 2.1131, "step": 62670 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019732812784189934, "loss": 2.1959, "step": 62675 }, { "epoch": 0.15, "grad_norm": 1.703125, "learning_rate": 0.00019732770342779693, "loss": 2.0412, "step": 62680 }, { "epoch": 0.15, "grad_norm": 1.6953125, "learning_rate": 0.00019732727898044564, "loss": 2.123, "step": 62685 }, { "epoch": 0.15, "grad_norm": 2.265625, "learning_rate": 0.00019732685449984556, "loss": 1.9369, "step": 62690 }, { "epoch": 0.15, "grad_norm": 1.8984375, "learning_rate": 0.00019732642998599687, "loss": 2.1151, "step": 62695 }, { "epoch": 0.15, "grad_norm": 1.96875, "learning_rate": 0.00019732600543889968, "loss": 2.091, "step": 62700 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019732558085855422, "loss": 2.2028, "step": 62705 }, { "epoch": 0.15, "grad_norm": 1.96875, "learning_rate": 0.0001973251562449605, "loss": 2.3085, "step": 62710 }, { "epoch": 0.15, "grad_norm": 1.9609375, "learning_rate": 0.0001973247315981188, "loss": 2.2504, "step": 62715 }, { "epoch": 0.15, "grad_norm": 1.75, "learning_rate": 0.00019732430691802918, "loss": 2.1998, "step": 62720 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019732388220469185, "loss": 2.2502, "step": 62725 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.00019732345745810688, "loss": 2.0856, "step": 62730 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019732303267827446, "loss": 2.4241, "step": 62735 }, { "epoch": 0.15, "grad_norm": 1.8984375, "learning_rate": 0.00019732260786519472, "loss": 2.2432, "step": 62740 }, { "epoch": 0.15, "grad_norm": 2.25, "learning_rate": 0.0001973221830188678, "loss": 2.1723, "step": 62745 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.0001973217581392939, "loss": 2.2662, "step": 62750 }, { "epoch": 0.15, "grad_norm": 1.984375, "learning_rate": 0.00019732133322647308, "loss": 2.137, "step": 62755 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.00019732090828040555, "loss": 2.1167, "step": 62760 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.00019732048330109144, "loss": 2.17, "step": 62765 }, { "epoch": 0.15, "grad_norm": 2.1875, "learning_rate": 0.00019732005828853086, "loss": 2.3156, "step": 62770 }, { "epoch": 0.15, "grad_norm": 1.84375, "learning_rate": 0.000197319633242724, "loss": 2.1097, "step": 62775 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.000197319208163671, "loss": 2.0323, "step": 62780 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019731878305137196, "loss": 2.1719, "step": 62785 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.00019731835790582708, "loss": 2.1216, "step": 62790 }, { "epoch": 0.15, "grad_norm": 2.125, "learning_rate": 0.0001973179327270365, "loss": 2.2335, "step": 62795 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019731750751500034, "loss": 2.2426, "step": 62800 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.00019731708226971876, "loss": 2.3548, "step": 62805 }, { "epoch": 0.15, "grad_norm": 1.8984375, "learning_rate": 0.00019731665699119187, "loss": 2.0092, "step": 62810 }, { "epoch": 0.15, "grad_norm": 2.296875, "learning_rate": 0.00019731623167941985, "loss": 2.1763, "step": 62815 }, { "epoch": 0.15, "grad_norm": 2.28125, "learning_rate": 0.00019731580633440286, "loss": 1.9813, "step": 62820 }, { "epoch": 0.15, "grad_norm": 2.328125, "learning_rate": 0.00019731538095614103, "loss": 2.1106, "step": 62825 }, { "epoch": 0.15, "grad_norm": 1.9453125, "learning_rate": 0.0001973149555446345, "loss": 2.2387, "step": 62830 }, { "epoch": 0.15, "grad_norm": 1.9453125, "learning_rate": 0.00019731453009988337, "loss": 2.262, "step": 62835 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019731410462188787, "loss": 2.0468, "step": 62840 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.0001973136791106481, "loss": 2.1978, "step": 62845 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019731325356616423, "loss": 2.2426, "step": 62850 }, { "epoch": 0.15, "grad_norm": 1.96875, "learning_rate": 0.00019731282798843638, "loss": 2.3662, "step": 62855 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.00019731240237746468, "loss": 2.1831, "step": 62860 }, { "epoch": 0.15, "grad_norm": 1.96875, "learning_rate": 0.00019731197673324933, "loss": 2.1057, "step": 62865 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019731155105579042, "loss": 2.0965, "step": 62870 }, { "epoch": 0.15, "grad_norm": 1.984375, "learning_rate": 0.00019731112534508813, "loss": 2.1697, "step": 62875 }, { "epoch": 0.15, "grad_norm": 1.734375, "learning_rate": 0.00019731069960114258, "loss": 2.1421, "step": 62880 }, { "epoch": 0.15, "grad_norm": 1.9296875, "learning_rate": 0.00019731027382395398, "loss": 2.2504, "step": 62885 }, { "epoch": 0.15, "grad_norm": 1.796875, "learning_rate": 0.00019730984801352236, "loss": 2.3464, "step": 62890 }, { "epoch": 0.15, "grad_norm": 1.6796875, "learning_rate": 0.00019730942216984797, "loss": 2.1456, "step": 62895 }, { "epoch": 0.15, "grad_norm": 1.65625, "learning_rate": 0.0001973089962929309, "loss": 2.1845, "step": 62900 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.00019730857038277133, "loss": 2.1384, "step": 62905 }, { "epoch": 0.15, "grad_norm": 1.859375, "learning_rate": 0.00019730814443936936, "loss": 2.1569, "step": 62910 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019730771846272519, "loss": 2.0297, "step": 62915 }, { "epoch": 0.15, "grad_norm": 2.40625, "learning_rate": 0.0001973072924528389, "loss": 2.3811, "step": 62920 }, { "epoch": 0.15, "grad_norm": 1.8046875, "learning_rate": 0.00019730686640971072, "loss": 2.1703, "step": 62925 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019730644033334074, "loss": 2.2166, "step": 62930 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.0001973060142237291, "loss": 2.3068, "step": 62935 }, { "epoch": 0.15, "grad_norm": 1.9296875, "learning_rate": 0.00019730558808087598, "loss": 2.2658, "step": 62940 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.0001973051619047815, "loss": 2.1732, "step": 62945 }, { "epoch": 0.15, "grad_norm": 1.9765625, "learning_rate": 0.0001973047356954458, "loss": 2.0668, "step": 62950 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.00019730430945286904, "loss": 2.2134, "step": 62955 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.00019730388317705137, "loss": 2.1459, "step": 62960 }, { "epoch": 0.15, "grad_norm": 2.40625, "learning_rate": 0.00019730345686799292, "loss": 2.1676, "step": 62965 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.00019730303052569386, "loss": 2.2714, "step": 62970 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.0001973026041501543, "loss": 2.2149, "step": 62975 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.00019730217774137444, "loss": 2.0617, "step": 62980 }, { "epoch": 0.15, "grad_norm": 2.296875, "learning_rate": 0.00019730175129935437, "loss": 1.927, "step": 62985 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019730132482409426, "loss": 2.1613, "step": 62990 }, { "epoch": 0.15, "grad_norm": 2.46875, "learning_rate": 0.00019730089831559422, "loss": 2.2312, "step": 62995 }, { "epoch": 0.15, "grad_norm": 2.1875, "learning_rate": 0.00019730047177385446, "loss": 2.2141, "step": 63000 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019730004519887508, "loss": 2.1881, "step": 63005 }, { "epoch": 0.15, "grad_norm": 1.8984375, "learning_rate": 0.00019729961859065625, "loss": 2.0273, "step": 63010 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.00019729919194919814, "loss": 2.2198, "step": 63015 }, { "epoch": 0.15, "grad_norm": 2.3125, "learning_rate": 0.00019729876527450083, "loss": 2.2905, "step": 63020 }, { "epoch": 0.15, "grad_norm": 1.84375, "learning_rate": 0.00019729833856656445, "loss": 2.0463, "step": 63025 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.00019729791182538926, "loss": 2.1863, "step": 63030 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.00019729748505097533, "loss": 2.1541, "step": 63035 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.0001972970582433228, "loss": 2.1824, "step": 63040 }, { "epoch": 0.15, "grad_norm": 2.203125, "learning_rate": 0.00019729663140243182, "loss": 2.2673, "step": 63045 }, { "epoch": 0.15, "grad_norm": 2.21875, "learning_rate": 0.00019729620452830254, "loss": 2.2572, "step": 63050 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.00019729577762093515, "loss": 2.2052, "step": 63055 }, { "epoch": 0.15, "grad_norm": 2.203125, "learning_rate": 0.00019729535068032972, "loss": 2.2555, "step": 63060 }, { "epoch": 0.15, "grad_norm": 2.296875, "learning_rate": 0.00019729492370648645, "loss": 2.2081, "step": 63065 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.00019729449669940547, "loss": 2.0765, "step": 63070 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019729406965908694, "loss": 2.2439, "step": 63075 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.00019729364258553095, "loss": 2.1789, "step": 63080 }, { "epoch": 0.15, "grad_norm": 2.53125, "learning_rate": 0.00019729321547873774, "loss": 2.2241, "step": 63085 }, { "epoch": 0.15, "grad_norm": 2.21875, "learning_rate": 0.00019729278833870736, "loss": 2.2553, "step": 63090 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019729236116544004, "loss": 2.1369, "step": 63095 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019729193395893587, "loss": 2.1215, "step": 63100 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.000197291506719195, "loss": 2.0888, "step": 63105 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019729107944621755, "loss": 1.9708, "step": 63110 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.00019729065214000376, "loss": 2.0019, "step": 63115 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.0001972902248005537, "loss": 2.2349, "step": 63120 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.00019728979742786754, "loss": 2.3672, "step": 63125 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019728937002194542, "loss": 2.4046, "step": 63130 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019728894258278748, "loss": 2.1722, "step": 63135 }, { "epoch": 0.15, "grad_norm": 1.8125, "learning_rate": 0.00019728851511039385, "loss": 2.3127, "step": 63140 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.00019728808760476475, "loss": 2.2206, "step": 63145 }, { "epoch": 0.15, "grad_norm": 1.7109375, "learning_rate": 0.00019728766006590025, "loss": 2.1976, "step": 63150 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019728723249380053, "loss": 2.2015, "step": 63155 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.0001972868048884657, "loss": 2.0787, "step": 63160 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019728637724989595, "loss": 2.1334, "step": 63165 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.0001972859495780914, "loss": 2.2918, "step": 63170 }, { "epoch": 0.15, "grad_norm": 2.46875, "learning_rate": 0.00019728552187305224, "loss": 2.2123, "step": 63175 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019728509413477854, "loss": 2.2926, "step": 63180 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.0001972846663632705, "loss": 2.1987, "step": 63185 }, { "epoch": 0.15, "grad_norm": 1.8046875, "learning_rate": 0.00019728423855852825, "loss": 2.1656, "step": 63190 }, { "epoch": 0.15, "grad_norm": 2.484375, "learning_rate": 0.00019728381072055195, "loss": 2.1168, "step": 63195 }, { "epoch": 0.15, "grad_norm": 2.21875, "learning_rate": 0.00019728338284934175, "loss": 2.4191, "step": 63200 }, { "epoch": 0.15, "grad_norm": 2.734375, "learning_rate": 0.00019728295494489775, "loss": 2.1537, "step": 63205 }, { "epoch": 0.15, "grad_norm": 1.546875, "learning_rate": 0.00019728252700722013, "loss": 2.0289, "step": 63210 }, { "epoch": 0.15, "grad_norm": 1.625, "learning_rate": 0.0001972820990363091, "loss": 2.2336, "step": 63215 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.00019728167103216465, "loss": 2.0952, "step": 63220 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019728124299478705, "loss": 2.0332, "step": 63225 }, { "epoch": 0.15, "grad_norm": 1.6640625, "learning_rate": 0.00019728081492417643, "loss": 2.182, "step": 63230 }, { "epoch": 0.15, "grad_norm": 1.8515625, "learning_rate": 0.0001972803868203329, "loss": 2.2139, "step": 63235 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019727995868325663, "loss": 1.9667, "step": 63240 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019727953051294777, "loss": 2.2437, "step": 63245 }, { "epoch": 0.15, "grad_norm": 2.125, "learning_rate": 0.00019727910230940644, "loss": 2.2128, "step": 63250 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.00019727867407263284, "loss": 2.0033, "step": 63255 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.00019727824580262704, "loss": 2.0856, "step": 63260 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019727781749938924, "loss": 2.1129, "step": 63265 }, { "epoch": 0.15, "grad_norm": 1.84375, "learning_rate": 0.0001972773891629196, "loss": 2.2579, "step": 63270 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.0001972769607932182, "loss": 2.2629, "step": 63275 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019727653239028526, "loss": 2.21, "step": 63280 }, { "epoch": 0.15, "grad_norm": 2.28125, "learning_rate": 0.0001972761039541209, "loss": 2.1503, "step": 63285 }, { "epoch": 0.15, "grad_norm": 2.5, "learning_rate": 0.0001972756754847252, "loss": 2.2428, "step": 63290 }, { "epoch": 0.15, "grad_norm": 1.8984375, "learning_rate": 0.0001972752469820984, "loss": 2.3954, "step": 63295 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.0001972748184462406, "loss": 2.2777, "step": 63300 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.000197274389877152, "loss": 2.1286, "step": 63305 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019727396127483266, "loss": 2.0808, "step": 63310 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019727353263928278, "loss": 2.3179, "step": 63315 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.00019727310397050252, "loss": 2.0206, "step": 63320 }, { "epoch": 0.15, "grad_norm": 2.40625, "learning_rate": 0.00019727267526849197, "loss": 2.1174, "step": 63325 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019727224653325132, "loss": 2.1477, "step": 63330 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.00019727181776478073, "loss": 2.225, "step": 63335 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.0001972713889630803, "loss": 2.1476, "step": 63340 }, { "epoch": 0.15, "grad_norm": 2.546875, "learning_rate": 0.0001972709601281502, "loss": 2.103, "step": 63345 }, { "epoch": 0.15, "grad_norm": 1.6171875, "learning_rate": 0.00019727053125999058, "loss": 2.2277, "step": 63350 }, { "epoch": 0.15, "grad_norm": 1.8359375, "learning_rate": 0.00019727010235860156, "loss": 2.2103, "step": 63355 }, { "epoch": 0.15, "grad_norm": 1.6171875, "learning_rate": 0.00019726967342398335, "loss": 2.2686, "step": 63360 }, { "epoch": 0.15, "grad_norm": 1.9609375, "learning_rate": 0.00019726924445613605, "loss": 2.0908, "step": 63365 }, { "epoch": 0.15, "grad_norm": 2.21875, "learning_rate": 0.0001972688154550598, "loss": 2.3664, "step": 63370 }, { "epoch": 0.15, "grad_norm": 2.5625, "learning_rate": 0.00019726838642075472, "loss": 2.297, "step": 63375 }, { "epoch": 0.15, "grad_norm": 1.9609375, "learning_rate": 0.00019726795735322105, "loss": 2.1586, "step": 63380 }, { "epoch": 0.15, "grad_norm": 1.8046875, "learning_rate": 0.00019726752825245886, "loss": 2.3578, "step": 63385 }, { "epoch": 0.15, "grad_norm": 2.515625, "learning_rate": 0.00019726709911846834, "loss": 2.3054, "step": 63390 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.0001972666699512496, "loss": 2.0784, "step": 63395 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.0001972662407508028, "loss": 2.1742, "step": 63400 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019726581151712806, "loss": 2.1619, "step": 63405 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.0001972653822502256, "loss": 2.3087, "step": 63410 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.0001972649529500955, "loss": 2.1819, "step": 63415 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.00019726452361673794, "loss": 2.1315, "step": 63420 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019726409425015304, "loss": 2.2289, "step": 63425 }, { "epoch": 0.15, "grad_norm": 1.609375, "learning_rate": 0.00019726366485034094, "loss": 2.072, "step": 63430 }, { "epoch": 0.15, "grad_norm": 1.6484375, "learning_rate": 0.00019726323541730185, "loss": 2.1678, "step": 63435 }, { "epoch": 0.15, "grad_norm": 1.953125, "learning_rate": 0.00019726280595103589, "loss": 2.171, "step": 63440 }, { "epoch": 0.15, "grad_norm": 1.984375, "learning_rate": 0.00019726237645154314, "loss": 2.2107, "step": 63445 }, { "epoch": 0.15, "grad_norm": 1.9609375, "learning_rate": 0.0001972619469188238, "loss": 2.2866, "step": 63450 }, { "epoch": 0.15, "grad_norm": 1.9765625, "learning_rate": 0.00019726151735287805, "loss": 1.9403, "step": 63455 }, { "epoch": 0.15, "grad_norm": 2.46875, "learning_rate": 0.000197261087753706, "loss": 2.1348, "step": 63460 }, { "epoch": 0.15, "grad_norm": 1.96875, "learning_rate": 0.00019726065812130777, "loss": 2.172, "step": 63465 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.00019726022845568356, "loss": 2.1781, "step": 63470 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019725979875683347, "loss": 2.1882, "step": 63475 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.00019725936902475767, "loss": 1.9686, "step": 63480 }, { "epoch": 0.15, "grad_norm": 1.8515625, "learning_rate": 0.00019725893925945633, "loss": 2.1855, "step": 63485 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.00019725850946092957, "loss": 1.9139, "step": 63490 }, { "epoch": 0.15, "grad_norm": 2.34375, "learning_rate": 0.0001972580796291775, "loss": 2.3191, "step": 63495 }, { "epoch": 0.15, "grad_norm": 1.5078125, "learning_rate": 0.00019725764976420037, "loss": 2.0368, "step": 63500 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.0001972572198659982, "loss": 2.3537, "step": 63505 }, { "epoch": 0.15, "grad_norm": 1.8515625, "learning_rate": 0.00019725678993457124, "loss": 2.1866, "step": 63510 }, { "epoch": 0.15, "grad_norm": 2.359375, "learning_rate": 0.00019725635996991958, "loss": 2.1928, "step": 63515 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.00019725592997204338, "loss": 2.2591, "step": 63520 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.0001972554999409428, "loss": 2.0466, "step": 63525 }, { "epoch": 0.15, "grad_norm": 2.453125, "learning_rate": 0.000197255069876618, "loss": 2.1154, "step": 63530 }, { "epoch": 0.15, "grad_norm": 1.6875, "learning_rate": 0.00019725463977906906, "loss": 2.0081, "step": 63535 }, { "epoch": 0.15, "grad_norm": 2.265625, "learning_rate": 0.0001972542096482962, "loss": 2.1867, "step": 63540 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.00019725377948429953, "loss": 2.069, "step": 63545 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.0001972533492870792, "loss": 2.1633, "step": 63550 }, { "epoch": 0.15, "grad_norm": 2.234375, "learning_rate": 0.00019725291905663538, "loss": 2.2339, "step": 63555 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019725248879296818, "loss": 2.1426, "step": 63560 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.00019725205849607777, "loss": 2.3615, "step": 63565 }, { "epoch": 0.15, "grad_norm": 2.375, "learning_rate": 0.0001972516281659643, "loss": 2.0997, "step": 63570 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019725119780262791, "loss": 2.1677, "step": 63575 }, { "epoch": 0.15, "grad_norm": 2.3125, "learning_rate": 0.00019725076740606873, "loss": 2.3839, "step": 63580 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019725033697628693, "loss": 2.2286, "step": 63585 }, { "epoch": 0.15, "grad_norm": 1.796875, "learning_rate": 0.0001972499065132827, "loss": 2.2166, "step": 63590 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019724947601705605, "loss": 2.0923, "step": 63595 }, { "epoch": 0.15, "grad_norm": 2.234375, "learning_rate": 0.0001972490454876073, "loss": 2.0504, "step": 63600 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019724861492493646, "loss": 2.1926, "step": 63605 }, { "epoch": 0.15, "grad_norm": 1.5859375, "learning_rate": 0.00019724818432904372, "loss": 2.2268, "step": 63610 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019724775369992927, "loss": 2.0688, "step": 63615 }, { "epoch": 0.15, "grad_norm": 1.984375, "learning_rate": 0.0001972473230375932, "loss": 2.2708, "step": 63620 }, { "epoch": 0.15, "grad_norm": 1.5625, "learning_rate": 0.0001972468923420357, "loss": 2.2296, "step": 63625 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.0001972464616132569, "loss": 2.2507, "step": 63630 }, { "epoch": 0.15, "grad_norm": 2.296875, "learning_rate": 0.00019724603085125692, "loss": 2.2592, "step": 63635 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019724560005603597, "loss": 2.1265, "step": 63640 }, { "epoch": 0.15, "grad_norm": 1.59375, "learning_rate": 0.00019724516922759412, "loss": 2.23, "step": 63645 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.00019724473836593159, "loss": 2.2432, "step": 63650 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019724430747104847, "loss": 2.0774, "step": 63655 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019724387654294497, "loss": 2.3395, "step": 63660 }, { "epoch": 0.15, "grad_norm": 2.265625, "learning_rate": 0.00019724344558162113, "loss": 2.0602, "step": 63665 }, { "epoch": 0.15, "grad_norm": 1.984375, "learning_rate": 0.0001972430145870772, "loss": 2.1134, "step": 63670 }, { "epoch": 0.15, "grad_norm": 1.8125, "learning_rate": 0.00019724258355931332, "loss": 2.3135, "step": 63675 }, { "epoch": 0.15, "grad_norm": 2.40625, "learning_rate": 0.0001972421524983296, "loss": 2.2898, "step": 63680 }, { "epoch": 0.15, "grad_norm": 1.9609375, "learning_rate": 0.00019724172140412615, "loss": 2.2211, "step": 63685 }, { "epoch": 0.15, "grad_norm": 1.84375, "learning_rate": 0.00019724129027670322, "loss": 1.9966, "step": 63690 }, { "epoch": 0.15, "grad_norm": 1.671875, "learning_rate": 0.00019724085911606087, "loss": 2.1233, "step": 63695 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019724042792219928, "loss": 2.1313, "step": 63700 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019723999669511862, "loss": 2.3118, "step": 63705 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.000197239565434819, "loss": 2.1194, "step": 63710 }, { "epoch": 0.15, "grad_norm": 1.7109375, "learning_rate": 0.00019723913414130057, "loss": 2.1842, "step": 63715 }, { "epoch": 0.15, "grad_norm": 1.859375, "learning_rate": 0.0001972387028145635, "loss": 2.2057, "step": 63720 }, { "epoch": 0.15, "grad_norm": 2.21875, "learning_rate": 0.00019723827145460792, "loss": 2.289, "step": 63725 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019723784006143397, "loss": 2.0188, "step": 63730 }, { "epoch": 0.15, "grad_norm": 1.8046875, "learning_rate": 0.00019723740863504183, "loss": 2.1016, "step": 63735 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.0001972369771754316, "loss": 2.1919, "step": 63740 }, { "epoch": 0.15, "grad_norm": 1.9765625, "learning_rate": 0.00019723654568260353, "loss": 2.3114, "step": 63745 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.0001972361141565576, "loss": 2.1164, "step": 63750 }, { "epoch": 0.15, "grad_norm": 1.6796875, "learning_rate": 0.00019723568259729412, "loss": 2.0728, "step": 63755 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.00019723525100481313, "loss": 2.2039, "step": 63760 }, { "epoch": 0.15, "grad_norm": 1.984375, "learning_rate": 0.00019723481937911484, "loss": 2.1269, "step": 63765 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019723438772019933, "loss": 2.2199, "step": 63770 }, { "epoch": 0.15, "grad_norm": 2.1875, "learning_rate": 0.00019723395602806683, "loss": 2.2382, "step": 63775 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.00019723352430271743, "loss": 2.0266, "step": 63780 }, { "epoch": 0.15, "grad_norm": 1.7890625, "learning_rate": 0.00019723309254415127, "loss": 2.1127, "step": 63785 }, { "epoch": 0.15, "grad_norm": 1.6953125, "learning_rate": 0.00019723266075236858, "loss": 2.2071, "step": 63790 }, { "epoch": 0.15, "grad_norm": 2.890625, "learning_rate": 0.00019723222892736942, "loss": 2.2091, "step": 63795 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.00019723179706915395, "loss": 2.1375, "step": 63800 }, { "epoch": 0.15, "grad_norm": 1.703125, "learning_rate": 0.00019723136517772237, "loss": 2.2132, "step": 63805 }, { "epoch": 0.15, "grad_norm": 1.859375, "learning_rate": 0.00019723093325307476, "loss": 2.2059, "step": 63810 }, { "epoch": 0.15, "grad_norm": 2.28125, "learning_rate": 0.00019723050129521132, "loss": 2.1776, "step": 63815 }, { "epoch": 0.15, "grad_norm": 2.21875, "learning_rate": 0.00019723006930413216, "loss": 2.1964, "step": 63820 }, { "epoch": 0.15, "grad_norm": 1.6796875, "learning_rate": 0.00019722963727983745, "loss": 2.1156, "step": 63825 }, { "epoch": 0.15, "grad_norm": 1.953125, "learning_rate": 0.00019722920522232735, "loss": 2.0086, "step": 63830 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019722877313160198, "loss": 2.2398, "step": 63835 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.0001972283410076615, "loss": 2.1019, "step": 63840 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019722790885050604, "loss": 2.1899, "step": 63845 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.00019722747666013577, "loss": 2.1705, "step": 63850 }, { "epoch": 0.15, "grad_norm": 2.421875, "learning_rate": 0.00019722704443655083, "loss": 2.1732, "step": 63855 }, { "epoch": 0.15, "grad_norm": 2.3125, "learning_rate": 0.0001972266121797514, "loss": 2.1621, "step": 63860 }, { "epoch": 0.15, "grad_norm": 1.84375, "learning_rate": 0.00019722617988973754, "loss": 2.0918, "step": 63865 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.0001972257475665095, "loss": 2.3335, "step": 63870 }, { "epoch": 0.15, "grad_norm": 1.8125, "learning_rate": 0.00019722531521006737, "loss": 2.0699, "step": 63875 }, { "epoch": 0.15, "grad_norm": 2.34375, "learning_rate": 0.0001972248828204113, "loss": 2.239, "step": 63880 }, { "epoch": 0.15, "grad_norm": 2.421875, "learning_rate": 0.00019722445039754141, "loss": 1.9897, "step": 63885 }, { "epoch": 0.15, "grad_norm": 2.3125, "learning_rate": 0.00019722401794145794, "loss": 2.1089, "step": 63890 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019722358545216099, "loss": 2.0561, "step": 63895 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019722315292965067, "loss": 1.9856, "step": 63900 }, { "epoch": 0.15, "grad_norm": 1.9296875, "learning_rate": 0.00019722272037392714, "loss": 2.0674, "step": 63905 }, { "epoch": 0.15, "grad_norm": 2.203125, "learning_rate": 0.00019722228778499062, "loss": 2.2146, "step": 63910 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019722185516284118, "loss": 2.2576, "step": 63915 }, { "epoch": 0.15, "grad_norm": 1.703125, "learning_rate": 0.00019722142250747898, "loss": 2.1643, "step": 63920 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.00019722098981890418, "loss": 2.0421, "step": 63925 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.00019722055709711693, "loss": 2.248, "step": 63930 }, { "epoch": 0.15, "grad_norm": 1.546875, "learning_rate": 0.00019722012434211737, "loss": 2.1191, "step": 63935 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.00019721969155390566, "loss": 2.3072, "step": 63940 }, { "epoch": 0.15, "grad_norm": 1.953125, "learning_rate": 0.00019721925873248191, "loss": 2.2174, "step": 63945 }, { "epoch": 0.15, "grad_norm": 2.125, "learning_rate": 0.00019721882587784634, "loss": 2.2711, "step": 63950 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.00019721839298999906, "loss": 2.1211, "step": 63955 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019721796006894017, "loss": 2.1982, "step": 63960 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019721752711466988, "loss": 2.2634, "step": 63965 }, { "epoch": 0.15, "grad_norm": 1.671875, "learning_rate": 0.00019721709412718835, "loss": 2.2459, "step": 63970 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019721666110649567, "loss": 2.1569, "step": 63975 }, { "epoch": 0.15, "grad_norm": 2.25, "learning_rate": 0.000197216228052592, "loss": 2.3613, "step": 63980 }, { "epoch": 0.15, "grad_norm": 1.6484375, "learning_rate": 0.00019721579496547755, "loss": 2.1319, "step": 63985 }, { "epoch": 0.15, "grad_norm": 2.6875, "learning_rate": 0.00019721536184515238, "loss": 2.2069, "step": 63990 }, { "epoch": 0.15, "grad_norm": 2.234375, "learning_rate": 0.0001972149286916167, "loss": 2.215, "step": 63995 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.00019721449550487061, "loss": 2.1253, "step": 64000 }, { "epoch": 0.15, "grad_norm": 2.71875, "learning_rate": 0.0001972140622849143, "loss": 2.1768, "step": 64005 }, { "epoch": 0.15, "grad_norm": 2.515625, "learning_rate": 0.00019721362903174792, "loss": 2.2761, "step": 64010 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.0001972131957453716, "loss": 2.0564, "step": 64015 }, { "epoch": 0.15, "grad_norm": 2.296875, "learning_rate": 0.00019721276242578548, "loss": 2.1384, "step": 64020 }, { "epoch": 0.15, "grad_norm": 2.203125, "learning_rate": 0.0001972123290729897, "loss": 2.2123, "step": 64025 }, { "epoch": 0.15, "grad_norm": 1.7578125, "learning_rate": 0.00019721189568698447, "loss": 2.0981, "step": 64030 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.00019721146226776986, "loss": 2.1528, "step": 64035 }, { "epoch": 0.15, "grad_norm": 2.203125, "learning_rate": 0.00019721102881534606, "loss": 2.187, "step": 64040 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.0001972105953297132, "loss": 2.0242, "step": 64045 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.00019721016181087144, "loss": 2.4324, "step": 64050 }, { "epoch": 0.15, "grad_norm": 1.6875, "learning_rate": 0.00019720972825882093, "loss": 2.1662, "step": 64055 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.00019720929467356178, "loss": 2.0153, "step": 64060 }, { "epoch": 0.15, "grad_norm": 2.265625, "learning_rate": 0.0001972088610550942, "loss": 2.1215, "step": 64065 }, { "epoch": 0.15, "grad_norm": 2.203125, "learning_rate": 0.00019720842740341836, "loss": 2.0484, "step": 64070 }, { "epoch": 0.15, "grad_norm": 1.9765625, "learning_rate": 0.00019720799371853432, "loss": 2.1201, "step": 64075 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019720756000044226, "loss": 2.1216, "step": 64080 }, { "epoch": 0.15, "grad_norm": 1.8984375, "learning_rate": 0.0001972071262491423, "loss": 2.1434, "step": 64085 }, { "epoch": 0.15, "grad_norm": 2.21875, "learning_rate": 0.0001972066924646347, "loss": 2.1883, "step": 64090 }, { "epoch": 0.15, "grad_norm": 1.7890625, "learning_rate": 0.00019720625864691946, "loss": 2.1797, "step": 64095 }, { "epoch": 0.15, "grad_norm": 1.8359375, "learning_rate": 0.00019720582479599682, "loss": 1.9995, "step": 64100 }, { "epoch": 0.15, "grad_norm": 1.8046875, "learning_rate": 0.00019720539091186692, "loss": 2.1543, "step": 64105 }, { "epoch": 0.15, "grad_norm": 2.1875, "learning_rate": 0.0001972049569945299, "loss": 2.1316, "step": 64110 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.0001972045230439859, "loss": 2.2284, "step": 64115 }, { "epoch": 0.15, "grad_norm": 1.7890625, "learning_rate": 0.00019720408906023505, "loss": 2.2832, "step": 64120 }, { "epoch": 0.15, "grad_norm": 1.890625, "learning_rate": 0.00019720365504327756, "loss": 2.2241, "step": 64125 }, { "epoch": 0.15, "grad_norm": 2.359375, "learning_rate": 0.0001972032209931135, "loss": 2.2951, "step": 64130 }, { "epoch": 0.15, "grad_norm": 2.609375, "learning_rate": 0.00019720278690974307, "loss": 2.1988, "step": 64135 }, { "epoch": 0.15, "grad_norm": 1.4921875, "learning_rate": 0.0001972023527931664, "loss": 2.0926, "step": 64140 }, { "epoch": 0.15, "grad_norm": 1.8046875, "learning_rate": 0.00019720191864338365, "loss": 2.1069, "step": 64145 }, { "epoch": 0.15, "grad_norm": 2.3125, "learning_rate": 0.00019720148446039496, "loss": 2.181, "step": 64150 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.0001972010502442005, "loss": 2.1819, "step": 64155 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.00019720061599480038, "loss": 2.1689, "step": 64160 }, { "epoch": 0.15, "grad_norm": 1.671875, "learning_rate": 0.00019720018171219477, "loss": 2.1904, "step": 64165 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.0001971997473963838, "loss": 2.0898, "step": 64170 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019719931304736766, "loss": 2.1846, "step": 64175 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019719887866514647, "loss": 2.3189, "step": 64180 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.00019719844424972034, "loss": 2.004, "step": 64185 }, { "epoch": 0.15, "grad_norm": 2.296875, "learning_rate": 0.00019719800980108948, "loss": 2.1881, "step": 64190 }, { "epoch": 0.15, "grad_norm": 2.328125, "learning_rate": 0.00019719757531925405, "loss": 1.9863, "step": 64195 }, { "epoch": 0.15, "grad_norm": 1.7890625, "learning_rate": 0.00019719714080421413, "loss": 2.2438, "step": 64200 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.0001971967062559699, "loss": 2.1321, "step": 64205 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019719627167452152, "loss": 2.0484, "step": 64210 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019719583705986914, "loss": 2.0918, "step": 64215 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.0001971954024120129, "loss": 2.0101, "step": 64220 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.00019719496773095294, "loss": 2.1321, "step": 64225 }, { "epoch": 0.15, "grad_norm": 1.671875, "learning_rate": 0.00019719453301668942, "loss": 2.0815, "step": 64230 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.0001971940982692225, "loss": 2.095, "step": 64235 }, { "epoch": 0.15, "grad_norm": 1.9296875, "learning_rate": 0.0001971936634885523, "loss": 2.1953, "step": 64240 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019719322867467899, "loss": 2.1869, "step": 64245 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019719279382760267, "loss": 2.3176, "step": 64250 }, { "epoch": 0.15, "grad_norm": 1.984375, "learning_rate": 0.00019719235894732356, "loss": 2.0954, "step": 64255 }, { "epoch": 0.15, "grad_norm": 1.9765625, "learning_rate": 0.00019719192403384178, "loss": 2.3804, "step": 64260 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.00019719148908715748, "loss": 2.1314, "step": 64265 }, { "epoch": 0.15, "grad_norm": 1.6640625, "learning_rate": 0.0001971910541072708, "loss": 2.1522, "step": 64270 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019719061909418187, "loss": 1.9679, "step": 64275 }, { "epoch": 0.15, "grad_norm": 1.65625, "learning_rate": 0.0001971901840478909, "loss": 2.0541, "step": 64280 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.000197189748968398, "loss": 2.2452, "step": 64285 }, { "epoch": 0.15, "grad_norm": 2.1875, "learning_rate": 0.00019718931385570327, "loss": 2.01, "step": 64290 }, { "epoch": 0.15, "grad_norm": 1.890625, "learning_rate": 0.00019718887870980693, "loss": 2.1616, "step": 64295 }, { "epoch": 0.15, "grad_norm": 2.125, "learning_rate": 0.00019718844353070915, "loss": 2.0428, "step": 64300 }, { "epoch": 0.15, "grad_norm": 1.9296875, "learning_rate": 0.00019718800831840998, "loss": 2.2719, "step": 64305 }, { "epoch": 0.15, "grad_norm": 1.546875, "learning_rate": 0.00019718757307290964, "loss": 2.1555, "step": 64310 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.0001971871377942083, "loss": 2.0961, "step": 64315 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.00019718670248230601, "loss": 2.2036, "step": 64320 }, { "epoch": 0.15, "grad_norm": 2.484375, "learning_rate": 0.00019718626713720303, "loss": 2.1959, "step": 64325 }, { "epoch": 0.15, "grad_norm": 1.9609375, "learning_rate": 0.00019718583175889942, "loss": 2.3517, "step": 64330 }, { "epoch": 0.15, "grad_norm": 2.328125, "learning_rate": 0.0001971853963473954, "loss": 2.0339, "step": 64335 }, { "epoch": 0.15, "grad_norm": 2.328125, "learning_rate": 0.00019718496090269106, "loss": 2.1113, "step": 64340 }, { "epoch": 0.15, "grad_norm": 1.6796875, "learning_rate": 0.0001971845254247866, "loss": 2.2514, "step": 64345 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019718408991368212, "loss": 2.1978, "step": 64350 }, { "epoch": 0.15, "grad_norm": 2.125, "learning_rate": 0.00019718365436937777, "loss": 2.1705, "step": 64355 }, { "epoch": 0.15, "grad_norm": 2.671875, "learning_rate": 0.00019718321879187378, "loss": 2.2433, "step": 64360 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.0001971827831811702, "loss": 2.2221, "step": 64365 }, { "epoch": 0.15, "grad_norm": 1.625, "learning_rate": 0.00019718234753726726, "loss": 2.2732, "step": 64370 }, { "epoch": 0.15, "grad_norm": 1.5859375, "learning_rate": 0.000197181911860165, "loss": 2.0612, "step": 64375 }, { "epoch": 0.15, "grad_norm": 1.71875, "learning_rate": 0.0001971814761498637, "loss": 2.2694, "step": 64380 }, { "epoch": 0.15, "grad_norm": 2.265625, "learning_rate": 0.00019718104040636342, "loss": 2.1422, "step": 64385 }, { "epoch": 0.15, "grad_norm": 3.453125, "learning_rate": 0.00019718060462966435, "loss": 2.1049, "step": 64390 }, { "epoch": 0.15, "grad_norm": 1.7421875, "learning_rate": 0.00019718016881976658, "loss": 2.1799, "step": 64395 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.00019717973297667034, "loss": 2.2521, "step": 64400 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.00019717929710037573, "loss": 2.0526, "step": 64405 }, { "epoch": 0.15, "grad_norm": 2.4375, "learning_rate": 0.0001971788611908829, "loss": 2.082, "step": 64410 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019717842524819202, "loss": 2.0259, "step": 64415 }, { "epoch": 0.15, "grad_norm": 2.375, "learning_rate": 0.00019717798927230322, "loss": 2.1495, "step": 64420 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019717755326321667, "loss": 2.1388, "step": 64425 }, { "epoch": 0.15, "grad_norm": 2.25, "learning_rate": 0.00019717711722093252, "loss": 2.116, "step": 64430 }, { "epoch": 0.15, "grad_norm": 1.6328125, "learning_rate": 0.00019717668114545086, "loss": 2.1799, "step": 64435 }, { "epoch": 0.15, "grad_norm": 1.578125, "learning_rate": 0.00019717624503677193, "loss": 2.2425, "step": 64440 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.0001971758088948958, "loss": 2.2389, "step": 64445 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019717537271982265, "loss": 2.0312, "step": 64450 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.00019717493651155263, "loss": 2.2399, "step": 64455 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.0001971745002700859, "loss": 2.0535, "step": 64460 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.0001971740639954226, "loss": 2.169, "step": 64465 }, { "epoch": 0.15, "grad_norm": 1.9453125, "learning_rate": 0.00019717362768756287, "loss": 2.2213, "step": 64470 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019717319134650687, "loss": 2.2358, "step": 64475 }, { "epoch": 0.15, "grad_norm": 1.9765625, "learning_rate": 0.00019717275497225476, "loss": 2.2602, "step": 64480 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019717231856480666, "loss": 2.3045, "step": 64485 }, { "epoch": 0.15, "grad_norm": 1.8515625, "learning_rate": 0.00019717188212416272, "loss": 2.2497, "step": 64490 }, { "epoch": 0.15, "grad_norm": 2.296875, "learning_rate": 0.00019717144565032312, "loss": 2.2271, "step": 64495 }, { "epoch": 0.15, "grad_norm": 1.8046875, "learning_rate": 0.000197171009143288, "loss": 2.1211, "step": 64500 }, { "epoch": 0.15, "grad_norm": 2.4375, "learning_rate": 0.00019717057260305748, "loss": 2.0951, "step": 64505 }, { "epoch": 0.15, "grad_norm": 2.390625, "learning_rate": 0.00019717013602963174, "loss": 2.0956, "step": 64510 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.0001971696994230109, "loss": 2.0215, "step": 64515 }, { "epoch": 0.15, "grad_norm": 1.6796875, "learning_rate": 0.00019716926278319518, "loss": 2.0771, "step": 64520 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019716882611018463, "loss": 2.1776, "step": 64525 }, { "epoch": 0.15, "grad_norm": 1.6953125, "learning_rate": 0.00019716838940397947, "loss": 2.2859, "step": 64530 }, { "epoch": 0.15, "grad_norm": 1.6640625, "learning_rate": 0.0001971679526645798, "loss": 2.1432, "step": 64535 }, { "epoch": 0.15, "grad_norm": 1.7578125, "learning_rate": 0.00019716751589198583, "loss": 2.2655, "step": 64540 }, { "epoch": 0.15, "grad_norm": 1.84375, "learning_rate": 0.00019716707908619765, "loss": 2.2509, "step": 64545 }, { "epoch": 0.15, "grad_norm": 1.9453125, "learning_rate": 0.00019716664224721545, "loss": 2.3871, "step": 64550 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.00019716620537503935, "loss": 1.9799, "step": 64555 }, { "epoch": 0.15, "grad_norm": 1.8984375, "learning_rate": 0.00019716576846966953, "loss": 2.1296, "step": 64560 }, { "epoch": 0.15, "grad_norm": 1.75, "learning_rate": 0.0001971653315311061, "loss": 2.0325, "step": 64565 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019716489455934923, "loss": 2.2086, "step": 64570 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019716445755439907, "loss": 2.1249, "step": 64575 }, { "epoch": 0.15, "grad_norm": 1.9765625, "learning_rate": 0.0001971640205162558, "loss": 2.1818, "step": 64580 }, { "epoch": 0.15, "grad_norm": 1.890625, "learning_rate": 0.00019716358344491953, "loss": 2.1826, "step": 64585 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.00019716314634039036, "loss": 2.1744, "step": 64590 }, { "epoch": 0.15, "grad_norm": 2.265625, "learning_rate": 0.00019716270920266856, "loss": 2.1943, "step": 64595 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.00019716227203175419, "loss": 2.1102, "step": 64600 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.00019716183482764742, "loss": 2.1581, "step": 64605 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019716139759034845, "loss": 2.2646, "step": 64610 }, { "epoch": 0.15, "grad_norm": 1.859375, "learning_rate": 0.00019716096031985734, "loss": 2.2765, "step": 64615 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.0001971605230161743, "loss": 2.2937, "step": 64620 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019716008567929945, "loss": 2.1949, "step": 64625 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.00019715964830923296, "loss": 2.1844, "step": 64630 }, { "epoch": 0.15, "grad_norm": 2.375, "learning_rate": 0.00019715921090597498, "loss": 2.2043, "step": 64635 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019715877346952564, "loss": 2.2504, "step": 64640 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.00019715833599988513, "loss": 2.267, "step": 64645 }, { "epoch": 0.15, "grad_norm": 2.515625, "learning_rate": 0.00019715789849705353, "loss": 1.9792, "step": 64650 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019715746096103103, "loss": 2.1969, "step": 64655 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.0001971570233918178, "loss": 2.1147, "step": 64660 }, { "epoch": 0.15, "grad_norm": 1.953125, "learning_rate": 0.00019715658578941397, "loss": 2.2482, "step": 64665 }, { "epoch": 0.15, "grad_norm": 2.21875, "learning_rate": 0.00019715614815381968, "loss": 2.1726, "step": 64670 }, { "epoch": 0.15, "grad_norm": 1.984375, "learning_rate": 0.00019715571048503508, "loss": 2.3112, "step": 64675 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019715527278306035, "loss": 2.1019, "step": 64680 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.0001971548350478956, "loss": 2.112, "step": 64685 }, { "epoch": 0.15, "grad_norm": 2.296875, "learning_rate": 0.00019715439727954103, "loss": 2.0624, "step": 64690 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019715395947799672, "loss": 2.1539, "step": 64695 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019715352164326286, "loss": 2.1154, "step": 64700 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.0001971530837753396, "loss": 2.2924, "step": 64705 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.0001971526458742271, "loss": 2.2987, "step": 64710 }, { "epoch": 0.15, "grad_norm": 1.7890625, "learning_rate": 0.00019715220793992545, "loss": 2.1341, "step": 64715 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019715176997243486, "loss": 2.1056, "step": 64720 }, { "epoch": 0.15, "grad_norm": 1.8125, "learning_rate": 0.00019715133197175546, "loss": 2.1504, "step": 64725 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019715089393788744, "loss": 2.1454, "step": 64730 }, { "epoch": 0.15, "grad_norm": 2.28125, "learning_rate": 0.0001971504558708309, "loss": 2.1182, "step": 64735 }, { "epoch": 0.15, "grad_norm": 1.6875, "learning_rate": 0.00019715001777058597, "loss": 2.1548, "step": 64740 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019714957963715287, "loss": 2.1243, "step": 64745 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.00019714914147053167, "loss": 2.2101, "step": 64750 }, { "epoch": 0.15, "grad_norm": 1.9609375, "learning_rate": 0.0001971487032707226, "loss": 2.2061, "step": 64755 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019714826503772575, "loss": 2.2152, "step": 64760 }, { "epoch": 0.15, "grad_norm": 3.59375, "learning_rate": 0.00019714782677154129, "loss": 2.1494, "step": 64765 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.0001971473884721694, "loss": 2.1347, "step": 64770 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.00019714695013961018, "loss": 2.1634, "step": 64775 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019714651177386378, "loss": 2.1003, "step": 64780 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.0001971460733749304, "loss": 2.0018, "step": 64785 }, { "epoch": 0.15, "grad_norm": 2.265625, "learning_rate": 0.00019714563494281016, "loss": 2.1715, "step": 64790 }, { "epoch": 0.15, "grad_norm": 2.328125, "learning_rate": 0.00019714519647750318, "loss": 2.2581, "step": 64795 }, { "epoch": 0.15, "grad_norm": 1.71875, "learning_rate": 0.00019714475797900967, "loss": 2.2041, "step": 64800 }, { "epoch": 0.15, "grad_norm": 1.609375, "learning_rate": 0.00019714431944732974, "loss": 2.3092, "step": 64805 }, { "epoch": 0.15, "grad_norm": 1.7265625, "learning_rate": 0.00019714388088246354, "loss": 2.1949, "step": 64810 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.0001971434422844112, "loss": 2.1882, "step": 64815 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019714300365317294, "loss": 2.2991, "step": 64820 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019714256498874888, "loss": 2.0226, "step": 64825 }, { "epoch": 0.15, "grad_norm": 1.84375, "learning_rate": 0.00019714212629113912, "loss": 2.2834, "step": 64830 }, { "epoch": 0.15, "grad_norm": 2.25, "learning_rate": 0.00019714168756034388, "loss": 2.1034, "step": 64835 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.00019714124879636327, "loss": 2.2881, "step": 64840 }, { "epoch": 0.15, "grad_norm": 1.578125, "learning_rate": 0.0001971408099991974, "loss": 2.088, "step": 64845 }, { "epoch": 0.15, "grad_norm": 1.953125, "learning_rate": 0.00019714037116884653, "loss": 2.1405, "step": 64850 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019713993230531074, "loss": 2.3342, "step": 64855 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019713949340859017, "loss": 2.1251, "step": 64860 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.000197139054478685, "loss": 2.0948, "step": 64865 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019713861551559533, "loss": 1.9745, "step": 64870 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.00019713817651932137, "loss": 2.2554, "step": 64875 }, { "epoch": 0.15, "grad_norm": 1.7578125, "learning_rate": 0.00019713773748986327, "loss": 2.1455, "step": 64880 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.00019713729842722115, "loss": 2.1829, "step": 64885 }, { "epoch": 0.15, "grad_norm": 3.46875, "learning_rate": 0.00019713685933139515, "loss": 2.3799, "step": 64890 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019713642020238546, "loss": 2.1201, "step": 64895 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.00019713598104019217, "loss": 2.1726, "step": 64900 }, { "epoch": 0.15, "grad_norm": 2.234375, "learning_rate": 0.0001971355418448155, "loss": 2.1959, "step": 64905 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.00019713510261625553, "loss": 2.3523, "step": 64910 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.0001971346633545125, "loss": 2.292, "step": 64915 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019713422405958646, "loss": 2.1198, "step": 64920 }, { "epoch": 0.15, "grad_norm": 1.796875, "learning_rate": 0.00019713378473147763, "loss": 2.2749, "step": 64925 }, { "epoch": 0.15, "grad_norm": 2.28125, "learning_rate": 0.00019713334537018614, "loss": 2.1668, "step": 64930 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019713290597571216, "loss": 2.3312, "step": 64935 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019713246654805576, "loss": 2.2056, "step": 64940 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.0001971320270872172, "loss": 2.0457, "step": 64945 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.00019713158759319653, "loss": 2.0878, "step": 64950 }, { "epoch": 0.15, "grad_norm": 1.796875, "learning_rate": 0.00019713114806599398, "loss": 2.2413, "step": 64955 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.00019713070850560965, "loss": 2.1207, "step": 64960 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.00019713026891204373, "loss": 2.2099, "step": 64965 }, { "epoch": 0.15, "grad_norm": 1.9609375, "learning_rate": 0.00019712982928529632, "loss": 2.3455, "step": 64970 }, { "epoch": 0.15, "grad_norm": 1.6015625, "learning_rate": 0.00019712938962536766, "loss": 2.0466, "step": 64975 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.00019712894993225777, "loss": 1.9283, "step": 64980 }, { "epoch": 0.15, "grad_norm": 1.8359375, "learning_rate": 0.0001971285102059669, "loss": 2.0275, "step": 64985 }, { "epoch": 0.15, "grad_norm": 1.640625, "learning_rate": 0.00019712807044649518, "loss": 2.1753, "step": 64990 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019712763065384272, "loss": 2.3642, "step": 64995 }, { "epoch": 0.15, "grad_norm": 2.09375, "learning_rate": 0.0001971271908280097, "loss": 2.0962, "step": 65000 }, { "epoch": 0.15, "grad_norm": 2.328125, "learning_rate": 0.00019712675096899632, "loss": 2.2113, "step": 65005 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.00019712631107680263, "loss": 1.9887, "step": 65010 }, { "epoch": 0.15, "grad_norm": 1.7421875, "learning_rate": 0.00019712587115142886, "loss": 2.1203, "step": 65015 }, { "epoch": 0.15, "grad_norm": 2.125, "learning_rate": 0.0001971254311928751, "loss": 2.326, "step": 65020 }, { "epoch": 0.15, "grad_norm": 2.21875, "learning_rate": 0.00019712499120114156, "loss": 2.3436, "step": 65025 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019712455117622835, "loss": 2.2529, "step": 65030 }, { "epoch": 0.15, "grad_norm": 1.8984375, "learning_rate": 0.00019712411111813563, "loss": 2.1649, "step": 65035 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019712367102686355, "loss": 2.0353, "step": 65040 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019712323090241227, "loss": 2.3245, "step": 65045 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019712279074478192, "loss": 2.1537, "step": 65050 }, { "epoch": 0.15, "grad_norm": 1.8125, "learning_rate": 0.0001971223505539727, "loss": 2.2756, "step": 65055 }, { "epoch": 0.15, "grad_norm": 1.625, "learning_rate": 0.00019712191032998465, "loss": 2.3282, "step": 65060 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.00019712147007281807, "loss": 2.1883, "step": 65065 }, { "epoch": 0.15, "grad_norm": 1.859375, "learning_rate": 0.000197121029782473, "loss": 2.2857, "step": 65070 }, { "epoch": 0.15, "grad_norm": 1.5703125, "learning_rate": 0.00019712058945894962, "loss": 2.1774, "step": 65075 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.0001971201491022481, "loss": 2.0721, "step": 65080 }, { "epoch": 0.15, "grad_norm": 1.4453125, "learning_rate": 0.00019711970871236857, "loss": 2.1141, "step": 65085 }, { "epoch": 0.15, "grad_norm": 1.796875, "learning_rate": 0.0001971192682893112, "loss": 2.23, "step": 65090 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.0001971188278330761, "loss": 2.1727, "step": 65095 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019711838734366347, "loss": 2.1033, "step": 65100 }, { "epoch": 0.15, "grad_norm": 1.890625, "learning_rate": 0.00019711794682107344, "loss": 2.2952, "step": 65105 }, { "epoch": 0.15, "grad_norm": 1.8046875, "learning_rate": 0.00019711750626530612, "loss": 1.8866, "step": 65110 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019711706567636172, "loss": 2.0889, "step": 65115 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.0001971166250542404, "loss": 2.1207, "step": 65120 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.00019711618439894224, "loss": 2.1444, "step": 65125 }, { "epoch": 0.15, "grad_norm": 2.34375, "learning_rate": 0.00019711574371046746, "loss": 2.2974, "step": 65130 }, { "epoch": 0.15, "grad_norm": 1.953125, "learning_rate": 0.00019711530298881615, "loss": 2.0949, "step": 65135 }, { "epoch": 0.15, "grad_norm": 3.265625, "learning_rate": 0.0001971148622339885, "loss": 2.1856, "step": 65140 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019711442144598466, "loss": 2.2018, "step": 65145 }, { "epoch": 0.15, "grad_norm": 1.96875, "learning_rate": 0.00019711398062480478, "loss": 2.1913, "step": 65150 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019711353977044897, "loss": 2.1473, "step": 65155 }, { "epoch": 0.15, "grad_norm": 1.953125, "learning_rate": 0.00019711309888291746, "loss": 2.1974, "step": 65160 }, { "epoch": 0.15, "grad_norm": 1.71875, "learning_rate": 0.00019711265796221032, "loss": 2.2961, "step": 65165 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019711221700832775, "loss": 2.2219, "step": 65170 }, { "epoch": 0.15, "grad_norm": 1.6484375, "learning_rate": 0.00019711177602126987, "loss": 2.1695, "step": 65175 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.00019711133500103687, "loss": 2.1255, "step": 65180 }, { "epoch": 0.15, "grad_norm": 1.84375, "learning_rate": 0.00019711089394762885, "loss": 2.2096, "step": 65185 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.00019711045286104601, "loss": 2.1465, "step": 65190 }, { "epoch": 0.15, "grad_norm": 2.25, "learning_rate": 0.00019711001174128846, "loss": 2.2796, "step": 65195 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.00019710957058835638, "loss": 1.9312, "step": 65200 }, { "epoch": 0.15, "grad_norm": 1.890625, "learning_rate": 0.00019710912940224993, "loss": 2.2727, "step": 65205 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.00019710868818296923, "loss": 2.2603, "step": 65210 }, { "epoch": 0.15, "grad_norm": 1.9609375, "learning_rate": 0.0001971082469305144, "loss": 2.181, "step": 65215 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.00019710780564488568, "loss": 2.204, "step": 65220 }, { "epoch": 0.15, "grad_norm": 2.265625, "learning_rate": 0.00019710736432608313, "loss": 2.1416, "step": 65225 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.00019710692297410698, "loss": 2.1469, "step": 65230 }, { "epoch": 0.15, "grad_norm": 1.609375, "learning_rate": 0.0001971064815889573, "loss": 1.9529, "step": 65235 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019710604017063435, "loss": 2.205, "step": 65240 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.00019710559871913817, "loss": 2.1577, "step": 65245 }, { "epoch": 0.15, "grad_norm": 2.25, "learning_rate": 0.00019710515723446895, "loss": 2.208, "step": 65250 }, { "epoch": 0.15, "grad_norm": 2.640625, "learning_rate": 0.00019710471571662686, "loss": 2.3584, "step": 65255 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.00019710427416561206, "loss": 2.1188, "step": 65260 }, { "epoch": 0.15, "grad_norm": 1.7734375, "learning_rate": 0.00019710383258142463, "loss": 2.253, "step": 65265 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.00019710339096406482, "loss": 2.2867, "step": 65270 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.0001971029493135327, "loss": 2.295, "step": 65275 }, { "epoch": 0.15, "grad_norm": 1.7578125, "learning_rate": 0.00019710250762982847, "loss": 2.2148, "step": 65280 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019710206591295226, "loss": 1.8485, "step": 65285 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.00019710162416290423, "loss": 2.2189, "step": 65290 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.0001971011823796845, "loss": 2.0984, "step": 65295 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019710074056329327, "loss": 2.3129, "step": 65300 }, { "epoch": 0.15, "grad_norm": 1.8359375, "learning_rate": 0.00019710029871373066, "loss": 2.0919, "step": 65305 }, { "epoch": 0.15, "grad_norm": 1.953125, "learning_rate": 0.00019709985683099678, "loss": 2.0991, "step": 65310 }, { "epoch": 0.15, "grad_norm": 1.4921875, "learning_rate": 0.0001970994149150919, "loss": 2.2749, "step": 65315 }, { "epoch": 0.15, "grad_norm": 1.921875, "learning_rate": 0.00019709897296601607, "loss": 2.1357, "step": 65320 }, { "epoch": 0.15, "grad_norm": 1.7109375, "learning_rate": 0.00019709853098376945, "loss": 2.2439, "step": 65325 }, { "epoch": 0.15, "grad_norm": 1.796875, "learning_rate": 0.00019709808896835223, "loss": 2.0509, "step": 65330 }, { "epoch": 0.15, "grad_norm": 1.71875, "learning_rate": 0.00019709764691976456, "loss": 1.9271, "step": 65335 }, { "epoch": 0.15, "grad_norm": 1.7578125, "learning_rate": 0.00019709720483800653, "loss": 2.2374, "step": 65340 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019709676272307837, "loss": 2.1715, "step": 65345 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.00019709632057498015, "loss": 2.3221, "step": 65350 }, { "epoch": 0.15, "grad_norm": 1.7265625, "learning_rate": 0.00019709587839371212, "loss": 1.9611, "step": 65355 }, { "epoch": 0.15, "grad_norm": 2.171875, "learning_rate": 0.00019709543617927436, "loss": 2.1619, "step": 65360 }, { "epoch": 0.15, "grad_norm": 1.9296875, "learning_rate": 0.00019709499393166704, "loss": 2.2812, "step": 65365 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.00019709455165089029, "loss": 2.1414, "step": 65370 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019709410933694426, "loss": 2.106, "step": 65375 }, { "epoch": 0.15, "grad_norm": 2.21875, "learning_rate": 0.00019709366698982916, "loss": 2.2644, "step": 65380 }, { "epoch": 0.15, "grad_norm": 2.3125, "learning_rate": 0.00019709322460954512, "loss": 2.0688, "step": 65385 }, { "epoch": 0.15, "grad_norm": 1.578125, "learning_rate": 0.00019709278219609224, "loss": 2.2125, "step": 65390 }, { "epoch": 0.15, "grad_norm": 2.296875, "learning_rate": 0.0001970923397494707, "loss": 2.2846, "step": 65395 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.0001970918972696807, "loss": 2.2923, "step": 65400 }, { "epoch": 0.15, "grad_norm": 2.65625, "learning_rate": 0.0001970914547567223, "loss": 2.1403, "step": 65405 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.00019709101221059572, "loss": 2.2358, "step": 65410 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.00019709056963130108, "loss": 2.1018, "step": 65415 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019709012701883856, "loss": 2.3465, "step": 65420 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.00019708968437320826, "loss": 2.2281, "step": 65425 }, { "epoch": 0.15, "grad_norm": 2.03125, "learning_rate": 0.0001970892416944104, "loss": 2.1904, "step": 65430 }, { "epoch": 0.15, "grad_norm": 2.34375, "learning_rate": 0.00019708879898244506, "loss": 1.9705, "step": 65435 }, { "epoch": 0.15, "grad_norm": 1.84375, "learning_rate": 0.00019708835623731243, "loss": 2.1177, "step": 65440 }, { "epoch": 0.15, "grad_norm": 1.953125, "learning_rate": 0.00019708791345901267, "loss": 2.0418, "step": 65445 }, { "epoch": 0.15, "grad_norm": 1.8828125, "learning_rate": 0.00019708747064754593, "loss": 2.276, "step": 65450 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.0001970870278029123, "loss": 2.2324, "step": 65455 }, { "epoch": 0.15, "grad_norm": 1.8515625, "learning_rate": 0.00019708658492511202, "loss": 2.0953, "step": 65460 }, { "epoch": 0.15, "grad_norm": 2.28125, "learning_rate": 0.00019708614201414522, "loss": 2.0988, "step": 65465 }, { "epoch": 0.15, "grad_norm": 2.8125, "learning_rate": 0.000197085699070012, "loss": 2.2525, "step": 65470 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019708525609271255, "loss": 2.1401, "step": 65475 }, { "epoch": 0.15, "grad_norm": 1.78125, "learning_rate": 0.00019708481308224702, "loss": 2.2261, "step": 65480 }, { "epoch": 0.15, "grad_norm": 2.296875, "learning_rate": 0.00019708437003861554, "loss": 2.1836, "step": 65485 }, { "epoch": 0.15, "grad_norm": 2.484375, "learning_rate": 0.0001970839269618183, "loss": 2.1272, "step": 65490 }, { "epoch": 0.15, "grad_norm": 1.859375, "learning_rate": 0.00019708348385185542, "loss": 2.0998, "step": 65495 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.00019708304070872708, "loss": 2.216, "step": 65500 }, { "epoch": 0.15, "grad_norm": 1.6328125, "learning_rate": 0.0001970825975324334, "loss": 2.1447, "step": 65505 }, { "epoch": 0.15, "grad_norm": 2.328125, "learning_rate": 0.00019708215432297455, "loss": 2.1565, "step": 65510 }, { "epoch": 0.15, "grad_norm": 2.3125, "learning_rate": 0.00019708171108035065, "loss": 2.3753, "step": 65515 }, { "epoch": 0.15, "grad_norm": 2.671875, "learning_rate": 0.00019708126780456188, "loss": 2.1005, "step": 65520 }, { "epoch": 0.15, "grad_norm": 2.421875, "learning_rate": 0.00019708082449560843, "loss": 2.1995, "step": 65525 }, { "epoch": 0.15, "grad_norm": 2.0, "learning_rate": 0.0001970803811534904, "loss": 2.2186, "step": 65530 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.0001970799377782079, "loss": 2.333, "step": 65535 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.00019707949436976118, "loss": 2.1714, "step": 65540 }, { "epoch": 0.15, "grad_norm": 1.859375, "learning_rate": 0.00019707905092815033, "loss": 2.0954, "step": 65545 }, { "epoch": 0.15, "grad_norm": 2.25, "learning_rate": 0.00019707860745337552, "loss": 2.2774, "step": 65550 }, { "epoch": 0.15, "grad_norm": 2.359375, "learning_rate": 0.00019707816394543687, "loss": 2.2444, "step": 65555 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019707772040433459, "loss": 2.1134, "step": 65560 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.00019707727683006878, "loss": 2.26, "step": 65565 }, { "epoch": 0.15, "grad_norm": 2.359375, "learning_rate": 0.00019707683322263964, "loss": 2.1374, "step": 65570 }, { "epoch": 0.15, "grad_norm": 1.5859375, "learning_rate": 0.00019707638958204728, "loss": 2.2068, "step": 65575 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.00019707594590829186, "loss": 2.1179, "step": 65580 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019707550220137354, "loss": 2.3435, "step": 65585 }, { "epoch": 0.15, "grad_norm": 2.28125, "learning_rate": 0.00019707505846129245, "loss": 2.1886, "step": 65590 }, { "epoch": 0.15, "grad_norm": 2.53125, "learning_rate": 0.0001970746146880488, "loss": 2.308, "step": 65595 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.00019707417088164265, "loss": 2.251, "step": 65600 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019707372704207425, "loss": 2.2907, "step": 65605 }, { "epoch": 0.15, "grad_norm": 2.15625, "learning_rate": 0.0001970732831693437, "loss": 2.3678, "step": 65610 }, { "epoch": 0.15, "grad_norm": 1.9375, "learning_rate": 0.0001970728392634511, "loss": 2.1252, "step": 65615 }, { "epoch": 0.15, "grad_norm": 1.9140625, "learning_rate": 0.00019707239532439669, "loss": 2.1429, "step": 65620 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.0001970719513521806, "loss": 2.2439, "step": 65625 }, { "epoch": 0.15, "grad_norm": 1.9296875, "learning_rate": 0.00019707150734680297, "loss": 2.2294, "step": 65630 }, { "epoch": 0.15, "grad_norm": 2.0625, "learning_rate": 0.00019707106330826398, "loss": 2.2741, "step": 65635 }, { "epoch": 0.15, "grad_norm": 1.8203125, "learning_rate": 0.0001970706192365637, "loss": 2.314, "step": 65640 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019707017513170236, "loss": 2.1935, "step": 65645 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.0001970697309936801, "loss": 1.9465, "step": 65650 }, { "epoch": 0.15, "grad_norm": 1.859375, "learning_rate": 0.00019706928682249703, "loss": 2.1797, "step": 65655 }, { "epoch": 0.15, "grad_norm": 1.859375, "learning_rate": 0.00019706884261815334, "loss": 2.1588, "step": 65660 }, { "epoch": 0.15, "grad_norm": 2.234375, "learning_rate": 0.0001970683983806492, "loss": 2.2971, "step": 65665 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.00019706795410998472, "loss": 2.2119, "step": 65670 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.00019706750980616006, "loss": 2.0986, "step": 65675 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019706706546917537, "loss": 2.1775, "step": 65680 }, { "epoch": 0.15, "grad_norm": 2.25, "learning_rate": 0.00019706662109903082, "loss": 2.1959, "step": 65685 }, { "epoch": 0.15, "grad_norm": 2.328125, "learning_rate": 0.00019706617669572655, "loss": 2.2058, "step": 65690 }, { "epoch": 0.15, "grad_norm": 1.734375, "learning_rate": 0.0001970657322592627, "loss": 1.9707, "step": 65695 }, { "epoch": 0.15, "grad_norm": 2.328125, "learning_rate": 0.00019706528778963946, "loss": 2.0014, "step": 65700 }, { "epoch": 0.15, "grad_norm": 1.8671875, "learning_rate": 0.00019706484328685698, "loss": 2.334, "step": 65705 }, { "epoch": 0.15, "grad_norm": 1.796875, "learning_rate": 0.00019706439875091534, "loss": 2.2494, "step": 65710 }, { "epoch": 0.15, "grad_norm": 2.046875, "learning_rate": 0.00019706395418181476, "loss": 2.1142, "step": 65715 }, { "epoch": 0.15, "grad_norm": 1.796875, "learning_rate": 0.00019706350957955534, "loss": 2.1236, "step": 65720 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019706306494413731, "loss": 2.2344, "step": 65725 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019706262027556078, "loss": 2.2232, "step": 65730 }, { "epoch": 0.15, "grad_norm": 2.109375, "learning_rate": 0.00019706217557382587, "loss": 2.0583, "step": 65735 }, { "epoch": 0.15, "grad_norm": 2.1875, "learning_rate": 0.00019706173083893278, "loss": 2.062, "step": 65740 }, { "epoch": 0.15, "grad_norm": 2.34375, "learning_rate": 0.00019706128607088164, "loss": 2.3112, "step": 65745 }, { "epoch": 0.15, "grad_norm": 1.6953125, "learning_rate": 0.0001970608412696726, "loss": 2.0975, "step": 65750 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.0001970603964353058, "loss": 2.1917, "step": 65755 }, { "epoch": 0.15, "grad_norm": 1.828125, "learning_rate": 0.00019705995156778144, "loss": 2.2803, "step": 65760 }, { "epoch": 0.15, "grad_norm": 1.7578125, "learning_rate": 0.00019705950666709963, "loss": 2.3118, "step": 65765 }, { "epoch": 0.15, "grad_norm": 1.796875, "learning_rate": 0.0001970590617332605, "loss": 2.1734, "step": 65770 }, { "epoch": 0.15, "grad_norm": 1.90625, "learning_rate": 0.00019705861676626426, "loss": 2.2319, "step": 65775 }, { "epoch": 0.15, "grad_norm": 2.1875, "learning_rate": 0.00019705817176611103, "loss": 2.2668, "step": 65780 }, { "epoch": 0.15, "grad_norm": 1.9921875, "learning_rate": 0.00019705772673280102, "loss": 2.2508, "step": 65785 }, { "epoch": 0.15, "grad_norm": 2.140625, "learning_rate": 0.00019705728166633427, "loss": 1.9855, "step": 65790 }, { "epoch": 0.15, "grad_norm": 1.984375, "learning_rate": 0.000197056836566711, "loss": 2.4017, "step": 65795 }, { "epoch": 0.15, "grad_norm": 1.9453125, "learning_rate": 0.00019705639143393136, "loss": 2.1436, "step": 65800 }, { "epoch": 0.15, "grad_norm": 2.390625, "learning_rate": 0.0001970559462679955, "loss": 2.2015, "step": 65805 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019705550106890356, "loss": 2.2593, "step": 65810 }, { "epoch": 0.15, "grad_norm": 2.28125, "learning_rate": 0.0001970550558366557, "loss": 2.1124, "step": 65815 }, { "epoch": 0.15, "grad_norm": 1.875, "learning_rate": 0.0001970546105712521, "loss": 2.3429, "step": 65820 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019705416527269286, "loss": 2.3874, "step": 65825 }, { "epoch": 0.15, "grad_norm": 1.765625, "learning_rate": 0.00019705371994097816, "loss": 2.1899, "step": 65830 }, { "epoch": 0.15, "grad_norm": 2.015625, "learning_rate": 0.00019705327457610815, "loss": 2.3438, "step": 65835 }, { "epoch": 0.15, "grad_norm": 1.890625, "learning_rate": 0.000197052829178083, "loss": 2.2586, "step": 65840 }, { "epoch": 0.15, "grad_norm": 2.21875, "learning_rate": 0.0001970523837469028, "loss": 2.4291, "step": 65845 }, { "epoch": 0.15, "grad_norm": 2.359375, "learning_rate": 0.00019705193828256778, "loss": 2.277, "step": 65850 }, { "epoch": 0.15, "grad_norm": 1.8984375, "learning_rate": 0.00019705149278507805, "loss": 2.2881, "step": 65855 }, { "epoch": 0.15, "grad_norm": 2.078125, "learning_rate": 0.00019705104725443377, "loss": 2.2544, "step": 65860 }, { "epoch": 0.16, "grad_norm": 1.8125, "learning_rate": 0.0001970506016906351, "loss": 2.1046, "step": 65865 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.00019705015609368216, "loss": 2.2488, "step": 65870 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019704971046357515, "loss": 2.2648, "step": 65875 }, { "epoch": 0.16, "grad_norm": 2.359375, "learning_rate": 0.0001970492648003142, "loss": 2.3166, "step": 65880 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.00019704881910389944, "loss": 2.0416, "step": 65885 }, { "epoch": 0.16, "grad_norm": 1.6640625, "learning_rate": 0.00019704837337433106, "loss": 2.0845, "step": 65890 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.0001970479276116092, "loss": 2.2772, "step": 65895 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.000197047481815734, "loss": 2.0274, "step": 65900 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019704703598670559, "loss": 2.2368, "step": 65905 }, { "epoch": 0.16, "grad_norm": 1.65625, "learning_rate": 0.0001970465901245242, "loss": 2.0963, "step": 65910 }, { "epoch": 0.16, "grad_norm": 1.59375, "learning_rate": 0.0001970461442291899, "loss": 2.1911, "step": 65915 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.00019704569830070293, "loss": 2.0328, "step": 65920 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019704525233906335, "loss": 2.1127, "step": 65925 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019704480634427133, "loss": 2.2446, "step": 65930 }, { "epoch": 0.16, "grad_norm": 1.828125, "learning_rate": 0.00019704436031632707, "loss": 2.241, "step": 65935 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.0001970439142552307, "loss": 2.2941, "step": 65940 }, { "epoch": 0.16, "grad_norm": 2.203125, "learning_rate": 0.00019704346816098238, "loss": 2.0802, "step": 65945 }, { "epoch": 0.16, "grad_norm": 1.7109375, "learning_rate": 0.00019704302203358222, "loss": 2.1507, "step": 65950 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.00019704257587303042, "loss": 2.1152, "step": 65955 }, { "epoch": 0.16, "grad_norm": 1.6953125, "learning_rate": 0.00019704212967932712, "loss": 2.0844, "step": 65960 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.00019704168345247247, "loss": 2.1694, "step": 65965 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019704123719246658, "loss": 2.3668, "step": 65970 }, { "epoch": 0.16, "grad_norm": 1.921875, "learning_rate": 0.00019704079089930972, "loss": 2.2499, "step": 65975 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.0001970403445730019, "loss": 2.1351, "step": 65980 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.00019703989821354337, "loss": 2.1738, "step": 65985 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.00019703945182093424, "loss": 2.0101, "step": 65990 }, { "epoch": 0.16, "grad_norm": 1.8984375, "learning_rate": 0.00019703900539517469, "loss": 2.1569, "step": 65995 }, { "epoch": 0.16, "grad_norm": 2.234375, "learning_rate": 0.00019703855893626483, "loss": 2.3257, "step": 66000 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019703811244420485, "loss": 2.3854, "step": 66005 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.0001970376659189949, "loss": 1.9951, "step": 66010 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.0001970372193606351, "loss": 2.1876, "step": 66015 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.00019703677276912567, "loss": 2.1005, "step": 66020 }, { "epoch": 0.16, "grad_norm": 1.8046875, "learning_rate": 0.00019703632614446667, "loss": 2.0861, "step": 66025 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019703587948665833, "loss": 2.2325, "step": 66030 }, { "epoch": 0.16, "grad_norm": 1.5859375, "learning_rate": 0.00019703543279570075, "loss": 2.0339, "step": 66035 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.00019703498607159415, "loss": 2.2209, "step": 66040 }, { "epoch": 0.16, "grad_norm": 1.9296875, "learning_rate": 0.0001970345393143386, "loss": 2.1471, "step": 66045 }, { "epoch": 0.16, "grad_norm": 1.84375, "learning_rate": 0.00019703409252393427, "loss": 2.2587, "step": 66050 }, { "epoch": 0.16, "grad_norm": 2.5, "learning_rate": 0.0001970336457003814, "loss": 2.3703, "step": 66055 }, { "epoch": 0.16, "grad_norm": 1.640625, "learning_rate": 0.00019703319884368004, "loss": 2.0242, "step": 66060 }, { "epoch": 0.16, "grad_norm": 1.75, "learning_rate": 0.00019703275195383037, "loss": 2.2044, "step": 66065 }, { "epoch": 0.16, "grad_norm": 1.6328125, "learning_rate": 0.00019703230503083258, "loss": 2.2044, "step": 66070 }, { "epoch": 0.16, "grad_norm": 1.7578125, "learning_rate": 0.0001970318580746868, "loss": 2.2094, "step": 66075 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019703141108539312, "loss": 2.3189, "step": 66080 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.00019703096406295179, "loss": 2.2119, "step": 66085 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019703051700736292, "loss": 2.1639, "step": 66090 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019703006991862666, "loss": 2.2271, "step": 66095 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.00019702962279674317, "loss": 2.059, "step": 66100 }, { "epoch": 0.16, "grad_norm": 2.359375, "learning_rate": 0.00019702917564171261, "loss": 1.9414, "step": 66105 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019702872845353512, "loss": 2.1326, "step": 66110 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019702828123221083, "loss": 2.145, "step": 66115 }, { "epoch": 0.16, "grad_norm": 2.28125, "learning_rate": 0.00019702783397773996, "loss": 2.0457, "step": 66120 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.0001970273866901226, "loss": 2.3129, "step": 66125 }, { "epoch": 0.16, "grad_norm": 1.78125, "learning_rate": 0.00019702693936935893, "loss": 2.1109, "step": 66130 }, { "epoch": 0.16, "grad_norm": 2.1875, "learning_rate": 0.00019702649201544913, "loss": 2.1975, "step": 66135 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019702604462839326, "loss": 2.2456, "step": 66140 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019702559720819157, "loss": 2.1197, "step": 66145 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.0001970251497548442, "loss": 1.7935, "step": 66150 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019702470226835124, "loss": 2.2522, "step": 66155 }, { "epoch": 0.16, "grad_norm": 1.7421875, "learning_rate": 0.0001970242547487129, "loss": 2.0129, "step": 66160 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.0001970238071959293, "loss": 2.1826, "step": 66165 }, { "epoch": 0.16, "grad_norm": 2.5625, "learning_rate": 0.00019702335961000063, "loss": 2.1775, "step": 66170 }, { "epoch": 0.16, "grad_norm": 2.546875, "learning_rate": 0.000197022911990927, "loss": 2.2033, "step": 66175 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019702246433870858, "loss": 2.0704, "step": 66180 }, { "epoch": 0.16, "grad_norm": 2.203125, "learning_rate": 0.00019702201665334554, "loss": 2.2001, "step": 66185 }, { "epoch": 0.16, "grad_norm": 1.8359375, "learning_rate": 0.000197021568934838, "loss": 1.9994, "step": 66190 }, { "epoch": 0.16, "grad_norm": 1.65625, "learning_rate": 0.00019702112118318615, "loss": 2.1798, "step": 66195 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019702067339839014, "loss": 2.0484, "step": 66200 }, { "epoch": 0.16, "grad_norm": 1.6328125, "learning_rate": 0.00019702022558045008, "loss": 1.9272, "step": 66205 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019701977772936617, "loss": 2.0698, "step": 66210 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019701932984513853, "loss": 2.1799, "step": 66215 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.0001970188819277673, "loss": 2.1047, "step": 66220 }, { "epoch": 0.16, "grad_norm": 1.90625, "learning_rate": 0.00019701843397725274, "loss": 2.1361, "step": 66225 }, { "epoch": 0.16, "grad_norm": 1.921875, "learning_rate": 0.00019701798599359486, "loss": 2.1893, "step": 66230 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.0001970175379767939, "loss": 2.0876, "step": 66235 }, { "epoch": 0.16, "grad_norm": 2.40625, "learning_rate": 0.00019701708992684997, "loss": 2.1756, "step": 66240 }, { "epoch": 0.16, "grad_norm": 1.9375, "learning_rate": 0.00019701664184376328, "loss": 2.1153, "step": 66245 }, { "epoch": 0.16, "grad_norm": 1.7578125, "learning_rate": 0.00019701619372753393, "loss": 2.0996, "step": 66250 }, { "epoch": 0.16, "grad_norm": 1.9921875, "learning_rate": 0.00019701574557816207, "loss": 2.1443, "step": 66255 }, { "epoch": 0.16, "grad_norm": 2.203125, "learning_rate": 0.00019701529739564789, "loss": 2.2353, "step": 66260 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.0001970148491799915, "loss": 2.1999, "step": 66265 }, { "epoch": 0.16, "grad_norm": 1.6875, "learning_rate": 0.00019701440093119313, "loss": 2.3196, "step": 66270 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.0001970139526492528, "loss": 2.3517, "step": 66275 }, { "epoch": 0.16, "grad_norm": 2.328125, "learning_rate": 0.0001970135043341708, "loss": 2.164, "step": 66280 }, { "epoch": 0.16, "grad_norm": 1.8984375, "learning_rate": 0.00019701305598594725, "loss": 2.4036, "step": 66285 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019701260760458222, "loss": 2.0507, "step": 66290 }, { "epoch": 0.16, "grad_norm": 1.6328125, "learning_rate": 0.00019701215919007596, "loss": 1.8922, "step": 66295 }, { "epoch": 0.16, "grad_norm": 1.8046875, "learning_rate": 0.0001970117107424286, "loss": 2.2322, "step": 66300 }, { "epoch": 0.16, "grad_norm": 1.6171875, "learning_rate": 0.00019701126226164025, "loss": 2.0807, "step": 66305 }, { "epoch": 0.16, "grad_norm": 2.640625, "learning_rate": 0.0001970108137477111, "loss": 2.1304, "step": 66310 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019701036520064128, "loss": 2.3534, "step": 66315 }, { "epoch": 0.16, "grad_norm": 2.265625, "learning_rate": 0.000197009916620431, "loss": 2.0951, "step": 66320 }, { "epoch": 0.16, "grad_norm": 1.921875, "learning_rate": 0.00019700946800708035, "loss": 2.1857, "step": 66325 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019700901936058952, "loss": 2.2183, "step": 66330 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.00019700857068095863, "loss": 2.0949, "step": 66335 }, { "epoch": 0.16, "grad_norm": 1.90625, "learning_rate": 0.00019700812196818786, "loss": 2.3175, "step": 66340 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.00019700767322227737, "loss": 2.2057, "step": 66345 }, { "epoch": 0.16, "grad_norm": 1.703125, "learning_rate": 0.0001970072244432273, "loss": 2.1454, "step": 66350 }, { "epoch": 0.16, "grad_norm": 2.296875, "learning_rate": 0.00019700677563103778, "loss": 2.183, "step": 66355 }, { "epoch": 0.16, "grad_norm": 1.90625, "learning_rate": 0.000197006326785709, "loss": 1.9345, "step": 66360 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.0001970058779072411, "loss": 2.1002, "step": 66365 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019700542899563422, "loss": 2.1598, "step": 66370 }, { "epoch": 0.16, "grad_norm": 2.6875, "learning_rate": 0.00019700498005088853, "loss": 2.2743, "step": 66375 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.0001970045310730042, "loss": 2.0556, "step": 66380 }, { "epoch": 0.16, "grad_norm": 1.9921875, "learning_rate": 0.00019700408206198134, "loss": 2.2143, "step": 66385 }, { "epoch": 0.16, "grad_norm": 1.90625, "learning_rate": 0.00019700363301782014, "loss": 2.0918, "step": 66390 }, { "epoch": 0.16, "grad_norm": 2.421875, "learning_rate": 0.00019700318394052073, "loss": 2.1472, "step": 66395 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019700273483008328, "loss": 2.0836, "step": 66400 }, { "epoch": 0.16, "grad_norm": 1.90625, "learning_rate": 0.00019700228568650795, "loss": 2.1096, "step": 66405 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019700183650979484, "loss": 2.0824, "step": 66410 }, { "epoch": 0.16, "grad_norm": 1.65625, "learning_rate": 0.00019700138729994418, "loss": 2.3117, "step": 66415 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.00019700093805695608, "loss": 1.9544, "step": 66420 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.0001970004887808307, "loss": 2.0878, "step": 66425 }, { "epoch": 0.16, "grad_norm": 1.9296875, "learning_rate": 0.00019700003947156818, "loss": 2.1192, "step": 66430 }, { "epoch": 0.16, "grad_norm": 1.5859375, "learning_rate": 0.00019699959012916868, "loss": 2.1384, "step": 66435 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019699914075363238, "loss": 2.0112, "step": 66440 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.00019699869134495943, "loss": 2.0358, "step": 66445 }, { "epoch": 0.16, "grad_norm": 1.6875, "learning_rate": 0.00019699824190314992, "loss": 2.2378, "step": 66450 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019699779242820408, "loss": 2.3212, "step": 66455 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.00019699734292012204, "loss": 2.275, "step": 66460 }, { "epoch": 0.16, "grad_norm": 1.53125, "learning_rate": 0.00019699689337890396, "loss": 2.0527, "step": 66465 }, { "epoch": 0.16, "grad_norm": 2.265625, "learning_rate": 0.00019699644380454995, "loss": 2.1725, "step": 66470 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019699599419706023, "loss": 2.1571, "step": 66475 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.0001969955445564349, "loss": 2.1641, "step": 66480 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.0001969950948826741, "loss": 2.01, "step": 66485 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.00019699464517577807, "loss": 2.096, "step": 66490 }, { "epoch": 0.16, "grad_norm": 2.375, "learning_rate": 0.0001969941954357469, "loss": 2.2528, "step": 66495 }, { "epoch": 0.16, "grad_norm": 1.65625, "learning_rate": 0.00019699374566258074, "loss": 2.1472, "step": 66500 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019699329585627976, "loss": 2.0899, "step": 66505 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.00019699284601684412, "loss": 2.2279, "step": 66510 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.00019699239614427396, "loss": 2.1298, "step": 66515 }, { "epoch": 0.16, "grad_norm": 1.9296875, "learning_rate": 0.00019699194623856942, "loss": 2.1478, "step": 66520 }, { "epoch": 0.16, "grad_norm": 1.828125, "learning_rate": 0.0001969914962997307, "loss": 2.2284, "step": 66525 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.0001969910463277579, "loss": 2.1062, "step": 66530 }, { "epoch": 0.16, "grad_norm": 1.84375, "learning_rate": 0.00019699059632265122, "loss": 2.1995, "step": 66535 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.0001969901462844108, "loss": 2.3245, "step": 66540 }, { "epoch": 0.16, "grad_norm": 1.9375, "learning_rate": 0.00019698969621303677, "loss": 2.0251, "step": 66545 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.0001969892461085293, "loss": 2.2068, "step": 66550 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.00019698879597088856, "loss": 2.1677, "step": 66555 }, { "epoch": 0.16, "grad_norm": 2.3125, "learning_rate": 0.00019698834580011466, "loss": 2.2653, "step": 66560 }, { "epoch": 0.16, "grad_norm": 1.8203125, "learning_rate": 0.0001969878955962078, "loss": 2.3519, "step": 66565 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.0001969874453591681, "loss": 2.3535, "step": 66570 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.00019698699508899576, "loss": 2.1524, "step": 66575 }, { "epoch": 0.16, "grad_norm": 1.7421875, "learning_rate": 0.00019698654478569088, "loss": 2.3252, "step": 66580 }, { "epoch": 0.16, "grad_norm": 1.7578125, "learning_rate": 0.00019698609444925366, "loss": 2.2412, "step": 66585 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.00019698564407968423, "loss": 2.1361, "step": 66590 }, { "epoch": 0.16, "grad_norm": 1.75, "learning_rate": 0.0001969851936769827, "loss": 2.0742, "step": 66595 }, { "epoch": 0.16, "grad_norm": 1.6796875, "learning_rate": 0.0001969847432411493, "loss": 2.1006, "step": 66600 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019698429277218416, "loss": 2.2626, "step": 66605 }, { "epoch": 0.16, "grad_norm": 2.625, "learning_rate": 0.00019698384227008744, "loss": 2.1249, "step": 66610 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019698339173485928, "loss": 2.1736, "step": 66615 }, { "epoch": 0.16, "grad_norm": 1.8359375, "learning_rate": 0.0001969829411664998, "loss": 2.0958, "step": 66620 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.0001969824905650092, "loss": 2.1372, "step": 66625 }, { "epoch": 0.16, "grad_norm": 1.6328125, "learning_rate": 0.00019698203993038762, "loss": 2.2213, "step": 66630 }, { "epoch": 0.16, "grad_norm": 1.7265625, "learning_rate": 0.0001969815892626352, "loss": 2.3095, "step": 66635 }, { "epoch": 0.16, "grad_norm": 1.5078125, "learning_rate": 0.00019698113856175215, "loss": 2.0835, "step": 66640 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.0001969806878277386, "loss": 2.1932, "step": 66645 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019698023706059463, "loss": 2.1956, "step": 66650 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.00019697978626032048, "loss": 2.1923, "step": 66655 }, { "epoch": 0.16, "grad_norm": 1.8828125, "learning_rate": 0.00019697933542691626, "loss": 2.1735, "step": 66660 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.00019697888456038216, "loss": 2.1899, "step": 66665 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.0001969784336607183, "loss": 2.2244, "step": 66670 }, { "epoch": 0.16, "grad_norm": 1.7890625, "learning_rate": 0.00019697798272792487, "loss": 2.2155, "step": 66675 }, { "epoch": 0.16, "grad_norm": 1.7734375, "learning_rate": 0.000196977531762002, "loss": 2.1224, "step": 66680 }, { "epoch": 0.16, "grad_norm": 1.8515625, "learning_rate": 0.0001969770807629498, "loss": 2.1852, "step": 66685 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019697662973076853, "loss": 2.1045, "step": 66690 }, { "epoch": 0.16, "grad_norm": 1.6875, "learning_rate": 0.00019697617866545826, "loss": 2.2694, "step": 66695 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.00019697572756701917, "loss": 2.3225, "step": 66700 }, { "epoch": 0.16, "grad_norm": 1.8046875, "learning_rate": 0.0001969752764354514, "loss": 2.0625, "step": 66705 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019697482527075515, "loss": 2.2679, "step": 66710 }, { "epoch": 0.16, "grad_norm": 1.8203125, "learning_rate": 0.0001969743740729305, "loss": 2.1801, "step": 66715 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019697392284197767, "loss": 2.0509, "step": 66720 }, { "epoch": 0.16, "grad_norm": 1.8046875, "learning_rate": 0.0001969734715778968, "loss": 2.1584, "step": 66725 }, { "epoch": 0.16, "grad_norm": 1.6875, "learning_rate": 0.000196973020280688, "loss": 2.0517, "step": 66730 }, { "epoch": 0.16, "grad_norm": 1.8984375, "learning_rate": 0.00019697256895035147, "loss": 2.2886, "step": 66735 }, { "epoch": 0.16, "grad_norm": 1.7265625, "learning_rate": 0.00019697211758688737, "loss": 2.2697, "step": 66740 }, { "epoch": 0.16, "grad_norm": 1.6796875, "learning_rate": 0.00019697166619029582, "loss": 2.1983, "step": 66745 }, { "epoch": 0.16, "grad_norm": 4.84375, "learning_rate": 0.000196971214760577, "loss": 2.2519, "step": 66750 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.00019697076329773104, "loss": 1.9437, "step": 66755 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.0001969703118017581, "loss": 2.3955, "step": 66760 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019696986027265836, "loss": 2.1523, "step": 66765 }, { "epoch": 0.16, "grad_norm": 2.671875, "learning_rate": 0.00019696940871043194, "loss": 2.0245, "step": 66770 }, { "epoch": 0.16, "grad_norm": 2.28125, "learning_rate": 0.00019696895711507906, "loss": 2.1284, "step": 66775 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019696850548659976, "loss": 2.0912, "step": 66780 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019696805382499432, "loss": 2.0957, "step": 66785 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.0001969676021302628, "loss": 2.2507, "step": 66790 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.0001969671504024054, "loss": 2.0199, "step": 66795 }, { "epoch": 0.16, "grad_norm": 1.96875, "learning_rate": 0.00019696669864142225, "loss": 2.4233, "step": 66800 }, { "epoch": 0.16, "grad_norm": 2.265625, "learning_rate": 0.0001969662468473135, "loss": 2.3142, "step": 66805 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.00019696579502007937, "loss": 2.1115, "step": 66810 }, { "epoch": 0.16, "grad_norm": 3.15625, "learning_rate": 0.00019696534315971994, "loss": 2.1367, "step": 66815 }, { "epoch": 0.16, "grad_norm": 1.7421875, "learning_rate": 0.0001969648912662354, "loss": 2.1068, "step": 66820 }, { "epoch": 0.16, "grad_norm": 1.7578125, "learning_rate": 0.0001969644393396259, "loss": 2.2349, "step": 66825 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019696398737989156, "loss": 2.1661, "step": 66830 }, { "epoch": 0.16, "grad_norm": 2.3125, "learning_rate": 0.0001969635353870326, "loss": 2.352, "step": 66835 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.0001969630833610491, "loss": 2.0426, "step": 66840 }, { "epoch": 0.16, "grad_norm": 1.9375, "learning_rate": 0.00019696263130194127, "loss": 2.2433, "step": 66845 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019696217920970926, "loss": 2.0047, "step": 66850 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019696172708435317, "loss": 2.1496, "step": 66855 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019696127492587322, "loss": 2.0398, "step": 66860 }, { "epoch": 0.16, "grad_norm": 2.28125, "learning_rate": 0.00019696082273426956, "loss": 2.2795, "step": 66865 }, { "epoch": 0.16, "grad_norm": 1.8828125, "learning_rate": 0.0001969603705095423, "loss": 2.2245, "step": 66870 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.0001969599182516916, "loss": 2.1593, "step": 66875 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019695946596071768, "loss": 1.8791, "step": 66880 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.0001969590136366206, "loss": 2.2135, "step": 66885 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.00019695856127940062, "loss": 2.1063, "step": 66890 }, { "epoch": 0.16, "grad_norm": 1.96875, "learning_rate": 0.0001969581088890578, "loss": 2.1495, "step": 66895 }, { "epoch": 0.16, "grad_norm": 1.8984375, "learning_rate": 0.00019695765646559235, "loss": 2.1837, "step": 66900 }, { "epoch": 0.16, "grad_norm": 2.40625, "learning_rate": 0.0001969572040090044, "loss": 2.1535, "step": 66905 }, { "epoch": 0.16, "grad_norm": 1.8984375, "learning_rate": 0.0001969567515192941, "loss": 2.1434, "step": 66910 }, { "epoch": 0.16, "grad_norm": 2.1875, "learning_rate": 0.00019695629899646163, "loss": 2.0968, "step": 66915 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.0001969558464405071, "loss": 2.0337, "step": 66920 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.0001969553938514307, "loss": 2.0941, "step": 66925 }, { "epoch": 0.16, "grad_norm": 1.90625, "learning_rate": 0.00019695494122923263, "loss": 2.1726, "step": 66930 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019695448857391293, "loss": 2.1612, "step": 66935 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019695403588547187, "loss": 2.1399, "step": 66940 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.0001969535831639095, "loss": 2.1752, "step": 66945 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019695313040922607, "loss": 2.1092, "step": 66950 }, { "epoch": 0.16, "grad_norm": 1.9375, "learning_rate": 0.00019695267762142167, "loss": 2.2299, "step": 66955 }, { "epoch": 0.16, "grad_norm": 1.6015625, "learning_rate": 0.00019695222480049652, "loss": 1.9726, "step": 66960 }, { "epoch": 0.16, "grad_norm": 2.53125, "learning_rate": 0.0001969517719464507, "loss": 2.345, "step": 66965 }, { "epoch": 0.16, "grad_norm": 1.921875, "learning_rate": 0.00019695131905928437, "loss": 2.0707, "step": 66970 }, { "epoch": 0.16, "grad_norm": 1.8125, "learning_rate": 0.00019695086613899773, "loss": 2.2946, "step": 66975 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.0001969504131855909, "loss": 2.1738, "step": 66980 }, { "epoch": 0.16, "grad_norm": 2.375, "learning_rate": 0.0001969499601990641, "loss": 2.1343, "step": 66985 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.00019694950717941742, "loss": 2.3791, "step": 66990 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.000196949054126651, "loss": 2.2324, "step": 66995 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019694860104076505, "loss": 2.2169, "step": 67000 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019694814792175968, "loss": 1.917, "step": 67005 }, { "epoch": 0.16, "grad_norm": 1.6484375, "learning_rate": 0.00019694769476963508, "loss": 2.1309, "step": 67010 }, { "epoch": 0.16, "grad_norm": 1.9609375, "learning_rate": 0.00019694724158439138, "loss": 2.0683, "step": 67015 }, { "epoch": 0.16, "grad_norm": 1.71875, "learning_rate": 0.00019694678836602874, "loss": 2.1652, "step": 67020 }, { "epoch": 0.16, "grad_norm": 1.65625, "learning_rate": 0.00019694633511454734, "loss": 2.3558, "step": 67025 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.0001969458818299473, "loss": 2.0306, "step": 67030 }, { "epoch": 0.16, "grad_norm": 2.546875, "learning_rate": 0.00019694542851222879, "loss": 2.1081, "step": 67035 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.00019694497516139196, "loss": 2.1704, "step": 67040 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.00019694452177743696, "loss": 2.1185, "step": 67045 }, { "epoch": 0.16, "grad_norm": 1.8828125, "learning_rate": 0.00019694406836036398, "loss": 2.0825, "step": 67050 }, { "epoch": 0.16, "grad_norm": 1.7421875, "learning_rate": 0.00019694361491017314, "loss": 2.3416, "step": 67055 }, { "epoch": 0.16, "grad_norm": 1.78125, "learning_rate": 0.0001969431614268646, "loss": 2.0886, "step": 67060 }, { "epoch": 0.16, "grad_norm": 1.8203125, "learning_rate": 0.0001969427079104385, "loss": 2.1198, "step": 67065 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019694225436089503, "loss": 2.1951, "step": 67070 }, { "epoch": 0.16, "grad_norm": 2.453125, "learning_rate": 0.00019694180077823434, "loss": 2.1815, "step": 67075 }, { "epoch": 0.16, "grad_norm": 2.296875, "learning_rate": 0.00019694134716245656, "loss": 2.1754, "step": 67080 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.00019694089351356185, "loss": 2.1374, "step": 67085 }, { "epoch": 0.16, "grad_norm": 1.84375, "learning_rate": 0.00019694043983155038, "loss": 1.9974, "step": 67090 }, { "epoch": 0.16, "grad_norm": 1.515625, "learning_rate": 0.00019693998611642228, "loss": 2.019, "step": 67095 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.00019693953236817776, "loss": 2.2284, "step": 67100 }, { "epoch": 0.16, "grad_norm": 2.53125, "learning_rate": 0.0001969390785868169, "loss": 2.1529, "step": 67105 }, { "epoch": 0.16, "grad_norm": 2.1875, "learning_rate": 0.00019693862477233992, "loss": 2.265, "step": 67110 }, { "epoch": 0.16, "grad_norm": 2.453125, "learning_rate": 0.00019693817092474692, "loss": 2.1866, "step": 67115 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.0001969377170440381, "loss": 2.1938, "step": 67120 }, { "epoch": 0.16, "grad_norm": 2.265625, "learning_rate": 0.00019693726313021358, "loss": 2.2685, "step": 67125 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019693680918327357, "loss": 2.1813, "step": 67130 }, { "epoch": 0.16, "grad_norm": 1.7578125, "learning_rate": 0.00019693635520321817, "loss": 2.1775, "step": 67135 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019693590119004755, "loss": 1.9721, "step": 67140 }, { "epoch": 0.16, "grad_norm": 1.84375, "learning_rate": 0.00019693544714376187, "loss": 2.1905, "step": 67145 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.00019693499306436128, "loss": 2.0386, "step": 67150 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.00019693453895184594, "loss": 2.1123, "step": 67155 }, { "epoch": 0.16, "grad_norm": 2.53125, "learning_rate": 0.00019693408480621601, "loss": 2.2327, "step": 67160 }, { "epoch": 0.16, "grad_norm": 2.296875, "learning_rate": 0.00019693363062747162, "loss": 2.0201, "step": 67165 }, { "epoch": 0.16, "grad_norm": 1.734375, "learning_rate": 0.00019693317641561296, "loss": 2.2037, "step": 67170 }, { "epoch": 0.16, "grad_norm": 1.59375, "learning_rate": 0.00019693272217064016, "loss": 2.0645, "step": 67175 }, { "epoch": 0.16, "grad_norm": 2.1875, "learning_rate": 0.0001969322678925534, "loss": 2.3747, "step": 67180 }, { "epoch": 0.16, "grad_norm": 1.8203125, "learning_rate": 0.0001969318135813528, "loss": 2.3293, "step": 67185 }, { "epoch": 0.16, "grad_norm": 1.65625, "learning_rate": 0.00019693135923703854, "loss": 2.116, "step": 67190 }, { "epoch": 0.16, "grad_norm": 1.8125, "learning_rate": 0.00019693090485961077, "loss": 2.0641, "step": 67195 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019693045044906965, "loss": 2.1189, "step": 67200 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019692999600541533, "loss": 2.1307, "step": 67205 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.00019692954152864797, "loss": 2.1031, "step": 67210 }, { "epoch": 0.16, "grad_norm": 1.71875, "learning_rate": 0.00019692908701876772, "loss": 2.2169, "step": 67215 }, { "epoch": 0.16, "grad_norm": 1.7890625, "learning_rate": 0.00019692863247577474, "loss": 2.3257, "step": 67220 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019692817789966914, "loss": 2.1299, "step": 67225 }, { "epoch": 0.16, "grad_norm": 1.8046875, "learning_rate": 0.00019692772329045117, "loss": 2.244, "step": 67230 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.0001969272686481209, "loss": 2.259, "step": 67235 }, { "epoch": 0.16, "grad_norm": 1.7109375, "learning_rate": 0.00019692681397267853, "loss": 2.3287, "step": 67240 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.0001969263592641242, "loss": 2.2288, "step": 67245 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.00019692590452245808, "loss": 2.2818, "step": 67250 }, { "epoch": 0.16, "grad_norm": 1.625, "learning_rate": 0.0001969254497476803, "loss": 2.1605, "step": 67255 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.00019692499493979102, "loss": 2.2518, "step": 67260 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.0001969245400987904, "loss": 2.2379, "step": 67265 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.00019692408522467862, "loss": 2.1812, "step": 67270 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.0001969236303174558, "loss": 2.1775, "step": 67275 }, { "epoch": 0.16, "grad_norm": 1.734375, "learning_rate": 0.00019692317537712214, "loss": 2.397, "step": 67280 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.00019692272040367772, "loss": 2.0773, "step": 67285 }, { "epoch": 0.16, "grad_norm": 1.9375, "learning_rate": 0.00019692226539712277, "loss": 2.175, "step": 67290 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.0001969218103574574, "loss": 2.1269, "step": 67295 }, { "epoch": 0.16, "grad_norm": 1.9921875, "learning_rate": 0.0001969213552846818, "loss": 2.2534, "step": 67300 }, { "epoch": 0.16, "grad_norm": 2.75, "learning_rate": 0.0001969209001787961, "loss": 2.3639, "step": 67305 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019692044503980048, "loss": 1.994, "step": 67310 }, { "epoch": 0.16, "grad_norm": 1.765625, "learning_rate": 0.00019691998986769504, "loss": 2.2034, "step": 67315 }, { "epoch": 0.16, "grad_norm": 1.9609375, "learning_rate": 0.00019691953466248, "loss": 2.2289, "step": 67320 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.00019691907942415547, "loss": 2.1634, "step": 67325 }, { "epoch": 0.16, "grad_norm": 1.84375, "learning_rate": 0.00019691862415272164, "loss": 2.1645, "step": 67330 }, { "epoch": 0.16, "grad_norm": 1.8984375, "learning_rate": 0.00019691816884817866, "loss": 2.2803, "step": 67335 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.00019691771351052663, "loss": 2.2511, "step": 67340 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019691725813976581, "loss": 2.1457, "step": 67345 }, { "epoch": 0.16, "grad_norm": 1.96875, "learning_rate": 0.00019691680273589625, "loss": 2.1545, "step": 67350 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.0001969163472989182, "loss": 2.2788, "step": 67355 }, { "epoch": 0.16, "grad_norm": 1.765625, "learning_rate": 0.0001969158918288317, "loss": 2.1164, "step": 67360 }, { "epoch": 0.16, "grad_norm": 1.7578125, "learning_rate": 0.00019691543632563706, "loss": 2.2432, "step": 67365 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.0001969149807893343, "loss": 2.228, "step": 67370 }, { "epoch": 0.16, "grad_norm": 1.9296875, "learning_rate": 0.00019691452521992364, "loss": 2.149, "step": 67375 }, { "epoch": 0.16, "grad_norm": 2.5625, "learning_rate": 0.0001969140696174052, "loss": 2.1251, "step": 67380 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019691361398177916, "loss": 2.2932, "step": 67385 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.0001969131583130457, "loss": 2.1449, "step": 67390 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019691270261120488, "loss": 2.2872, "step": 67395 }, { "epoch": 0.16, "grad_norm": 1.7421875, "learning_rate": 0.000196912246876257, "loss": 2.1859, "step": 67400 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.0001969117911082021, "loss": 2.1329, "step": 67405 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.00019691133530704038, "loss": 2.2151, "step": 67410 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.000196910879472772, "loss": 2.038, "step": 67415 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019691042360539708, "loss": 2.1331, "step": 67420 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019690996770491584, "loss": 2.0234, "step": 67425 }, { "epoch": 0.16, "grad_norm": 1.7265625, "learning_rate": 0.00019690951177132838, "loss": 2.0117, "step": 67430 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.0001969090558046349, "loss": 2.0832, "step": 67435 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019690859980483547, "loss": 1.9985, "step": 67440 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019690814377193035, "loss": 2.25, "step": 67445 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.00019690768770591963, "loss": 2.1645, "step": 67450 }, { "epoch": 0.16, "grad_norm": 1.6328125, "learning_rate": 0.0001969072316068035, "loss": 2.0469, "step": 67455 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.0001969067754745821, "loss": 1.9662, "step": 67460 }, { "epoch": 0.16, "grad_norm": 2.4375, "learning_rate": 0.00019690631930925555, "loss": 2.332, "step": 67465 }, { "epoch": 0.16, "grad_norm": 2.484375, "learning_rate": 0.0001969058631108241, "loss": 2.4059, "step": 67470 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.0001969054068792878, "loss": 2.2044, "step": 67475 }, { "epoch": 0.16, "grad_norm": 1.8984375, "learning_rate": 0.0001969049506146469, "loss": 2.1886, "step": 67480 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.00019690449431690148, "loss": 2.467, "step": 67485 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019690403798605175, "loss": 2.1577, "step": 67490 }, { "epoch": 0.16, "grad_norm": 1.7734375, "learning_rate": 0.00019690358162209782, "loss": 2.2604, "step": 67495 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.0001969031252250399, "loss": 2.1876, "step": 67500 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.0001969026687948781, "loss": 2.0495, "step": 67505 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.00019690221233161257, "loss": 2.2692, "step": 67510 }, { "epoch": 0.16, "grad_norm": 1.7734375, "learning_rate": 0.00019690175583524352, "loss": 2.2293, "step": 67515 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019690129930577106, "loss": 2.0586, "step": 67520 }, { "epoch": 0.16, "grad_norm": 2.296875, "learning_rate": 0.00019690084274319537, "loss": 2.2751, "step": 67525 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.0001969003861475166, "loss": 2.151, "step": 67530 }, { "epoch": 0.16, "grad_norm": 1.6953125, "learning_rate": 0.00019689992951873485, "loss": 2.1835, "step": 67535 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019689947285685035, "loss": 2.1849, "step": 67540 }, { "epoch": 0.16, "grad_norm": 2.328125, "learning_rate": 0.00019689901616186327, "loss": 2.0824, "step": 67545 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.00019689855943377368, "loss": 2.2409, "step": 67550 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.0001968981026725818, "loss": 1.8253, "step": 67555 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.00019689764587828778, "loss": 2.3776, "step": 67560 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.00019689718905089177, "loss": 2.2942, "step": 67565 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.0001968967321903939, "loss": 2.0462, "step": 67570 }, { "epoch": 0.16, "grad_norm": 1.9609375, "learning_rate": 0.00019689627529679437, "loss": 2.3065, "step": 67575 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.0001968958183700933, "loss": 2.1725, "step": 67580 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019689536141029087, "loss": 2.1411, "step": 67585 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.00019689490441738726, "loss": 2.1669, "step": 67590 }, { "epoch": 0.16, "grad_norm": 1.6484375, "learning_rate": 0.00019689444739138254, "loss": 2.1185, "step": 67595 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019689399033227695, "loss": 2.2924, "step": 67600 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.0001968935332400706, "loss": 2.1412, "step": 67605 }, { "epoch": 0.16, "grad_norm": 1.5078125, "learning_rate": 0.00019689307611476368, "loss": 2.2556, "step": 67610 }, { "epoch": 0.16, "grad_norm": 2.359375, "learning_rate": 0.0001968926189563563, "loss": 2.1996, "step": 67615 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.00019689216176484867, "loss": 2.1234, "step": 67620 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.0001968917045402409, "loss": 2.1533, "step": 67625 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.0001968912472825332, "loss": 2.0935, "step": 67630 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019689078999172565, "loss": 2.1748, "step": 67635 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.00019689033266781847, "loss": 2.2659, "step": 67640 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.0001968898753108118, "loss": 2.2605, "step": 67645 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019688941792070578, "loss": 2.2231, "step": 67650 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.0001968889604975006, "loss": 2.1002, "step": 67655 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.00019688850304119636, "loss": 2.2638, "step": 67660 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.00019688804555179328, "loss": 2.2426, "step": 67665 }, { "epoch": 0.16, "grad_norm": 1.71875, "learning_rate": 0.00019688758802929147, "loss": 2.0945, "step": 67670 }, { "epoch": 0.16, "grad_norm": 1.8828125, "learning_rate": 0.0001968871304736911, "loss": 2.0971, "step": 67675 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019688667288499234, "loss": 2.2625, "step": 67680 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019688621526319533, "loss": 2.3748, "step": 67685 }, { "epoch": 0.16, "grad_norm": 1.8046875, "learning_rate": 0.0001968857576083002, "loss": 2.3047, "step": 67690 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.0001968852999203072, "loss": 2.052, "step": 67695 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019688484219921638, "loss": 2.1406, "step": 67700 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.00019688438444502796, "loss": 2.0912, "step": 67705 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019688392665774207, "loss": 2.1111, "step": 67710 }, { "epoch": 0.16, "grad_norm": 1.9296875, "learning_rate": 0.00019688346883735887, "loss": 2.1835, "step": 67715 }, { "epoch": 0.16, "grad_norm": 1.78125, "learning_rate": 0.00019688301098387854, "loss": 2.1356, "step": 67720 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.00019688255309730123, "loss": 2.3053, "step": 67725 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.00019688209517762704, "loss": 2.16, "step": 67730 }, { "epoch": 0.16, "grad_norm": 1.6796875, "learning_rate": 0.00019688163722485618, "loss": 2.1222, "step": 67735 }, { "epoch": 0.16, "grad_norm": 1.8828125, "learning_rate": 0.00019688117923898882, "loss": 1.967, "step": 67740 }, { "epoch": 0.16, "grad_norm": 1.828125, "learning_rate": 0.00019688072122002505, "loss": 2.1212, "step": 67745 }, { "epoch": 0.16, "grad_norm": 1.828125, "learning_rate": 0.0001968802631679651, "loss": 2.198, "step": 67750 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019687980508280908, "loss": 2.083, "step": 67755 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019687934696455717, "loss": 2.2194, "step": 67760 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.0001968788888132095, "loss": 2.1859, "step": 67765 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.00019687843062876628, "loss": 2.0271, "step": 67770 }, { "epoch": 0.16, "grad_norm": 1.921875, "learning_rate": 0.0001968779724112276, "loss": 2.24, "step": 67775 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.0001968775141605937, "loss": 2.2562, "step": 67780 }, { "epoch": 0.16, "grad_norm": 1.734375, "learning_rate": 0.00019687705587686464, "loss": 2.0078, "step": 67785 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.0001968765975600406, "loss": 2.044, "step": 67790 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.00019687613921012177, "loss": 2.0863, "step": 67795 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019687568082710831, "loss": 2.2402, "step": 67800 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019687522241100037, "loss": 2.2292, "step": 67805 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.0001968747639617981, "loss": 2.1322, "step": 67810 }, { "epoch": 0.16, "grad_norm": 1.8203125, "learning_rate": 0.0001968743054795016, "loss": 2.1485, "step": 67815 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019687384696411112, "loss": 2.1956, "step": 67820 }, { "epoch": 0.16, "grad_norm": 1.8125, "learning_rate": 0.00019687338841562678, "loss": 1.9805, "step": 67825 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.00019687292983404871, "loss": 2.0961, "step": 67830 }, { "epoch": 0.16, "grad_norm": 1.7265625, "learning_rate": 0.00019687247121937714, "loss": 2.1006, "step": 67835 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019687201257161214, "loss": 2.096, "step": 67840 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.00019687155389075392, "loss": 2.0901, "step": 67845 }, { "epoch": 0.16, "grad_norm": 1.9375, "learning_rate": 0.0001968710951768026, "loss": 2.1772, "step": 67850 }, { "epoch": 0.16, "grad_norm": 1.6015625, "learning_rate": 0.00019687063642975837, "loss": 2.2087, "step": 67855 }, { "epoch": 0.16, "grad_norm": 1.703125, "learning_rate": 0.00019687017764962136, "loss": 2.0998, "step": 67860 }, { "epoch": 0.16, "grad_norm": 2.296875, "learning_rate": 0.00019686971883639177, "loss": 1.9349, "step": 67865 }, { "epoch": 0.16, "grad_norm": 1.8984375, "learning_rate": 0.0001968692599900697, "loss": 2.0981, "step": 67870 }, { "epoch": 0.16, "grad_norm": 2.28125, "learning_rate": 0.00019686880111065532, "loss": 2.3142, "step": 67875 }, { "epoch": 0.16, "grad_norm": 2.484375, "learning_rate": 0.00019686834219814886, "loss": 2.3852, "step": 67880 }, { "epoch": 0.16, "grad_norm": 1.7421875, "learning_rate": 0.00019686788325255035, "loss": 2.1514, "step": 67885 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019686742427386003, "loss": 2.1939, "step": 67890 }, { "epoch": 0.16, "grad_norm": 1.8125, "learning_rate": 0.00019686696526207808, "loss": 2.1376, "step": 67895 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.0001968665062172046, "loss": 2.0238, "step": 67900 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.00019686604713923975, "loss": 2.128, "step": 67905 }, { "epoch": 0.16, "grad_norm": 2.203125, "learning_rate": 0.00019686558802818372, "loss": 2.1216, "step": 67910 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.00019686512888403664, "loss": 2.0601, "step": 67915 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019686466970679866, "loss": 2.3039, "step": 67920 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.00019686421049646996, "loss": 2.1204, "step": 67925 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.0001968637512530507, "loss": 2.0902, "step": 67930 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.000196863291976541, "loss": 2.2233, "step": 67935 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.0001968628326669411, "loss": 2.0488, "step": 67940 }, { "epoch": 0.16, "grad_norm": 1.9375, "learning_rate": 0.00019686237332425103, "loss": 2.2732, "step": 67945 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.00019686191394847107, "loss": 2.3722, "step": 67950 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.0001968614545396013, "loss": 2.1744, "step": 67955 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019686099509764188, "loss": 2.0933, "step": 67960 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.00019686053562259303, "loss": 2.1736, "step": 67965 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.00019686007611445484, "loss": 2.2615, "step": 67970 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.0001968596165732275, "loss": 2.2396, "step": 67975 }, { "epoch": 0.16, "grad_norm": 1.8046875, "learning_rate": 0.00019685915699891115, "loss": 2.1187, "step": 67980 }, { "epoch": 0.16, "grad_norm": 2.234375, "learning_rate": 0.00019685869739150596, "loss": 2.0235, "step": 67985 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.00019685823775101208, "loss": 2.1271, "step": 67990 }, { "epoch": 0.16, "grad_norm": 1.828125, "learning_rate": 0.00019685777807742968, "loss": 2.0942, "step": 67995 }, { "epoch": 0.16, "grad_norm": 1.5390625, "learning_rate": 0.0001968573183707589, "loss": 2.045, "step": 68000 }, { "epoch": 0.16, "grad_norm": 1.7109375, "learning_rate": 0.0001968568586309999, "loss": 2.1701, "step": 68005 }, { "epoch": 0.16, "grad_norm": 1.5078125, "learning_rate": 0.00019685639885815283, "loss": 2.0852, "step": 68010 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.0001968559390522179, "loss": 2.1484, "step": 68015 }, { "epoch": 0.16, "grad_norm": 2.421875, "learning_rate": 0.00019685547921319518, "loss": 2.2264, "step": 68020 }, { "epoch": 0.16, "grad_norm": 1.6484375, "learning_rate": 0.00019685501934108487, "loss": 1.9561, "step": 68025 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019685455943588715, "loss": 2.2416, "step": 68030 }, { "epoch": 0.16, "grad_norm": 1.78125, "learning_rate": 0.00019685409949760216, "loss": 2.2418, "step": 68035 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.00019685363952623005, "loss": 2.0931, "step": 68040 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.00019685317952177096, "loss": 2.0534, "step": 68045 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.0001968527194842251, "loss": 2.1519, "step": 68050 }, { "epoch": 0.16, "grad_norm": 1.5546875, "learning_rate": 0.00019685225941359257, "loss": 2.1074, "step": 68055 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.00019685179930987355, "loss": 2.033, "step": 68060 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.0001968513391730682, "loss": 2.314, "step": 68065 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.00019685087900317666, "loss": 2.2217, "step": 68070 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.00019685041880019918, "loss": 2.1375, "step": 68075 }, { "epoch": 0.16, "grad_norm": 1.8359375, "learning_rate": 0.00019684995856413578, "loss": 2.2213, "step": 68080 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.00019684949829498668, "loss": 2.2692, "step": 68085 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019684903799275204, "loss": 2.2113, "step": 68090 }, { "epoch": 0.16, "grad_norm": 6.34375, "learning_rate": 0.000196848577657432, "loss": 2.3516, "step": 68095 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.00019684811728902673, "loss": 2.1805, "step": 68100 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.0001968476568875364, "loss": 2.0982, "step": 68105 }, { "epoch": 0.16, "grad_norm": 8.3125, "learning_rate": 0.00019684719645296115, "loss": 2.1887, "step": 68110 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.00019684673598530114, "loss": 2.2989, "step": 68115 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.00019684627548455653, "loss": 2.4703, "step": 68120 }, { "epoch": 0.16, "grad_norm": 2.34375, "learning_rate": 0.00019684581495072747, "loss": 2.1333, "step": 68125 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019684535438381414, "loss": 2.3355, "step": 68130 }, { "epoch": 0.16, "grad_norm": 1.90625, "learning_rate": 0.00019684489378381668, "loss": 2.2381, "step": 68135 }, { "epoch": 0.16, "grad_norm": 1.6953125, "learning_rate": 0.00019684443315073522, "loss": 2.2259, "step": 68140 }, { "epoch": 0.16, "grad_norm": 2.84375, "learning_rate": 0.00019684397248456996, "loss": 2.3495, "step": 68145 }, { "epoch": 0.16, "grad_norm": 1.5703125, "learning_rate": 0.00019684351178532106, "loss": 2.1683, "step": 68150 }, { "epoch": 0.16, "grad_norm": 1.9609375, "learning_rate": 0.00019684305105298864, "loss": 2.1465, "step": 68155 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.0001968425902875729, "loss": 2.2024, "step": 68160 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019684212948907395, "loss": 2.2885, "step": 68165 }, { "epoch": 0.16, "grad_norm": 1.8828125, "learning_rate": 0.00019684166865749196, "loss": 2.2254, "step": 68170 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019684120779282714, "loss": 2.2142, "step": 68175 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019684074689507958, "loss": 1.9928, "step": 68180 }, { "epoch": 0.16, "grad_norm": 1.84375, "learning_rate": 0.00019684028596424948, "loss": 2.1059, "step": 68185 }, { "epoch": 0.16, "grad_norm": 1.8203125, "learning_rate": 0.00019683982500033698, "loss": 2.1044, "step": 68190 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019683936400334223, "loss": 2.3863, "step": 68195 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.0001968389029732654, "loss": 2.1052, "step": 68200 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019683844191010664, "loss": 2.0762, "step": 68205 }, { "epoch": 0.16, "grad_norm": 1.5234375, "learning_rate": 0.0001968379808138661, "loss": 2.0405, "step": 68210 }, { "epoch": 0.16, "grad_norm": 1.71875, "learning_rate": 0.000196837519684544, "loss": 2.2559, "step": 68215 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.0001968370585221404, "loss": 2.2353, "step": 68220 }, { "epoch": 0.16, "grad_norm": 2.59375, "learning_rate": 0.00019683659732665552, "loss": 2.0825, "step": 68225 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.0001968361360980895, "loss": 2.0713, "step": 68230 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.0001968356748364425, "loss": 2.4125, "step": 68235 }, { "epoch": 0.16, "grad_norm": 1.7265625, "learning_rate": 0.0001968352135417147, "loss": 2.2074, "step": 68240 }, { "epoch": 0.16, "grad_norm": 1.7265625, "learning_rate": 0.0001968347522139062, "loss": 2.1938, "step": 68245 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.0001968342908530172, "loss": 2.0403, "step": 68250 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019683382945904785, "loss": 1.9932, "step": 68255 }, { "epoch": 0.16, "grad_norm": 2.375, "learning_rate": 0.0001968333680319983, "loss": 2.0712, "step": 68260 }, { "epoch": 0.16, "grad_norm": 1.6640625, "learning_rate": 0.00019683290657186874, "loss": 2.3092, "step": 68265 }, { "epoch": 0.16, "grad_norm": 2.203125, "learning_rate": 0.0001968324450786593, "loss": 2.2367, "step": 68270 }, { "epoch": 0.16, "grad_norm": 2.65625, "learning_rate": 0.00019683198355237012, "loss": 2.1786, "step": 68275 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019683152199300138, "loss": 2.0503, "step": 68280 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019683106040055325, "loss": 2.4033, "step": 68285 }, { "epoch": 0.16, "grad_norm": 1.78125, "learning_rate": 0.00019683059877502584, "loss": 2.2113, "step": 68290 }, { "epoch": 0.16, "grad_norm": 1.8828125, "learning_rate": 0.0001968301371164194, "loss": 2.2278, "step": 68295 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.000196829675424734, "loss": 2.2733, "step": 68300 }, { "epoch": 0.16, "grad_norm": 1.8515625, "learning_rate": 0.00019682921369996984, "loss": 2.1354, "step": 68305 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.00019682875194212705, "loss": 2.0723, "step": 68310 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.0001968282901512058, "loss": 1.9275, "step": 68315 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.00019682782832720626, "loss": 2.0474, "step": 68320 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019682736647012855, "loss": 2.0635, "step": 68325 }, { "epoch": 0.16, "grad_norm": 2.4375, "learning_rate": 0.00019682690457997287, "loss": 2.2102, "step": 68330 }, { "epoch": 0.16, "grad_norm": 2.75, "learning_rate": 0.0001968264426567394, "loss": 2.0606, "step": 68335 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.0001968259807004282, "loss": 2.0661, "step": 68340 }, { "epoch": 0.16, "grad_norm": 2.28125, "learning_rate": 0.00019682551871103953, "loss": 2.0465, "step": 68345 }, { "epoch": 0.16, "grad_norm": 1.8203125, "learning_rate": 0.00019682505668857349, "loss": 2.0191, "step": 68350 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019682459463303027, "loss": 2.3058, "step": 68355 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019682413254441, "loss": 2.1694, "step": 68360 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019682367042271286, "loss": 1.9454, "step": 68365 }, { "epoch": 0.16, "grad_norm": 1.9375, "learning_rate": 0.000196823208267939, "loss": 2.3562, "step": 68370 }, { "epoch": 0.16, "grad_norm": 1.8515625, "learning_rate": 0.00019682274608008856, "loss": 2.0922, "step": 68375 }, { "epoch": 0.16, "grad_norm": 2.28125, "learning_rate": 0.00019682228385916171, "loss": 2.2166, "step": 68380 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019682182160515863, "loss": 2.1159, "step": 68385 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019682135931807946, "loss": 2.3398, "step": 68390 }, { "epoch": 0.16, "grad_norm": 2.5625, "learning_rate": 0.00019682089699792435, "loss": 2.0929, "step": 68395 }, { "epoch": 0.16, "grad_norm": 1.7421875, "learning_rate": 0.00019682043464469348, "loss": 2.1047, "step": 68400 }, { "epoch": 0.16, "grad_norm": 1.8828125, "learning_rate": 0.00019681997225838696, "loss": 1.9804, "step": 68405 }, { "epoch": 0.16, "grad_norm": 1.8203125, "learning_rate": 0.000196819509839005, "loss": 2.1389, "step": 68410 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.00019681904738654774, "loss": 2.2275, "step": 68415 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.00019681858490101534, "loss": 2.1387, "step": 68420 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.00019681812238240797, "loss": 2.2496, "step": 68425 }, { "epoch": 0.16, "grad_norm": 1.7265625, "learning_rate": 0.00019681765983072577, "loss": 2.0962, "step": 68430 }, { "epoch": 0.16, "grad_norm": 1.9609375, "learning_rate": 0.0001968171972459689, "loss": 2.0615, "step": 68435 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.0001968167346281375, "loss": 1.9486, "step": 68440 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019681627197723175, "loss": 2.2356, "step": 68445 }, { "epoch": 0.16, "grad_norm": 2.234375, "learning_rate": 0.0001968158092932518, "loss": 2.1459, "step": 68450 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.00019681534657619784, "loss": 2.1933, "step": 68455 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.00019681488382607, "loss": 2.3263, "step": 68460 }, { "epoch": 0.16, "grad_norm": 1.6640625, "learning_rate": 0.0001968144210428684, "loss": 1.9345, "step": 68465 }, { "epoch": 0.16, "grad_norm": 2.3125, "learning_rate": 0.0001968139582265933, "loss": 2.1111, "step": 68470 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.00019681349537724473, "loss": 2.0236, "step": 68475 }, { "epoch": 0.16, "grad_norm": 2.234375, "learning_rate": 0.00019681303249482294, "loss": 2.273, "step": 68480 }, { "epoch": 0.16, "grad_norm": 2.703125, "learning_rate": 0.00019681256957932808, "loss": 2.1415, "step": 68485 }, { "epoch": 0.16, "grad_norm": 1.96875, "learning_rate": 0.00019681210663076028, "loss": 2.2389, "step": 68490 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.0001968116436491197, "loss": 2.194, "step": 68495 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019681118063440654, "loss": 2.1562, "step": 68500 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.00019681071758662086, "loss": 2.1433, "step": 68505 }, { "epoch": 0.16, "grad_norm": 1.734375, "learning_rate": 0.00019681025450576294, "loss": 2.0859, "step": 68510 }, { "epoch": 0.16, "grad_norm": 1.9296875, "learning_rate": 0.00019680979139183286, "loss": 2.1056, "step": 68515 }, { "epoch": 0.16, "grad_norm": 1.6328125, "learning_rate": 0.0001968093282448308, "loss": 2.2305, "step": 68520 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019680886506475692, "loss": 2.2616, "step": 68525 }, { "epoch": 0.16, "grad_norm": 2.34375, "learning_rate": 0.00019680840185161137, "loss": 2.1856, "step": 68530 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019680793860539435, "loss": 2.2178, "step": 68535 }, { "epoch": 0.16, "grad_norm": 2.9375, "learning_rate": 0.00019680747532610594, "loss": 2.1712, "step": 68540 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019680701201374635, "loss": 2.2445, "step": 68545 }, { "epoch": 0.16, "grad_norm": 2.375, "learning_rate": 0.00019680654866831575, "loss": 2.1791, "step": 68550 }, { "epoch": 0.16, "grad_norm": 1.96875, "learning_rate": 0.00019680608528981425, "loss": 2.136, "step": 68555 }, { "epoch": 0.16, "grad_norm": 1.8359375, "learning_rate": 0.00019680562187824205, "loss": 2.087, "step": 68560 }, { "epoch": 0.16, "grad_norm": 2.390625, "learning_rate": 0.0001968051584335993, "loss": 2.1157, "step": 68565 }, { "epoch": 0.16, "grad_norm": 1.6875, "learning_rate": 0.00019680469495588613, "loss": 1.9956, "step": 68570 }, { "epoch": 0.16, "grad_norm": 1.6328125, "learning_rate": 0.00019680423144510272, "loss": 2.0143, "step": 68575 }, { "epoch": 0.16, "grad_norm": 1.96875, "learning_rate": 0.00019680376790124927, "loss": 2.2748, "step": 68580 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.00019680330432432586, "loss": 2.1165, "step": 68585 }, { "epoch": 0.16, "grad_norm": 2.1875, "learning_rate": 0.00019680284071433267, "loss": 2.1658, "step": 68590 }, { "epoch": 0.16, "grad_norm": 1.8203125, "learning_rate": 0.00019680237707126992, "loss": 2.0775, "step": 68595 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.00019680191339513768, "loss": 2.1695, "step": 68600 }, { "epoch": 0.16, "grad_norm": 1.90625, "learning_rate": 0.0001968014496859362, "loss": 2.0406, "step": 68605 }, { "epoch": 0.16, "grad_norm": 2.1875, "learning_rate": 0.00019680098594366556, "loss": 2.0929, "step": 68610 }, { "epoch": 0.16, "grad_norm": 2.40625, "learning_rate": 0.00019680052216832595, "loss": 2.2001, "step": 68615 }, { "epoch": 0.16, "grad_norm": 1.8984375, "learning_rate": 0.00019680005835991752, "loss": 2.1429, "step": 68620 }, { "epoch": 0.16, "grad_norm": 1.6640625, "learning_rate": 0.00019679959451844044, "loss": 2.1427, "step": 68625 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019679913064389487, "loss": 2.2287, "step": 68630 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019679866673628094, "loss": 2.1949, "step": 68635 }, { "epoch": 0.16, "grad_norm": 1.7890625, "learning_rate": 0.00019679820279559884, "loss": 2.1531, "step": 68640 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.00019679773882184872, "loss": 2.2605, "step": 68645 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.00019679727481503075, "loss": 2.1164, "step": 68650 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019679681077514505, "loss": 2.1173, "step": 68655 }, { "epoch": 0.16, "grad_norm": 2.421875, "learning_rate": 0.00019679634670219184, "loss": 2.0935, "step": 68660 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.0001967958825961712, "loss": 2.0172, "step": 68665 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.00019679541845708334, "loss": 2.1944, "step": 68670 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019679495428492842, "loss": 2.2806, "step": 68675 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.0001967944900797066, "loss": 2.204, "step": 68680 }, { "epoch": 0.16, "grad_norm": 2.65625, "learning_rate": 0.00019679402584141804, "loss": 2.1259, "step": 68685 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.00019679356157006287, "loss": 2.2517, "step": 68690 }, { "epoch": 0.16, "grad_norm": 1.96875, "learning_rate": 0.00019679309726564124, "loss": 2.2278, "step": 68695 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019679263292815335, "loss": 2.2274, "step": 68700 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.00019679216855759935, "loss": 2.2206, "step": 68705 }, { "epoch": 0.16, "grad_norm": 1.6328125, "learning_rate": 0.00019679170415397933, "loss": 2.1911, "step": 68710 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019679123971729358, "loss": 2.2215, "step": 68715 }, { "epoch": 0.16, "grad_norm": 1.78125, "learning_rate": 0.00019679077524754218, "loss": 1.9863, "step": 68720 }, { "epoch": 0.16, "grad_norm": 2.296875, "learning_rate": 0.00019679031074472527, "loss": 2.3104, "step": 68725 }, { "epoch": 0.16, "grad_norm": 2.359375, "learning_rate": 0.00019678984620884304, "loss": 2.2336, "step": 68730 }, { "epoch": 0.16, "grad_norm": 2.5625, "learning_rate": 0.00019678938163989566, "loss": 2.2033, "step": 68735 }, { "epoch": 0.16, "grad_norm": 1.6796875, "learning_rate": 0.00019678891703788325, "loss": 2.1893, "step": 68740 }, { "epoch": 0.16, "grad_norm": 2.75, "learning_rate": 0.00019678845240280599, "loss": 2.134, "step": 68745 }, { "epoch": 0.16, "grad_norm": 1.5859375, "learning_rate": 0.00019678798773466408, "loss": 2.2066, "step": 68750 }, { "epoch": 0.16, "grad_norm": 1.546875, "learning_rate": 0.00019678752303345758, "loss": 2.1465, "step": 68755 }, { "epoch": 0.16, "grad_norm": 1.8046875, "learning_rate": 0.00019678705829918674, "loss": 2.1604, "step": 68760 }, { "epoch": 0.16, "grad_norm": 2.40625, "learning_rate": 0.00019678659353185166, "loss": 2.2433, "step": 68765 }, { "epoch": 0.16, "grad_norm": 2.296875, "learning_rate": 0.00019678612873145256, "loss": 1.9594, "step": 68770 }, { "epoch": 0.16, "grad_norm": 2.4375, "learning_rate": 0.00019678566389798956, "loss": 2.1989, "step": 68775 }, { "epoch": 0.16, "grad_norm": 2.484375, "learning_rate": 0.00019678519903146283, "loss": 2.325, "step": 68780 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.00019678473413187249, "loss": 2.2772, "step": 68785 }, { "epoch": 0.16, "grad_norm": 1.8828125, "learning_rate": 0.00019678426919921872, "loss": 2.1486, "step": 68790 }, { "epoch": 0.16, "grad_norm": 2.46875, "learning_rate": 0.00019678380423350172, "loss": 2.221, "step": 68795 }, { "epoch": 0.16, "grad_norm": 1.7109375, "learning_rate": 0.00019678333923472157, "loss": 2.2157, "step": 68800 }, { "epoch": 0.16, "grad_norm": 1.6640625, "learning_rate": 0.00019678287420287854, "loss": 2.0629, "step": 68805 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.0001967824091379727, "loss": 2.0503, "step": 68810 }, { "epoch": 0.16, "grad_norm": 1.9296875, "learning_rate": 0.00019678194404000422, "loss": 2.3825, "step": 68815 }, { "epoch": 0.16, "grad_norm": 1.84375, "learning_rate": 0.00019678147890897328, "loss": 2.1149, "step": 68820 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.00019678101374488006, "loss": 2.1033, "step": 68825 }, { "epoch": 0.16, "grad_norm": 1.84375, "learning_rate": 0.00019678054854772463, "loss": 2.1509, "step": 68830 }, { "epoch": 0.16, "grad_norm": 1.9375, "learning_rate": 0.00019678008331750727, "loss": 2.1933, "step": 68835 }, { "epoch": 0.16, "grad_norm": 1.8984375, "learning_rate": 0.00019677961805422803, "loss": 2.2232, "step": 68840 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019677915275788713, "loss": 2.2709, "step": 68845 }, { "epoch": 0.16, "grad_norm": 2.546875, "learning_rate": 0.00019677868742848476, "loss": 2.1533, "step": 68850 }, { "epoch": 0.16, "grad_norm": 2.59375, "learning_rate": 0.000196778222066021, "loss": 2.0477, "step": 68855 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.00019677775667049604, "loss": 2.0675, "step": 68860 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.00019677729124191004, "loss": 2.1063, "step": 68865 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.00019677682578026317, "loss": 2.215, "step": 68870 }, { "epoch": 0.16, "grad_norm": 2.53125, "learning_rate": 0.00019677636028555558, "loss": 2.1896, "step": 68875 }, { "epoch": 0.16, "grad_norm": 1.90625, "learning_rate": 0.00019677589475778745, "loss": 2.3158, "step": 68880 }, { "epoch": 0.16, "grad_norm": 1.8046875, "learning_rate": 0.00019677542919695887, "loss": 2.1482, "step": 68885 }, { "epoch": 0.16, "grad_norm": 3.125, "learning_rate": 0.0001967749636030701, "loss": 1.9667, "step": 68890 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.00019677449797612123, "loss": 2.0601, "step": 68895 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019677403231611242, "loss": 2.2768, "step": 68900 }, { "epoch": 0.16, "grad_norm": 2.1875, "learning_rate": 0.00019677356662304386, "loss": 2.2604, "step": 68905 }, { "epoch": 0.16, "grad_norm": 2.359375, "learning_rate": 0.0001967731008969157, "loss": 2.1654, "step": 68910 }, { "epoch": 0.16, "grad_norm": 2.453125, "learning_rate": 0.0001967726351377281, "loss": 2.2445, "step": 68915 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.0001967721693454812, "loss": 2.1775, "step": 68920 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.00019677170352017516, "loss": 2.288, "step": 68925 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019677123766181017, "loss": 2.1354, "step": 68930 }, { "epoch": 0.16, "grad_norm": 1.765625, "learning_rate": 0.00019677077177038634, "loss": 2.0532, "step": 68935 }, { "epoch": 0.16, "grad_norm": 1.828125, "learning_rate": 0.0001967703058459039, "loss": 2.1373, "step": 68940 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.00019676983988836295, "loss": 2.2005, "step": 68945 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.00019676937389776367, "loss": 2.0515, "step": 68950 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.0001967689078741062, "loss": 2.305, "step": 68955 }, { "epoch": 0.16, "grad_norm": 2.1875, "learning_rate": 0.00019676844181739075, "loss": 2.17, "step": 68960 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019676797572761742, "loss": 2.1988, "step": 68965 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019676750960478638, "loss": 2.1336, "step": 68970 }, { "epoch": 0.16, "grad_norm": 1.9375, "learning_rate": 0.00019676704344889784, "loss": 2.1415, "step": 68975 }, { "epoch": 0.16, "grad_norm": 1.7578125, "learning_rate": 0.0001967665772599519, "loss": 2.1866, "step": 68980 }, { "epoch": 0.16, "grad_norm": 1.703125, "learning_rate": 0.00019676611103794874, "loss": 2.0856, "step": 68985 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019676564478288854, "loss": 2.2822, "step": 68990 }, { "epoch": 0.16, "grad_norm": 1.5390625, "learning_rate": 0.00019676517849477146, "loss": 2.2471, "step": 68995 }, { "epoch": 0.16, "grad_norm": 1.765625, "learning_rate": 0.0001967647121735976, "loss": 2.2987, "step": 69000 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019676424581936716, "loss": 2.1596, "step": 69005 }, { "epoch": 0.16, "grad_norm": 1.65625, "learning_rate": 0.00019676377943208032, "loss": 2.1073, "step": 69010 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019676331301173722, "loss": 2.0766, "step": 69015 }, { "epoch": 0.16, "grad_norm": 2.65625, "learning_rate": 0.00019676284655833798, "loss": 2.2334, "step": 69020 }, { "epoch": 0.16, "grad_norm": 1.5625, "learning_rate": 0.00019676238007188283, "loss": 2.0503, "step": 69025 }, { "epoch": 0.16, "grad_norm": 1.8359375, "learning_rate": 0.0001967619135523719, "loss": 1.9945, "step": 69030 }, { "epoch": 0.16, "grad_norm": 1.96875, "learning_rate": 0.00019676144699980533, "loss": 2.3179, "step": 69035 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.0001967609804141833, "loss": 2.2894, "step": 69040 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.00019676051379550598, "loss": 2.3105, "step": 69045 }, { "epoch": 0.16, "grad_norm": 1.8203125, "learning_rate": 0.00019676004714377348, "loss": 2.2292, "step": 69050 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.000196759580458986, "loss": 1.9585, "step": 69055 }, { "epoch": 0.16, "grad_norm": 2.203125, "learning_rate": 0.00019675911374114372, "loss": 2.2624, "step": 69060 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019675864699024675, "loss": 2.1455, "step": 69065 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.00019675818020629526, "loss": 2.1554, "step": 69070 }, { "epoch": 0.16, "grad_norm": 1.75, "learning_rate": 0.00019675771338928947, "loss": 2.1209, "step": 69075 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019675724653922942, "loss": 2.2823, "step": 69080 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.00019675677965611538, "loss": 2.2214, "step": 69085 }, { "epoch": 0.16, "grad_norm": 2.203125, "learning_rate": 0.0001967563127399475, "loss": 2.2237, "step": 69090 }, { "epoch": 0.16, "grad_norm": 1.8515625, "learning_rate": 0.00019675584579072585, "loss": 2.189, "step": 69095 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.00019675537880845068, "loss": 2.1912, "step": 69100 }, { "epoch": 0.16, "grad_norm": 2.703125, "learning_rate": 0.00019675491179312212, "loss": 2.1429, "step": 69105 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.0001967544447447403, "loss": 2.2345, "step": 69110 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019675397766330543, "loss": 2.0689, "step": 69115 }, { "epoch": 0.16, "grad_norm": 1.9296875, "learning_rate": 0.00019675351054881764, "loss": 2.2554, "step": 69120 }, { "epoch": 0.16, "grad_norm": 2.453125, "learning_rate": 0.00019675304340127712, "loss": 2.2631, "step": 69125 }, { "epoch": 0.16, "grad_norm": 2.28125, "learning_rate": 0.00019675257622068398, "loss": 2.3276, "step": 69130 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019675210900703838, "loss": 2.2978, "step": 69135 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019675164176034056, "loss": 2.116, "step": 69140 }, { "epoch": 0.16, "grad_norm": 2.390625, "learning_rate": 0.0001967511744805906, "loss": 2.0876, "step": 69145 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019675070716778867, "loss": 2.2631, "step": 69150 }, { "epoch": 0.16, "grad_norm": 1.703125, "learning_rate": 0.00019675023982193497, "loss": 2.0624, "step": 69155 }, { "epoch": 0.16, "grad_norm": 2.234375, "learning_rate": 0.00019674977244302962, "loss": 2.3807, "step": 69160 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019674930503107277, "loss": 2.2748, "step": 69165 }, { "epoch": 0.16, "grad_norm": 2.46875, "learning_rate": 0.00019674883758606461, "loss": 2.2375, "step": 69170 }, { "epoch": 0.16, "grad_norm": 1.7578125, "learning_rate": 0.00019674837010800534, "loss": 2.3147, "step": 69175 }, { "epoch": 0.16, "grad_norm": 1.7109375, "learning_rate": 0.00019674790259689503, "loss": 2.225, "step": 69180 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.0001967474350527339, "loss": 2.1518, "step": 69185 }, { "epoch": 0.16, "grad_norm": 1.8515625, "learning_rate": 0.00019674696747552206, "loss": 2.3046, "step": 69190 }, { "epoch": 0.16, "grad_norm": 1.921875, "learning_rate": 0.00019674649986525973, "loss": 2.2059, "step": 69195 }, { "epoch": 0.16, "grad_norm": 1.921875, "learning_rate": 0.00019674603222194704, "loss": 2.2956, "step": 69200 }, { "epoch": 0.16, "grad_norm": 1.7578125, "learning_rate": 0.00019674556454558415, "loss": 2.2147, "step": 69205 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.0001967450968361712, "loss": 2.3019, "step": 69210 }, { "epoch": 0.16, "grad_norm": 1.9296875, "learning_rate": 0.0001967446290937084, "loss": 2.0878, "step": 69215 }, { "epoch": 0.16, "grad_norm": 2.328125, "learning_rate": 0.00019674416131819586, "loss": 2.112, "step": 69220 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.0001967436935096338, "loss": 2.1646, "step": 69225 }, { "epoch": 0.16, "grad_norm": 1.8828125, "learning_rate": 0.0001967432256680223, "loss": 2.2867, "step": 69230 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019674275779336156, "loss": 2.1698, "step": 69235 }, { "epoch": 0.16, "grad_norm": 1.75, "learning_rate": 0.00019674228988565172, "loss": 2.2831, "step": 69240 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.000196741821944893, "loss": 2.0502, "step": 69245 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.00019674135397108552, "loss": 2.1527, "step": 69250 }, { "epoch": 0.16, "grad_norm": 2.28125, "learning_rate": 0.0001967408859642294, "loss": 2.2983, "step": 69255 }, { "epoch": 0.16, "grad_norm": 2.375, "learning_rate": 0.00019674041792432488, "loss": 2.0754, "step": 69260 }, { "epoch": 0.16, "grad_norm": 1.703125, "learning_rate": 0.00019673994985137206, "loss": 2.0223, "step": 69265 }, { "epoch": 0.16, "grad_norm": 1.765625, "learning_rate": 0.0001967394817453711, "loss": 2.1631, "step": 69270 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.0001967390136063222, "loss": 1.9227, "step": 69275 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019673854543422552, "loss": 2.1169, "step": 69280 }, { "epoch": 0.16, "grad_norm": 1.7890625, "learning_rate": 0.00019673807722908116, "loss": 2.1053, "step": 69285 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.00019673760899088934, "loss": 2.3484, "step": 69290 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.0001967371407196502, "loss": 2.3169, "step": 69295 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.00019673667241536388, "loss": 2.2691, "step": 69300 }, { "epoch": 0.16, "grad_norm": 1.7734375, "learning_rate": 0.00019673620407803058, "loss": 2.2172, "step": 69305 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.00019673573570765042, "loss": 2.4412, "step": 69310 }, { "epoch": 0.16, "grad_norm": 1.6328125, "learning_rate": 0.0001967352673042236, "loss": 2.232, "step": 69315 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019673479886775025, "loss": 2.1616, "step": 69320 }, { "epoch": 0.16, "grad_norm": 2.546875, "learning_rate": 0.00019673433039823053, "loss": 2.241, "step": 69325 }, { "epoch": 0.16, "grad_norm": 1.828125, "learning_rate": 0.00019673386189566458, "loss": 2.1353, "step": 69330 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.00019673339336005264, "loss": 2.0179, "step": 69335 }, { "epoch": 0.16, "grad_norm": 2.203125, "learning_rate": 0.0001967329247913948, "loss": 2.284, "step": 69340 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019673245618969122, "loss": 2.0428, "step": 69345 }, { "epoch": 0.16, "grad_norm": 1.796875, "learning_rate": 0.00019673198755494208, "loss": 2.2188, "step": 69350 }, { "epoch": 0.16, "grad_norm": 1.921875, "learning_rate": 0.00019673151888714756, "loss": 2.0259, "step": 69355 }, { "epoch": 0.16, "grad_norm": 1.6640625, "learning_rate": 0.0001967310501863078, "loss": 2.0259, "step": 69360 }, { "epoch": 0.16, "grad_norm": 1.90625, "learning_rate": 0.00019673058145242296, "loss": 2.173, "step": 69365 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.0001967301126854932, "loss": 2.1247, "step": 69370 }, { "epoch": 0.16, "grad_norm": 1.7734375, "learning_rate": 0.00019672964388551867, "loss": 2.2015, "step": 69375 }, { "epoch": 0.16, "grad_norm": 2.296875, "learning_rate": 0.00019672917505249952, "loss": 2.2884, "step": 69380 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019672870618643595, "loss": 2.3305, "step": 69385 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.0001967282372873281, "loss": 2.09, "step": 69390 }, { "epoch": 0.16, "grad_norm": 2.609375, "learning_rate": 0.00019672776835517612, "loss": 2.2131, "step": 69395 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.0001967272993899802, "loss": 2.0158, "step": 69400 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.00019672683039174046, "loss": 2.2307, "step": 69405 }, { "epoch": 0.16, "grad_norm": 2.328125, "learning_rate": 0.0001967263613604571, "loss": 2.0473, "step": 69410 }, { "epoch": 0.16, "grad_norm": 1.96875, "learning_rate": 0.00019672589229613024, "loss": 2.0702, "step": 69415 }, { "epoch": 0.16, "grad_norm": 2.1875, "learning_rate": 0.00019672542319876008, "loss": 2.1854, "step": 69420 }, { "epoch": 0.16, "grad_norm": 1.7578125, "learning_rate": 0.00019672495406834674, "loss": 2.1116, "step": 69425 }, { "epoch": 0.16, "grad_norm": 2.234375, "learning_rate": 0.00019672448490489042, "loss": 2.3565, "step": 69430 }, { "epoch": 0.16, "grad_norm": 2.453125, "learning_rate": 0.00019672401570839125, "loss": 2.1679, "step": 69435 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019672354647884942, "loss": 2.1935, "step": 69440 }, { "epoch": 0.16, "grad_norm": 2.25, "learning_rate": 0.00019672307721626506, "loss": 2.1923, "step": 69445 }, { "epoch": 0.16, "grad_norm": 1.7265625, "learning_rate": 0.00019672260792063834, "loss": 2.124, "step": 69450 }, { "epoch": 0.16, "grad_norm": 1.8203125, "learning_rate": 0.00019672213859196942, "loss": 2.317, "step": 69455 }, { "epoch": 0.16, "grad_norm": 2.421875, "learning_rate": 0.00019672166923025847, "loss": 2.0825, "step": 69460 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019672119983550564, "loss": 2.1604, "step": 69465 }, { "epoch": 0.16, "grad_norm": 1.8125, "learning_rate": 0.0001967207304077111, "loss": 2.2356, "step": 69470 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.000196720260946875, "loss": 2.0242, "step": 69475 }, { "epoch": 0.16, "grad_norm": 1.84375, "learning_rate": 0.0001967197914529975, "loss": 2.0939, "step": 69480 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019671932192607877, "loss": 2.1793, "step": 69485 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.00019671885236611898, "loss": 2.2544, "step": 69490 }, { "epoch": 0.16, "grad_norm": 2.328125, "learning_rate": 0.00019671838277311824, "loss": 2.2198, "step": 69495 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.00019671791314707676, "loss": 2.2914, "step": 69500 }, { "epoch": 0.16, "grad_norm": 1.8125, "learning_rate": 0.0001967174434879947, "loss": 2.313, "step": 69505 }, { "epoch": 0.16, "grad_norm": 1.828125, "learning_rate": 0.0001967169737958722, "loss": 2.0923, "step": 69510 }, { "epoch": 0.16, "grad_norm": 2.375, "learning_rate": 0.00019671650407070943, "loss": 2.2087, "step": 69515 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.00019671603431250655, "loss": 2.3547, "step": 69520 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.00019671556452126372, "loss": 2.0387, "step": 69525 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.00019671509469698108, "loss": 2.1509, "step": 69530 }, { "epoch": 0.16, "grad_norm": 1.7890625, "learning_rate": 0.00019671462483965884, "loss": 2.2884, "step": 69535 }, { "epoch": 0.16, "grad_norm": 2.296875, "learning_rate": 0.00019671415494929711, "loss": 2.2759, "step": 69540 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.0001967136850258961, "loss": 2.0568, "step": 69545 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.0001967132150694559, "loss": 2.1849, "step": 69550 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.00019671274507997673, "loss": 2.0922, "step": 69555 }, { "epoch": 0.16, "grad_norm": 2.234375, "learning_rate": 0.00019671227505745873, "loss": 1.9837, "step": 69560 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019671180500190207, "loss": 2.0344, "step": 69565 }, { "epoch": 0.16, "grad_norm": 2.421875, "learning_rate": 0.00019671133491330688, "loss": 2.1309, "step": 69570 }, { "epoch": 0.16, "grad_norm": 1.703125, "learning_rate": 0.0001967108647916734, "loss": 2.2737, "step": 69575 }, { "epoch": 0.16, "grad_norm": 1.6171875, "learning_rate": 0.00019671039463700166, "loss": 2.1808, "step": 69580 }, { "epoch": 0.16, "grad_norm": 2.109375, "learning_rate": 0.00019670992444929195, "loss": 2.1564, "step": 69585 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019670945422854433, "loss": 2.1211, "step": 69590 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.00019670898397475904, "loss": 1.9648, "step": 69595 }, { "epoch": 0.16, "grad_norm": 2.390625, "learning_rate": 0.0001967085136879362, "loss": 2.2379, "step": 69600 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.000196708043368076, "loss": 2.1788, "step": 69605 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019670757301517856, "loss": 2.1729, "step": 69610 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.00019670710262924406, "loss": 2.2073, "step": 69615 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.00019670663221027263, "loss": 2.1709, "step": 69620 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.0001967061617582645, "loss": 2.1904, "step": 69625 }, { "epoch": 0.16, "grad_norm": 1.90625, "learning_rate": 0.00019670569127321977, "loss": 2.1728, "step": 69630 }, { "epoch": 0.16, "grad_norm": 1.65625, "learning_rate": 0.00019670522075513865, "loss": 2.0407, "step": 69635 }, { "epoch": 0.16, "grad_norm": 2.046875, "learning_rate": 0.00019670475020402124, "loss": 2.1643, "step": 69640 }, { "epoch": 0.16, "grad_norm": 2.09375, "learning_rate": 0.00019670427961986775, "loss": 2.0097, "step": 69645 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.0001967038090026783, "loss": 2.0138, "step": 69650 }, { "epoch": 0.16, "grad_norm": 1.8984375, "learning_rate": 0.00019670333835245313, "loss": 2.1434, "step": 69655 }, { "epoch": 0.16, "grad_norm": 1.8828125, "learning_rate": 0.00019670286766919232, "loss": 2.1356, "step": 69660 }, { "epoch": 0.16, "grad_norm": 1.6484375, "learning_rate": 0.000196702396952896, "loss": 1.8064, "step": 69665 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019670192620356445, "loss": 2.1596, "step": 69670 }, { "epoch": 0.16, "grad_norm": 1.671875, "learning_rate": 0.00019670145542119777, "loss": 2.2624, "step": 69675 }, { "epoch": 0.16, "grad_norm": 1.96875, "learning_rate": 0.0001967009846057961, "loss": 2.303, "step": 69680 }, { "epoch": 0.16, "grad_norm": 1.90625, "learning_rate": 0.0001967005137573596, "loss": 2.164, "step": 69685 }, { "epoch": 0.16, "grad_norm": 1.6875, "learning_rate": 0.0001967000428758885, "loss": 2.1232, "step": 69690 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.0001966995719613829, "loss": 2.047, "step": 69695 }, { "epoch": 0.16, "grad_norm": 1.8203125, "learning_rate": 0.00019669910101384296, "loss": 2.0215, "step": 69700 }, { "epoch": 0.16, "grad_norm": 1.6953125, "learning_rate": 0.00019669863003326883, "loss": 2.3103, "step": 69705 }, { "epoch": 0.16, "grad_norm": 2.328125, "learning_rate": 0.0001966981590196607, "loss": 2.3136, "step": 69710 }, { "epoch": 0.16, "grad_norm": 2.484375, "learning_rate": 0.00019669768797301874, "loss": 2.3438, "step": 69715 }, { "epoch": 0.16, "grad_norm": 2.21875, "learning_rate": 0.0001966972168933431, "loss": 2.1412, "step": 69720 }, { "epoch": 0.16, "grad_norm": 1.9375, "learning_rate": 0.00019669674578063395, "loss": 2.2616, "step": 69725 }, { "epoch": 0.16, "grad_norm": 1.828125, "learning_rate": 0.0001966962746348914, "loss": 2.1075, "step": 69730 }, { "epoch": 0.16, "grad_norm": 1.828125, "learning_rate": 0.00019669580345611567, "loss": 2.0472, "step": 69735 }, { "epoch": 0.16, "grad_norm": 2.28125, "learning_rate": 0.0001966953322443069, "loss": 2.0146, "step": 69740 }, { "epoch": 0.16, "grad_norm": 2.34375, "learning_rate": 0.00019669486099946527, "loss": 2.1186, "step": 69745 }, { "epoch": 0.16, "grad_norm": 1.7890625, "learning_rate": 0.00019669438972159088, "loss": 2.1855, "step": 69750 }, { "epoch": 0.16, "grad_norm": 2.1875, "learning_rate": 0.00019669391841068397, "loss": 2.0128, "step": 69755 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.00019669344706674466, "loss": 2.153, "step": 69760 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.0001966929756897731, "loss": 1.9935, "step": 69765 }, { "epoch": 0.16, "grad_norm": 1.7421875, "learning_rate": 0.00019669250427976947, "loss": 2.0265, "step": 69770 }, { "epoch": 0.16, "grad_norm": 1.9765625, "learning_rate": 0.00019669203283673392, "loss": 2.1377, "step": 69775 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019669156136066661, "loss": 2.2163, "step": 69780 }, { "epoch": 0.16, "grad_norm": 2.4375, "learning_rate": 0.00019669108985156773, "loss": 2.0893, "step": 69785 }, { "epoch": 0.16, "grad_norm": 1.953125, "learning_rate": 0.0001966906183094374, "loss": 1.8388, "step": 69790 }, { "epoch": 0.16, "grad_norm": 1.96875, "learning_rate": 0.00019669014673427584, "loss": 2.3582, "step": 69795 }, { "epoch": 0.16, "grad_norm": 1.6484375, "learning_rate": 0.00019668967512608316, "loss": 2.1493, "step": 69800 }, { "epoch": 0.16, "grad_norm": 2.53125, "learning_rate": 0.0001966892034848595, "loss": 2.2237, "step": 69805 }, { "epoch": 0.16, "grad_norm": 2.390625, "learning_rate": 0.00019668873181060507, "loss": 2.2671, "step": 69810 }, { "epoch": 0.16, "grad_norm": 1.9609375, "learning_rate": 0.00019668826010332005, "loss": 2.1917, "step": 69815 }, { "epoch": 0.16, "grad_norm": 1.9609375, "learning_rate": 0.0001966877883630045, "loss": 2.196, "step": 69820 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.0001966873165896587, "loss": 2.1644, "step": 69825 }, { "epoch": 0.16, "grad_norm": 2.59375, "learning_rate": 0.00019668684478328278, "loss": 2.154, "step": 69830 }, { "epoch": 0.16, "grad_norm": 1.6171875, "learning_rate": 0.00019668637294387682, "loss": 2.1264, "step": 69835 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019668590107144106, "loss": 2.2799, "step": 69840 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019668542916597564, "loss": 2.1751, "step": 69845 }, { "epoch": 0.16, "grad_norm": 2.4375, "learning_rate": 0.00019668495722748074, "loss": 2.2395, "step": 69850 }, { "epoch": 0.16, "grad_norm": 2.359375, "learning_rate": 0.00019668448525595654, "loss": 2.035, "step": 69855 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.0001966840132514031, "loss": 2.2257, "step": 69860 }, { "epoch": 0.16, "grad_norm": 3.71875, "learning_rate": 0.00019668354121382068, "loss": 2.0145, "step": 69865 }, { "epoch": 0.16, "grad_norm": 1.5859375, "learning_rate": 0.0001966830691432094, "loss": 2.0635, "step": 69870 }, { "epoch": 0.16, "grad_norm": 1.6640625, "learning_rate": 0.00019668259703956942, "loss": 2.2402, "step": 69875 }, { "epoch": 0.16, "grad_norm": 2.1875, "learning_rate": 0.00019668212490290095, "loss": 2.3375, "step": 69880 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019668165273320406, "loss": 2.1879, "step": 69885 }, { "epoch": 0.16, "grad_norm": 1.71875, "learning_rate": 0.000196681180530479, "loss": 1.9893, "step": 69890 }, { "epoch": 0.16, "grad_norm": 1.890625, "learning_rate": 0.00019668070829472587, "loss": 2.1614, "step": 69895 }, { "epoch": 0.16, "grad_norm": 2.234375, "learning_rate": 0.0001966802360259449, "loss": 2.1431, "step": 69900 }, { "epoch": 0.16, "grad_norm": 1.6875, "learning_rate": 0.00019667976372413616, "loss": 2.2639, "step": 69905 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019667929138929987, "loss": 2.2079, "step": 69910 }, { "epoch": 0.16, "grad_norm": 1.71875, "learning_rate": 0.0001966788190214362, "loss": 2.1886, "step": 69915 }, { "epoch": 0.16, "grad_norm": 2.234375, "learning_rate": 0.0001966783466205453, "loss": 2.2062, "step": 69920 }, { "epoch": 0.16, "grad_norm": 2.0625, "learning_rate": 0.00019667787418662728, "loss": 2.1698, "step": 69925 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019667740171968237, "loss": 2.0863, "step": 69930 }, { "epoch": 0.16, "grad_norm": 1.921875, "learning_rate": 0.00019667692921971074, "loss": 1.9792, "step": 69935 }, { "epoch": 0.16, "grad_norm": 2.03125, "learning_rate": 0.00019667645668671247, "loss": 2.3586, "step": 69940 }, { "epoch": 0.16, "grad_norm": 1.859375, "learning_rate": 0.00019667598412068777, "loss": 2.2341, "step": 69945 }, { "epoch": 0.16, "grad_norm": 1.984375, "learning_rate": 0.0001966755115216368, "loss": 2.2742, "step": 69950 }, { "epoch": 0.16, "grad_norm": 1.8828125, "learning_rate": 0.00019667503888955975, "loss": 1.9169, "step": 69955 }, { "epoch": 0.16, "grad_norm": 2.015625, "learning_rate": 0.00019667456622445673, "loss": 1.8572, "step": 69960 }, { "epoch": 0.16, "grad_norm": 1.828125, "learning_rate": 0.00019667409352632794, "loss": 2.1511, "step": 69965 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019667362079517353, "loss": 2.2483, "step": 69970 }, { "epoch": 0.16, "grad_norm": 1.8046875, "learning_rate": 0.00019667314803099365, "loss": 2.0556, "step": 69975 }, { "epoch": 0.16, "grad_norm": 2.125, "learning_rate": 0.00019667267523378846, "loss": 2.0681, "step": 69980 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019667220240355814, "loss": 2.2115, "step": 69985 }, { "epoch": 0.16, "grad_norm": 1.6640625, "learning_rate": 0.00019667172954030283, "loss": 1.9242, "step": 69990 }, { "epoch": 0.16, "grad_norm": 2.140625, "learning_rate": 0.00019667125664402273, "loss": 2.1793, "step": 69995 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.00019667078371471796, "loss": 2.1723, "step": 70000 }, { "epoch": 0.16, "grad_norm": 2.875, "learning_rate": 0.00019667031075238872, "loss": 2.1955, "step": 70005 }, { "epoch": 0.16, "grad_norm": 1.6015625, "learning_rate": 0.00019666983775703511, "loss": 1.9941, "step": 70010 }, { "epoch": 0.16, "grad_norm": 1.7421875, "learning_rate": 0.00019666936472865733, "loss": 2.2153, "step": 70015 }, { "epoch": 0.16, "grad_norm": 1.9375, "learning_rate": 0.00019666889166725557, "loss": 2.0139, "step": 70020 }, { "epoch": 0.16, "grad_norm": 1.8359375, "learning_rate": 0.00019666841857282993, "loss": 2.045, "step": 70025 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019666794544538063, "loss": 2.2981, "step": 70030 }, { "epoch": 0.16, "grad_norm": 1.8671875, "learning_rate": 0.00019666747228490783, "loss": 2.2302, "step": 70035 }, { "epoch": 0.16, "grad_norm": 1.6953125, "learning_rate": 0.00019666699909141162, "loss": 2.054, "step": 70040 }, { "epoch": 0.16, "grad_norm": 2.15625, "learning_rate": 0.00019666652586489223, "loss": 2.184, "step": 70045 }, { "epoch": 0.16, "grad_norm": 1.9453125, "learning_rate": 0.00019666605260534984, "loss": 2.0039, "step": 70050 }, { "epoch": 0.16, "grad_norm": 1.7109375, "learning_rate": 0.0001966655793127845, "loss": 1.9859, "step": 70055 }, { "epoch": 0.16, "grad_norm": 1.875, "learning_rate": 0.0001966651059871965, "loss": 2.1868, "step": 70060 }, { "epoch": 0.16, "grad_norm": 1.9296875, "learning_rate": 0.00019666463262858592, "loss": 2.1762, "step": 70065 }, { "epoch": 0.16, "grad_norm": 1.9296875, "learning_rate": 0.00019666415923695296, "loss": 2.2212, "step": 70070 }, { "epoch": 0.16, "grad_norm": 2.078125, "learning_rate": 0.00019666368581229776, "loss": 2.1477, "step": 70075 }, { "epoch": 0.16, "grad_norm": 2.0, "learning_rate": 0.0001966632123546205, "loss": 2.2436, "step": 70080 }, { "epoch": 0.16, "grad_norm": 1.921875, "learning_rate": 0.00019666273886392133, "loss": 2.265, "step": 70085 }, { "epoch": 0.16, "grad_norm": 1.7890625, "learning_rate": 0.00019666226534020042, "loss": 2.2699, "step": 70090 }, { "epoch": 0.16, "grad_norm": 1.6875, "learning_rate": 0.00019666179178345794, "loss": 2.2498, "step": 70095 }, { "epoch": 0.16, "grad_norm": 1.7109375, "learning_rate": 0.000196661318193694, "loss": 2.2544, "step": 70100 }, { "epoch": 0.16, "grad_norm": 1.9140625, "learning_rate": 0.00019666084457090882, "loss": 2.127, "step": 70105 }, { "epoch": 0.16, "grad_norm": 2.171875, "learning_rate": 0.00019666037091510256, "loss": 2.1302, "step": 70110 }, { "epoch": 0.17, "grad_norm": 1.75, "learning_rate": 0.00019665989722627536, "loss": 1.9577, "step": 70115 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.00019665942350442737, "loss": 2.0527, "step": 70120 }, { "epoch": 0.17, "grad_norm": 1.9375, "learning_rate": 0.00019665894974955878, "loss": 2.1131, "step": 70125 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019665847596166974, "loss": 2.1929, "step": 70130 }, { "epoch": 0.17, "grad_norm": 1.8046875, "learning_rate": 0.0001966580021407604, "loss": 2.1292, "step": 70135 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.00019665752828683096, "loss": 2.208, "step": 70140 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.0001966570543998815, "loss": 2.1148, "step": 70145 }, { "epoch": 0.17, "grad_norm": 2.28125, "learning_rate": 0.00019665658047991226, "loss": 2.2065, "step": 70150 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.0001966561065269234, "loss": 2.2068, "step": 70155 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019665563254091505, "loss": 2.1994, "step": 70160 }, { "epoch": 0.17, "grad_norm": 2.34375, "learning_rate": 0.0001966551585218874, "loss": 2.222, "step": 70165 }, { "epoch": 0.17, "grad_norm": 1.75, "learning_rate": 0.00019665468446984058, "loss": 2.2331, "step": 70170 }, { "epoch": 0.17, "grad_norm": 1.6015625, "learning_rate": 0.00019665421038477478, "loss": 2.1724, "step": 70175 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.00019665373626669011, "loss": 2.0304, "step": 70180 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.0001966532621155868, "loss": 2.3463, "step": 70185 }, { "epoch": 0.17, "grad_norm": 2.53125, "learning_rate": 0.000196652787931465, "loss": 2.2465, "step": 70190 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019665231371432485, "loss": 2.1261, "step": 70195 }, { "epoch": 0.17, "grad_norm": 2.34375, "learning_rate": 0.00019665183946416647, "loss": 2.1146, "step": 70200 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019665136518099013, "loss": 2.2185, "step": 70205 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.0001966508908647959, "loss": 2.1137, "step": 70210 }, { "epoch": 0.17, "grad_norm": 2.671875, "learning_rate": 0.00019665041651558397, "loss": 2.2721, "step": 70215 }, { "epoch": 0.17, "grad_norm": 1.7265625, "learning_rate": 0.00019664994213335448, "loss": 2.2501, "step": 70220 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019664946771810767, "loss": 2.1578, "step": 70225 }, { "epoch": 0.17, "grad_norm": 1.8125, "learning_rate": 0.0001966489932698436, "loss": 2.1067, "step": 70230 }, { "epoch": 0.17, "grad_norm": 2.421875, "learning_rate": 0.00019664851878856252, "loss": 2.0396, "step": 70235 }, { "epoch": 0.17, "grad_norm": 1.7421875, "learning_rate": 0.00019664804427426454, "loss": 2.3544, "step": 70240 }, { "epoch": 0.17, "grad_norm": 1.703125, "learning_rate": 0.00019664756972694982, "loss": 2.1853, "step": 70245 }, { "epoch": 0.17, "grad_norm": 1.90625, "learning_rate": 0.00019664709514661857, "loss": 2.1616, "step": 70250 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.00019664662053327087, "loss": 2.128, "step": 70255 }, { "epoch": 0.17, "grad_norm": 2.296875, "learning_rate": 0.00019664614588690697, "loss": 2.1298, "step": 70260 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.000196645671207527, "loss": 2.2945, "step": 70265 }, { "epoch": 0.17, "grad_norm": 1.796875, "learning_rate": 0.0001966451964951311, "loss": 2.2247, "step": 70270 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019664472174971948, "loss": 2.04, "step": 70275 }, { "epoch": 0.17, "grad_norm": 1.609375, "learning_rate": 0.00019664424697129224, "loss": 2.0014, "step": 70280 }, { "epoch": 0.17, "grad_norm": 2.390625, "learning_rate": 0.00019664377215984955, "loss": 2.2993, "step": 70285 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.0001966432973153916, "loss": 2.055, "step": 70290 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.00019664282243791857, "loss": 2.2774, "step": 70295 }, { "epoch": 0.17, "grad_norm": 1.8125, "learning_rate": 0.0001966423475274306, "loss": 2.2498, "step": 70300 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019664187258392786, "loss": 2.2183, "step": 70305 }, { "epoch": 0.17, "grad_norm": 2.390625, "learning_rate": 0.00019664139760741046, "loss": 2.0982, "step": 70310 }, { "epoch": 0.17, "grad_norm": 2.609375, "learning_rate": 0.00019664092259787864, "loss": 2.2561, "step": 70315 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.00019664044755533252, "loss": 2.3197, "step": 70320 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.00019663997247977227, "loss": 2.3856, "step": 70325 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019663949737119807, "loss": 2.104, "step": 70330 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019663902222961002, "loss": 2.2267, "step": 70335 }, { "epoch": 0.17, "grad_norm": 2.265625, "learning_rate": 0.0001966385470550084, "loss": 2.095, "step": 70340 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019663807184739325, "loss": 2.2808, "step": 70345 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.0001966375966067648, "loss": 2.1896, "step": 70350 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019663712133312318, "loss": 2.0802, "step": 70355 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.00019663664602646857, "loss": 2.1703, "step": 70360 }, { "epoch": 0.17, "grad_norm": 1.6875, "learning_rate": 0.00019663617068680112, "loss": 2.2247, "step": 70365 }, { "epoch": 0.17, "grad_norm": 1.6640625, "learning_rate": 0.00019663569531412102, "loss": 2.0902, "step": 70370 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.0001966352199084284, "loss": 2.3306, "step": 70375 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019663474446972345, "loss": 2.2487, "step": 70380 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.0001966342689980063, "loss": 2.0767, "step": 70385 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.00019663379349327715, "loss": 2.2283, "step": 70390 }, { "epoch": 0.17, "grad_norm": 1.640625, "learning_rate": 0.00019663331795553615, "loss": 2.1494, "step": 70395 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.0001966328423847834, "loss": 2.2253, "step": 70400 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.00019663236678101918, "loss": 1.9454, "step": 70405 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019663189114424356, "loss": 2.0632, "step": 70410 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.00019663141547445677, "loss": 2.1846, "step": 70415 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.0001966309397716589, "loss": 2.2919, "step": 70420 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.00019663046403585016, "loss": 2.1967, "step": 70425 }, { "epoch": 0.17, "grad_norm": 1.5703125, "learning_rate": 0.0001966299882670307, "loss": 2.3267, "step": 70430 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019662951246520068, "loss": 2.3169, "step": 70435 }, { "epoch": 0.17, "grad_norm": 1.7578125, "learning_rate": 0.00019662903663036026, "loss": 2.0559, "step": 70440 }, { "epoch": 0.17, "grad_norm": 2.4375, "learning_rate": 0.00019662856076250963, "loss": 2.1344, "step": 70445 }, { "epoch": 0.17, "grad_norm": 1.8359375, "learning_rate": 0.00019662808486164892, "loss": 2.3389, "step": 70450 }, { "epoch": 0.17, "grad_norm": 1.625, "learning_rate": 0.00019662760892777832, "loss": 1.9037, "step": 70455 }, { "epoch": 0.17, "grad_norm": 1.9765625, "learning_rate": 0.00019662713296089793, "loss": 2.185, "step": 70460 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.000196626656961008, "loss": 2.095, "step": 70465 }, { "epoch": 0.17, "grad_norm": 1.625, "learning_rate": 0.00019662618092810866, "loss": 2.2548, "step": 70470 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.00019662570486220004, "loss": 2.2248, "step": 70475 }, { "epoch": 0.17, "grad_norm": 1.515625, "learning_rate": 0.00019662522876328232, "loss": 2.1316, "step": 70480 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019662475263135568, "loss": 2.1339, "step": 70485 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019662427646642027, "loss": 2.0802, "step": 70490 }, { "epoch": 0.17, "grad_norm": 1.7890625, "learning_rate": 0.00019662380026847623, "loss": 2.132, "step": 70495 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.0001966233240375238, "loss": 2.3502, "step": 70500 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019662284777356308, "loss": 2.1735, "step": 70505 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019662237147659423, "loss": 2.4195, "step": 70510 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.0001966218951466174, "loss": 2.2372, "step": 70515 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019662141878363284, "loss": 2.2289, "step": 70520 }, { "epoch": 0.17, "grad_norm": 1.9140625, "learning_rate": 0.00019662094238764058, "loss": 2.261, "step": 70525 }, { "epoch": 0.17, "grad_norm": 1.7421875, "learning_rate": 0.0001966204659586409, "loss": 2.1077, "step": 70530 }, { "epoch": 0.17, "grad_norm": 2.546875, "learning_rate": 0.0001966199894966339, "loss": 2.2043, "step": 70535 }, { "epoch": 0.17, "grad_norm": 1.625, "learning_rate": 0.00019661951300161976, "loss": 2.2309, "step": 70540 }, { "epoch": 0.17, "grad_norm": 1.53125, "learning_rate": 0.00019661903647359865, "loss": 2.277, "step": 70545 }, { "epoch": 0.17, "grad_norm": 2.265625, "learning_rate": 0.00019661855991257073, "loss": 2.2821, "step": 70550 }, { "epoch": 0.17, "grad_norm": 1.875, "learning_rate": 0.00019661808331853612, "loss": 2.1709, "step": 70555 }, { "epoch": 0.17, "grad_norm": 1.765625, "learning_rate": 0.00019661760669149506, "loss": 2.1425, "step": 70560 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.00019661713003144766, "loss": 2.1293, "step": 70565 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.0001966166533383941, "loss": 2.0649, "step": 70570 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019661617661233453, "loss": 2.0287, "step": 70575 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019661569985326914, "loss": 2.0442, "step": 70580 }, { "epoch": 0.17, "grad_norm": 1.8046875, "learning_rate": 0.00019661522306119804, "loss": 2.3172, "step": 70585 }, { "epoch": 0.17, "grad_norm": 2.59375, "learning_rate": 0.00019661474623612146, "loss": 2.0703, "step": 70590 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.0001966142693780395, "loss": 2.2968, "step": 70595 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.0001966137924869524, "loss": 2.1055, "step": 70600 }, { "epoch": 0.17, "grad_norm": 1.875, "learning_rate": 0.00019661331556286024, "loss": 2.1651, "step": 70605 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019661283860576326, "loss": 2.3595, "step": 70610 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.00019661236161566152, "loss": 2.1395, "step": 70615 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.00019661188459255528, "loss": 2.1093, "step": 70620 }, { "epoch": 0.17, "grad_norm": 1.75, "learning_rate": 0.00019661140753644466, "loss": 2.2526, "step": 70625 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.00019661093044732986, "loss": 2.1728, "step": 70630 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019661045332521098, "loss": 2.0725, "step": 70635 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.00019660997617008822, "loss": 2.2535, "step": 70640 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019660949898196175, "loss": 2.153, "step": 70645 }, { "epoch": 0.17, "grad_norm": 1.796875, "learning_rate": 0.0001966090217608317, "loss": 2.0969, "step": 70650 }, { "epoch": 0.17, "grad_norm": 2.265625, "learning_rate": 0.0001966085445066983, "loss": 2.0533, "step": 70655 }, { "epoch": 0.17, "grad_norm": 1.734375, "learning_rate": 0.00019660806721956165, "loss": 2.2118, "step": 70660 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.0001966075898994219, "loss": 2.2687, "step": 70665 }, { "epoch": 0.17, "grad_norm": 1.859375, "learning_rate": 0.0001966071125462793, "loss": 2.1713, "step": 70670 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.00019660663516013393, "loss": 2.1512, "step": 70675 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.00019660615774098598, "loss": 2.2348, "step": 70680 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.0001966056802888356, "loss": 2.0843, "step": 70685 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.00019660520280368298, "loss": 1.9157, "step": 70690 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.0001966047252855283, "loss": 2.2796, "step": 70695 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.00019660424773437166, "loss": 2.2207, "step": 70700 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019660377015021324, "loss": 2.2637, "step": 70705 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019660329253305326, "loss": 2.1161, "step": 70710 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.00019660281488289184, "loss": 2.133, "step": 70715 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019660233719972914, "loss": 2.1788, "step": 70720 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019660185948356534, "loss": 2.1081, "step": 70725 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.00019660138173440057, "loss": 2.1398, "step": 70730 }, { "epoch": 0.17, "grad_norm": 1.9140625, "learning_rate": 0.000196600903952235, "loss": 2.2038, "step": 70735 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.00019660042613706884, "loss": 2.0924, "step": 70740 }, { "epoch": 0.17, "grad_norm": 1.9375, "learning_rate": 0.00019659994828890223, "loss": 2.0603, "step": 70745 }, { "epoch": 0.17, "grad_norm": 2.34375, "learning_rate": 0.0001965994704077353, "loss": 2.2561, "step": 70750 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019659899249356824, "loss": 2.2183, "step": 70755 }, { "epoch": 0.17, "grad_norm": 2.0, "learning_rate": 0.00019659851454640123, "loss": 2.0951, "step": 70760 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.00019659803656623442, "loss": 2.3397, "step": 70765 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019659755855306793, "loss": 2.3771, "step": 70770 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.000196597080506902, "loss": 2.2213, "step": 70775 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019659660242773676, "loss": 2.1699, "step": 70780 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.00019659612431557235, "loss": 2.2714, "step": 70785 }, { "epoch": 0.17, "grad_norm": 1.734375, "learning_rate": 0.00019659564617040897, "loss": 2.2615, "step": 70790 }, { "epoch": 0.17, "grad_norm": 1.8046875, "learning_rate": 0.00019659516799224672, "loss": 2.033, "step": 70795 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.00019659468978108584, "loss": 2.0727, "step": 70800 }, { "epoch": 0.17, "grad_norm": 2.375, "learning_rate": 0.00019659421153692648, "loss": 2.1574, "step": 70805 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019659373325976875, "loss": 2.0908, "step": 70810 }, { "epoch": 0.17, "grad_norm": 1.75, "learning_rate": 0.00019659325494961286, "loss": 2.1344, "step": 70815 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.00019659277660645898, "loss": 2.3275, "step": 70820 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.00019659229823030725, "loss": 2.3591, "step": 70825 }, { "epoch": 0.17, "grad_norm": 1.7265625, "learning_rate": 0.00019659181982115782, "loss": 2.2678, "step": 70830 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.0001965913413790109, "loss": 2.1794, "step": 70835 }, { "epoch": 0.17, "grad_norm": 1.859375, "learning_rate": 0.00019659086290386662, "loss": 2.228, "step": 70840 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.00019659038439572512, "loss": 2.2872, "step": 70845 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019658990585458662, "loss": 2.3882, "step": 70850 }, { "epoch": 0.17, "grad_norm": 2.0, "learning_rate": 0.00019658942728045125, "loss": 2.2305, "step": 70855 }, { "epoch": 0.17, "grad_norm": 1.890625, "learning_rate": 0.0001965889486733192, "loss": 2.2772, "step": 70860 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.00019658847003319057, "loss": 2.1346, "step": 70865 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.0001965879913600656, "loss": 2.2905, "step": 70870 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.0001965875126539444, "loss": 2.0722, "step": 70875 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019658703391482718, "loss": 2.1816, "step": 70880 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019658655514271406, "loss": 2.1566, "step": 70885 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019658607633760524, "loss": 2.1523, "step": 70890 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019658559749950082, "loss": 2.0919, "step": 70895 }, { "epoch": 0.17, "grad_norm": 1.6640625, "learning_rate": 0.00019658511862840106, "loss": 2.2015, "step": 70900 }, { "epoch": 0.17, "grad_norm": 1.640625, "learning_rate": 0.00019658463972430603, "loss": 2.1385, "step": 70905 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019658416078721595, "loss": 2.2402, "step": 70910 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.00019658368181713098, "loss": 2.2032, "step": 70915 }, { "epoch": 0.17, "grad_norm": 1.6640625, "learning_rate": 0.00019658320281405126, "loss": 2.2561, "step": 70920 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019658272377797697, "loss": 2.0039, "step": 70925 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.0001965822447089083, "loss": 2.1202, "step": 70930 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.00019658176560684534, "loss": 2.1764, "step": 70935 }, { "epoch": 0.17, "grad_norm": 1.5234375, "learning_rate": 0.00019658128647178833, "loss": 2.0441, "step": 70940 }, { "epoch": 0.17, "grad_norm": 2.34375, "learning_rate": 0.00019658080730373736, "loss": 2.1519, "step": 70945 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.00019658032810269266, "loss": 2.2867, "step": 70950 }, { "epoch": 0.17, "grad_norm": 1.84375, "learning_rate": 0.00019657984886865438, "loss": 2.0657, "step": 70955 }, { "epoch": 0.17, "grad_norm": 1.71875, "learning_rate": 0.00019657936960162267, "loss": 2.2434, "step": 70960 }, { "epoch": 0.17, "grad_norm": 1.71875, "learning_rate": 0.00019657889030159765, "loss": 2.0074, "step": 70965 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.00019657841096857958, "loss": 2.2028, "step": 70970 }, { "epoch": 0.17, "grad_norm": 1.625, "learning_rate": 0.00019657793160256858, "loss": 2.1471, "step": 70975 }, { "epoch": 0.17, "grad_norm": 1.765625, "learning_rate": 0.00019657745220356477, "loss": 2.1923, "step": 70980 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.00019657697277156836, "loss": 2.0971, "step": 70985 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.00019657649330657951, "loss": 2.178, "step": 70990 }, { "epoch": 0.17, "grad_norm": 1.859375, "learning_rate": 0.00019657601380859834, "loss": 2.239, "step": 70995 }, { "epoch": 0.17, "grad_norm": 1.7578125, "learning_rate": 0.00019657553427762512, "loss": 2.2204, "step": 71000 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.00019657505471365992, "loss": 2.1598, "step": 71005 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.0001965745751167029, "loss": 2.2048, "step": 71010 }, { "epoch": 0.17, "grad_norm": 2.40625, "learning_rate": 0.0001965740954867543, "loss": 2.2403, "step": 71015 }, { "epoch": 0.17, "grad_norm": 1.9375, "learning_rate": 0.00019657361582381424, "loss": 2.1325, "step": 71020 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019657313612788283, "loss": 2.0632, "step": 71025 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.0001965726563989603, "loss": 2.2317, "step": 71030 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019657217663704684, "loss": 2.1014, "step": 71035 }, { "epoch": 0.17, "grad_norm": 1.7265625, "learning_rate": 0.00019657169684214254, "loss": 2.141, "step": 71040 }, { "epoch": 0.17, "grad_norm": 2.3125, "learning_rate": 0.00019657121701424762, "loss": 2.1351, "step": 71045 }, { "epoch": 0.17, "grad_norm": 1.75, "learning_rate": 0.0001965707371533622, "loss": 2.1627, "step": 71050 }, { "epoch": 0.17, "grad_norm": 1.828125, "learning_rate": 0.00019657025725948647, "loss": 2.2133, "step": 71055 }, { "epoch": 0.17, "grad_norm": 1.6328125, "learning_rate": 0.0001965697773326206, "loss": 1.9774, "step": 71060 }, { "epoch": 0.17, "grad_norm": 1.890625, "learning_rate": 0.0001965692973727647, "loss": 2.3519, "step": 71065 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.000196568817379919, "loss": 2.2675, "step": 71070 }, { "epoch": 0.17, "grad_norm": 2.484375, "learning_rate": 0.00019656833735408365, "loss": 2.2046, "step": 71075 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.00019656785729525882, "loss": 2.3382, "step": 71080 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019656737720344464, "loss": 2.0647, "step": 71085 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.00019656689707864129, "loss": 2.0496, "step": 71090 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019656641692084893, "loss": 2.3572, "step": 71095 }, { "epoch": 0.17, "grad_norm": 2.34375, "learning_rate": 0.00019656593673006775, "loss": 2.1521, "step": 71100 }, { "epoch": 0.17, "grad_norm": 1.875, "learning_rate": 0.0001965654565062979, "loss": 2.2393, "step": 71105 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019656497624953953, "loss": 2.284, "step": 71110 }, { "epoch": 0.17, "grad_norm": 2.375, "learning_rate": 0.00019656449595979278, "loss": 2.251, "step": 71115 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.00019656401563705788, "loss": 2.1106, "step": 71120 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019656353528133496, "loss": 2.1494, "step": 71125 }, { "epoch": 0.17, "grad_norm": 2.609375, "learning_rate": 0.0001965630548926242, "loss": 2.2602, "step": 71130 }, { "epoch": 0.17, "grad_norm": 2.59375, "learning_rate": 0.0001965625744709257, "loss": 2.1028, "step": 71135 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.0001965620940162397, "loss": 2.0131, "step": 71140 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019656161352856635, "loss": 2.2629, "step": 71145 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.0001965611330079058, "loss": 2.3184, "step": 71150 }, { "epoch": 0.17, "grad_norm": 1.765625, "learning_rate": 0.00019656065245425818, "loss": 2.1086, "step": 71155 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.00019656017186762373, "loss": 2.2278, "step": 71160 }, { "epoch": 0.17, "grad_norm": 1.9765625, "learning_rate": 0.00019655969124800256, "loss": 2.0583, "step": 71165 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.00019655921059539484, "loss": 2.2312, "step": 71170 }, { "epoch": 0.17, "grad_norm": 1.9296875, "learning_rate": 0.00019655872990980076, "loss": 2.037, "step": 71175 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019655824919122044, "loss": 2.1356, "step": 71180 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.0001965577684396541, "loss": 2.2795, "step": 71185 }, { "epoch": 0.17, "grad_norm": 1.6015625, "learning_rate": 0.0001965572876551019, "loss": 2.0762, "step": 71190 }, { "epoch": 0.17, "grad_norm": 1.828125, "learning_rate": 0.00019655680683756393, "loss": 2.2584, "step": 71195 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.0001965563259870404, "loss": 2.1063, "step": 71200 }, { "epoch": 0.17, "grad_norm": 1.875, "learning_rate": 0.0001965558451035315, "loss": 2.1717, "step": 71205 }, { "epoch": 0.17, "grad_norm": 1.828125, "learning_rate": 0.00019655536418703738, "loss": 2.1281, "step": 71210 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.00019655488323755814, "loss": 2.1822, "step": 71215 }, { "epoch": 0.17, "grad_norm": 2.359375, "learning_rate": 0.00019655440225509408, "loss": 2.084, "step": 71220 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.00019655392123964525, "loss": 2.0582, "step": 71225 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019655344019121182, "loss": 2.0981, "step": 71230 }, { "epoch": 0.17, "grad_norm": 1.8125, "learning_rate": 0.00019655295910979404, "loss": 2.0835, "step": 71235 }, { "epoch": 0.17, "grad_norm": 1.8125, "learning_rate": 0.00019655247799539198, "loss": 2.118, "step": 71240 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019655199684800586, "loss": 2.1084, "step": 71245 }, { "epoch": 0.17, "grad_norm": 1.6875, "learning_rate": 0.0001965515156676358, "loss": 2.1904, "step": 71250 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019655103445428203, "loss": 2.3886, "step": 71255 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019655055320794466, "loss": 2.2125, "step": 71260 }, { "epoch": 0.17, "grad_norm": 1.8125, "learning_rate": 0.00019655007192862387, "loss": 2.0783, "step": 71265 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.0001965495906163198, "loss": 2.1695, "step": 71270 }, { "epoch": 0.17, "grad_norm": 2.296875, "learning_rate": 0.00019654910927103267, "loss": 2.2456, "step": 71275 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.0001965486278927626, "loss": 2.2723, "step": 71280 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019654814648150976, "loss": 2.2181, "step": 71285 }, { "epoch": 0.17, "grad_norm": 1.96875, "learning_rate": 0.00019654766503727434, "loss": 2.0224, "step": 71290 }, { "epoch": 0.17, "grad_norm": 1.5390625, "learning_rate": 0.0001965471835600565, "loss": 2.1948, "step": 71295 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.00019654670204985637, "loss": 2.1643, "step": 71300 }, { "epoch": 0.17, "grad_norm": 1.9140625, "learning_rate": 0.00019654622050667412, "loss": 2.2135, "step": 71305 }, { "epoch": 0.17, "grad_norm": 1.890625, "learning_rate": 0.00019654573893050997, "loss": 2.154, "step": 71310 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.00019654525732136401, "loss": 2.3102, "step": 71315 }, { "epoch": 0.17, "grad_norm": 2.265625, "learning_rate": 0.00019654477567923647, "loss": 2.1063, "step": 71320 }, { "epoch": 0.17, "grad_norm": 2.34375, "learning_rate": 0.00019654429400412743, "loss": 2.1655, "step": 71325 }, { "epoch": 0.17, "grad_norm": 1.8046875, "learning_rate": 0.00019654381229603716, "loss": 2.1032, "step": 71330 }, { "epoch": 0.17, "grad_norm": 1.7578125, "learning_rate": 0.00019654333055496576, "loss": 2.2022, "step": 71335 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019654284878091338, "loss": 2.047, "step": 71340 }, { "epoch": 0.17, "grad_norm": 1.5859375, "learning_rate": 0.00019654236697388026, "loss": 2.0085, "step": 71345 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.00019654188513386646, "loss": 2.2207, "step": 71350 }, { "epoch": 0.17, "grad_norm": 2.359375, "learning_rate": 0.00019654140326087226, "loss": 2.0807, "step": 71355 }, { "epoch": 0.17, "grad_norm": 2.453125, "learning_rate": 0.00019654092135489774, "loss": 2.121, "step": 71360 }, { "epoch": 0.17, "grad_norm": 1.9296875, "learning_rate": 0.0001965404394159431, "loss": 2.0382, "step": 71365 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019653995744400846, "loss": 1.9449, "step": 71370 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019653947543909404, "loss": 2.1458, "step": 71375 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.00019653899340119999, "loss": 2.1504, "step": 71380 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.00019653851133032645, "loss": 1.9515, "step": 71385 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.00019653802922647365, "loss": 2.2598, "step": 71390 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019653754708964167, "loss": 2.1337, "step": 71395 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019653706491983073, "loss": 2.0754, "step": 71400 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019653658271704096, "loss": 2.1264, "step": 71405 }, { "epoch": 0.17, "grad_norm": 1.890625, "learning_rate": 0.00019653610048127256, "loss": 2.0982, "step": 71410 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019653561821252567, "loss": 2.2251, "step": 71415 }, { "epoch": 0.17, "grad_norm": 1.7890625, "learning_rate": 0.00019653513591080045, "loss": 2.2132, "step": 71420 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.0001965346535760971, "loss": 2.087, "step": 71425 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019653417120841574, "loss": 2.2901, "step": 71430 }, { "epoch": 0.17, "grad_norm": 2.390625, "learning_rate": 0.00019653368880775656, "loss": 2.1639, "step": 71435 }, { "epoch": 0.17, "grad_norm": 2.265625, "learning_rate": 0.00019653320637411974, "loss": 2.1327, "step": 71440 }, { "epoch": 0.17, "grad_norm": 1.78125, "learning_rate": 0.00019653272390750541, "loss": 2.0907, "step": 71445 }, { "epoch": 0.17, "grad_norm": 1.65625, "learning_rate": 0.00019653224140791375, "loss": 2.1611, "step": 71450 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019653175887534495, "loss": 2.1345, "step": 71455 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.0001965312763097991, "loss": 2.1718, "step": 71460 }, { "epoch": 0.17, "grad_norm": 1.7109375, "learning_rate": 0.00019653079371127646, "loss": 2.1954, "step": 71465 }, { "epoch": 0.17, "grad_norm": 1.8359375, "learning_rate": 0.00019653031107977716, "loss": 2.2303, "step": 71470 }, { "epoch": 0.17, "grad_norm": 1.7578125, "learning_rate": 0.0001965298284153013, "loss": 2.0622, "step": 71475 }, { "epoch": 0.17, "grad_norm": 1.65625, "learning_rate": 0.00019652934571784913, "loss": 2.232, "step": 71480 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.0001965288629874208, "loss": 2.2732, "step": 71485 }, { "epoch": 0.17, "grad_norm": 1.9375, "learning_rate": 0.00019652838022401646, "loss": 2.1548, "step": 71490 }, { "epoch": 0.17, "grad_norm": 2.265625, "learning_rate": 0.00019652789742763624, "loss": 2.3026, "step": 71495 }, { "epoch": 0.17, "grad_norm": 2.0, "learning_rate": 0.00019652741459828037, "loss": 2.4381, "step": 71500 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.00019652693173594896, "loss": 2.1532, "step": 71505 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.0001965264488406422, "loss": 2.1515, "step": 71510 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.0001965259659123603, "loss": 2.2273, "step": 71515 }, { "epoch": 0.17, "grad_norm": 2.515625, "learning_rate": 0.00019652548295110334, "loss": 2.1355, "step": 71520 }, { "epoch": 0.17, "grad_norm": 2.390625, "learning_rate": 0.00019652499995687151, "loss": 2.2971, "step": 71525 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019652451692966504, "loss": 2.1811, "step": 71530 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.000196524033869484, "loss": 2.2308, "step": 71535 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.0001965235507763286, "loss": 2.0821, "step": 71540 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019652306765019904, "loss": 2.1191, "step": 71545 }, { "epoch": 0.17, "grad_norm": 1.7265625, "learning_rate": 0.00019652258449109544, "loss": 2.1576, "step": 71550 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.00019652210129901795, "loss": 2.1689, "step": 71555 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019652161807396677, "loss": 2.261, "step": 71560 }, { "epoch": 0.17, "grad_norm": 1.671875, "learning_rate": 0.00019652113481594206, "loss": 2.2869, "step": 71565 }, { "epoch": 0.17, "grad_norm": 1.78125, "learning_rate": 0.000196520651524944, "loss": 2.1315, "step": 71570 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.0001965201682009727, "loss": 2.1477, "step": 71575 }, { "epoch": 0.17, "grad_norm": 1.9140625, "learning_rate": 0.00019651968484402836, "loss": 2.2012, "step": 71580 }, { "epoch": 0.17, "grad_norm": 1.703125, "learning_rate": 0.00019651920145411116, "loss": 2.2124, "step": 71585 }, { "epoch": 0.17, "grad_norm": 2.28125, "learning_rate": 0.00019651871803122125, "loss": 2.2583, "step": 71590 }, { "epoch": 0.17, "grad_norm": 2.515625, "learning_rate": 0.0001965182345753588, "loss": 2.2869, "step": 71595 }, { "epoch": 0.17, "grad_norm": 1.90625, "learning_rate": 0.00019651775108652398, "loss": 2.1872, "step": 71600 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019651726756471694, "loss": 2.2036, "step": 71605 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.00019651678400993782, "loss": 2.0282, "step": 71610 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.00019651630042218682, "loss": 1.9749, "step": 71615 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.00019651581680146414, "loss": 2.2007, "step": 71620 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.0001965153331477699, "loss": 2.1246, "step": 71625 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019651484946110425, "loss": 2.1231, "step": 71630 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.00019651436574146737, "loss": 2.373, "step": 71635 }, { "epoch": 0.17, "grad_norm": 1.8046875, "learning_rate": 0.00019651388198885944, "loss": 2.1743, "step": 71640 }, { "epoch": 0.17, "grad_norm": 2.296875, "learning_rate": 0.00019651339820328065, "loss": 1.9887, "step": 71645 }, { "epoch": 0.17, "grad_norm": 2.0, "learning_rate": 0.0001965129143847311, "loss": 2.0745, "step": 71650 }, { "epoch": 0.17, "grad_norm": 2.3125, "learning_rate": 0.000196512430533211, "loss": 2.2065, "step": 71655 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.0001965119466487205, "loss": 2.169, "step": 71660 }, { "epoch": 0.17, "grad_norm": 2.359375, "learning_rate": 0.00019651146273125977, "loss": 2.0366, "step": 71665 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.00019651097878082898, "loss": 2.1677, "step": 71670 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019651049479742826, "loss": 2.0627, "step": 71675 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.00019651001078105785, "loss": 2.3165, "step": 71680 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019650952673171782, "loss": 2.2074, "step": 71685 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.0001965090426494084, "loss": 2.1415, "step": 71690 }, { "epoch": 0.17, "grad_norm": 1.859375, "learning_rate": 0.00019650855853412978, "loss": 2.1208, "step": 71695 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.00019650807438588203, "loss": 2.1716, "step": 71700 }, { "epoch": 0.17, "grad_norm": 1.9140625, "learning_rate": 0.0001965075902046654, "loss": 2.0953, "step": 71705 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019650710599048005, "loss": 2.1712, "step": 71710 }, { "epoch": 0.17, "grad_norm": 1.71875, "learning_rate": 0.0001965066217433261, "loss": 2.2348, "step": 71715 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019650613746320373, "loss": 2.0776, "step": 71720 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019650565315011313, "loss": 2.0611, "step": 71725 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019650516880405442, "loss": 2.1344, "step": 71730 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019650468442502782, "loss": 2.0923, "step": 71735 }, { "epoch": 0.17, "grad_norm": 2.0, "learning_rate": 0.00019650420001303344, "loss": 2.2647, "step": 71740 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.0001965037155680715, "loss": 2.1782, "step": 71745 }, { "epoch": 0.17, "grad_norm": 1.859375, "learning_rate": 0.00019650323109014214, "loss": 2.2388, "step": 71750 }, { "epoch": 0.17, "grad_norm": 2.3125, "learning_rate": 0.00019650274657924554, "loss": 2.1232, "step": 71755 }, { "epoch": 0.17, "grad_norm": 1.9375, "learning_rate": 0.00019650226203538185, "loss": 2.2085, "step": 71760 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.0001965017774585512, "loss": 2.1798, "step": 71765 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.0001965012928487538, "loss": 2.1624, "step": 71770 }, { "epoch": 0.17, "grad_norm": 1.8359375, "learning_rate": 0.00019650080820598985, "loss": 2.1816, "step": 71775 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019650032353025945, "loss": 2.2173, "step": 71780 }, { "epoch": 0.17, "grad_norm": 1.90625, "learning_rate": 0.00019649983882156276, "loss": 2.1526, "step": 71785 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.00019649935407990002, "loss": 2.1869, "step": 71790 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019649886930527135, "loss": 2.0414, "step": 71795 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.0001964983844976769, "loss": 2.2105, "step": 71800 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019649789965711683, "loss": 2.1976, "step": 71805 }, { "epoch": 0.17, "grad_norm": 1.796875, "learning_rate": 0.00019649741478359136, "loss": 2.1412, "step": 71810 }, { "epoch": 0.17, "grad_norm": 2.0, "learning_rate": 0.0001964969298771006, "loss": 2.0737, "step": 71815 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.00019649644493764475, "loss": 2.1415, "step": 71820 }, { "epoch": 0.17, "grad_norm": 1.734375, "learning_rate": 0.00019649595996522397, "loss": 2.1901, "step": 71825 }, { "epoch": 0.17, "grad_norm": 1.78125, "learning_rate": 0.0001964954749598384, "loss": 2.1787, "step": 71830 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.00019649498992148825, "loss": 2.0472, "step": 71835 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019649450485017365, "loss": 2.0506, "step": 71840 }, { "epoch": 0.17, "grad_norm": 1.96875, "learning_rate": 0.0001964940197458948, "loss": 2.2466, "step": 71845 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.00019649353460865179, "loss": 2.246, "step": 71850 }, { "epoch": 0.17, "grad_norm": 1.6328125, "learning_rate": 0.00019649304943844488, "loss": 2.3202, "step": 71855 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.0001964925642352742, "loss": 2.1017, "step": 71860 }, { "epoch": 0.17, "grad_norm": 2.328125, "learning_rate": 0.00019649207899913987, "loss": 2.3179, "step": 71865 }, { "epoch": 0.17, "grad_norm": 2.390625, "learning_rate": 0.00019649159373004213, "loss": 2.2388, "step": 71870 }, { "epoch": 0.17, "grad_norm": 2.453125, "learning_rate": 0.00019649110842798108, "loss": 2.2316, "step": 71875 }, { "epoch": 0.17, "grad_norm": 1.734375, "learning_rate": 0.00019649062309295693, "loss": 2.2408, "step": 71880 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019649013772496984, "loss": 2.2632, "step": 71885 }, { "epoch": 0.17, "grad_norm": 1.71875, "learning_rate": 0.00019648965232402, "loss": 2.1658, "step": 71890 }, { "epoch": 0.17, "grad_norm": 2.3125, "learning_rate": 0.0001964891668901075, "loss": 2.1708, "step": 71895 }, { "epoch": 0.17, "grad_norm": 1.859375, "learning_rate": 0.00019648868142323254, "loss": 2.0619, "step": 71900 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019648819592339535, "loss": 2.1262, "step": 71905 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019648771039059603, "loss": 2.0971, "step": 71910 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.00019648722482483474, "loss": 2.0499, "step": 71915 }, { "epoch": 0.17, "grad_norm": 1.7890625, "learning_rate": 0.00019648673922611164, "loss": 2.1659, "step": 71920 }, { "epoch": 0.17, "grad_norm": 1.796875, "learning_rate": 0.00019648625359442695, "loss": 2.2133, "step": 71925 }, { "epoch": 0.17, "grad_norm": 1.9296875, "learning_rate": 0.00019648576792978081, "loss": 2.0113, "step": 71930 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019648528223217338, "loss": 2.4443, "step": 71935 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.00019648479650160482, "loss": 2.066, "step": 71940 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019648431073807532, "loss": 2.2831, "step": 71945 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.00019648382494158502, "loss": 2.2426, "step": 71950 }, { "epoch": 0.17, "grad_norm": 2.28125, "learning_rate": 0.00019648333911213408, "loss": 2.1157, "step": 71955 }, { "epoch": 0.17, "grad_norm": 2.296875, "learning_rate": 0.00019648285324972271, "loss": 2.0362, "step": 71960 }, { "epoch": 0.17, "grad_norm": 2.375, "learning_rate": 0.000196482367354351, "loss": 2.0731, "step": 71965 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.0001964818814260192, "loss": 2.0664, "step": 71970 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.00019648139546472746, "loss": 2.1586, "step": 71975 }, { "epoch": 0.17, "grad_norm": 1.9296875, "learning_rate": 0.0001964809094704759, "loss": 2.4888, "step": 71980 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019648042344326468, "loss": 2.1722, "step": 71985 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019647993738309405, "loss": 2.0691, "step": 71990 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.0001964794512899641, "loss": 2.0128, "step": 71995 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019647896516387502, "loss": 2.2623, "step": 72000 }, { "epoch": 0.17, "grad_norm": 1.96875, "learning_rate": 0.000196478479004827, "loss": 2.0826, "step": 72005 }, { "epoch": 0.17, "grad_norm": 1.7109375, "learning_rate": 0.00019647799281282012, "loss": 2.1639, "step": 72010 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.00019647750658785467, "loss": 2.2144, "step": 72015 }, { "epoch": 0.17, "grad_norm": 1.890625, "learning_rate": 0.00019647702032993074, "loss": 2.1964, "step": 72020 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019647653403904848, "loss": 2.2682, "step": 72025 }, { "epoch": 0.17, "grad_norm": 1.765625, "learning_rate": 0.00019647604771520812, "loss": 2.0878, "step": 72030 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.00019647556135840975, "loss": 2.231, "step": 72035 }, { "epoch": 0.17, "grad_norm": 1.71875, "learning_rate": 0.00019647507496865362, "loss": 2.1936, "step": 72040 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.00019647458854593985, "loss": 2.0899, "step": 72045 }, { "epoch": 0.17, "grad_norm": 1.78125, "learning_rate": 0.0001964741020902686, "loss": 2.1249, "step": 72050 }, { "epoch": 0.17, "grad_norm": 1.875, "learning_rate": 0.00019647361560164007, "loss": 2.2265, "step": 72055 }, { "epoch": 0.17, "grad_norm": 2.359375, "learning_rate": 0.00019647312908005439, "loss": 2.2101, "step": 72060 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019647264252551172, "loss": 2.2031, "step": 72065 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.00019647215593801226, "loss": 2.1421, "step": 72070 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.00019647166931755617, "loss": 2.1866, "step": 72075 }, { "epoch": 0.17, "grad_norm": 1.671875, "learning_rate": 0.0001964711826641436, "loss": 2.2068, "step": 72080 }, { "epoch": 0.17, "grad_norm": 1.6484375, "learning_rate": 0.00019647069597777473, "loss": 2.1639, "step": 72085 }, { "epoch": 0.17, "grad_norm": 1.765625, "learning_rate": 0.0001964702092584497, "loss": 2.0176, "step": 72090 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.0001964697225061687, "loss": 2.346, "step": 72095 }, { "epoch": 0.17, "grad_norm": 2.421875, "learning_rate": 0.0001964692357209319, "loss": 2.2571, "step": 72100 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.00019646874890273948, "loss": 2.0959, "step": 72105 }, { "epoch": 0.17, "grad_norm": 2.703125, "learning_rate": 0.00019646826205159157, "loss": 2.0084, "step": 72110 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019646777516748836, "loss": 2.2677, "step": 72115 }, { "epoch": 0.17, "grad_norm": 1.6640625, "learning_rate": 0.00019646728825042998, "loss": 2.316, "step": 72120 }, { "epoch": 0.17, "grad_norm": 1.9375, "learning_rate": 0.00019646680130041666, "loss": 2.3698, "step": 72125 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.00019646631431744852, "loss": 2.1582, "step": 72130 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.00019646582730152573, "loss": 2.1187, "step": 72135 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.00019646534025264845, "loss": 2.1383, "step": 72140 }, { "epoch": 0.17, "grad_norm": 2.40625, "learning_rate": 0.0001964648531708169, "loss": 2.3781, "step": 72145 }, { "epoch": 0.17, "grad_norm": 1.9375, "learning_rate": 0.00019646436605603117, "loss": 2.1306, "step": 72150 }, { "epoch": 0.17, "grad_norm": 1.828125, "learning_rate": 0.0001964638789082915, "loss": 2.232, "step": 72155 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.00019646339172759797, "loss": 2.1367, "step": 72160 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019646290451395083, "loss": 2.3377, "step": 72165 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.0001964624172673502, "loss": 2.2072, "step": 72170 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.0001964619299877963, "loss": 2.071, "step": 72175 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.0001964614426752892, "loss": 2.1659, "step": 72180 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019646095532982916, "loss": 2.1292, "step": 72185 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.00019646046795141628, "loss": 2.2583, "step": 72190 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019645998054005075, "loss": 2.161, "step": 72195 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019645949309573277, "loss": 2.1161, "step": 72200 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019645900561846247, "loss": 2.035, "step": 72205 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019645851810824, "loss": 2.0922, "step": 72210 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.00019645803056506558, "loss": 2.3176, "step": 72215 }, { "epoch": 0.17, "grad_norm": 2.484375, "learning_rate": 0.00019645754298893935, "loss": 2.0893, "step": 72220 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019645705537986143, "loss": 2.1323, "step": 72225 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.00019645656773783208, "loss": 2.1494, "step": 72230 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019645608006285139, "loss": 2.0959, "step": 72235 }, { "epoch": 0.17, "grad_norm": 1.765625, "learning_rate": 0.00019645559235491957, "loss": 1.9663, "step": 72240 }, { "epoch": 0.17, "grad_norm": 1.90625, "learning_rate": 0.00019645510461403678, "loss": 1.9625, "step": 72245 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.00019645461684020314, "loss": 2.1455, "step": 72250 }, { "epoch": 0.17, "grad_norm": 1.9296875, "learning_rate": 0.0001964541290334189, "loss": 2.3269, "step": 72255 }, { "epoch": 0.17, "grad_norm": 1.6875, "learning_rate": 0.00019645364119368414, "loss": 2.2242, "step": 72260 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.00019645315332099907, "loss": 2.1978, "step": 72265 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019645266541536388, "loss": 2.2647, "step": 72270 }, { "epoch": 0.17, "grad_norm": 1.8046875, "learning_rate": 0.0001964521774767787, "loss": 2.222, "step": 72275 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.0001964516895052437, "loss": 2.0411, "step": 72280 }, { "epoch": 0.17, "grad_norm": 2.375, "learning_rate": 0.00019645120150075905, "loss": 2.2176, "step": 72285 }, { "epoch": 0.17, "grad_norm": 2.609375, "learning_rate": 0.0001964507134633249, "loss": 2.2752, "step": 72290 }, { "epoch": 0.17, "grad_norm": 2.375, "learning_rate": 0.00019645022539294146, "loss": 1.9846, "step": 72295 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.00019644973728960888, "loss": 2.1621, "step": 72300 }, { "epoch": 0.17, "grad_norm": 1.8125, "learning_rate": 0.00019644924915332733, "loss": 2.2408, "step": 72305 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019644876098409694, "loss": 2.2344, "step": 72310 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.0001964482727819179, "loss": 2.3042, "step": 72315 }, { "epoch": 0.17, "grad_norm": 1.640625, "learning_rate": 0.00019644778454679042, "loss": 2.0662, "step": 72320 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.0001964472962787146, "loss": 2.4216, "step": 72325 }, { "epoch": 0.17, "grad_norm": 2.40625, "learning_rate": 0.00019644680797769064, "loss": 2.153, "step": 72330 }, { "epoch": 0.17, "grad_norm": 1.6875, "learning_rate": 0.0001964463196437187, "loss": 2.2378, "step": 72335 }, { "epoch": 0.17, "grad_norm": 2.4375, "learning_rate": 0.00019644583127679895, "loss": 2.2227, "step": 72340 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019644534287693155, "loss": 2.1537, "step": 72345 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.0001964448544441167, "loss": 2.3003, "step": 72350 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.0001964443659783545, "loss": 2.1712, "step": 72355 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.00019644387747964515, "loss": 2.2205, "step": 72360 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019644338894798886, "loss": 2.0946, "step": 72365 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019644290038338573, "loss": 2.1574, "step": 72370 }, { "epoch": 0.17, "grad_norm": 1.9765625, "learning_rate": 0.00019644241178583597, "loss": 2.1878, "step": 72375 }, { "epoch": 0.17, "grad_norm": 2.28125, "learning_rate": 0.00019644192315533974, "loss": 2.1968, "step": 72380 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.00019644143449189718, "loss": 2.1127, "step": 72385 }, { "epoch": 0.17, "grad_norm": 1.828125, "learning_rate": 0.0001964409457955085, "loss": 2.1454, "step": 72390 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019644045706617385, "loss": 2.2315, "step": 72395 }, { "epoch": 0.17, "grad_norm": 1.40625, "learning_rate": 0.00019643996830389336, "loss": 2.0706, "step": 72400 }, { "epoch": 0.17, "grad_norm": 2.0, "learning_rate": 0.00019643947950866722, "loss": 2.1793, "step": 72405 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.00019643899068049562, "loss": 2.0978, "step": 72410 }, { "epoch": 0.17, "grad_norm": 2.296875, "learning_rate": 0.00019643850181937875, "loss": 1.9436, "step": 72415 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.0001964380129253167, "loss": 2.0687, "step": 72420 }, { "epoch": 0.17, "grad_norm": 1.625, "learning_rate": 0.00019643752399830968, "loss": 2.2271, "step": 72425 }, { "epoch": 0.17, "grad_norm": 1.8359375, "learning_rate": 0.00019643703503835784, "loss": 2.0571, "step": 72430 }, { "epoch": 0.17, "grad_norm": 1.6796875, "learning_rate": 0.0001964365460454614, "loss": 2.1797, "step": 72435 }, { "epoch": 0.17, "grad_norm": 1.8359375, "learning_rate": 0.00019643605701962044, "loss": 2.1276, "step": 72440 }, { "epoch": 0.17, "grad_norm": 2.46875, "learning_rate": 0.0001964355679608352, "loss": 2.1128, "step": 72445 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.0001964350788691058, "loss": 2.2141, "step": 72450 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019643458974443246, "loss": 2.3301, "step": 72455 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.0001964341005868153, "loss": 2.1951, "step": 72460 }, { "epoch": 0.17, "grad_norm": 1.7578125, "learning_rate": 0.00019643361139625451, "loss": 2.066, "step": 72465 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.00019643312217275022, "loss": 2.1907, "step": 72470 }, { "epoch": 0.17, "grad_norm": 1.796875, "learning_rate": 0.00019643263291630264, "loss": 2.2515, "step": 72475 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.00019643214362691195, "loss": 2.188, "step": 72480 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.0001964316543045783, "loss": 2.1771, "step": 72485 }, { "epoch": 0.17, "grad_norm": 1.6640625, "learning_rate": 0.0001964311649493018, "loss": 2.2445, "step": 72490 }, { "epoch": 0.17, "grad_norm": 3.0, "learning_rate": 0.0001964306755610827, "loss": 2.2226, "step": 72495 }, { "epoch": 0.17, "grad_norm": 1.9140625, "learning_rate": 0.0001964301861399211, "loss": 1.9774, "step": 72500 }, { "epoch": 0.17, "grad_norm": 1.9765625, "learning_rate": 0.00019642969668581724, "loss": 2.0556, "step": 72505 }, { "epoch": 0.17, "grad_norm": 2.890625, "learning_rate": 0.00019642920719877121, "loss": 2.153, "step": 72510 }, { "epoch": 0.17, "grad_norm": 1.90625, "learning_rate": 0.00019642871767878322, "loss": 2.0187, "step": 72515 }, { "epoch": 0.17, "grad_norm": 1.9375, "learning_rate": 0.00019642822812585345, "loss": 2.1287, "step": 72520 }, { "epoch": 0.17, "grad_norm": 1.875, "learning_rate": 0.000196427738539982, "loss": 2.08, "step": 72525 }, { "epoch": 0.17, "grad_norm": 1.78125, "learning_rate": 0.00019642724892116915, "loss": 2.1257, "step": 72530 }, { "epoch": 0.17, "grad_norm": 2.453125, "learning_rate": 0.00019642675926941498, "loss": 1.9804, "step": 72535 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019642626958471968, "loss": 2.3269, "step": 72540 }, { "epoch": 0.17, "grad_norm": 1.8046875, "learning_rate": 0.0001964257798670834, "loss": 2.1984, "step": 72545 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019642529011650636, "loss": 2.0229, "step": 72550 }, { "epoch": 0.17, "grad_norm": 1.6171875, "learning_rate": 0.00019642480033298866, "loss": 2.1323, "step": 72555 }, { "epoch": 0.17, "grad_norm": 1.5390625, "learning_rate": 0.0001964243105165305, "loss": 2.1609, "step": 72560 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019642382066713206, "loss": 2.1362, "step": 72565 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.0001964233307847935, "loss": 2.0417, "step": 72570 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.00019642284086951497, "loss": 2.2022, "step": 72575 }, { "epoch": 0.17, "grad_norm": 1.6484375, "learning_rate": 0.00019642235092129666, "loss": 2.0364, "step": 72580 }, { "epoch": 0.17, "grad_norm": 1.9375, "learning_rate": 0.00019642186094013872, "loss": 2.0104, "step": 72585 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.0001964213709260413, "loss": 2.0329, "step": 72590 }, { "epoch": 0.17, "grad_norm": 2.390625, "learning_rate": 0.00019642088087900463, "loss": 2.1855, "step": 72595 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.00019642039079902882, "loss": 2.1722, "step": 72600 }, { "epoch": 0.17, "grad_norm": 1.6484375, "learning_rate": 0.00019641990068611405, "loss": 1.99, "step": 72605 }, { "epoch": 0.17, "grad_norm": 2.484375, "learning_rate": 0.00019641941054026048, "loss": 2.2372, "step": 72610 }, { "epoch": 0.17, "grad_norm": 1.765625, "learning_rate": 0.00019641892036146834, "loss": 2.2993, "step": 72615 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019641843014973773, "loss": 2.2143, "step": 72620 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.00019641793990506881, "loss": 2.1387, "step": 72625 }, { "epoch": 0.17, "grad_norm": 2.46875, "learning_rate": 0.00019641744962746178, "loss": 2.1557, "step": 72630 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.00019641695931691682, "loss": 2.1338, "step": 72635 }, { "epoch": 0.17, "grad_norm": 2.375, "learning_rate": 0.00019641646897343407, "loss": 2.22, "step": 72640 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.0001964159785970137, "loss": 2.106, "step": 72645 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019641548818765588, "loss": 2.0309, "step": 72650 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019641499774536078, "loss": 2.2174, "step": 72655 }, { "epoch": 0.17, "grad_norm": 1.890625, "learning_rate": 0.00019641450727012858, "loss": 2.2059, "step": 72660 }, { "epoch": 0.17, "grad_norm": 1.6953125, "learning_rate": 0.00019641401676195942, "loss": 2.0323, "step": 72665 }, { "epoch": 0.17, "grad_norm": 1.6484375, "learning_rate": 0.0001964135262208535, "loss": 2.1446, "step": 72670 }, { "epoch": 0.17, "grad_norm": 1.796875, "learning_rate": 0.00019641303564681096, "loss": 2.0883, "step": 72675 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.00019641254503983198, "loss": 2.2999, "step": 72680 }, { "epoch": 0.17, "grad_norm": 1.75, "learning_rate": 0.00019641205439991676, "loss": 2.2195, "step": 72685 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.00019641156372706537, "loss": 1.8962, "step": 72690 }, { "epoch": 0.17, "grad_norm": 2.484375, "learning_rate": 0.00019641107302127808, "loss": 2.0309, "step": 72695 }, { "epoch": 0.17, "grad_norm": 1.71875, "learning_rate": 0.000196410582282555, "loss": 2.1454, "step": 72700 }, { "epoch": 0.17, "grad_norm": 3.34375, "learning_rate": 0.00019641009151089635, "loss": 2.4213, "step": 72705 }, { "epoch": 0.17, "grad_norm": 2.296875, "learning_rate": 0.00019640960070630223, "loss": 2.142, "step": 72710 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019640910986877286, "loss": 2.0575, "step": 72715 }, { "epoch": 0.17, "grad_norm": 3.59375, "learning_rate": 0.00019640861899830841, "loss": 2.1268, "step": 72720 }, { "epoch": 0.17, "grad_norm": 1.8046875, "learning_rate": 0.000196408128094909, "loss": 2.0666, "step": 72725 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.0001964076371585748, "loss": 2.1235, "step": 72730 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.00019640714618930605, "loss": 2.2464, "step": 72735 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.00019640665518710283, "loss": 2.1717, "step": 72740 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019640616415196537, "loss": 2.1437, "step": 72745 }, { "epoch": 0.17, "grad_norm": 1.515625, "learning_rate": 0.00019640567308389382, "loss": 2.0328, "step": 72750 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019640518198288834, "loss": 2.2542, "step": 72755 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.0001964046908489491, "loss": 2.2317, "step": 72760 }, { "epoch": 0.17, "grad_norm": 1.8125, "learning_rate": 0.00019640419968207625, "loss": 2.0718, "step": 72765 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019640370848227, "loss": 2.2195, "step": 72770 }, { "epoch": 0.17, "grad_norm": 2.484375, "learning_rate": 0.0001964032172495305, "loss": 2.1638, "step": 72775 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.0001964027259838579, "loss": 2.2519, "step": 72780 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.00019640223468525239, "loss": 2.2681, "step": 72785 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.0001964017433537141, "loss": 2.1351, "step": 72790 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.00019640125198924327, "loss": 2.3419, "step": 72795 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.00019640076059184, "loss": 2.2919, "step": 72800 }, { "epoch": 0.17, "grad_norm": 1.578125, "learning_rate": 0.00019640026916150448, "loss": 2.0962, "step": 72805 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019639977769823688, "loss": 2.16, "step": 72810 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019639928620203737, "loss": 1.9961, "step": 72815 }, { "epoch": 0.17, "grad_norm": 1.8125, "learning_rate": 0.00019639879467290615, "loss": 1.8884, "step": 72820 }, { "epoch": 0.17, "grad_norm": 2.0, "learning_rate": 0.0001963983031108433, "loss": 2.0915, "step": 72825 }, { "epoch": 0.17, "grad_norm": 1.4453125, "learning_rate": 0.00019639781151584905, "loss": 2.2628, "step": 72830 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.0001963973198879236, "loss": 2.1221, "step": 72835 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019639682822706703, "loss": 2.0749, "step": 72840 }, { "epoch": 0.17, "grad_norm": 2.28125, "learning_rate": 0.0001963963365332796, "loss": 2.063, "step": 72845 }, { "epoch": 0.17, "grad_norm": 1.859375, "learning_rate": 0.00019639584480656138, "loss": 2.2085, "step": 72850 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019639535304691262, "loss": 2.0787, "step": 72855 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.00019639486125433347, "loss": 2.0079, "step": 72860 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019639436942882406, "loss": 2.2501, "step": 72865 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019639387757038458, "loss": 2.2379, "step": 72870 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.00019639338567901523, "loss": 2.2019, "step": 72875 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019639289375471614, "loss": 2.1919, "step": 72880 }, { "epoch": 0.17, "grad_norm": 2.28125, "learning_rate": 0.0001963924017974875, "loss": 2.1027, "step": 72885 }, { "epoch": 0.17, "grad_norm": 1.8359375, "learning_rate": 0.00019639190980732946, "loss": 2.2027, "step": 72890 }, { "epoch": 0.17, "grad_norm": 1.9765625, "learning_rate": 0.0001963914177842422, "loss": 2.1186, "step": 72895 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019639092572822588, "loss": 2.2738, "step": 72900 }, { "epoch": 0.17, "grad_norm": 1.9296875, "learning_rate": 0.00019639043363928066, "loss": 2.2606, "step": 72905 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019638994151740673, "loss": 2.2454, "step": 72910 }, { "epoch": 0.17, "grad_norm": 2.53125, "learning_rate": 0.00019638944936260425, "loss": 2.1448, "step": 72915 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.00019638895717487338, "loss": 2.3923, "step": 72920 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.0001963884649542143, "loss": 2.199, "step": 72925 }, { "epoch": 0.17, "grad_norm": 1.8359375, "learning_rate": 0.0001963879727006272, "loss": 2.2588, "step": 72930 }, { "epoch": 0.17, "grad_norm": 2.265625, "learning_rate": 0.00019638748041411218, "loss": 2.1992, "step": 72935 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019638698809466945, "loss": 2.0968, "step": 72940 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.00019638649574229918, "loss": 2.2476, "step": 72945 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019638600335700158, "loss": 2.1491, "step": 72950 }, { "epoch": 0.17, "grad_norm": 1.859375, "learning_rate": 0.0001963855109387767, "loss": 1.9864, "step": 72955 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019638501848762483, "loss": 2.3212, "step": 72960 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019638452600354608, "loss": 2.1923, "step": 72965 }, { "epoch": 0.17, "grad_norm": 2.625, "learning_rate": 0.00019638403348654063, "loss": 2.2408, "step": 72970 }, { "epoch": 0.17, "grad_norm": 1.8359375, "learning_rate": 0.0001963835409366086, "loss": 2.3014, "step": 72975 }, { "epoch": 0.17, "grad_norm": 1.9765625, "learning_rate": 0.00019638304835375026, "loss": 2.2937, "step": 72980 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.0001963825557379657, "loss": 2.2842, "step": 72985 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019638206308925514, "loss": 2.2573, "step": 72990 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.0001963815704076187, "loss": 1.9986, "step": 72995 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.00019638107769305655, "loss": 2.1244, "step": 73000 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.0001963805849455689, "loss": 2.2638, "step": 73005 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019638009216515586, "loss": 2.0821, "step": 73010 }, { "epoch": 0.17, "grad_norm": 2.59375, "learning_rate": 0.00019637959935181769, "loss": 2.0274, "step": 73015 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019637910650555446, "loss": 2.1048, "step": 73020 }, { "epoch": 0.17, "grad_norm": 2.296875, "learning_rate": 0.0001963786136263664, "loss": 2.2425, "step": 73025 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.00019637812071425363, "loss": 2.2029, "step": 73030 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019637762776921638, "loss": 2.3768, "step": 73035 }, { "epoch": 0.17, "grad_norm": 1.9375, "learning_rate": 0.00019637713479125476, "loss": 1.9592, "step": 73040 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.00019637664178036895, "loss": 2.217, "step": 73045 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019637614873655915, "loss": 2.3342, "step": 73050 }, { "epoch": 0.17, "grad_norm": 1.4375, "learning_rate": 0.00019637565565982552, "loss": 2.1569, "step": 73055 }, { "epoch": 0.17, "grad_norm": 1.40625, "learning_rate": 0.00019637516255016822, "loss": 2.1078, "step": 73060 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.00019637466940758739, "loss": 2.1494, "step": 73065 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.00019637417623208326, "loss": 2.1544, "step": 73070 }, { "epoch": 0.17, "grad_norm": 1.9375, "learning_rate": 0.00019637368302365593, "loss": 2.1735, "step": 73075 }, { "epoch": 0.17, "grad_norm": 2.546875, "learning_rate": 0.0001963731897823056, "loss": 2.1891, "step": 73080 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019637269650803248, "loss": 2.2087, "step": 73085 }, { "epoch": 0.17, "grad_norm": 1.8046875, "learning_rate": 0.00019637220320083666, "loss": 2.0242, "step": 73090 }, { "epoch": 0.17, "grad_norm": 1.9140625, "learning_rate": 0.0001963717098607184, "loss": 2.0693, "step": 73095 }, { "epoch": 0.17, "grad_norm": 1.78125, "learning_rate": 0.00019637121648767773, "loss": 2.0158, "step": 73100 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.000196370723081715, "loss": 2.2206, "step": 73105 }, { "epoch": 0.17, "grad_norm": 1.9296875, "learning_rate": 0.00019637022964283022, "loss": 2.1345, "step": 73110 }, { "epoch": 0.17, "grad_norm": 2.453125, "learning_rate": 0.00019636973617102364, "loss": 2.1969, "step": 73115 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.0001963692426662954, "loss": 2.103, "step": 73120 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.00019636874912864566, "loss": 2.1539, "step": 73125 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019636825555807466, "loss": 2.265, "step": 73130 }, { "epoch": 0.17, "grad_norm": 1.84375, "learning_rate": 0.0001963677619545825, "loss": 2.1443, "step": 73135 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019636726831816933, "loss": 2.1425, "step": 73140 }, { "epoch": 0.17, "grad_norm": 1.71875, "learning_rate": 0.0001963667746488354, "loss": 2.1862, "step": 73145 }, { "epoch": 0.17, "grad_norm": 1.8359375, "learning_rate": 0.00019636628094658082, "loss": 2.0123, "step": 73150 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019636578721140576, "loss": 2.1771, "step": 73155 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019636529344331042, "loss": 2.4404, "step": 73160 }, { "epoch": 0.17, "grad_norm": 1.9140625, "learning_rate": 0.0001963647996422949, "loss": 2.2538, "step": 73165 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.00019636430580835946, "loss": 2.1237, "step": 73170 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.0001963638119415042, "loss": 2.2505, "step": 73175 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.00019636331804172934, "loss": 2.1923, "step": 73180 }, { "epoch": 0.17, "grad_norm": 1.75, "learning_rate": 0.000196362824109035, "loss": 2.0048, "step": 73185 }, { "epoch": 0.17, "grad_norm": 1.828125, "learning_rate": 0.0001963623301434214, "loss": 2.2721, "step": 73190 }, { "epoch": 0.17, "grad_norm": 2.40625, "learning_rate": 0.00019636183614488864, "loss": 2.2045, "step": 73195 }, { "epoch": 0.17, "grad_norm": 1.734375, "learning_rate": 0.00019636134211343696, "loss": 2.2569, "step": 73200 }, { "epoch": 0.17, "grad_norm": 1.6953125, "learning_rate": 0.0001963608480490665, "loss": 2.0045, "step": 73205 }, { "epoch": 0.17, "grad_norm": 1.9453125, "learning_rate": 0.00019636035395177742, "loss": 1.9768, "step": 73210 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019635985982156985, "loss": 2.1726, "step": 73215 }, { "epoch": 0.17, "grad_norm": 2.0, "learning_rate": 0.00019635936565844407, "loss": 2.1461, "step": 73220 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.00019635887146240015, "loss": 2.1748, "step": 73225 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.0001963583772334383, "loss": 2.2877, "step": 73230 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.0001963578829715587, "loss": 2.2951, "step": 73235 }, { "epoch": 0.17, "grad_norm": 2.0, "learning_rate": 0.00019635738867676146, "loss": 2.2005, "step": 73240 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.0001963568943490468, "loss": 2.2774, "step": 73245 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.0001963563999884149, "loss": 2.075, "step": 73250 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.0001963559055948659, "loss": 2.1565, "step": 73255 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019635541116839995, "loss": 2.2303, "step": 73260 }, { "epoch": 0.17, "grad_norm": 1.890625, "learning_rate": 0.00019635491670901727, "loss": 2.2476, "step": 73265 }, { "epoch": 0.17, "grad_norm": 2.265625, "learning_rate": 0.000196354422216718, "loss": 2.1069, "step": 73270 }, { "epoch": 0.17, "grad_norm": 2.515625, "learning_rate": 0.00019635392769150228, "loss": 2.2393, "step": 73275 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.00019635343313337037, "loss": 1.9639, "step": 73280 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.00019635293854232232, "loss": 2.2364, "step": 73285 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.00019635244391835838, "loss": 2.2464, "step": 73290 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.0001963519492614787, "loss": 2.0188, "step": 73295 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.00019635145457168348, "loss": 2.1892, "step": 73300 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.00019635095984897282, "loss": 2.1448, "step": 73305 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019635046509334695, "loss": 2.0891, "step": 73310 }, { "epoch": 0.17, "grad_norm": 1.7265625, "learning_rate": 0.00019634997030480597, "loss": 2.1818, "step": 73315 }, { "epoch": 0.17, "grad_norm": 1.6796875, "learning_rate": 0.0001963494754833501, "loss": 2.1188, "step": 73320 }, { "epoch": 0.17, "grad_norm": 2.28125, "learning_rate": 0.00019634898062897955, "loss": 2.1934, "step": 73325 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.0001963484857416944, "loss": 2.0756, "step": 73330 }, { "epoch": 0.17, "grad_norm": 1.8125, "learning_rate": 0.00019634799082149487, "loss": 2.0979, "step": 73335 }, { "epoch": 0.17, "grad_norm": 2.34375, "learning_rate": 0.00019634749586838111, "loss": 2.0998, "step": 73340 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.0001963470008823533, "loss": 2.0245, "step": 73345 }, { "epoch": 0.17, "grad_norm": 1.7578125, "learning_rate": 0.00019634650586341165, "loss": 2.2688, "step": 73350 }, { "epoch": 0.17, "grad_norm": 1.875, "learning_rate": 0.00019634601081155623, "loss": 2.2085, "step": 73355 }, { "epoch": 0.17, "grad_norm": 2.984375, "learning_rate": 0.0001963455157267873, "loss": 2.0938, "step": 73360 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019634502060910495, "loss": 2.2923, "step": 73365 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.00019634452545850946, "loss": 2.3177, "step": 73370 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.00019634403027500087, "loss": 2.1755, "step": 73375 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019634353505857942, "loss": 2.2208, "step": 73380 }, { "epoch": 0.17, "grad_norm": 1.96875, "learning_rate": 0.0001963430398092453, "loss": 2.2463, "step": 73385 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019634254452699864, "loss": 2.2438, "step": 73390 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.00019634204921183964, "loss": 2.1459, "step": 73395 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.00019634155386376843, "loss": 2.2152, "step": 73400 }, { "epoch": 0.17, "grad_norm": 1.9375, "learning_rate": 0.00019634105848278518, "loss": 2.2553, "step": 73405 }, { "epoch": 0.17, "grad_norm": 1.796875, "learning_rate": 0.0001963405630688901, "loss": 2.2953, "step": 73410 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019634006762208331, "loss": 2.1631, "step": 73415 }, { "epoch": 0.17, "grad_norm": 1.75, "learning_rate": 0.00019633957214236505, "loss": 2.1681, "step": 73420 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.0001963390766297354, "loss": 2.1471, "step": 73425 }, { "epoch": 0.17, "grad_norm": 1.8359375, "learning_rate": 0.0001963385810841946, "loss": 2.2827, "step": 73430 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.0001963380855057428, "loss": 2.1107, "step": 73435 }, { "epoch": 0.17, "grad_norm": 1.75, "learning_rate": 0.00019633758989438013, "loss": 2.1637, "step": 73440 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.0001963370942501068, "loss": 2.2153, "step": 73445 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019633659857292302, "loss": 2.1557, "step": 73450 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.00019633610286282888, "loss": 2.1776, "step": 73455 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.00019633560711982458, "loss": 2.0881, "step": 73460 }, { "epoch": 0.17, "grad_norm": 2.171875, "learning_rate": 0.00019633511134391028, "loss": 1.985, "step": 73465 }, { "epoch": 0.17, "grad_norm": 1.75, "learning_rate": 0.00019633461553508617, "loss": 2.2369, "step": 73470 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.0001963341196933524, "loss": 2.1029, "step": 73475 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019633362381870915, "loss": 2.1264, "step": 73480 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019633312791115658, "loss": 1.9669, "step": 73485 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019633263197069487, "loss": 2.1063, "step": 73490 }, { "epoch": 0.17, "grad_norm": 1.9296875, "learning_rate": 0.00019633213599732418, "loss": 2.1438, "step": 73495 }, { "epoch": 0.17, "grad_norm": 1.7734375, "learning_rate": 0.0001963316399910447, "loss": 2.2038, "step": 73500 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019633114395185657, "loss": 2.2506, "step": 73505 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.00019633064787976, "loss": 2.1974, "step": 73510 }, { "epoch": 0.17, "grad_norm": 2.28125, "learning_rate": 0.0001963301517747551, "loss": 2.2714, "step": 73515 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.0001963296556368421, "loss": 2.0902, "step": 73520 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019632915946602114, "loss": 2.1979, "step": 73525 }, { "epoch": 0.17, "grad_norm": 2.390625, "learning_rate": 0.00019632866326229237, "loss": 2.106, "step": 73530 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019632816702565598, "loss": 2.0937, "step": 73535 }, { "epoch": 0.17, "grad_norm": 1.7421875, "learning_rate": 0.00019632767075611219, "loss": 2.2276, "step": 73540 }, { "epoch": 0.17, "grad_norm": 1.8046875, "learning_rate": 0.0001963271744536611, "loss": 2.3735, "step": 73545 }, { "epoch": 0.17, "grad_norm": 1.78125, "learning_rate": 0.00019632667811830286, "loss": 2.1822, "step": 73550 }, { "epoch": 0.17, "grad_norm": 2.3125, "learning_rate": 0.0001963261817500377, "loss": 2.2343, "step": 73555 }, { "epoch": 0.17, "grad_norm": 2.0, "learning_rate": 0.0001963256853488658, "loss": 2.1731, "step": 73560 }, { "epoch": 0.17, "grad_norm": 1.875, "learning_rate": 0.00019632518891478726, "loss": 1.9869, "step": 73565 }, { "epoch": 0.17, "grad_norm": 1.90625, "learning_rate": 0.0001963246924478023, "loss": 2.3186, "step": 73570 }, { "epoch": 0.17, "grad_norm": 1.9296875, "learning_rate": 0.00019632419594791108, "loss": 2.3031, "step": 73575 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.00019632369941511378, "loss": 2.1687, "step": 73580 }, { "epoch": 0.17, "grad_norm": 1.5078125, "learning_rate": 0.00019632320284941054, "loss": 2.0124, "step": 73585 }, { "epoch": 0.17, "grad_norm": 1.8359375, "learning_rate": 0.00019632270625080157, "loss": 2.0862, "step": 73590 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.000196322209619287, "loss": 2.2334, "step": 73595 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.00019632171295486703, "loss": 2.0581, "step": 73600 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019632121625754177, "loss": 2.1284, "step": 73605 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.00019632071952731148, "loss": 2.0885, "step": 73610 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.00019632022276417626, "loss": 2.3443, "step": 73615 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019631972596813636, "loss": 2.1336, "step": 73620 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019631922913919186, "loss": 2.2466, "step": 73625 }, { "epoch": 0.17, "grad_norm": 2.3125, "learning_rate": 0.00019631873227734294, "loss": 2.0995, "step": 73630 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019631823538258982, "loss": 2.0901, "step": 73635 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019631773845493262, "loss": 2.2859, "step": 73640 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.00019631724149437156, "loss": 2.2066, "step": 73645 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.0001963167445009068, "loss": 2.1416, "step": 73650 }, { "epoch": 0.17, "grad_norm": 1.6953125, "learning_rate": 0.00019631624747453846, "loss": 2.1128, "step": 73655 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019631575041526673, "loss": 2.1171, "step": 73660 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019631525332309183, "loss": 1.9956, "step": 73665 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019631475619801388, "loss": 2.1287, "step": 73670 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019631425904003305, "loss": 2.0305, "step": 73675 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.0001963137618491495, "loss": 2.2567, "step": 73680 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.00019631326462536347, "loss": 2.1475, "step": 73685 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.00019631276736867506, "loss": 2.1648, "step": 73690 }, { "epoch": 0.17, "grad_norm": 1.8125, "learning_rate": 0.00019631227007908448, "loss": 2.3153, "step": 73695 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019631177275659188, "loss": 2.0449, "step": 73700 }, { "epoch": 0.17, "grad_norm": 1.8984375, "learning_rate": 0.0001963112754011974, "loss": 2.1661, "step": 73705 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019631077801290128, "loss": 2.2574, "step": 73710 }, { "epoch": 0.17, "grad_norm": 2.140625, "learning_rate": 0.00019631028059170362, "loss": 2.0927, "step": 73715 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019630978313760463, "loss": 2.2221, "step": 73720 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.0001963092856506045, "loss": 2.2672, "step": 73725 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019630878813070335, "loss": 2.1772, "step": 73730 }, { "epoch": 0.17, "grad_norm": 1.6171875, "learning_rate": 0.00019630829057790136, "loss": 2.2237, "step": 73735 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.00019630779299219874, "loss": 2.1076, "step": 73740 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.0001963072953735956, "loss": 2.1335, "step": 73745 }, { "epoch": 0.17, "grad_norm": 1.8359375, "learning_rate": 0.00019630679772209212, "loss": 2.2493, "step": 73750 }, { "epoch": 0.17, "grad_norm": 2.0, "learning_rate": 0.00019630630003768856, "loss": 2.1906, "step": 73755 }, { "epoch": 0.17, "grad_norm": 1.7578125, "learning_rate": 0.000196305802320385, "loss": 2.1006, "step": 73760 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.0001963053045701816, "loss": 2.2073, "step": 73765 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019630480678707856, "loss": 2.1052, "step": 73770 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.00019630430897107608, "loss": 2.2575, "step": 73775 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.0001963038111221743, "loss": 2.0211, "step": 73780 }, { "epoch": 0.17, "grad_norm": 1.875, "learning_rate": 0.00019630331324037336, "loss": 2.1417, "step": 73785 }, { "epoch": 0.17, "grad_norm": 1.96875, "learning_rate": 0.0001963028153256735, "loss": 2.2107, "step": 73790 }, { "epoch": 0.17, "grad_norm": 3.0, "learning_rate": 0.00019630231737807484, "loss": 2.1845, "step": 73795 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019630181939757756, "loss": 2.0975, "step": 73800 }, { "epoch": 0.17, "grad_norm": 1.65625, "learning_rate": 0.00019630132138418182, "loss": 2.1225, "step": 73805 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.00019630082333788783, "loss": 2.2464, "step": 73810 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.0001963003252586957, "loss": 2.096, "step": 73815 }, { "epoch": 0.17, "grad_norm": 2.34375, "learning_rate": 0.00019629982714660567, "loss": 2.0572, "step": 73820 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019629932900161782, "loss": 2.139, "step": 73825 }, { "epoch": 0.17, "grad_norm": 1.90625, "learning_rate": 0.00019629883082373243, "loss": 2.1649, "step": 73830 }, { "epoch": 0.17, "grad_norm": 2.328125, "learning_rate": 0.00019629833261294958, "loss": 2.3015, "step": 73835 }, { "epoch": 0.17, "grad_norm": 1.7421875, "learning_rate": 0.00019629783436926948, "loss": 2.2877, "step": 73840 }, { "epoch": 0.17, "grad_norm": 1.78125, "learning_rate": 0.00019629733609269229, "loss": 2.0647, "step": 73845 }, { "epoch": 0.17, "grad_norm": 1.6953125, "learning_rate": 0.00019629683778321817, "loss": 2.275, "step": 73850 }, { "epoch": 0.17, "grad_norm": 1.6015625, "learning_rate": 0.00019629633944084733, "loss": 2.249, "step": 73855 }, { "epoch": 0.17, "grad_norm": 1.6171875, "learning_rate": 0.0001962958410655799, "loss": 1.9961, "step": 73860 }, { "epoch": 0.17, "grad_norm": 1.96875, "learning_rate": 0.00019629534265741606, "loss": 2.2958, "step": 73865 }, { "epoch": 0.17, "grad_norm": 1.7265625, "learning_rate": 0.000196294844216356, "loss": 2.0032, "step": 73870 }, { "epoch": 0.17, "grad_norm": 1.9140625, "learning_rate": 0.00019629434574239987, "loss": 2.18, "step": 73875 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.00019629384723554783, "loss": 2.1028, "step": 73880 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.00019629334869580007, "loss": 2.1527, "step": 73885 }, { "epoch": 0.17, "grad_norm": 1.734375, "learning_rate": 0.00019629285012315678, "loss": 2.0766, "step": 73890 }, { "epoch": 0.17, "grad_norm": 2.5625, "learning_rate": 0.00019629235151761808, "loss": 2.2748, "step": 73895 }, { "epoch": 0.17, "grad_norm": 1.96875, "learning_rate": 0.00019629185287918416, "loss": 2.1529, "step": 73900 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019629135420785523, "loss": 2.1168, "step": 73905 }, { "epoch": 0.17, "grad_norm": 1.8515625, "learning_rate": 0.0001962908555036314, "loss": 2.0175, "step": 73910 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019629035676651288, "loss": 2.1039, "step": 73915 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019628985799649985, "loss": 2.3507, "step": 73920 }, { "epoch": 0.17, "grad_norm": 1.609375, "learning_rate": 0.00019628935919359242, "loss": 2.4008, "step": 73925 }, { "epoch": 0.17, "grad_norm": 1.7421875, "learning_rate": 0.00019628886035779082, "loss": 2.3878, "step": 73930 }, { "epoch": 0.17, "grad_norm": 1.9296875, "learning_rate": 0.00019628836148909517, "loss": 2.4041, "step": 73935 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019628786258750572, "loss": 2.2205, "step": 73940 }, { "epoch": 0.17, "grad_norm": 1.6875, "learning_rate": 0.00019628736365302253, "loss": 2.3059, "step": 73945 }, { "epoch": 0.17, "grad_norm": 1.5625, "learning_rate": 0.00019628686468564586, "loss": 2.12, "step": 73950 }, { "epoch": 0.17, "grad_norm": 1.6015625, "learning_rate": 0.00019628636568537586, "loss": 1.9593, "step": 73955 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019628586665221268, "loss": 1.9618, "step": 73960 }, { "epoch": 0.17, "grad_norm": 2.46875, "learning_rate": 0.0001962853675861565, "loss": 2.0997, "step": 73965 }, { "epoch": 0.17, "grad_norm": 1.4765625, "learning_rate": 0.0001962848684872075, "loss": 2.1272, "step": 73970 }, { "epoch": 0.17, "grad_norm": 2.28125, "learning_rate": 0.00019628436935536584, "loss": 2.1493, "step": 73975 }, { "epoch": 0.17, "grad_norm": 1.8046875, "learning_rate": 0.0001962838701906317, "loss": 2.1758, "step": 73980 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019628337099300523, "loss": 2.2684, "step": 73985 }, { "epoch": 0.17, "grad_norm": 1.953125, "learning_rate": 0.0001962828717624866, "loss": 2.3021, "step": 73990 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.000196282372499076, "loss": 2.0882, "step": 73995 }, { "epoch": 0.17, "grad_norm": 1.6953125, "learning_rate": 0.00019628187320277362, "loss": 2.2196, "step": 74000 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.0001962813738735796, "loss": 2.2341, "step": 74005 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019628087451149413, "loss": 2.1227, "step": 74010 }, { "epoch": 0.17, "grad_norm": 1.6953125, "learning_rate": 0.00019628037511651734, "loss": 2.2297, "step": 74015 }, { "epoch": 0.17, "grad_norm": 3.984375, "learning_rate": 0.00019627987568864944, "loss": 2.2817, "step": 74020 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.00019627937622789058, "loss": 2.2777, "step": 74025 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019627887673424096, "loss": 1.9626, "step": 74030 }, { "epoch": 0.17, "grad_norm": 2.3125, "learning_rate": 0.00019627837720770074, "loss": 2.1998, "step": 74035 }, { "epoch": 0.17, "grad_norm": 2.625, "learning_rate": 0.00019627787764827002, "loss": 2.3497, "step": 74040 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019627737805594909, "loss": 2.2135, "step": 74045 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.000196276878430738, "loss": 2.2389, "step": 74050 }, { "epoch": 0.17, "grad_norm": 2.453125, "learning_rate": 0.00019627637877263704, "loss": 2.1018, "step": 74055 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.0001962758790816463, "loss": 2.068, "step": 74060 }, { "epoch": 0.17, "grad_norm": 2.03125, "learning_rate": 0.000196275379357766, "loss": 2.2684, "step": 74065 }, { "epoch": 0.17, "grad_norm": 1.5703125, "learning_rate": 0.00019627487960099623, "loss": 2.1504, "step": 74070 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019627437981133725, "loss": 2.0905, "step": 74075 }, { "epoch": 0.17, "grad_norm": 1.65625, "learning_rate": 0.00019627387998878918, "loss": 2.2253, "step": 74080 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019627338013335224, "loss": 2.3393, "step": 74085 }, { "epoch": 0.17, "grad_norm": 2.109375, "learning_rate": 0.00019627288024502652, "loss": 2.1829, "step": 74090 }, { "epoch": 0.17, "grad_norm": 1.828125, "learning_rate": 0.00019627238032381229, "loss": 2.2665, "step": 74095 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019627188036970962, "loss": 2.2118, "step": 74100 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.00019627138038271876, "loss": 2.0226, "step": 74105 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.00019627088036283983, "loss": 2.3284, "step": 74110 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.00019627038031007303, "loss": 2.3501, "step": 74115 }, { "epoch": 0.17, "grad_norm": 1.7890625, "learning_rate": 0.00019626988022441855, "loss": 2.1443, "step": 74120 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.0001962693801058765, "loss": 2.0515, "step": 74125 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.0001962688799544471, "loss": 1.9916, "step": 74130 }, { "epoch": 0.17, "grad_norm": 1.96875, "learning_rate": 0.00019626837977013048, "loss": 2.0677, "step": 74135 }, { "epoch": 0.17, "grad_norm": 2.34375, "learning_rate": 0.00019626787955292688, "loss": 2.4322, "step": 74140 }, { "epoch": 0.17, "grad_norm": 2.015625, "learning_rate": 0.00019626737930283637, "loss": 1.9552, "step": 74145 }, { "epoch": 0.17, "grad_norm": 1.8828125, "learning_rate": 0.00019626687901985924, "loss": 2.3087, "step": 74150 }, { "epoch": 0.17, "grad_norm": 1.890625, "learning_rate": 0.00019626637870399553, "loss": 2.1934, "step": 74155 }, { "epoch": 0.17, "grad_norm": 1.8046875, "learning_rate": 0.00019626587835524552, "loss": 2.0899, "step": 74160 }, { "epoch": 0.17, "grad_norm": 2.421875, "learning_rate": 0.00019626537797360933, "loss": 2.1953, "step": 74165 }, { "epoch": 0.17, "grad_norm": 1.890625, "learning_rate": 0.00019626487755908717, "loss": 1.9662, "step": 74170 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.00019626437711167913, "loss": 2.24, "step": 74175 }, { "epoch": 0.17, "grad_norm": 1.640625, "learning_rate": 0.00019626387663138547, "loss": 2.2789, "step": 74180 }, { "epoch": 0.17, "grad_norm": 1.96875, "learning_rate": 0.0001962633761182063, "loss": 2.2423, "step": 74185 }, { "epoch": 0.17, "grad_norm": 1.8671875, "learning_rate": 0.00019626287557214183, "loss": 2.1374, "step": 74190 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.00019626237499319223, "loss": 2.3527, "step": 74195 }, { "epoch": 0.17, "grad_norm": 1.921875, "learning_rate": 0.00019626187438135764, "loss": 2.1892, "step": 74200 }, { "epoch": 0.17, "grad_norm": 2.21875, "learning_rate": 0.00019626137373663825, "loss": 1.998, "step": 74205 }, { "epoch": 0.17, "grad_norm": 1.9921875, "learning_rate": 0.00019626087305903421, "loss": 2.1483, "step": 74210 }, { "epoch": 0.17, "grad_norm": 2.1875, "learning_rate": 0.00019626037234854576, "loss": 2.2683, "step": 74215 }, { "epoch": 0.17, "grad_norm": 1.6875, "learning_rate": 0.000196259871605173, "loss": 2.06, "step": 74220 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019625937082891608, "loss": 2.1338, "step": 74225 }, { "epoch": 0.17, "grad_norm": 1.75, "learning_rate": 0.00019625887001977526, "loss": 2.1906, "step": 74230 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.00019625836917775064, "loss": 2.0408, "step": 74235 }, { "epoch": 0.17, "grad_norm": 2.25, "learning_rate": 0.00019625786830284242, "loss": 1.9686, "step": 74240 }, { "epoch": 0.17, "grad_norm": 2.09375, "learning_rate": 0.00019625736739505077, "loss": 2.2001, "step": 74245 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019625686645437584, "loss": 2.3444, "step": 74250 }, { "epoch": 0.17, "grad_norm": 1.9609375, "learning_rate": 0.00019625636548081788, "loss": 2.2614, "step": 74255 }, { "epoch": 0.17, "grad_norm": 1.75, "learning_rate": 0.00019625586447437693, "loss": 1.9928, "step": 74260 }, { "epoch": 0.17, "grad_norm": 1.78125, "learning_rate": 0.00019625536343505326, "loss": 2.1026, "step": 74265 }, { "epoch": 0.17, "grad_norm": 2.234375, "learning_rate": 0.00019625486236284702, "loss": 2.1721, "step": 74270 }, { "epoch": 0.17, "grad_norm": 2.296875, "learning_rate": 0.00019625436125775836, "loss": 2.0554, "step": 74275 }, { "epoch": 0.17, "grad_norm": 2.15625, "learning_rate": 0.00019625386011978745, "loss": 2.1882, "step": 74280 }, { "epoch": 0.17, "grad_norm": 1.875, "learning_rate": 0.0001962533589489345, "loss": 2.1674, "step": 74285 }, { "epoch": 0.17, "grad_norm": 2.546875, "learning_rate": 0.00019625285774519966, "loss": 2.1669, "step": 74290 }, { "epoch": 0.17, "grad_norm": 1.984375, "learning_rate": 0.00019625235650858308, "loss": 2.1995, "step": 74295 }, { "epoch": 0.17, "grad_norm": 2.265625, "learning_rate": 0.00019625185523908497, "loss": 2.1551, "step": 74300 }, { "epoch": 0.17, "grad_norm": 2.078125, "learning_rate": 0.00019625135393670545, "loss": 2.1005, "step": 74305 }, { "epoch": 0.17, "grad_norm": 1.96875, "learning_rate": 0.00019625085260144474, "loss": 2.1136, "step": 74310 }, { "epoch": 0.17, "grad_norm": 2.0625, "learning_rate": 0.000196250351233303, "loss": 1.9622, "step": 74315 }, { "epoch": 0.17, "grad_norm": 2.125, "learning_rate": 0.00019624984983228038, "loss": 2.2235, "step": 74320 }, { "epoch": 0.17, "grad_norm": 2.28125, "learning_rate": 0.00019624934839837708, "loss": 2.2393, "step": 74325 }, { "epoch": 0.17, "grad_norm": 1.609375, "learning_rate": 0.00019624884693159324, "loss": 1.973, "step": 74330 }, { "epoch": 0.17, "grad_norm": 1.8203125, "learning_rate": 0.00019624834543192907, "loss": 2.0817, "step": 74335 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.0001962478438993847, "loss": 2.1684, "step": 74340 }, { "epoch": 0.17, "grad_norm": 2.046875, "learning_rate": 0.00019624734233396034, "loss": 2.0777, "step": 74345 }, { "epoch": 0.17, "grad_norm": 2.4375, "learning_rate": 0.00019624684073565613, "loss": 2.1144, "step": 74350 }, { "epoch": 0.17, "grad_norm": 1.765625, "learning_rate": 0.00019624633910447225, "loss": 2.1847, "step": 74355 }, { "epoch": 0.17, "grad_norm": 2.203125, "learning_rate": 0.0001962458374404089, "loss": 2.2759, "step": 74360 }, { "epoch": 0.18, "grad_norm": 1.8046875, "learning_rate": 0.0001962453357434662, "loss": 2.2642, "step": 74365 }, { "epoch": 0.18, "grad_norm": 1.6171875, "learning_rate": 0.00019624483401364435, "loss": 1.9941, "step": 74370 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019624433225094351, "loss": 2.3556, "step": 74375 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.0001962438304553639, "loss": 2.154, "step": 74380 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019624332862690563, "loss": 2.3446, "step": 74385 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019624282676556888, "loss": 2.2479, "step": 74390 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.00019624232487135387, "loss": 2.2064, "step": 74395 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.0001962418229442607, "loss": 2.2057, "step": 74400 }, { "epoch": 0.18, "grad_norm": 1.8125, "learning_rate": 0.00019624132098428957, "loss": 2.2993, "step": 74405 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.0001962408189914407, "loss": 2.0498, "step": 74410 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.00019624031696571418, "loss": 2.3008, "step": 74415 }, { "epoch": 0.18, "grad_norm": 2.796875, "learning_rate": 0.00019623981490711026, "loss": 2.2443, "step": 74420 }, { "epoch": 0.18, "grad_norm": 1.8671875, "learning_rate": 0.00019623931281562905, "loss": 2.2181, "step": 74425 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.00019623881069127073, "loss": 2.1177, "step": 74430 }, { "epoch": 0.18, "grad_norm": 1.9453125, "learning_rate": 0.00019623830853403553, "loss": 2.2844, "step": 74435 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019623780634392356, "loss": 2.0899, "step": 74440 }, { "epoch": 0.18, "grad_norm": 1.890625, "learning_rate": 0.000196237304120935, "loss": 2.0438, "step": 74445 }, { "epoch": 0.18, "grad_norm": 1.9453125, "learning_rate": 0.00019623680186507003, "loss": 2.2247, "step": 74450 }, { "epoch": 0.18, "grad_norm": 1.8046875, "learning_rate": 0.00019623629957632883, "loss": 2.0609, "step": 74455 }, { "epoch": 0.18, "grad_norm": 1.6953125, "learning_rate": 0.00019623579725471158, "loss": 2.0463, "step": 74460 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.0001962352949002184, "loss": 2.0361, "step": 74465 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.00019623479251284953, "loss": 2.3306, "step": 74470 }, { "epoch": 0.18, "grad_norm": 1.984375, "learning_rate": 0.0001962342900926051, "loss": 2.1953, "step": 74475 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.0001962337876394853, "loss": 2.1577, "step": 74480 }, { "epoch": 0.18, "grad_norm": 1.9453125, "learning_rate": 0.00019623328515349026, "loss": 2.0839, "step": 74485 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.0001962327826346202, "loss": 2.1356, "step": 74490 }, { "epoch": 0.18, "grad_norm": 2.3125, "learning_rate": 0.0001962322800828753, "loss": 2.0569, "step": 74495 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.0001962317774982557, "loss": 2.0758, "step": 74500 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019623127488076157, "loss": 2.1698, "step": 74505 }, { "epoch": 0.18, "grad_norm": 2.671875, "learning_rate": 0.0001962307722303931, "loss": 2.1889, "step": 74510 }, { "epoch": 0.18, "grad_norm": 1.8203125, "learning_rate": 0.00019623026954715042, "loss": 2.1923, "step": 74515 }, { "epoch": 0.18, "grad_norm": 2.453125, "learning_rate": 0.00019622976683103378, "loss": 2.0304, "step": 74520 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.00019622926408204328, "loss": 2.2675, "step": 74525 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019622876130017913, "loss": 2.4136, "step": 74530 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.0001962282584854415, "loss": 2.2477, "step": 74535 }, { "epoch": 0.18, "grad_norm": 2.359375, "learning_rate": 0.0001962277556378305, "loss": 2.0559, "step": 74540 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.0001962272527573464, "loss": 1.766, "step": 74545 }, { "epoch": 0.18, "grad_norm": 1.890625, "learning_rate": 0.0001962267498439893, "loss": 2.2658, "step": 74550 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019622624689775943, "loss": 2.1753, "step": 74555 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.0001962257439186569, "loss": 1.9489, "step": 74560 }, { "epoch": 0.18, "grad_norm": 1.5, "learning_rate": 0.00019622524090668196, "loss": 2.0108, "step": 74565 }, { "epoch": 0.18, "grad_norm": 2.65625, "learning_rate": 0.00019622473786183467, "loss": 2.2056, "step": 74570 }, { "epoch": 0.18, "grad_norm": 1.5859375, "learning_rate": 0.00019622423478411532, "loss": 2.0325, "step": 74575 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.00019622373167352397, "loss": 2.1246, "step": 74580 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.00019622322853006088, "loss": 2.3554, "step": 74585 }, { "epoch": 0.18, "grad_norm": 2.421875, "learning_rate": 0.00019622272535372619, "loss": 2.3002, "step": 74590 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.00019622222214452006, "loss": 2.2716, "step": 74595 }, { "epoch": 0.18, "grad_norm": 1.921875, "learning_rate": 0.00019622171890244268, "loss": 2.1406, "step": 74600 }, { "epoch": 0.18, "grad_norm": 2.296875, "learning_rate": 0.00019622121562749422, "loss": 2.1684, "step": 74605 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.00019622071231967483, "loss": 2.1559, "step": 74610 }, { "epoch": 0.18, "grad_norm": 1.71875, "learning_rate": 0.0001962202089789847, "loss": 2.1131, "step": 74615 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.00019621970560542402, "loss": 2.1956, "step": 74620 }, { "epoch": 0.18, "grad_norm": 1.625, "learning_rate": 0.00019621920219899295, "loss": 2.0941, "step": 74625 }, { "epoch": 0.18, "grad_norm": 2.28125, "learning_rate": 0.00019621869875969164, "loss": 2.1316, "step": 74630 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019621819528752027, "loss": 2.1601, "step": 74635 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.00019621769178247903, "loss": 2.137, "step": 74640 }, { "epoch": 0.18, "grad_norm": 2.28125, "learning_rate": 0.0001962171882445681, "loss": 2.1776, "step": 74645 }, { "epoch": 0.18, "grad_norm": 1.8359375, "learning_rate": 0.00019621668467378762, "loss": 2.227, "step": 74650 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.00019621618107013779, "loss": 2.224, "step": 74655 }, { "epoch": 0.18, "grad_norm": 1.84375, "learning_rate": 0.0001962156774336187, "loss": 1.9092, "step": 74660 }, { "epoch": 0.18, "grad_norm": 1.8359375, "learning_rate": 0.00019621517376423064, "loss": 2.3067, "step": 74665 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.00019621467006197376, "loss": 2.2214, "step": 74670 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019621416632684816, "loss": 2.0925, "step": 74675 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.00019621366255885407, "loss": 2.3149, "step": 74680 }, { "epoch": 0.18, "grad_norm": 1.984375, "learning_rate": 0.00019621315875799166, "loss": 2.1142, "step": 74685 }, { "epoch": 0.18, "grad_norm": 1.765625, "learning_rate": 0.00019621265492426106, "loss": 2.2172, "step": 74690 }, { "epoch": 0.18, "grad_norm": 1.71875, "learning_rate": 0.0001962121510576625, "loss": 2.0011, "step": 74695 }, { "epoch": 0.18, "grad_norm": 2.46875, "learning_rate": 0.0001962116471581961, "loss": 2.2973, "step": 74700 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.00019621114322586206, "loss": 2.154, "step": 74705 }, { "epoch": 0.18, "grad_norm": 1.7578125, "learning_rate": 0.0001962106392606606, "loss": 2.0374, "step": 74710 }, { "epoch": 0.18, "grad_norm": 2.4375, "learning_rate": 0.00019621013526259178, "loss": 2.1364, "step": 74715 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.00019620963123165584, "loss": 2.0923, "step": 74720 }, { "epoch": 0.18, "grad_norm": 1.984375, "learning_rate": 0.00019620912716785297, "loss": 2.0101, "step": 74725 }, { "epoch": 0.18, "grad_norm": 1.6953125, "learning_rate": 0.00019620862307118331, "loss": 2.2845, "step": 74730 }, { "epoch": 0.18, "grad_norm": 2.390625, "learning_rate": 0.00019620811894164702, "loss": 2.1716, "step": 74735 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.0001962076147792443, "loss": 2.1399, "step": 74740 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019620711058397533, "loss": 2.0572, "step": 74745 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019620660635584028, "loss": 1.9666, "step": 74750 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.00019620610209483927, "loss": 1.9789, "step": 74755 }, { "epoch": 0.18, "grad_norm": 1.6796875, "learning_rate": 0.0001962055978009725, "loss": 2.2139, "step": 74760 }, { "epoch": 0.18, "grad_norm": 1.8828125, "learning_rate": 0.0001962050934742402, "loss": 2.3117, "step": 74765 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.00019620458911464247, "loss": 2.223, "step": 74770 }, { "epoch": 0.18, "grad_norm": 1.796875, "learning_rate": 0.0001962040847221795, "loss": 2.206, "step": 74775 }, { "epoch": 0.18, "grad_norm": 1.8671875, "learning_rate": 0.0001962035802968515, "loss": 2.3431, "step": 74780 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019620307583865861, "loss": 2.1789, "step": 74785 }, { "epoch": 0.18, "grad_norm": 2.4375, "learning_rate": 0.00019620257134760096, "loss": 2.1448, "step": 74790 }, { "epoch": 0.18, "grad_norm": 1.8203125, "learning_rate": 0.0001962020668236788, "loss": 1.8149, "step": 74795 }, { "epoch": 0.18, "grad_norm": 2.1875, "learning_rate": 0.00019620156226689227, "loss": 2.1732, "step": 74800 }, { "epoch": 0.18, "grad_norm": 3.015625, "learning_rate": 0.00019620105767724152, "loss": 2.1288, "step": 74805 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.00019620055305472674, "loss": 2.2751, "step": 74810 }, { "epoch": 0.18, "grad_norm": 1.65625, "learning_rate": 0.00019620004839934817, "loss": 2.2265, "step": 74815 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.00019619954371110585, "loss": 2.3454, "step": 74820 }, { "epoch": 0.18, "grad_norm": 2.234375, "learning_rate": 0.00019619903899000004, "loss": 2.1513, "step": 74825 }, { "epoch": 0.18, "grad_norm": 1.84375, "learning_rate": 0.00019619853423603092, "loss": 2.0276, "step": 74830 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.0001961980294491986, "loss": 2.0492, "step": 74835 }, { "epoch": 0.18, "grad_norm": 1.5546875, "learning_rate": 0.0001961975246295033, "loss": 2.0708, "step": 74840 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.0001961970197769452, "loss": 2.1807, "step": 74845 }, { "epoch": 0.18, "grad_norm": 1.734375, "learning_rate": 0.00019619651489152445, "loss": 2.185, "step": 74850 }, { "epoch": 0.18, "grad_norm": 2.515625, "learning_rate": 0.00019619600997324122, "loss": 2.1293, "step": 74855 }, { "epoch": 0.18, "grad_norm": 1.984375, "learning_rate": 0.00019619550502209568, "loss": 2.0737, "step": 74860 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.00019619500003808802, "loss": 2.046, "step": 74865 }, { "epoch": 0.18, "grad_norm": 1.5, "learning_rate": 0.0001961944950212184, "loss": 2.135, "step": 74870 }, { "epoch": 0.18, "grad_norm": 2.234375, "learning_rate": 0.00019619398997148697, "loss": 2.1901, "step": 74875 }, { "epoch": 0.18, "grad_norm": 1.7421875, "learning_rate": 0.00019619348488889395, "loss": 2.0664, "step": 74880 }, { "epoch": 0.18, "grad_norm": 1.8359375, "learning_rate": 0.0001961929797734395, "loss": 2.1772, "step": 74885 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019619247462512377, "loss": 2.1947, "step": 74890 }, { "epoch": 0.18, "grad_norm": 2.25, "learning_rate": 0.00019619196944394696, "loss": 2.2164, "step": 74895 }, { "epoch": 0.18, "grad_norm": 1.8203125, "learning_rate": 0.00019619146422990923, "loss": 2.1572, "step": 74900 }, { "epoch": 0.18, "grad_norm": 1.7109375, "learning_rate": 0.00019619095898301074, "loss": 2.1842, "step": 74905 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.00019619045370325168, "loss": 2.1745, "step": 74910 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.0001961899483906322, "loss": 2.103, "step": 74915 }, { "epoch": 0.18, "grad_norm": 1.984375, "learning_rate": 0.0001961894430451525, "loss": 2.1558, "step": 74920 }, { "epoch": 0.18, "grad_norm": 1.78125, "learning_rate": 0.00019618893766681274, "loss": 2.1354, "step": 74925 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019618843225561308, "loss": 2.2765, "step": 74930 }, { "epoch": 0.18, "grad_norm": 1.6953125, "learning_rate": 0.00019618792681155376, "loss": 2.1973, "step": 74935 }, { "epoch": 0.18, "grad_norm": 1.84375, "learning_rate": 0.00019618742133463486, "loss": 1.9672, "step": 74940 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.0001961869158248566, "loss": 2.0427, "step": 74945 }, { "epoch": 0.18, "grad_norm": 1.7265625, "learning_rate": 0.00019618641028221914, "loss": 2.1138, "step": 74950 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.00019618590470672266, "loss": 2.2405, "step": 74955 }, { "epoch": 0.18, "grad_norm": 1.890625, "learning_rate": 0.00019618539909836732, "loss": 2.2168, "step": 74960 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019618489345715333, "loss": 2.1923, "step": 74965 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.0001961843877830808, "loss": 2.0227, "step": 74970 }, { "epoch": 0.18, "grad_norm": 1.859375, "learning_rate": 0.00019618388207614994, "loss": 2.1061, "step": 74975 }, { "epoch": 0.18, "grad_norm": 1.828125, "learning_rate": 0.00019618337633636097, "loss": 2.3356, "step": 74980 }, { "epoch": 0.18, "grad_norm": 2.1875, "learning_rate": 0.00019618287056371399, "loss": 2.1384, "step": 74985 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.0001961823647582092, "loss": 2.2647, "step": 74990 }, { "epoch": 0.18, "grad_norm": 2.59375, "learning_rate": 0.00019618185891984676, "loss": 2.2202, "step": 74995 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.00019618135304862684, "loss": 2.2446, "step": 75000 }, { "epoch": 0.18, "grad_norm": 1.8125, "learning_rate": 0.00019618084714454964, "loss": 2.169, "step": 75005 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.0001961803412076153, "loss": 1.9617, "step": 75010 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019617983523782405, "loss": 1.9934, "step": 75015 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.00019617932923517598, "loss": 2.3443, "step": 75020 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019617882319967133, "loss": 2.0723, "step": 75025 }, { "epoch": 0.18, "grad_norm": 2.390625, "learning_rate": 0.00019617831713131026, "loss": 2.2706, "step": 75030 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019617781103009293, "loss": 2.2172, "step": 75035 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.0001961773048960195, "loss": 2.2191, "step": 75040 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019617679872909017, "loss": 2.0363, "step": 75045 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.0001961762925293051, "loss": 2.4065, "step": 75050 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.00019617578629666445, "loss": 2.0778, "step": 75055 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019617528003116843, "loss": 2.0901, "step": 75060 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.00019617477373281718, "loss": 2.0387, "step": 75065 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.0001961742674016109, "loss": 2.1754, "step": 75070 }, { "epoch": 0.18, "grad_norm": 1.5859375, "learning_rate": 0.00019617376103754972, "loss": 2.1588, "step": 75075 }, { "epoch": 0.18, "grad_norm": 1.9140625, "learning_rate": 0.00019617325464063384, "loss": 2.2877, "step": 75080 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.00019617274821086343, "loss": 2.1993, "step": 75085 }, { "epoch": 0.18, "grad_norm": 1.8828125, "learning_rate": 0.00019617224174823866, "loss": 2.2186, "step": 75090 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019617173525275975, "loss": 2.2119, "step": 75095 }, { "epoch": 0.18, "grad_norm": 1.828125, "learning_rate": 0.0001961712287244268, "loss": 1.9826, "step": 75100 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.00019617072216324003, "loss": 2.3304, "step": 75105 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.00019617021556919958, "loss": 2.1736, "step": 75110 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.00019616970894230563, "loss": 2.1695, "step": 75115 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019616920228255838, "loss": 2.2693, "step": 75120 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.00019616869558995798, "loss": 2.2559, "step": 75125 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.0001961681888645046, "loss": 2.0824, "step": 75130 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019616768210619845, "loss": 2.2281, "step": 75135 }, { "epoch": 0.18, "grad_norm": 3.1875, "learning_rate": 0.0001961671753150397, "loss": 2.0404, "step": 75140 }, { "epoch": 0.18, "grad_norm": 1.8359375, "learning_rate": 0.00019616666849102843, "loss": 2.211, "step": 75145 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.0001961661616341649, "loss": 2.1845, "step": 75150 }, { "epoch": 0.18, "grad_norm": 2.328125, "learning_rate": 0.00019616565474444927, "loss": 2.1742, "step": 75155 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019616514782188173, "loss": 2.3248, "step": 75160 }, { "epoch": 0.18, "grad_norm": 1.6328125, "learning_rate": 0.0001961646408664624, "loss": 2.1451, "step": 75165 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.00019616413387819152, "loss": 2.4083, "step": 75170 }, { "epoch": 0.18, "grad_norm": 1.84375, "learning_rate": 0.0001961636268570692, "loss": 2.1005, "step": 75175 }, { "epoch": 0.18, "grad_norm": 2.1875, "learning_rate": 0.00019616311980309565, "loss": 2.2886, "step": 75180 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.00019616261271627105, "loss": 1.933, "step": 75185 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.00019616210559659554, "loss": 2.1338, "step": 75190 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019616159844406932, "loss": 2.1775, "step": 75195 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019616109125869253, "loss": 2.215, "step": 75200 }, { "epoch": 0.18, "grad_norm": 1.90625, "learning_rate": 0.0001961605840404654, "loss": 2.101, "step": 75205 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019616007678938805, "loss": 1.9933, "step": 75210 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.00019615956950546067, "loss": 1.991, "step": 75215 }, { "epoch": 0.18, "grad_norm": 1.7421875, "learning_rate": 0.00019615906218868346, "loss": 2.156, "step": 75220 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.00019615855483905655, "loss": 2.1195, "step": 75225 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.00019615804745658016, "loss": 2.0356, "step": 75230 }, { "epoch": 0.18, "grad_norm": 1.6015625, "learning_rate": 0.0001961575400412544, "loss": 2.0521, "step": 75235 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.0001961570325930795, "loss": 2.1044, "step": 75240 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.0001961565251120556, "loss": 2.2361, "step": 75245 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.0001961560175981829, "loss": 2.368, "step": 75250 }, { "epoch": 0.18, "grad_norm": 1.796875, "learning_rate": 0.00019615551005146156, "loss": 2.181, "step": 75255 }, { "epoch": 0.18, "grad_norm": 2.296875, "learning_rate": 0.00019615500247189175, "loss": 2.2106, "step": 75260 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.00019615449485947366, "loss": 2.1044, "step": 75265 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.00019615398721420743, "loss": 2.3533, "step": 75270 }, { "epoch": 0.18, "grad_norm": 1.8203125, "learning_rate": 0.00019615347953609328, "loss": 2.2888, "step": 75275 }, { "epoch": 0.18, "grad_norm": 1.84375, "learning_rate": 0.00019615297182513133, "loss": 2.1608, "step": 75280 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.0001961524640813218, "loss": 1.9876, "step": 75285 }, { "epoch": 0.18, "grad_norm": 2.3125, "learning_rate": 0.00019615195630466484, "loss": 2.3164, "step": 75290 }, { "epoch": 0.18, "grad_norm": 1.8359375, "learning_rate": 0.00019615144849516062, "loss": 2.2163, "step": 75295 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019615094065280936, "loss": 2.3068, "step": 75300 }, { "epoch": 0.18, "grad_norm": 1.65625, "learning_rate": 0.00019615043277761114, "loss": 2.0254, "step": 75305 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.0001961499248695662, "loss": 2.1098, "step": 75310 }, { "epoch": 0.18, "grad_norm": 1.8671875, "learning_rate": 0.0001961494169286747, "loss": 2.1308, "step": 75315 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.00019614890895493684, "loss": 2.306, "step": 75320 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.00019614840094835277, "loss": 2.2504, "step": 75325 }, { "epoch": 0.18, "grad_norm": 1.5546875, "learning_rate": 0.00019614789290892264, "loss": 2.2443, "step": 75330 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019614738483664666, "loss": 2.0624, "step": 75335 }, { "epoch": 0.18, "grad_norm": 1.734375, "learning_rate": 0.000196146876731525, "loss": 2.2731, "step": 75340 }, { "epoch": 0.18, "grad_norm": 1.5546875, "learning_rate": 0.0001961463685935578, "loss": 2.1918, "step": 75345 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019614586042274526, "loss": 2.2611, "step": 75350 }, { "epoch": 0.18, "grad_norm": 1.5859375, "learning_rate": 0.00019614535221908755, "loss": 2.1952, "step": 75355 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.00019614484398258487, "loss": 2.0015, "step": 75360 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.0001961443357132373, "loss": 2.4163, "step": 75365 }, { "epoch": 0.18, "grad_norm": 1.7578125, "learning_rate": 0.00019614382741104516, "loss": 2.2145, "step": 75370 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.0001961433190760085, "loss": 2.1885, "step": 75375 }, { "epoch": 0.18, "grad_norm": 2.40625, "learning_rate": 0.00019614281070812756, "loss": 2.0607, "step": 75380 }, { "epoch": 0.18, "grad_norm": 2.0, "learning_rate": 0.00019614230230740248, "loss": 2.3396, "step": 75385 }, { "epoch": 0.18, "grad_norm": 1.8359375, "learning_rate": 0.00019614179387383347, "loss": 2.0958, "step": 75390 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019614128540742064, "loss": 2.1518, "step": 75395 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019614077690816424, "loss": 2.1355, "step": 75400 }, { "epoch": 0.18, "grad_norm": 1.828125, "learning_rate": 0.00019614026837606438, "loss": 2.2225, "step": 75405 }, { "epoch": 0.18, "grad_norm": 1.765625, "learning_rate": 0.00019613975981112126, "loss": 2.0951, "step": 75410 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.00019613925121333506, "loss": 2.4319, "step": 75415 }, { "epoch": 0.18, "grad_norm": 1.796875, "learning_rate": 0.00019613874258270596, "loss": 2.1644, "step": 75420 }, { "epoch": 0.18, "grad_norm": 2.625, "learning_rate": 0.00019613823391923413, "loss": 2.15, "step": 75425 }, { "epoch": 0.18, "grad_norm": 1.6171875, "learning_rate": 0.0001961377252229197, "loss": 1.9633, "step": 75430 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019613721649376292, "loss": 2.2519, "step": 75435 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.00019613670773176392, "loss": 2.1844, "step": 75440 }, { "epoch": 0.18, "grad_norm": 1.890625, "learning_rate": 0.00019613619893692286, "loss": 2.0208, "step": 75445 }, { "epoch": 0.18, "grad_norm": 2.265625, "learning_rate": 0.00019613569010923991, "loss": 2.0956, "step": 75450 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.0001961351812487153, "loss": 2.0726, "step": 75455 }, { "epoch": 0.18, "grad_norm": 1.9140625, "learning_rate": 0.00019613467235534915, "loss": 2.0995, "step": 75460 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.00019613416342914163, "loss": 2.2152, "step": 75465 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.000196133654470093, "loss": 2.125, "step": 75470 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.00019613314547820332, "loss": 2.1841, "step": 75475 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019613263645347284, "loss": 2.3937, "step": 75480 }, { "epoch": 0.18, "grad_norm": 1.890625, "learning_rate": 0.00019613212739590167, "loss": 2.1095, "step": 75485 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.00019613161830549008, "loss": 1.9735, "step": 75490 }, { "epoch": 0.18, "grad_norm": 1.3359375, "learning_rate": 0.00019613110918223814, "loss": 1.9178, "step": 75495 }, { "epoch": 0.18, "grad_norm": 1.8671875, "learning_rate": 0.00019613060002614607, "loss": 2.0568, "step": 75500 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.0001961300908372141, "loss": 2.0428, "step": 75505 }, { "epoch": 0.18, "grad_norm": 1.4453125, "learning_rate": 0.00019612958161544227, "loss": 2.0919, "step": 75510 }, { "epoch": 0.18, "grad_norm": 1.6875, "learning_rate": 0.00019612907236083086, "loss": 2.1828, "step": 75515 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019612856307338, "loss": 2.2424, "step": 75520 }, { "epoch": 0.18, "grad_norm": 1.6640625, "learning_rate": 0.00019612805375308993, "loss": 2.1787, "step": 75525 }, { "epoch": 0.18, "grad_norm": 1.703125, "learning_rate": 0.00019612754439996075, "loss": 2.1415, "step": 75530 }, { "epoch": 0.18, "grad_norm": 1.7421875, "learning_rate": 0.00019612703501399264, "loss": 2.091, "step": 75535 }, { "epoch": 0.18, "grad_norm": 1.7578125, "learning_rate": 0.0001961265255951858, "loss": 2.1514, "step": 75540 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.00019612601614354042, "loss": 2.1284, "step": 75545 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.0001961255066590566, "loss": 2.0104, "step": 75550 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.0001961249971417346, "loss": 2.0831, "step": 75555 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019612448759157456, "loss": 2.2202, "step": 75560 }, { "epoch": 0.18, "grad_norm": 1.828125, "learning_rate": 0.00019612397800857664, "loss": 2.2715, "step": 75565 }, { "epoch": 0.18, "grad_norm": 1.78125, "learning_rate": 0.00019612346839274102, "loss": 2.1184, "step": 75570 }, { "epoch": 0.18, "grad_norm": 1.671875, "learning_rate": 0.00019612295874406788, "loss": 2.1435, "step": 75575 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.00019612244906255742, "loss": 2.1214, "step": 75580 }, { "epoch": 0.18, "grad_norm": 1.796875, "learning_rate": 0.00019612193934820976, "loss": 2.1935, "step": 75585 }, { "epoch": 0.18, "grad_norm": 2.25, "learning_rate": 0.0001961214296010251, "loss": 2.0339, "step": 75590 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.0001961209198210036, "loss": 2.2813, "step": 75595 }, { "epoch": 0.18, "grad_norm": 2.234375, "learning_rate": 0.0001961204100081455, "loss": 2.2306, "step": 75600 }, { "epoch": 0.18, "grad_norm": 1.9453125, "learning_rate": 0.00019611990016245092, "loss": 2.0637, "step": 75605 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019611939028392, "loss": 1.9663, "step": 75610 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.000196118880372553, "loss": 2.218, "step": 75615 }, { "epoch": 0.18, "grad_norm": 1.7890625, "learning_rate": 0.00019611837042835003, "loss": 2.1182, "step": 75620 }, { "epoch": 0.18, "grad_norm": 2.1875, "learning_rate": 0.00019611786045131127, "loss": 1.9928, "step": 75625 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.0001961173504414369, "loss": 2.1395, "step": 75630 }, { "epoch": 0.18, "grad_norm": 2.0, "learning_rate": 0.00019611684039872712, "loss": 2.2762, "step": 75635 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019611633032318206, "loss": 2.2416, "step": 75640 }, { "epoch": 0.18, "grad_norm": 1.671875, "learning_rate": 0.00019611582021480194, "loss": 1.9801, "step": 75645 }, { "epoch": 0.18, "grad_norm": 2.59375, "learning_rate": 0.0001961153100735869, "loss": 2.0914, "step": 75650 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019611479989953715, "loss": 2.0405, "step": 75655 }, { "epoch": 0.18, "grad_norm": 1.90625, "learning_rate": 0.0001961142896926528, "loss": 2.1603, "step": 75660 }, { "epoch": 0.18, "grad_norm": 1.7578125, "learning_rate": 0.0001961137794529341, "loss": 2.1374, "step": 75665 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.00019611326918038118, "loss": 2.3508, "step": 75670 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.0001961127588749942, "loss": 2.0899, "step": 75675 }, { "epoch": 0.18, "grad_norm": 2.34375, "learning_rate": 0.00019611224853677338, "loss": 2.006, "step": 75680 }, { "epoch": 0.18, "grad_norm": 1.609375, "learning_rate": 0.00019611173816571888, "loss": 2.075, "step": 75685 }, { "epoch": 0.18, "grad_norm": 1.796875, "learning_rate": 0.0001961112277618309, "loss": 2.1872, "step": 75690 }, { "epoch": 0.18, "grad_norm": 1.84375, "learning_rate": 0.0001961107173251095, "loss": 2.3092, "step": 75695 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.000196110206855555, "loss": 2.0403, "step": 75700 }, { "epoch": 0.18, "grad_norm": 1.7890625, "learning_rate": 0.0001961096963531675, "loss": 2.0919, "step": 75705 }, { "epoch": 0.18, "grad_norm": 1.9453125, "learning_rate": 0.00019610918581794716, "loss": 2.0106, "step": 75710 }, { "epoch": 0.18, "grad_norm": 1.828125, "learning_rate": 0.00019610867524989418, "loss": 2.1317, "step": 75715 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019610816464900874, "loss": 2.136, "step": 75720 }, { "epoch": 0.18, "grad_norm": 2.328125, "learning_rate": 0.00019610765401529103, "loss": 2.2966, "step": 75725 }, { "epoch": 0.18, "grad_norm": 1.46875, "learning_rate": 0.0001961071433487412, "loss": 2.1069, "step": 75730 }, { "epoch": 0.18, "grad_norm": 1.8125, "learning_rate": 0.00019610663264935937, "loss": 2.0163, "step": 75735 }, { "epoch": 0.18, "grad_norm": 1.890625, "learning_rate": 0.00019610612191714582, "loss": 2.1531, "step": 75740 }, { "epoch": 0.18, "grad_norm": 1.8359375, "learning_rate": 0.00019610561115210068, "loss": 2.441, "step": 75745 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.0001961051003542241, "loss": 2.1942, "step": 75750 }, { "epoch": 0.18, "grad_norm": 1.921875, "learning_rate": 0.0001961045895235163, "loss": 2.374, "step": 75755 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.0001961040786599774, "loss": 2.1865, "step": 75760 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019610356776360763, "loss": 2.084, "step": 75765 }, { "epoch": 0.18, "grad_norm": 1.7109375, "learning_rate": 0.00019610305683440712, "loss": 2.2305, "step": 75770 }, { "epoch": 0.18, "grad_norm": 2.0, "learning_rate": 0.00019610254587237606, "loss": 2.2958, "step": 75775 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.00019610203487751467, "loss": 1.9442, "step": 75780 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019610152384982304, "loss": 1.9675, "step": 75785 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.0001961010127893014, "loss": 2.2642, "step": 75790 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.0001961005016959499, "loss": 2.2583, "step": 75795 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.00019609999056976874, "loss": 2.165, "step": 75800 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.00019609947941075806, "loss": 2.2219, "step": 75805 }, { "epoch": 0.18, "grad_norm": 1.8828125, "learning_rate": 0.00019609896821891806, "loss": 2.3035, "step": 75810 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.0001960984569942489, "loss": 2.1365, "step": 75815 }, { "epoch": 0.18, "grad_norm": 1.796875, "learning_rate": 0.0001960979457367508, "loss": 2.3646, "step": 75820 }, { "epoch": 0.18, "grad_norm": 1.671875, "learning_rate": 0.00019609743444642388, "loss": 2.1838, "step": 75825 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.00019609692312326832, "loss": 2.1409, "step": 75830 }, { "epoch": 0.18, "grad_norm": 2.234375, "learning_rate": 0.00019609641176728432, "loss": 2.2549, "step": 75835 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.00019609590037847205, "loss": 2.2596, "step": 75840 }, { "epoch": 0.18, "grad_norm": 3.0, "learning_rate": 0.0001960953889568317, "loss": 2.1213, "step": 75845 }, { "epoch": 0.18, "grad_norm": 1.6640625, "learning_rate": 0.0001960948775023634, "loss": 2.1304, "step": 75850 }, { "epoch": 0.18, "grad_norm": 1.75, "learning_rate": 0.00019609436601506733, "loss": 2.14, "step": 75855 }, { "epoch": 0.18, "grad_norm": 1.640625, "learning_rate": 0.00019609385449494368, "loss": 2.0875, "step": 75860 }, { "epoch": 0.18, "grad_norm": 1.640625, "learning_rate": 0.00019609334294199267, "loss": 2.1227, "step": 75865 }, { "epoch": 0.18, "grad_norm": 1.65625, "learning_rate": 0.0001960928313562144, "loss": 2.0517, "step": 75870 }, { "epoch": 0.18, "grad_norm": 1.9140625, "learning_rate": 0.00019609231973760903, "loss": 2.0817, "step": 75875 }, { "epoch": 0.18, "grad_norm": 1.859375, "learning_rate": 0.00019609180808617685, "loss": 2.2644, "step": 75880 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019609129640191795, "loss": 2.2656, "step": 75885 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019609078468483252, "loss": 2.0953, "step": 75890 }, { "epoch": 0.18, "grad_norm": 1.7578125, "learning_rate": 0.00019609027293492073, "loss": 2.0384, "step": 75895 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019608976115218277, "loss": 2.1231, "step": 75900 }, { "epoch": 0.18, "grad_norm": 1.7578125, "learning_rate": 0.00019608924933661877, "loss": 2.1027, "step": 75905 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.00019608873748822898, "loss": 2.1475, "step": 75910 }, { "epoch": 0.18, "grad_norm": 2.234375, "learning_rate": 0.0001960882256070135, "loss": 2.3441, "step": 75915 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.0001960877136929726, "loss": 1.9696, "step": 75920 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019608720174610632, "loss": 2.2012, "step": 75925 }, { "epoch": 0.18, "grad_norm": 2.28125, "learning_rate": 0.00019608668976641496, "loss": 2.2192, "step": 75930 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019608617775389862, "loss": 2.1925, "step": 75935 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.0001960856657085575, "loss": 2.1846, "step": 75940 }, { "epoch": 0.18, "grad_norm": 1.90625, "learning_rate": 0.00019608515363039178, "loss": 2.1969, "step": 75945 }, { "epoch": 0.18, "grad_norm": 1.5234375, "learning_rate": 0.00019608464151940164, "loss": 2.3562, "step": 75950 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.00019608412937558722, "loss": 2.3101, "step": 75955 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.00019608361719894871, "loss": 2.1775, "step": 75960 }, { "epoch": 0.18, "grad_norm": 1.734375, "learning_rate": 0.00019608310498948632, "loss": 2.0947, "step": 75965 }, { "epoch": 0.18, "grad_norm": 2.703125, "learning_rate": 0.0001960825927472002, "loss": 1.933, "step": 75970 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.0001960820804720905, "loss": 2.2365, "step": 75975 }, { "epoch": 0.18, "grad_norm": 1.7109375, "learning_rate": 0.00019608156816415745, "loss": 2.1579, "step": 75980 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019608105582340118, "loss": 2.0772, "step": 75985 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.0001960805434498219, "loss": 2.1822, "step": 75990 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019608003104341974, "loss": 2.1262, "step": 75995 }, { "epoch": 0.18, "grad_norm": 1.9453125, "learning_rate": 0.0001960795186041949, "loss": 2.1437, "step": 76000 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019607900613214754, "loss": 2.0137, "step": 76005 }, { "epoch": 0.18, "grad_norm": 2.234375, "learning_rate": 0.0001960784936272779, "loss": 2.1827, "step": 76010 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.00019607798108958605, "loss": 2.2296, "step": 76015 }, { "epoch": 0.18, "grad_norm": 1.9140625, "learning_rate": 0.00019607746851907224, "loss": 2.3597, "step": 76020 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.00019607695591573662, "loss": 2.3372, "step": 76025 }, { "epoch": 0.18, "grad_norm": 2.34375, "learning_rate": 0.00019607644327957938, "loss": 2.0752, "step": 76030 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.0001960759306106007, "loss": 2.213, "step": 76035 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019607541790880067, "loss": 2.2068, "step": 76040 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019607490517417958, "loss": 2.0988, "step": 76045 }, { "epoch": 0.18, "grad_norm": 2.65625, "learning_rate": 0.00019607439240673758, "loss": 2.3018, "step": 76050 }, { "epoch": 0.18, "grad_norm": 1.7265625, "learning_rate": 0.00019607387960647482, "loss": 2.2059, "step": 76055 }, { "epoch": 0.18, "grad_norm": 1.9296875, "learning_rate": 0.00019607336677339142, "loss": 2.2269, "step": 76060 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019607285390748768, "loss": 2.1476, "step": 76065 }, { "epoch": 0.18, "grad_norm": 2.265625, "learning_rate": 0.0001960723410087637, "loss": 2.198, "step": 76070 }, { "epoch": 0.18, "grad_norm": 2.703125, "learning_rate": 0.00019607182807721967, "loss": 2.141, "step": 76075 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019607131511285576, "loss": 2.208, "step": 76080 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.0001960708021156721, "loss": 2.188, "step": 76085 }, { "epoch": 0.18, "grad_norm": 1.9296875, "learning_rate": 0.00019607028908566896, "loss": 2.2966, "step": 76090 }, { "epoch": 0.18, "grad_norm": 1.734375, "learning_rate": 0.00019606977602284643, "loss": 2.1948, "step": 76095 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.00019606926292720477, "loss": 2.2798, "step": 76100 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.00019606874979874407, "loss": 2.2979, "step": 76105 }, { "epoch": 0.18, "grad_norm": 1.828125, "learning_rate": 0.00019606823663746457, "loss": 2.2043, "step": 76110 }, { "epoch": 0.18, "grad_norm": 1.796875, "learning_rate": 0.0001960677234433664, "loss": 2.1794, "step": 76115 }, { "epoch": 0.18, "grad_norm": 2.28125, "learning_rate": 0.00019606721021644976, "loss": 2.12, "step": 76120 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019606669695671484, "loss": 2.0006, "step": 76125 }, { "epoch": 0.18, "grad_norm": 2.390625, "learning_rate": 0.00019606618366416174, "loss": 2.1614, "step": 76130 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.00019606567033879073, "loss": 2.2345, "step": 76135 }, { "epoch": 0.18, "grad_norm": 1.90625, "learning_rate": 0.00019606515698060193, "loss": 1.8667, "step": 76140 }, { "epoch": 0.18, "grad_norm": 2.46875, "learning_rate": 0.00019606464358959554, "loss": 2.2432, "step": 76145 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.00019606413016577171, "loss": 2.2454, "step": 76150 }, { "epoch": 0.18, "grad_norm": 1.8046875, "learning_rate": 0.00019606361670913066, "loss": 2.1363, "step": 76155 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.0001960631032196725, "loss": 2.1847, "step": 76160 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019606258969739746, "loss": 2.2994, "step": 76165 }, { "epoch": 0.18, "grad_norm": 1.734375, "learning_rate": 0.00019606207614230572, "loss": 2.0851, "step": 76170 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.00019606156255439738, "loss": 2.1967, "step": 76175 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.0001960610489336727, "loss": 2.0907, "step": 76180 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019606053528013184, "loss": 2.1571, "step": 76185 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.00019606002159377493, "loss": 1.9349, "step": 76190 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.00019605950787460218, "loss": 2.2883, "step": 76195 }, { "epoch": 0.18, "grad_norm": 2.234375, "learning_rate": 0.00019605899412261378, "loss": 2.0367, "step": 76200 }, { "epoch": 0.18, "grad_norm": 1.9140625, "learning_rate": 0.00019605848033780988, "loss": 2.1648, "step": 76205 }, { "epoch": 0.18, "grad_norm": 1.640625, "learning_rate": 0.00019605796652019065, "loss": 2.235, "step": 76210 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019605745266975625, "loss": 2.218, "step": 76215 }, { "epoch": 0.18, "grad_norm": 1.859375, "learning_rate": 0.00019605693878650693, "loss": 2.1799, "step": 76220 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.0001960564248704428, "loss": 2.1759, "step": 76225 }, { "epoch": 0.18, "grad_norm": 2.375, "learning_rate": 0.00019605591092156406, "loss": 2.0955, "step": 76230 }, { "epoch": 0.18, "grad_norm": 1.8125, "learning_rate": 0.00019605539693987086, "loss": 2.0847, "step": 76235 }, { "epoch": 0.18, "grad_norm": 2.234375, "learning_rate": 0.0001960548829253634, "loss": 2.0762, "step": 76240 }, { "epoch": 0.18, "grad_norm": 1.8359375, "learning_rate": 0.00019605436887804186, "loss": 2.1535, "step": 76245 }, { "epoch": 0.18, "grad_norm": 2.765625, "learning_rate": 0.00019605385479790638, "loss": 2.2545, "step": 76250 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019605334068495718, "loss": 2.0418, "step": 76255 }, { "epoch": 0.18, "grad_norm": 2.28125, "learning_rate": 0.0001960528265391944, "loss": 2.1174, "step": 76260 }, { "epoch": 0.18, "grad_norm": 2.390625, "learning_rate": 0.00019605231236061827, "loss": 2.1148, "step": 76265 }, { "epoch": 0.18, "grad_norm": 1.796875, "learning_rate": 0.0001960517981492289, "loss": 2.2204, "step": 76270 }, { "epoch": 0.18, "grad_norm": 1.703125, "learning_rate": 0.0001960512839050265, "loss": 2.1704, "step": 76275 }, { "epoch": 0.18, "grad_norm": 1.828125, "learning_rate": 0.00019605076962801125, "loss": 2.2734, "step": 76280 }, { "epoch": 0.18, "grad_norm": 1.7421875, "learning_rate": 0.0001960502553181833, "loss": 2.0813, "step": 76285 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019604974097554284, "loss": 2.1306, "step": 76290 }, { "epoch": 0.18, "grad_norm": 1.7421875, "learning_rate": 0.0001960492266000901, "loss": 2.2667, "step": 76295 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019604871219182512, "loss": 1.9273, "step": 76300 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.0001960481977507482, "loss": 2.2318, "step": 76305 }, { "epoch": 0.18, "grad_norm": 1.7265625, "learning_rate": 0.00019604768327685946, "loss": 2.2086, "step": 76310 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.0001960471687701591, "loss": 2.1285, "step": 76315 }, { "epoch": 0.18, "grad_norm": 2.375, "learning_rate": 0.0001960466542306473, "loss": 2.2691, "step": 76320 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.0001960461396583242, "loss": 2.2238, "step": 76325 }, { "epoch": 0.18, "grad_norm": 2.453125, "learning_rate": 0.00019604562505319, "loss": 2.1771, "step": 76330 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019604511041524484, "loss": 2.1479, "step": 76335 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.00019604459574448896, "loss": 2.0134, "step": 76340 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.00019604408104092254, "loss": 2.2397, "step": 76345 }, { "epoch": 0.18, "grad_norm": 1.6796875, "learning_rate": 0.00019604356630454568, "loss": 2.1911, "step": 76350 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019604305153535857, "loss": 2.1968, "step": 76355 }, { "epoch": 0.18, "grad_norm": 1.78125, "learning_rate": 0.00019604253673336145, "loss": 2.0692, "step": 76360 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019604202189855448, "loss": 2.3075, "step": 76365 }, { "epoch": 0.18, "grad_norm": 1.7890625, "learning_rate": 0.00019604150703093779, "loss": 2.1389, "step": 76370 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019604099213051156, "loss": 2.2076, "step": 76375 }, { "epoch": 0.18, "grad_norm": 2.703125, "learning_rate": 0.000196040477197276, "loss": 2.319, "step": 76380 }, { "epoch": 0.18, "grad_norm": 1.8828125, "learning_rate": 0.00019603996223123126, "loss": 2.3102, "step": 76385 }, { "epoch": 0.18, "grad_norm": 1.78125, "learning_rate": 0.00019603944723237753, "loss": 2.2318, "step": 76390 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.000196038932200715, "loss": 2.1706, "step": 76395 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019603841713624384, "loss": 2.1914, "step": 76400 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019603790203896417, "loss": 2.3185, "step": 76405 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019603738690887626, "loss": 2.2155, "step": 76410 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.0001960368717459802, "loss": 2.2364, "step": 76415 }, { "epoch": 0.18, "grad_norm": 2.53125, "learning_rate": 0.00019603635655027622, "loss": 2.2146, "step": 76420 }, { "epoch": 0.18, "grad_norm": 1.59375, "learning_rate": 0.00019603584132176446, "loss": 2.0568, "step": 76425 }, { "epoch": 0.18, "grad_norm": 1.984375, "learning_rate": 0.0001960353260604451, "loss": 2.3133, "step": 76430 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.0001960348107663184, "loss": 2.1007, "step": 76435 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019603429543938442, "loss": 2.1514, "step": 76440 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019603378007964338, "loss": 2.1864, "step": 76445 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.00019603326468709548, "loss": 2.0532, "step": 76450 }, { "epoch": 0.18, "grad_norm": 1.9140625, "learning_rate": 0.00019603274926174088, "loss": 2.0927, "step": 76455 }, { "epoch": 0.18, "grad_norm": 1.6484375, "learning_rate": 0.00019603223380357972, "loss": 2.04, "step": 76460 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.0001960317183126122, "loss": 2.3195, "step": 76465 }, { "epoch": 0.18, "grad_norm": 1.7421875, "learning_rate": 0.00019603120278883856, "loss": 2.1789, "step": 76470 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019603068723225887, "loss": 2.2963, "step": 76475 }, { "epoch": 0.18, "grad_norm": 2.25, "learning_rate": 0.00019603017164287337, "loss": 1.9063, "step": 76480 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.0001960296560206822, "loss": 2.0838, "step": 76485 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.0001960291403656856, "loss": 2.1129, "step": 76490 }, { "epoch": 0.18, "grad_norm": 1.921875, "learning_rate": 0.00019602862467788367, "loss": 2.1026, "step": 76495 }, { "epoch": 0.18, "grad_norm": 1.921875, "learning_rate": 0.00019602810895727663, "loss": 2.0735, "step": 76500 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019602759320386465, "loss": 2.3163, "step": 76505 }, { "epoch": 0.18, "grad_norm": 1.7265625, "learning_rate": 0.0001960270774176479, "loss": 2.0822, "step": 76510 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.00019602656159862657, "loss": 2.2501, "step": 76515 }, { "epoch": 0.18, "grad_norm": 1.78125, "learning_rate": 0.00019602604574680085, "loss": 2.2869, "step": 76520 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019602552986217083, "loss": 2.1584, "step": 76525 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019602501394473678, "loss": 2.0614, "step": 76530 }, { "epoch": 0.18, "grad_norm": 1.9453125, "learning_rate": 0.00019602449799449885, "loss": 2.3408, "step": 76535 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.0001960239820114572, "loss": 2.0142, "step": 76540 }, { "epoch": 0.18, "grad_norm": 1.7421875, "learning_rate": 0.000196023465995612, "loss": 2.0885, "step": 76545 }, { "epoch": 0.18, "grad_norm": 1.8828125, "learning_rate": 0.00019602294994696347, "loss": 2.2016, "step": 76550 }, { "epoch": 0.18, "grad_norm": 1.7109375, "learning_rate": 0.00019602243386551176, "loss": 2.1007, "step": 76555 }, { "epoch": 0.18, "grad_norm": 1.890625, "learning_rate": 0.00019602191775125702, "loss": 2.087, "step": 76560 }, { "epoch": 0.18, "grad_norm": 1.734375, "learning_rate": 0.00019602140160419947, "loss": 2.1687, "step": 76565 }, { "epoch": 0.18, "grad_norm": 1.8125, "learning_rate": 0.00019602088542433925, "loss": 2.2331, "step": 76570 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019602036921167657, "loss": 2.1551, "step": 76575 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.0001960198529662116, "loss": 2.111, "step": 76580 }, { "epoch": 0.18, "grad_norm": 1.890625, "learning_rate": 0.00019601933668794447, "loss": 2.1098, "step": 76585 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019601882037687543, "loss": 2.08, "step": 76590 }, { "epoch": 0.18, "grad_norm": 1.734375, "learning_rate": 0.00019601830403300462, "loss": 1.9974, "step": 76595 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.00019601778765633218, "loss": 2.2211, "step": 76600 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.00019601727124685833, "loss": 2.0702, "step": 76605 }, { "epoch": 0.18, "grad_norm": 1.7890625, "learning_rate": 0.00019601675480458326, "loss": 2.0477, "step": 76610 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.00019601623832950712, "loss": 2.1884, "step": 76615 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.0001960157218216301, "loss": 2.0746, "step": 76620 }, { "epoch": 0.18, "grad_norm": 1.6796875, "learning_rate": 0.00019601520528095235, "loss": 2.0964, "step": 76625 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019601468870747408, "loss": 2.2302, "step": 76630 }, { "epoch": 0.18, "grad_norm": 1.9140625, "learning_rate": 0.00019601417210119543, "loss": 1.9557, "step": 76635 }, { "epoch": 0.18, "grad_norm": 2.28125, "learning_rate": 0.00019601365546211664, "loss": 2.1544, "step": 76640 }, { "epoch": 0.18, "grad_norm": 1.8828125, "learning_rate": 0.0001960131387902378, "loss": 2.1191, "step": 76645 }, { "epoch": 0.18, "grad_norm": 2.390625, "learning_rate": 0.00019601262208555914, "loss": 2.2778, "step": 76650 }, { "epoch": 0.18, "grad_norm": 1.7578125, "learning_rate": 0.00019601210534808084, "loss": 2.2163, "step": 76655 }, { "epoch": 0.18, "grad_norm": 4.25, "learning_rate": 0.00019601158857780306, "loss": 2.3014, "step": 76660 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.00019601107177472598, "loss": 2.2853, "step": 76665 }, { "epoch": 0.18, "grad_norm": 1.8046875, "learning_rate": 0.00019601055493884977, "loss": 2.2487, "step": 76670 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.0001960100380701746, "loss": 2.0347, "step": 76675 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019600952116870067, "loss": 2.2706, "step": 76680 }, { "epoch": 0.18, "grad_norm": 2.40625, "learning_rate": 0.00019600900423442816, "loss": 1.9611, "step": 76685 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.00019600848726735723, "loss": 2.0493, "step": 76690 }, { "epoch": 0.18, "grad_norm": 1.84375, "learning_rate": 0.00019600797026748804, "loss": 2.1058, "step": 76695 }, { "epoch": 0.18, "grad_norm": 1.90625, "learning_rate": 0.00019600745323482084, "loss": 2.2444, "step": 76700 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.0001960069361693557, "loss": 2.34, "step": 76705 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.00019600641907109286, "loss": 2.0721, "step": 76710 }, { "epoch": 0.18, "grad_norm": 1.8125, "learning_rate": 0.00019600590194003247, "loss": 2.1412, "step": 76715 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.00019600538477617475, "loss": 2.1827, "step": 76720 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.00019600486757951987, "loss": 2.188, "step": 76725 }, { "epoch": 0.18, "grad_norm": 2.3125, "learning_rate": 0.00019600435035006792, "loss": 2.3407, "step": 76730 }, { "epoch": 0.18, "grad_norm": 1.7265625, "learning_rate": 0.0001960038330878192, "loss": 2.376, "step": 76735 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.00019600331579277383, "loss": 2.1121, "step": 76740 }, { "epoch": 0.18, "grad_norm": 1.6015625, "learning_rate": 0.00019600279846493195, "loss": 2.0095, "step": 76745 }, { "epoch": 0.18, "grad_norm": 1.9296875, "learning_rate": 0.00019600228110429378, "loss": 2.1207, "step": 76750 }, { "epoch": 0.18, "grad_norm": 1.90625, "learning_rate": 0.0001960017637108595, "loss": 2.0654, "step": 76755 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019600124628462928, "loss": 2.1915, "step": 76760 }, { "epoch": 0.18, "grad_norm": 1.8203125, "learning_rate": 0.0001960007288256033, "loss": 2.2956, "step": 76765 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.0001960002113337817, "loss": 2.0915, "step": 76770 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.0001959996938091647, "loss": 2.2274, "step": 76775 }, { "epoch": 0.18, "grad_norm": 1.9453125, "learning_rate": 0.00019599917625175248, "loss": 2.3937, "step": 76780 }, { "epoch": 0.18, "grad_norm": 1.8828125, "learning_rate": 0.0001959986586615452, "loss": 2.1519, "step": 76785 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.000195998141038543, "loss": 2.2442, "step": 76790 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.0001959976233827461, "loss": 2.0702, "step": 76795 }, { "epoch": 0.18, "grad_norm": 1.78125, "learning_rate": 0.0001959971056941547, "loss": 2.1462, "step": 76800 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.00019599658797276895, "loss": 2.3833, "step": 76805 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.000195996070218589, "loss": 2.2453, "step": 76810 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.0001959955524316151, "loss": 2.3344, "step": 76815 }, { "epoch": 0.18, "grad_norm": 1.8046875, "learning_rate": 0.00019599503461184732, "loss": 2.1952, "step": 76820 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.00019599451675928592, "loss": 2.2253, "step": 76825 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019599399887393104, "loss": 2.0526, "step": 76830 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.0001959934809557829, "loss": 2.2651, "step": 76835 }, { "epoch": 0.18, "grad_norm": 2.578125, "learning_rate": 0.00019599296300484162, "loss": 2.128, "step": 76840 }, { "epoch": 0.18, "grad_norm": 2.0, "learning_rate": 0.0001959924450211074, "loss": 2.0558, "step": 76845 }, { "epoch": 0.18, "grad_norm": 1.9453125, "learning_rate": 0.00019599192700458043, "loss": 1.9602, "step": 76850 }, { "epoch": 0.18, "grad_norm": 1.765625, "learning_rate": 0.0001959914089552609, "loss": 2.147, "step": 76855 }, { "epoch": 0.18, "grad_norm": 1.859375, "learning_rate": 0.00019599089087314893, "loss": 2.2081, "step": 76860 }, { "epoch": 0.18, "grad_norm": 1.6328125, "learning_rate": 0.00019599037275824473, "loss": 2.2513, "step": 76865 }, { "epoch": 0.18, "grad_norm": 1.6171875, "learning_rate": 0.0001959898546105485, "loss": 2.0984, "step": 76870 }, { "epoch": 0.18, "grad_norm": 1.7421875, "learning_rate": 0.00019598933643006036, "loss": 2.0307, "step": 76875 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019598881821678054, "loss": 2.129, "step": 76880 }, { "epoch": 0.18, "grad_norm": 2.0, "learning_rate": 0.0001959882999707092, "loss": 2.1443, "step": 76885 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.00019598778169184654, "loss": 2.1442, "step": 76890 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.0001959872633801927, "loss": 2.1798, "step": 76895 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019598674503574785, "loss": 2.1993, "step": 76900 }, { "epoch": 0.18, "grad_norm": 1.75, "learning_rate": 0.00019598622665851218, "loss": 2.1124, "step": 76905 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.0001959857082484859, "loss": 1.8797, "step": 76910 }, { "epoch": 0.18, "grad_norm": 1.5859375, "learning_rate": 0.00019598518980566915, "loss": 2.1553, "step": 76915 }, { "epoch": 0.18, "grad_norm": 1.7109375, "learning_rate": 0.00019598467133006214, "loss": 2.1803, "step": 76920 }, { "epoch": 0.18, "grad_norm": 2.296875, "learning_rate": 0.000195984152821665, "loss": 2.0615, "step": 76925 }, { "epoch": 0.18, "grad_norm": 1.6953125, "learning_rate": 0.00019598363428047794, "loss": 2.042, "step": 76930 }, { "epoch": 0.18, "grad_norm": 2.296875, "learning_rate": 0.00019598311570650113, "loss": 2.1396, "step": 76935 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019598259709973474, "loss": 2.2048, "step": 76940 }, { "epoch": 0.18, "grad_norm": 2.3125, "learning_rate": 0.00019598207846017895, "loss": 2.2389, "step": 76945 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.00019598155978783397, "loss": 2.1838, "step": 76950 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.00019598104108269992, "loss": 2.2385, "step": 76955 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019598052234477703, "loss": 2.2141, "step": 76960 }, { "epoch": 0.18, "grad_norm": 1.6796875, "learning_rate": 0.00019598000357406545, "loss": 2.2518, "step": 76965 }, { "epoch": 0.18, "grad_norm": 1.6328125, "learning_rate": 0.00019597948477056534, "loss": 2.2177, "step": 76970 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019597896593427693, "loss": 2.11, "step": 76975 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019597844706520032, "loss": 2.2456, "step": 76980 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.00019597792816333572, "loss": 2.287, "step": 76985 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019597740922868337, "loss": 2.1524, "step": 76990 }, { "epoch": 0.18, "grad_norm": 1.734375, "learning_rate": 0.00019597689026124338, "loss": 2.1763, "step": 76995 }, { "epoch": 0.18, "grad_norm": 1.9453125, "learning_rate": 0.00019597637126101596, "loss": 2.3556, "step": 77000 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019597585222800126, "loss": 2.1495, "step": 77005 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019597533316219944, "loss": 2.0863, "step": 77010 }, { "epoch": 0.18, "grad_norm": 1.578125, "learning_rate": 0.00019597481406361073, "loss": 2.2929, "step": 77015 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.00019597429493223528, "loss": 2.106, "step": 77020 }, { "epoch": 0.18, "grad_norm": 1.8125, "learning_rate": 0.00019597377576807326, "loss": 2.3758, "step": 77025 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.0001959732565711249, "loss": 2.0831, "step": 77030 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.0001959727373413903, "loss": 2.0546, "step": 77035 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.00019597221807886967, "loss": 2.0222, "step": 77040 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019597169878356318, "loss": 2.1436, "step": 77045 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019597117945547104, "loss": 2.0141, "step": 77050 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.0001959706600945934, "loss": 2.2256, "step": 77055 }, { "epoch": 0.18, "grad_norm": 1.7265625, "learning_rate": 0.00019597014070093046, "loss": 2.1506, "step": 77060 }, { "epoch": 0.18, "grad_norm": 1.8828125, "learning_rate": 0.00019596962127448232, "loss": 2.2868, "step": 77065 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.00019596910181524929, "loss": 2.2271, "step": 77070 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.00019596858232323144, "loss": 2.0053, "step": 77075 }, { "epoch": 0.18, "grad_norm": 1.984375, "learning_rate": 0.00019596806279842895, "loss": 2.2509, "step": 77080 }, { "epoch": 0.18, "grad_norm": 1.609375, "learning_rate": 0.00019596754324084206, "loss": 2.098, "step": 77085 }, { "epoch": 0.18, "grad_norm": 2.296875, "learning_rate": 0.00019596702365047092, "loss": 1.9818, "step": 77090 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.0001959665040273157, "loss": 2.1765, "step": 77095 }, { "epoch": 0.18, "grad_norm": 1.6640625, "learning_rate": 0.00019596598437137658, "loss": 2.2068, "step": 77100 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019596546468265375, "loss": 2.229, "step": 77105 }, { "epoch": 0.18, "grad_norm": 2.28125, "learning_rate": 0.00019596494496114736, "loss": 2.3274, "step": 77110 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019596442520685762, "loss": 2.121, "step": 77115 }, { "epoch": 0.18, "grad_norm": 1.7421875, "learning_rate": 0.0001959639054197847, "loss": 2.0686, "step": 77120 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019596338559992876, "loss": 2.2922, "step": 77125 }, { "epoch": 0.18, "grad_norm": 2.3125, "learning_rate": 0.00019596286574728996, "loss": 2.4066, "step": 77130 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019596234586186854, "loss": 2.2659, "step": 77135 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.00019596182594366465, "loss": 2.2853, "step": 77140 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.0001959613059926784, "loss": 2.0473, "step": 77145 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.00019596078600891007, "loss": 2.1359, "step": 77150 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.0001959602659923598, "loss": 2.2759, "step": 77155 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019595974594302775, "loss": 2.3176, "step": 77160 }, { "epoch": 0.18, "grad_norm": 1.5, "learning_rate": 0.00019595922586091412, "loss": 2.0627, "step": 77165 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.0001959587057460191, "loss": 2.2026, "step": 77170 }, { "epoch": 0.18, "grad_norm": 2.0, "learning_rate": 0.00019595818559834278, "loss": 2.1657, "step": 77175 }, { "epoch": 0.18, "grad_norm": 1.703125, "learning_rate": 0.00019595766541788545, "loss": 2.2085, "step": 77180 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.00019595714520464724, "loss": 2.2024, "step": 77185 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019595662495862834, "loss": 1.9293, "step": 77190 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.00019595610467982888, "loss": 2.362, "step": 77195 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.0001959555843682491, "loss": 2.1224, "step": 77200 }, { "epoch": 0.18, "grad_norm": 1.84375, "learning_rate": 0.00019595506402388915, "loss": 2.2858, "step": 77205 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.0001959545436467492, "loss": 2.1753, "step": 77210 }, { "epoch": 0.18, "grad_norm": 3.453125, "learning_rate": 0.00019595402323682945, "loss": 2.138, "step": 77215 }, { "epoch": 0.18, "grad_norm": 2.484375, "learning_rate": 0.00019595350279413005, "loss": 2.2902, "step": 77220 }, { "epoch": 0.18, "grad_norm": 1.7421875, "learning_rate": 0.0001959529823186512, "loss": 2.2617, "step": 77225 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.0001959524618103931, "loss": 2.0039, "step": 77230 }, { "epoch": 0.18, "grad_norm": 1.921875, "learning_rate": 0.00019595194126935584, "loss": 2.1733, "step": 77235 }, { "epoch": 0.18, "grad_norm": 1.90625, "learning_rate": 0.0001959514206955397, "loss": 2.1806, "step": 77240 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.00019595090008894478, "loss": 2.326, "step": 77245 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.00019595037944957137, "loss": 2.3832, "step": 77250 }, { "epoch": 0.18, "grad_norm": 1.734375, "learning_rate": 0.00019594985877741946, "loss": 2.0695, "step": 77255 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.0001959493380724894, "loss": 2.2523, "step": 77260 }, { "epoch": 0.18, "grad_norm": 1.90625, "learning_rate": 0.00019594881733478133, "loss": 2.1912, "step": 77265 }, { "epoch": 0.18, "grad_norm": 1.796875, "learning_rate": 0.00019594829656429535, "loss": 2.3393, "step": 77270 }, { "epoch": 0.18, "grad_norm": 1.8125, "learning_rate": 0.00019594777576103175, "loss": 2.122, "step": 77275 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.0001959472549249906, "loss": 1.9802, "step": 77280 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.0001959467340561721, "loss": 2.1188, "step": 77285 }, { "epoch": 0.18, "grad_norm": 1.7265625, "learning_rate": 0.00019594621315457652, "loss": 2.1936, "step": 77290 }, { "epoch": 0.18, "grad_norm": 1.8125, "learning_rate": 0.00019594569222020395, "loss": 2.0907, "step": 77295 }, { "epoch": 0.18, "grad_norm": 1.8203125, "learning_rate": 0.00019594517125305462, "loss": 2.0479, "step": 77300 }, { "epoch": 0.18, "grad_norm": 2.265625, "learning_rate": 0.00019594465025312864, "loss": 2.0884, "step": 77305 }, { "epoch": 0.18, "grad_norm": 1.9140625, "learning_rate": 0.00019594412922042625, "loss": 2.0677, "step": 77310 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.0001959436081549476, "loss": 2.2614, "step": 77315 }, { "epoch": 0.18, "grad_norm": 1.59375, "learning_rate": 0.00019594308705669287, "loss": 2.0923, "step": 77320 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019594256592566224, "loss": 2.2996, "step": 77325 }, { "epoch": 0.18, "grad_norm": 1.8828125, "learning_rate": 0.0001959420447618559, "loss": 2.0619, "step": 77330 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.000195941523565274, "loss": 2.1923, "step": 77335 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.0001959410023359167, "loss": 2.098, "step": 77340 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.0001959404810737843, "loss": 2.198, "step": 77345 }, { "epoch": 0.18, "grad_norm": 2.328125, "learning_rate": 0.00019593995977887683, "loss": 2.125, "step": 77350 }, { "epoch": 0.18, "grad_norm": 2.0, "learning_rate": 0.00019593943845119455, "loss": 2.1238, "step": 77355 }, { "epoch": 0.18, "grad_norm": 1.796875, "learning_rate": 0.00019593891709073763, "loss": 2.3082, "step": 77360 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.0001959383956975062, "loss": 2.2615, "step": 77365 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.00019593787427150052, "loss": 2.0544, "step": 77370 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.00019593735281272067, "loss": 2.0041, "step": 77375 }, { "epoch": 0.18, "grad_norm": 2.5625, "learning_rate": 0.00019593683132116692, "loss": 2.2838, "step": 77380 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.0001959363097968394, "loss": 2.2076, "step": 77385 }, { "epoch": 0.18, "grad_norm": 2.609375, "learning_rate": 0.0001959357882397383, "loss": 2.2645, "step": 77390 }, { "epoch": 0.18, "grad_norm": 2.40625, "learning_rate": 0.00019593526664986376, "loss": 2.3234, "step": 77395 }, { "epoch": 0.18, "grad_norm": 1.4453125, "learning_rate": 0.00019593474502721604, "loss": 2.0888, "step": 77400 }, { "epoch": 0.18, "grad_norm": 1.9140625, "learning_rate": 0.00019593422337179525, "loss": 2.2152, "step": 77405 }, { "epoch": 0.18, "grad_norm": 1.8671875, "learning_rate": 0.0001959337016836016, "loss": 2.068, "step": 77410 }, { "epoch": 0.18, "grad_norm": 1.796875, "learning_rate": 0.00019593317996263526, "loss": 2.3003, "step": 77415 }, { "epoch": 0.18, "grad_norm": 1.828125, "learning_rate": 0.00019593265820889638, "loss": 2.2447, "step": 77420 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.0001959321364223852, "loss": 2.2092, "step": 77425 }, { "epoch": 0.18, "grad_norm": 1.7421875, "learning_rate": 0.00019593161460310183, "loss": 2.1091, "step": 77430 }, { "epoch": 0.18, "grad_norm": 2.265625, "learning_rate": 0.00019593109275104647, "loss": 2.2642, "step": 77435 }, { "epoch": 0.18, "grad_norm": 1.6640625, "learning_rate": 0.00019593057086621934, "loss": 2.2959, "step": 77440 }, { "epoch": 0.18, "grad_norm": 2.0, "learning_rate": 0.0001959300489486206, "loss": 2.2777, "step": 77445 }, { "epoch": 0.18, "grad_norm": 1.8203125, "learning_rate": 0.0001959295269982504, "loss": 2.2773, "step": 77450 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019592900501510895, "loss": 2.1226, "step": 77455 }, { "epoch": 0.18, "grad_norm": 1.7265625, "learning_rate": 0.00019592848299919636, "loss": 2.141, "step": 77460 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019592796095051288, "loss": 2.0743, "step": 77465 }, { "epoch": 0.18, "grad_norm": 2.234375, "learning_rate": 0.0001959274388690587, "loss": 2.2954, "step": 77470 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.00019592691675483393, "loss": 2.1098, "step": 77475 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.0001959263946078388, "loss": 2.1883, "step": 77480 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019592587242807347, "loss": 2.073, "step": 77485 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.00019592535021553816, "loss": 2.1507, "step": 77490 }, { "epoch": 0.18, "grad_norm": 1.8828125, "learning_rate": 0.00019592482797023295, "loss": 2.2564, "step": 77495 }, { "epoch": 0.18, "grad_norm": 1.828125, "learning_rate": 0.00019592430569215813, "loss": 2.0782, "step": 77500 }, { "epoch": 0.18, "grad_norm": 1.8828125, "learning_rate": 0.0001959237833813138, "loss": 2.1496, "step": 77505 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.00019592326103770016, "loss": 2.0612, "step": 77510 }, { "epoch": 0.18, "grad_norm": 2.1875, "learning_rate": 0.00019592273866131743, "loss": 2.1994, "step": 77515 }, { "epoch": 0.18, "grad_norm": 2.25, "learning_rate": 0.0001959222162521657, "loss": 2.2696, "step": 77520 }, { "epoch": 0.18, "grad_norm": 2.53125, "learning_rate": 0.00019592169381024523, "loss": 2.1742, "step": 77525 }, { "epoch": 0.18, "grad_norm": 2.359375, "learning_rate": 0.0001959211713355562, "loss": 2.1971, "step": 77530 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019592064882809873, "loss": 2.0897, "step": 77535 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.00019592012628787302, "loss": 2.0953, "step": 77540 }, { "epoch": 0.18, "grad_norm": 2.25, "learning_rate": 0.00019591960371487924, "loss": 2.1234, "step": 77545 }, { "epoch": 0.18, "grad_norm": 1.8359375, "learning_rate": 0.0001959190811091176, "loss": 2.1527, "step": 77550 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.0001959185584705883, "loss": 2.1618, "step": 77555 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.00019591803579929143, "loss": 2.2087, "step": 77560 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.00019591751309522724, "loss": 2.0929, "step": 77565 }, { "epoch": 0.18, "grad_norm": 2.4375, "learning_rate": 0.00019591699035839588, "loss": 2.2872, "step": 77570 }, { "epoch": 0.18, "grad_norm": 2.0, "learning_rate": 0.00019591646758879754, "loss": 2.1598, "step": 77575 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.0001959159447864324, "loss": 2.0725, "step": 77580 }, { "epoch": 0.18, "grad_norm": 1.8203125, "learning_rate": 0.00019591542195130062, "loss": 2.0963, "step": 77585 }, { "epoch": 0.18, "grad_norm": 1.8203125, "learning_rate": 0.00019591489908340242, "loss": 2.3697, "step": 77590 }, { "epoch": 0.18, "grad_norm": 2.234375, "learning_rate": 0.00019591437618273792, "loss": 2.1469, "step": 77595 }, { "epoch": 0.18, "grad_norm": 1.8359375, "learning_rate": 0.00019591385324930734, "loss": 2.2275, "step": 77600 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.00019591333028311087, "loss": 2.0757, "step": 77605 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019591280728414865, "loss": 2.0076, "step": 77610 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019591228425242085, "loss": 2.2436, "step": 77615 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019591176118792771, "loss": 2.1718, "step": 77620 }, { "epoch": 0.18, "grad_norm": 1.8671875, "learning_rate": 0.00019591123809066936, "loss": 2.0749, "step": 77625 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019591071496064598, "loss": 1.9972, "step": 77630 }, { "epoch": 0.18, "grad_norm": 1.921875, "learning_rate": 0.0001959101917978578, "loss": 2.0754, "step": 77635 }, { "epoch": 0.18, "grad_norm": 1.609375, "learning_rate": 0.00019590966860230492, "loss": 2.2787, "step": 77640 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019590914537398756, "loss": 2.234, "step": 77645 }, { "epoch": 0.18, "grad_norm": 1.8515625, "learning_rate": 0.00019590862211290593, "loss": 2.1874, "step": 77650 }, { "epoch": 0.18, "grad_norm": 1.84375, "learning_rate": 0.00019590809881906015, "loss": 2.1055, "step": 77655 }, { "epoch": 0.18, "grad_norm": 1.8046875, "learning_rate": 0.00019590757549245043, "loss": 2.0792, "step": 77660 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019590705213307693, "loss": 2.1335, "step": 77665 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019590652874093984, "loss": 2.1696, "step": 77670 }, { "epoch": 0.18, "grad_norm": 1.5078125, "learning_rate": 0.00019590600531603936, "loss": 1.9051, "step": 77675 }, { "epoch": 0.18, "grad_norm": 1.8203125, "learning_rate": 0.00019590548185837562, "loss": 2.4351, "step": 77680 }, { "epoch": 0.18, "grad_norm": 1.859375, "learning_rate": 0.00019590495836794886, "loss": 2.3231, "step": 77685 }, { "epoch": 0.18, "grad_norm": 2.296875, "learning_rate": 0.0001959044348447592, "loss": 2.1052, "step": 77690 }, { "epoch": 0.18, "grad_norm": 1.8359375, "learning_rate": 0.00019590391128880688, "loss": 1.9637, "step": 77695 }, { "epoch": 0.18, "grad_norm": 1.671875, "learning_rate": 0.000195903387700092, "loss": 2.0337, "step": 77700 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019590286407861483, "loss": 2.1538, "step": 77705 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019590234042437548, "loss": 2.1335, "step": 77710 }, { "epoch": 0.18, "grad_norm": 1.78125, "learning_rate": 0.00019590181673737416, "loss": 2.0398, "step": 77715 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.00019590129301761102, "loss": 2.2207, "step": 77720 }, { "epoch": 0.18, "grad_norm": 1.6953125, "learning_rate": 0.00019590076926508627, "loss": 2.2862, "step": 77725 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019590024547980008, "loss": 2.1724, "step": 77730 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.00019589972166175263, "loss": 2.1971, "step": 77735 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.0001958991978109441, "loss": 2.1995, "step": 77740 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.00019589867392737464, "loss": 2.2073, "step": 77745 }, { "epoch": 0.18, "grad_norm": 2.15625, "learning_rate": 0.00019589815001104448, "loss": 2.1206, "step": 77750 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.00019589762606195374, "loss": 2.0117, "step": 77755 }, { "epoch": 0.18, "grad_norm": 1.65625, "learning_rate": 0.00019589710208010267, "loss": 2.2489, "step": 77760 }, { "epoch": 0.18, "grad_norm": 1.859375, "learning_rate": 0.0001958965780654914, "loss": 2.3222, "step": 77765 }, { "epoch": 0.18, "grad_norm": 1.8046875, "learning_rate": 0.0001958960540181201, "loss": 2.1984, "step": 77770 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.000195895529937989, "loss": 2.0863, "step": 77775 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019589500582509824, "loss": 2.0835, "step": 77780 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.000195894481679448, "loss": 1.9604, "step": 77785 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019589395750103846, "loss": 2.1872, "step": 77790 }, { "epoch": 0.18, "grad_norm": 1.984375, "learning_rate": 0.0001958934332898698, "loss": 2.1537, "step": 77795 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019589290904594223, "loss": 2.1069, "step": 77800 }, { "epoch": 0.18, "grad_norm": 1.7421875, "learning_rate": 0.0001958923847692559, "loss": 2.2325, "step": 77805 }, { "epoch": 0.18, "grad_norm": 2.390625, "learning_rate": 0.00019589186045981097, "loss": 2.1445, "step": 77810 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.00019589133611760764, "loss": 2.1895, "step": 77815 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.0001958908117426461, "loss": 2.2319, "step": 77820 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019589028733492652, "loss": 2.2064, "step": 77825 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.00019588976289444906, "loss": 2.1954, "step": 77830 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.00019588923842121396, "loss": 2.1738, "step": 77835 }, { "epoch": 0.18, "grad_norm": 2.5625, "learning_rate": 0.0001958887139152213, "loss": 1.9754, "step": 77840 }, { "epoch": 0.18, "grad_norm": 1.609375, "learning_rate": 0.00019588818937647137, "loss": 2.1634, "step": 77845 }, { "epoch": 0.18, "grad_norm": 1.921875, "learning_rate": 0.0001958876648049643, "loss": 2.168, "step": 77850 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.00019588714020070023, "loss": 2.2421, "step": 77855 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.0001958866155636794, "loss": 2.2627, "step": 77860 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.00019588609089390193, "loss": 2.27, "step": 77865 }, { "epoch": 0.18, "grad_norm": 2.75, "learning_rate": 0.00019588556619136807, "loss": 2.1004, "step": 77870 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.0001958850414560779, "loss": 2.1504, "step": 77875 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.00019588451668803173, "loss": 2.1132, "step": 77880 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.00019588399188722963, "loss": 2.2361, "step": 77885 }, { "epoch": 0.18, "grad_norm": 1.9375, "learning_rate": 0.00019588346705367186, "loss": 2.2793, "step": 77890 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019588294218735852, "loss": 2.0729, "step": 77895 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.0001958824172882898, "loss": 2.2867, "step": 77900 }, { "epoch": 0.18, "grad_norm": 1.7578125, "learning_rate": 0.000195881892356466, "loss": 2.103, "step": 77905 }, { "epoch": 0.18, "grad_norm": 1.6796875, "learning_rate": 0.0001958813673918871, "loss": 2.1112, "step": 77910 }, { "epoch": 0.18, "grad_norm": 1.9765625, "learning_rate": 0.00019588084239455347, "loss": 2.0135, "step": 77915 }, { "epoch": 0.18, "grad_norm": 1.90625, "learning_rate": 0.00019588031736446517, "loss": 2.0212, "step": 77920 }, { "epoch": 0.18, "grad_norm": 1.734375, "learning_rate": 0.0001958797923016224, "loss": 1.9876, "step": 77925 }, { "epoch": 0.18, "grad_norm": 1.6171875, "learning_rate": 0.0001958792672060254, "loss": 2.3605, "step": 77930 }, { "epoch": 0.18, "grad_norm": 1.8671875, "learning_rate": 0.00019587874207767426, "loss": 2.347, "step": 77935 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019587821691656926, "loss": 2.1563, "step": 77940 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019587769172271045, "loss": 2.0984, "step": 77945 }, { "epoch": 0.18, "grad_norm": 2.640625, "learning_rate": 0.0001958771664960981, "loss": 2.081, "step": 77950 }, { "epoch": 0.18, "grad_norm": 2.1875, "learning_rate": 0.0001958766412367324, "loss": 2.1499, "step": 77955 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.0001958761159446135, "loss": 2.244, "step": 77960 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019587559061974154, "loss": 2.3391, "step": 77965 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.0001958750652621168, "loss": 2.1837, "step": 77970 }, { "epoch": 0.18, "grad_norm": 2.265625, "learning_rate": 0.00019587453987173934, "loss": 2.1228, "step": 77975 }, { "epoch": 0.18, "grad_norm": 2.265625, "learning_rate": 0.00019587401444860943, "loss": 2.1638, "step": 77980 }, { "epoch": 0.18, "grad_norm": 1.6328125, "learning_rate": 0.00019587348899272722, "loss": 2.3346, "step": 77985 }, { "epoch": 0.18, "grad_norm": 1.6796875, "learning_rate": 0.00019587296350409287, "loss": 2.0649, "step": 77990 }, { "epoch": 0.18, "grad_norm": 1.9921875, "learning_rate": 0.0001958724379827066, "loss": 2.2353, "step": 77995 }, { "epoch": 0.18, "grad_norm": 1.90625, "learning_rate": 0.00019587191242856851, "loss": 2.2784, "step": 78000 }, { "epoch": 0.18, "grad_norm": 2.234375, "learning_rate": 0.00019587138684167888, "loss": 2.2713, "step": 78005 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.00019587086122203786, "loss": 2.0337, "step": 78010 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.0001958703355696456, "loss": 2.2149, "step": 78015 }, { "epoch": 0.18, "grad_norm": 2.0, "learning_rate": 0.0001958698098845023, "loss": 2.1523, "step": 78020 }, { "epoch": 0.18, "grad_norm": 1.875, "learning_rate": 0.0001958692841666081, "loss": 2.1852, "step": 78025 }, { "epoch": 0.18, "grad_norm": 2.265625, "learning_rate": 0.00019586875841596325, "loss": 2.1728, "step": 78030 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.0001958682326325679, "loss": 2.2507, "step": 78035 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.0001958677068164222, "loss": 2.0428, "step": 78040 }, { "epoch": 0.18, "grad_norm": 1.9453125, "learning_rate": 0.00019586718096752635, "loss": 2.2501, "step": 78045 }, { "epoch": 0.18, "grad_norm": 1.5859375, "learning_rate": 0.00019586665508588053, "loss": 2.2275, "step": 78050 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019586612917148495, "loss": 2.1326, "step": 78055 }, { "epoch": 0.18, "grad_norm": 2.0, "learning_rate": 0.00019586560322433974, "loss": 2.1469, "step": 78060 }, { "epoch": 0.18, "grad_norm": 2.421875, "learning_rate": 0.00019586507724444512, "loss": 2.1675, "step": 78065 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.00019586455123180122, "loss": 2.1682, "step": 78070 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.00019586402518640826, "loss": 2.1253, "step": 78075 }, { "epoch": 0.18, "grad_norm": 1.6171875, "learning_rate": 0.00019586349910826644, "loss": 2.1466, "step": 78080 }, { "epoch": 0.18, "grad_norm": 1.6875, "learning_rate": 0.00019586297299737588, "loss": 2.1145, "step": 78085 }, { "epoch": 0.18, "grad_norm": 1.828125, "learning_rate": 0.0001958624468537368, "loss": 2.0047, "step": 78090 }, { "epoch": 0.18, "grad_norm": 1.5078125, "learning_rate": 0.00019586192067734935, "loss": 2.2745, "step": 78095 }, { "epoch": 0.18, "grad_norm": 2.40625, "learning_rate": 0.00019586139446821376, "loss": 2.0711, "step": 78100 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.0001958608682263302, "loss": 2.2408, "step": 78105 }, { "epoch": 0.18, "grad_norm": 1.984375, "learning_rate": 0.00019586034195169877, "loss": 2.1027, "step": 78110 }, { "epoch": 0.18, "grad_norm": 1.8359375, "learning_rate": 0.00019585981564431974, "loss": 2.1762, "step": 78115 }, { "epoch": 0.18, "grad_norm": 1.9140625, "learning_rate": 0.00019585928930419323, "loss": 2.0782, "step": 78120 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.0001958587629313195, "loss": 2.2314, "step": 78125 }, { "epoch": 0.18, "grad_norm": 1.78125, "learning_rate": 0.00019585823652569864, "loss": 2.3238, "step": 78130 }, { "epoch": 0.18, "grad_norm": 1.6953125, "learning_rate": 0.0001958577100873309, "loss": 2.0966, "step": 78135 }, { "epoch": 0.18, "grad_norm": 2.296875, "learning_rate": 0.00019585718361621635, "loss": 2.2451, "step": 78140 }, { "epoch": 0.18, "grad_norm": 1.9296875, "learning_rate": 0.00019585665711235534, "loss": 2.2219, "step": 78145 }, { "epoch": 0.18, "grad_norm": 2.3125, "learning_rate": 0.0001958561305757479, "loss": 2.2644, "step": 78150 }, { "epoch": 0.18, "grad_norm": 1.8515625, "learning_rate": 0.0001958556040063943, "loss": 2.0669, "step": 78155 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.00019585507740429469, "loss": 2.288, "step": 78160 }, { "epoch": 0.18, "grad_norm": 2.109375, "learning_rate": 0.00019585455076944923, "loss": 2.1255, "step": 78165 }, { "epoch": 0.18, "grad_norm": 1.9140625, "learning_rate": 0.0001958540241018581, "loss": 1.9457, "step": 78170 }, { "epoch": 0.18, "grad_norm": 1.734375, "learning_rate": 0.0001958534974015215, "loss": 2.2341, "step": 78175 }, { "epoch": 0.18, "grad_norm": 2.34375, "learning_rate": 0.00019585297066843964, "loss": 2.0992, "step": 78180 }, { "epoch": 0.18, "grad_norm": 1.8203125, "learning_rate": 0.00019585244390261266, "loss": 2.2088, "step": 78185 }, { "epoch": 0.18, "grad_norm": 2.09375, "learning_rate": 0.00019585191710404072, "loss": 2.1366, "step": 78190 }, { "epoch": 0.18, "grad_norm": 2.21875, "learning_rate": 0.00019585139027272406, "loss": 2.2558, "step": 78195 }, { "epoch": 0.18, "grad_norm": 1.7578125, "learning_rate": 0.00019585086340866278, "loss": 2.2617, "step": 78200 }, { "epoch": 0.18, "grad_norm": 1.6328125, "learning_rate": 0.00019585033651185716, "loss": 2.2416, "step": 78205 }, { "epoch": 0.18, "grad_norm": 2.515625, "learning_rate": 0.0001958498095823073, "loss": 2.1501, "step": 78210 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.0001958492826200134, "loss": 2.0255, "step": 78215 }, { "epoch": 0.18, "grad_norm": 1.8515625, "learning_rate": 0.00019584875562497566, "loss": 2.2063, "step": 78220 }, { "epoch": 0.18, "grad_norm": 1.6484375, "learning_rate": 0.00019584822859719425, "loss": 2.1951, "step": 78225 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019584770153666936, "loss": 2.1569, "step": 78230 }, { "epoch": 0.18, "grad_norm": 1.7890625, "learning_rate": 0.00019584717444340113, "loss": 2.1917, "step": 78235 }, { "epoch": 0.18, "grad_norm": 2.171875, "learning_rate": 0.00019584664731738975, "loss": 2.0751, "step": 78240 }, { "epoch": 0.18, "grad_norm": 2.28125, "learning_rate": 0.00019584612015863546, "loss": 2.0483, "step": 78245 }, { "epoch": 0.18, "grad_norm": 2.390625, "learning_rate": 0.00019584559296713838, "loss": 2.3004, "step": 78250 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.0001958450657428987, "loss": 2.0969, "step": 78255 }, { "epoch": 0.18, "grad_norm": 2.4375, "learning_rate": 0.00019584453848591661, "loss": 2.1575, "step": 78260 }, { "epoch": 0.18, "grad_norm": 2.390625, "learning_rate": 0.00019584401119619228, "loss": 2.2532, "step": 78265 }, { "epoch": 0.18, "grad_norm": 2.265625, "learning_rate": 0.00019584348387372593, "loss": 2.1367, "step": 78270 }, { "epoch": 0.18, "grad_norm": 1.7265625, "learning_rate": 0.00019584295651851768, "loss": 2.0416, "step": 78275 }, { "epoch": 0.18, "grad_norm": 1.5234375, "learning_rate": 0.00019584242913056775, "loss": 2.1112, "step": 78280 }, { "epoch": 0.18, "grad_norm": 2.015625, "learning_rate": 0.0001958419017098763, "loss": 2.1236, "step": 78285 }, { "epoch": 0.18, "grad_norm": 1.78125, "learning_rate": 0.0001958413742564435, "loss": 2.2714, "step": 78290 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019584084677026957, "loss": 2.2139, "step": 78295 }, { "epoch": 0.18, "grad_norm": 1.6953125, "learning_rate": 0.00019584031925135468, "loss": 2.3401, "step": 78300 }, { "epoch": 0.18, "grad_norm": 1.7578125, "learning_rate": 0.00019583979169969897, "loss": 2.0436, "step": 78305 }, { "epoch": 0.18, "grad_norm": 1.9453125, "learning_rate": 0.00019583926411530266, "loss": 2.1169, "step": 78310 }, { "epoch": 0.18, "grad_norm": 2.234375, "learning_rate": 0.00019583873649816594, "loss": 2.3033, "step": 78315 }, { "epoch": 0.18, "grad_norm": 1.859375, "learning_rate": 0.00019583820884828894, "loss": 2.1147, "step": 78320 }, { "epoch": 0.18, "grad_norm": 1.9296875, "learning_rate": 0.00019583768116567188, "loss": 2.1723, "step": 78325 }, { "epoch": 0.18, "grad_norm": 1.578125, "learning_rate": 0.00019583715345031496, "loss": 2.0674, "step": 78330 }, { "epoch": 0.18, "grad_norm": 2.046875, "learning_rate": 0.00019583662570221827, "loss": 2.208, "step": 78335 }, { "epoch": 0.18, "grad_norm": 2.390625, "learning_rate": 0.00019583609792138212, "loss": 2.3759, "step": 78340 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019583557010780656, "loss": 2.077, "step": 78345 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019583504226149186, "loss": 2.0839, "step": 78350 }, { "epoch": 0.18, "grad_norm": 1.5703125, "learning_rate": 0.00019583451438243816, "loss": 2.3517, "step": 78355 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.00019583398647064567, "loss": 2.1735, "step": 78360 }, { "epoch": 0.18, "grad_norm": 1.7734375, "learning_rate": 0.00019583345852611453, "loss": 2.1487, "step": 78365 }, { "epoch": 0.18, "grad_norm": 1.96875, "learning_rate": 0.00019583293054884496, "loss": 2.1833, "step": 78370 }, { "epoch": 0.18, "grad_norm": 2.53125, "learning_rate": 0.00019583240253883712, "loss": 2.0035, "step": 78375 }, { "epoch": 0.18, "grad_norm": 2.515625, "learning_rate": 0.0001958318744960912, "loss": 2.226, "step": 78380 }, { "epoch": 0.18, "grad_norm": 2.203125, "learning_rate": 0.00019583134642060733, "loss": 2.1321, "step": 78385 }, { "epoch": 0.18, "grad_norm": 1.78125, "learning_rate": 0.00019583081831238576, "loss": 2.1187, "step": 78390 }, { "epoch": 0.18, "grad_norm": 1.71875, "learning_rate": 0.00019583029017142665, "loss": 2.069, "step": 78395 }, { "epoch": 0.18, "grad_norm": 1.9296875, "learning_rate": 0.0001958297619977302, "loss": 2.1922, "step": 78400 }, { "epoch": 0.18, "grad_norm": 1.8671875, "learning_rate": 0.00019582923379129653, "loss": 2.0555, "step": 78405 }, { "epoch": 0.18, "grad_norm": 1.90625, "learning_rate": 0.00019582870555212586, "loss": 2.1939, "step": 78410 }, { "epoch": 0.18, "grad_norm": 3.765625, "learning_rate": 0.00019582817728021841, "loss": 2.0993, "step": 78415 }, { "epoch": 0.18, "grad_norm": 1.984375, "learning_rate": 0.00019582764897557427, "loss": 2.3741, "step": 78420 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.0001958271206381937, "loss": 2.2335, "step": 78425 }, { "epoch": 0.18, "grad_norm": 1.84375, "learning_rate": 0.0001958265922680768, "loss": 2.4226, "step": 78430 }, { "epoch": 0.18, "grad_norm": 1.5546875, "learning_rate": 0.0001958260638652238, "loss": 2.1257, "step": 78435 }, { "epoch": 0.18, "grad_norm": 2.140625, "learning_rate": 0.0001958255354296349, "loss": 2.2852, "step": 78440 }, { "epoch": 0.18, "grad_norm": 2.1875, "learning_rate": 0.0001958250069613103, "loss": 2.0514, "step": 78445 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.00019582447846025009, "loss": 2.1331, "step": 78450 }, { "epoch": 0.18, "grad_norm": 1.859375, "learning_rate": 0.00019582394992645453, "loss": 2.2045, "step": 78455 }, { "epoch": 0.18, "grad_norm": 1.8125, "learning_rate": 0.00019582342135992373, "loss": 2.1302, "step": 78460 }, { "epoch": 0.18, "grad_norm": 2.296875, "learning_rate": 0.00019582289276065792, "loss": 2.2325, "step": 78465 }, { "epoch": 0.18, "grad_norm": 2.453125, "learning_rate": 0.0001958223641286573, "loss": 2.1509, "step": 78470 }, { "epoch": 0.18, "grad_norm": 2.0625, "learning_rate": 0.000195821835463922, "loss": 2.1454, "step": 78475 }, { "epoch": 0.18, "grad_norm": 1.921875, "learning_rate": 0.00019582130676645224, "loss": 2.167, "step": 78480 }, { "epoch": 0.18, "grad_norm": 1.8125, "learning_rate": 0.00019582077803624817, "loss": 1.9839, "step": 78485 }, { "epoch": 0.18, "grad_norm": 2.03125, "learning_rate": 0.00019582024927331, "loss": 2.2557, "step": 78490 }, { "epoch": 0.18, "grad_norm": 2.1875, "learning_rate": 0.00019581972047763788, "loss": 2.4179, "step": 78495 }, { "epoch": 0.18, "grad_norm": 1.9296875, "learning_rate": 0.000195819191649232, "loss": 2.101, "step": 78500 }, { "epoch": 0.18, "grad_norm": 2.1875, "learning_rate": 0.00019581866278809256, "loss": 2.2072, "step": 78505 }, { "epoch": 0.18, "grad_norm": 1.921875, "learning_rate": 0.00019581813389421971, "loss": 2.2372, "step": 78510 }, { "epoch": 0.18, "grad_norm": 1.8671875, "learning_rate": 0.0001958176049676137, "loss": 2.1256, "step": 78515 }, { "epoch": 0.18, "grad_norm": 2.078125, "learning_rate": 0.0001958170760082746, "loss": 1.8865, "step": 78520 }, { "epoch": 0.18, "grad_norm": 1.7109375, "learning_rate": 0.00019581654701620268, "loss": 2.199, "step": 78525 }, { "epoch": 0.18, "grad_norm": 1.984375, "learning_rate": 0.00019581601799139805, "loss": 2.0369, "step": 78530 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.000195815488933861, "loss": 2.0255, "step": 78535 }, { "epoch": 0.18, "grad_norm": 1.5390625, "learning_rate": 0.00019581495984359156, "loss": 2.1149, "step": 78540 }, { "epoch": 0.18, "grad_norm": 1.9609375, "learning_rate": 0.00019581443072059007, "loss": 2.0749, "step": 78545 }, { "epoch": 0.18, "grad_norm": 1.953125, "learning_rate": 0.00019581390156485657, "loss": 2.2438, "step": 78550 }, { "epoch": 0.18, "grad_norm": 2.578125, "learning_rate": 0.00019581337237639134, "loss": 2.1936, "step": 78555 }, { "epoch": 0.18, "grad_norm": 1.921875, "learning_rate": 0.00019581284315519452, "loss": 2.1234, "step": 78560 }, { "epoch": 0.18, "grad_norm": 1.9296875, "learning_rate": 0.0001958123139012663, "loss": 2.0711, "step": 78565 }, { "epoch": 0.18, "grad_norm": 1.828125, "learning_rate": 0.00019581178461460682, "loss": 2.3573, "step": 78570 }, { "epoch": 0.18, "grad_norm": 1.6640625, "learning_rate": 0.00019581125529521634, "loss": 2.0896, "step": 78575 }, { "epoch": 0.18, "grad_norm": 1.71875, "learning_rate": 0.00019581072594309498, "loss": 2.109, "step": 78580 }, { "epoch": 0.18, "grad_norm": 2.53125, "learning_rate": 0.00019581019655824292, "loss": 2.3319, "step": 78585 }, { "epoch": 0.18, "grad_norm": 2.296875, "learning_rate": 0.0001958096671406604, "loss": 2.182, "step": 78590 }, { "epoch": 0.18, "grad_norm": 2.125, "learning_rate": 0.0001958091376903475, "loss": 2.0438, "step": 78595 }, { "epoch": 0.18, "grad_norm": 2.390625, "learning_rate": 0.0001958086082073045, "loss": 2.2468, "step": 78600 }, { "epoch": 0.18, "grad_norm": 2.1875, "learning_rate": 0.00019580807869153154, "loss": 2.0733, "step": 78605 }, { "epoch": 0.18, "grad_norm": 1.8984375, "learning_rate": 0.0001958075491430288, "loss": 2.1742, "step": 78610 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.00019580701956179648, "loss": 2.2614, "step": 78615 }, { "epoch": 0.19, "grad_norm": 1.8515625, "learning_rate": 0.00019580648994783472, "loss": 2.1838, "step": 78620 }, { "epoch": 0.19, "grad_norm": 2.296875, "learning_rate": 0.00019580596030114377, "loss": 1.9684, "step": 78625 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019580543062172373, "loss": 2.1269, "step": 78630 }, { "epoch": 0.19, "grad_norm": 1.8671875, "learning_rate": 0.00019580490090957483, "loss": 2.2704, "step": 78635 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.0001958043711646972, "loss": 2.1696, "step": 78640 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.00019580384138709108, "loss": 2.2816, "step": 78645 }, { "epoch": 0.19, "grad_norm": 1.640625, "learning_rate": 0.00019580331157675665, "loss": 2.1372, "step": 78650 }, { "epoch": 0.19, "grad_norm": 1.78125, "learning_rate": 0.00019580278173369405, "loss": 2.03, "step": 78655 }, { "epoch": 0.19, "grad_norm": 1.703125, "learning_rate": 0.0001958022518579035, "loss": 2.2191, "step": 78660 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019580172194938515, "loss": 2.0288, "step": 78665 }, { "epoch": 0.19, "grad_norm": 1.96875, "learning_rate": 0.0001958011920081392, "loss": 2.2253, "step": 78670 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019580066203416584, "loss": 2.2624, "step": 78675 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.0001958001320274652, "loss": 2.1732, "step": 78680 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.00019579960198803752, "loss": 2.1975, "step": 78685 }, { "epoch": 0.19, "grad_norm": 2.65625, "learning_rate": 0.00019579907191588297, "loss": 2.0307, "step": 78690 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.0001957985418110017, "loss": 2.1797, "step": 78695 }, { "epoch": 0.19, "grad_norm": 1.703125, "learning_rate": 0.0001957980116733939, "loss": 2.2267, "step": 78700 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019579748150305978, "loss": 2.0602, "step": 78705 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.00019579695129999953, "loss": 2.2526, "step": 78710 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.0001957964210642133, "loss": 2.1114, "step": 78715 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.0001957958907957012, "loss": 2.3874, "step": 78720 }, { "epoch": 0.19, "grad_norm": 1.828125, "learning_rate": 0.00019579536049446356, "loss": 2.1817, "step": 78725 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019579483016050046, "loss": 2.3759, "step": 78730 }, { "epoch": 0.19, "grad_norm": 1.9375, "learning_rate": 0.00019579429979381212, "loss": 2.1192, "step": 78735 }, { "epoch": 0.19, "grad_norm": 1.984375, "learning_rate": 0.0001957937693943987, "loss": 2.1953, "step": 78740 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.0001957932389622604, "loss": 2.1185, "step": 78745 }, { "epoch": 0.19, "grad_norm": 1.84375, "learning_rate": 0.00019579270849739736, "loss": 1.9863, "step": 78750 }, { "epoch": 0.19, "grad_norm": 1.7109375, "learning_rate": 0.00019579217799980985, "loss": 1.9624, "step": 78755 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019579164746949795, "loss": 1.9773, "step": 78760 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.0001957911169064619, "loss": 2.1189, "step": 78765 }, { "epoch": 0.19, "grad_norm": 1.59375, "learning_rate": 0.00019579058631070186, "loss": 2.2901, "step": 78770 }, { "epoch": 0.19, "grad_norm": 1.6015625, "learning_rate": 0.00019579005568221805, "loss": 2.3381, "step": 78775 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019578952502101059, "loss": 2.021, "step": 78780 }, { "epoch": 0.19, "grad_norm": 1.8671875, "learning_rate": 0.0001957889943270797, "loss": 1.9966, "step": 78785 }, { "epoch": 0.19, "grad_norm": 2.15625, "learning_rate": 0.00019578846360042556, "loss": 2.2233, "step": 78790 }, { "epoch": 0.19, "grad_norm": 1.7265625, "learning_rate": 0.0001957879328410483, "loss": 2.1608, "step": 78795 }, { "epoch": 0.19, "grad_norm": 1.7265625, "learning_rate": 0.0001957874020489482, "loss": 1.9576, "step": 78800 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019578687122412535, "loss": 2.2618, "step": 78805 }, { "epoch": 0.19, "grad_norm": 1.9453125, "learning_rate": 0.00019578634036657999, "loss": 2.0738, "step": 78810 }, { "epoch": 0.19, "grad_norm": 1.8515625, "learning_rate": 0.00019578580947631226, "loss": 2.1466, "step": 78815 }, { "epoch": 0.19, "grad_norm": 1.703125, "learning_rate": 0.00019578527855332239, "loss": 2.3952, "step": 78820 }, { "epoch": 0.19, "grad_norm": 2.46875, "learning_rate": 0.0001957847475976105, "loss": 2.158, "step": 78825 }, { "epoch": 0.19, "grad_norm": 1.8046875, "learning_rate": 0.00019578421660917684, "loss": 2.1279, "step": 78830 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.0001957836855880215, "loss": 2.2054, "step": 78835 }, { "epoch": 0.19, "grad_norm": 1.8671875, "learning_rate": 0.00019578315453414475, "loss": 2.1761, "step": 78840 }, { "epoch": 0.19, "grad_norm": 2.25, "learning_rate": 0.00019578262344754674, "loss": 2.1711, "step": 78845 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019578209232822764, "loss": 2.0541, "step": 78850 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.00019578156117618765, "loss": 2.1805, "step": 78855 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.00019578102999142695, "loss": 2.185, "step": 78860 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019578049877394567, "loss": 2.2325, "step": 78865 }, { "epoch": 0.19, "grad_norm": 1.640625, "learning_rate": 0.00019577996752374408, "loss": 2.3126, "step": 78870 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019577943624082228, "loss": 2.1266, "step": 78875 }, { "epoch": 0.19, "grad_norm": 1.8125, "learning_rate": 0.0001957789049251805, "loss": 2.3235, "step": 78880 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019577837357681892, "loss": 2.0433, "step": 78885 }, { "epoch": 0.19, "grad_norm": 2.1875, "learning_rate": 0.00019577784219573772, "loss": 2.436, "step": 78890 }, { "epoch": 0.19, "grad_norm": 1.7265625, "learning_rate": 0.00019577731078193703, "loss": 2.0643, "step": 78895 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.00019577677933541713, "loss": 2.0896, "step": 78900 }, { "epoch": 0.19, "grad_norm": 1.9609375, "learning_rate": 0.00019577624785617812, "loss": 2.0775, "step": 78905 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.0001957757163442202, "loss": 1.9816, "step": 78910 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019577518479954354, "loss": 2.0754, "step": 78915 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019577465322214837, "loss": 2.1204, "step": 78920 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.00019577412161203483, "loss": 2.0585, "step": 78925 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019577358996920313, "loss": 2.2122, "step": 78930 }, { "epoch": 0.19, "grad_norm": 1.984375, "learning_rate": 0.0001957730582936534, "loss": 2.2015, "step": 78935 }, { "epoch": 0.19, "grad_norm": 2.1875, "learning_rate": 0.0001957725265853859, "loss": 2.062, "step": 78940 }, { "epoch": 0.19, "grad_norm": 2.328125, "learning_rate": 0.00019577199484440075, "loss": 1.9384, "step": 78945 }, { "epoch": 0.19, "grad_norm": 2.3125, "learning_rate": 0.00019577146307069813, "loss": 2.3478, "step": 78950 }, { "epoch": 0.19, "grad_norm": 2.359375, "learning_rate": 0.00019577093126427823, "loss": 2.1633, "step": 78955 }, { "epoch": 0.19, "grad_norm": 2.53125, "learning_rate": 0.0001957703994251413, "loss": 2.0414, "step": 78960 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019576986755328744, "loss": 2.1124, "step": 78965 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.00019576933564871682, "loss": 1.9742, "step": 78970 }, { "epoch": 0.19, "grad_norm": 2.40625, "learning_rate": 0.00019576880371142968, "loss": 2.1339, "step": 78975 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.0001957682717414262, "loss": 2.2545, "step": 78980 }, { "epoch": 0.19, "grad_norm": 1.4140625, "learning_rate": 0.00019576773973870655, "loss": 1.8839, "step": 78985 }, { "epoch": 0.19, "grad_norm": 1.9140625, "learning_rate": 0.00019576720770327085, "loss": 2.0761, "step": 78990 }, { "epoch": 0.19, "grad_norm": 1.4765625, "learning_rate": 0.00019576667563511938, "loss": 2.1037, "step": 78995 }, { "epoch": 0.19, "grad_norm": 2.375, "learning_rate": 0.00019576614353425227, "loss": 2.034, "step": 79000 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.0001957656114006697, "loss": 2.2402, "step": 79005 }, { "epoch": 0.19, "grad_norm": 1.5, "learning_rate": 0.00019576507923437186, "loss": 2.2169, "step": 79010 }, { "epoch": 0.19, "grad_norm": 1.875, "learning_rate": 0.00019576454703535892, "loss": 2.2298, "step": 79015 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.0001957640148036311, "loss": 2.3161, "step": 79020 }, { "epoch": 0.19, "grad_norm": 1.6796875, "learning_rate": 0.00019576348253918854, "loss": 2.2546, "step": 79025 }, { "epoch": 0.19, "grad_norm": 1.6953125, "learning_rate": 0.00019576295024203145, "loss": 2.3047, "step": 79030 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019576241791216, "loss": 2.146, "step": 79035 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019576188554957434, "loss": 2.294, "step": 79040 }, { "epoch": 0.19, "grad_norm": 2.390625, "learning_rate": 0.0001957613531542747, "loss": 2.071, "step": 79045 }, { "epoch": 0.19, "grad_norm": 1.6171875, "learning_rate": 0.00019576082072626127, "loss": 2.0235, "step": 79050 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019576028826553417, "loss": 2.0872, "step": 79055 }, { "epoch": 0.19, "grad_norm": 2.28125, "learning_rate": 0.00019575975577209364, "loss": 2.1644, "step": 79060 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.00019575922324593985, "loss": 2.0916, "step": 79065 }, { "epoch": 0.19, "grad_norm": 1.859375, "learning_rate": 0.00019575869068707297, "loss": 2.0724, "step": 79070 }, { "epoch": 0.19, "grad_norm": 2.46875, "learning_rate": 0.00019575815809549316, "loss": 2.2614, "step": 79075 }, { "epoch": 0.19, "grad_norm": 2.140625, "learning_rate": 0.00019575762547120067, "loss": 2.0811, "step": 79080 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019575709281419558, "loss": 2.1858, "step": 79085 }, { "epoch": 0.19, "grad_norm": 1.90625, "learning_rate": 0.00019575656012447819, "loss": 2.1449, "step": 79090 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.0001957560274020486, "loss": 2.3655, "step": 79095 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.000195755494646907, "loss": 2.0717, "step": 79100 }, { "epoch": 0.19, "grad_norm": 2.546875, "learning_rate": 0.00019575496185905358, "loss": 2.0114, "step": 79105 }, { "epoch": 0.19, "grad_norm": 2.484375, "learning_rate": 0.00019575442903848854, "loss": 2.0923, "step": 79110 }, { "epoch": 0.19, "grad_norm": 2.140625, "learning_rate": 0.00019575389618521204, "loss": 2.4009, "step": 79115 }, { "epoch": 0.19, "grad_norm": 1.6875, "learning_rate": 0.0001957533632992243, "loss": 2.1344, "step": 79120 }, { "epoch": 0.19, "grad_norm": 1.9453125, "learning_rate": 0.00019575283038052545, "loss": 2.2125, "step": 79125 }, { "epoch": 0.19, "grad_norm": 1.8828125, "learning_rate": 0.00019575229742911573, "loss": 2.3218, "step": 79130 }, { "epoch": 0.19, "grad_norm": 1.9140625, "learning_rate": 0.00019575176444499524, "loss": 2.0753, "step": 79135 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019575123142816426, "loss": 1.9605, "step": 79140 }, { "epoch": 0.19, "grad_norm": 2.1875, "learning_rate": 0.00019575069837862288, "loss": 2.1458, "step": 79145 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.00019575016529637136, "loss": 2.0473, "step": 79150 }, { "epoch": 0.19, "grad_norm": 2.21875, "learning_rate": 0.0001957496321814098, "loss": 2.2649, "step": 79155 }, { "epoch": 0.19, "grad_norm": 1.828125, "learning_rate": 0.00019574909903373846, "loss": 2.1259, "step": 79160 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.0001957485658533575, "loss": 2.1478, "step": 79165 }, { "epoch": 0.19, "grad_norm": 1.984375, "learning_rate": 0.0001957480326402671, "loss": 2.1977, "step": 79170 }, { "epoch": 0.19, "grad_norm": 1.9609375, "learning_rate": 0.00019574749939446742, "loss": 2.1087, "step": 79175 }, { "epoch": 0.19, "grad_norm": 2.453125, "learning_rate": 0.00019574696611595864, "loss": 2.1066, "step": 79180 }, { "epoch": 0.19, "grad_norm": 1.7578125, "learning_rate": 0.000195746432804741, "loss": 2.1011, "step": 79185 }, { "epoch": 0.19, "grad_norm": 1.9140625, "learning_rate": 0.0001957458994608146, "loss": 2.1331, "step": 79190 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019574536608417967, "loss": 2.0592, "step": 79195 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019574483267483642, "loss": 2.094, "step": 79200 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.00019574429923278496, "loss": 2.0582, "step": 79205 }, { "epoch": 0.19, "grad_norm": 1.875, "learning_rate": 0.00019574376575802552, "loss": 2.2051, "step": 79210 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019574323225055832, "loss": 2.2525, "step": 79215 }, { "epoch": 0.19, "grad_norm": 1.6015625, "learning_rate": 0.00019574269871038344, "loss": 1.908, "step": 79220 }, { "epoch": 0.19, "grad_norm": 1.7578125, "learning_rate": 0.00019574216513750114, "loss": 2.1087, "step": 79225 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019574163153191157, "loss": 2.2747, "step": 79230 }, { "epoch": 0.19, "grad_norm": 1.8671875, "learning_rate": 0.00019574109789361494, "loss": 2.1509, "step": 79235 }, { "epoch": 0.19, "grad_norm": 1.9140625, "learning_rate": 0.0001957405642226114, "loss": 2.1707, "step": 79240 }, { "epoch": 0.19, "grad_norm": 2.21875, "learning_rate": 0.00019574003051890115, "loss": 2.2464, "step": 79245 }, { "epoch": 0.19, "grad_norm": 1.765625, "learning_rate": 0.00019573949678248438, "loss": 2.11, "step": 79250 }, { "epoch": 0.19, "grad_norm": 1.9375, "learning_rate": 0.00019573896301336126, "loss": 2.2274, "step": 79255 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.00019573842921153195, "loss": 2.1946, "step": 79260 }, { "epoch": 0.19, "grad_norm": 1.8125, "learning_rate": 0.00019573789537699664, "loss": 2.2769, "step": 79265 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.0001957373615097556, "loss": 2.2197, "step": 79270 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019573682760980887, "loss": 2.2254, "step": 79275 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019573629367715674, "loss": 2.2967, "step": 79280 }, { "epoch": 0.19, "grad_norm": 1.984375, "learning_rate": 0.00019573575971179935, "loss": 2.1708, "step": 79285 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.0001957352257137369, "loss": 2.2593, "step": 79290 }, { "epoch": 0.19, "grad_norm": 2.25, "learning_rate": 0.00019573469168296953, "loss": 2.0634, "step": 79295 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.00019573415761949748, "loss": 2.0604, "step": 79300 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.0001957336235233209, "loss": 2.0551, "step": 79305 }, { "epoch": 0.19, "grad_norm": 2.53125, "learning_rate": 0.00019573308939443998, "loss": 2.1491, "step": 79310 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.0001957325552328549, "loss": 2.1027, "step": 79315 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.00019573202103856586, "loss": 2.1525, "step": 79320 }, { "epoch": 0.19, "grad_norm": 1.890625, "learning_rate": 0.00019573148681157297, "loss": 2.1734, "step": 79325 }, { "epoch": 0.19, "grad_norm": 1.765625, "learning_rate": 0.0001957309525518765, "loss": 2.1442, "step": 79330 }, { "epoch": 0.19, "grad_norm": 1.78125, "learning_rate": 0.0001957304182594766, "loss": 2.0654, "step": 79335 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.0001957298839343735, "loss": 2.0685, "step": 79340 }, { "epoch": 0.19, "grad_norm": 1.859375, "learning_rate": 0.00019572934957656725, "loss": 2.1773, "step": 79345 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019572881518605816, "loss": 2.2511, "step": 79350 }, { "epoch": 0.19, "grad_norm": 1.84375, "learning_rate": 0.0001957282807628464, "loss": 2.1468, "step": 79355 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.00019572774630693206, "loss": 2.3592, "step": 79360 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019572721181831542, "loss": 2.132, "step": 79365 }, { "epoch": 0.19, "grad_norm": 1.9609375, "learning_rate": 0.00019572667729699666, "loss": 2.1491, "step": 79370 }, { "epoch": 0.19, "grad_norm": 2.359375, "learning_rate": 0.00019572614274297588, "loss": 2.2095, "step": 79375 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019572560815625333, "loss": 2.3125, "step": 79380 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.0001957250735368292, "loss": 2.1741, "step": 79385 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019572453888470358, "loss": 1.9943, "step": 79390 }, { "epoch": 0.19, "grad_norm": 1.84375, "learning_rate": 0.00019572400419987679, "loss": 2.0951, "step": 79395 }, { "epoch": 0.19, "grad_norm": 1.9140625, "learning_rate": 0.0001957234694823489, "loss": 2.2877, "step": 79400 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.0001957229347321202, "loss": 2.2786, "step": 79405 }, { "epoch": 0.19, "grad_norm": 1.8046875, "learning_rate": 0.00019572239994919074, "loss": 2.271, "step": 79410 }, { "epoch": 0.19, "grad_norm": 1.734375, "learning_rate": 0.0001957218651335608, "loss": 2.0654, "step": 79415 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019572133028523055, "loss": 2.2559, "step": 79420 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.00019572079540420014, "loss": 2.2315, "step": 79425 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019572026049046977, "loss": 2.1809, "step": 79430 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019571972554403963, "loss": 2.3085, "step": 79435 }, { "epoch": 0.19, "grad_norm": 1.6328125, "learning_rate": 0.0001957191905649099, "loss": 2.017, "step": 79440 }, { "epoch": 0.19, "grad_norm": 1.4765625, "learning_rate": 0.00019571865555308073, "loss": 1.896, "step": 79445 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.00019571812050855235, "loss": 2.1973, "step": 79450 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019571758543132496, "loss": 2.1316, "step": 79455 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019571705032139864, "loss": 2.123, "step": 79460 }, { "epoch": 0.19, "grad_norm": 1.921875, "learning_rate": 0.00019571651517877369, "loss": 2.1345, "step": 79465 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.00019571598000345022, "loss": 2.0192, "step": 79470 }, { "epoch": 0.19, "grad_norm": 2.53125, "learning_rate": 0.00019571544479542844, "loss": 2.246, "step": 79475 }, { "epoch": 0.19, "grad_norm": 2.375, "learning_rate": 0.00019571490955470853, "loss": 2.1439, "step": 79480 }, { "epoch": 0.19, "grad_norm": 1.890625, "learning_rate": 0.00019571437428129066, "loss": 2.2293, "step": 79485 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019571383897517503, "loss": 2.3286, "step": 79490 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019571330363636182, "loss": 2.2726, "step": 79495 }, { "epoch": 0.19, "grad_norm": 2.203125, "learning_rate": 0.00019571276826485124, "loss": 2.1503, "step": 79500 }, { "epoch": 0.19, "grad_norm": 1.7734375, "learning_rate": 0.00019571223286064338, "loss": 2.1001, "step": 79505 }, { "epoch": 0.19, "grad_norm": 1.734375, "learning_rate": 0.00019571169742373852, "loss": 2.3134, "step": 79510 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.0001957111619541368, "loss": 2.1074, "step": 79515 }, { "epoch": 0.19, "grad_norm": 2.203125, "learning_rate": 0.0001957106264518384, "loss": 2.1006, "step": 79520 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.00019571009091684356, "loss": 2.1303, "step": 79525 }, { "epoch": 0.19, "grad_norm": 2.140625, "learning_rate": 0.00019570955534915237, "loss": 2.1672, "step": 79530 }, { "epoch": 0.19, "grad_norm": 1.9140625, "learning_rate": 0.0001957090197487651, "loss": 2.1952, "step": 79535 }, { "epoch": 0.19, "grad_norm": 1.890625, "learning_rate": 0.00019570848411568185, "loss": 2.1894, "step": 79540 }, { "epoch": 0.19, "grad_norm": 2.578125, "learning_rate": 0.0001957079484499029, "loss": 2.0538, "step": 79545 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019570741275142832, "loss": 2.1447, "step": 79550 }, { "epoch": 0.19, "grad_norm": 1.7578125, "learning_rate": 0.00019570687702025842, "loss": 2.2573, "step": 79555 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.00019570634125639323, "loss": 2.1553, "step": 79560 }, { "epoch": 0.19, "grad_norm": 2.25, "learning_rate": 0.0001957058054598331, "loss": 2.1507, "step": 79565 }, { "epoch": 0.19, "grad_norm": 2.25, "learning_rate": 0.00019570526963057808, "loss": 2.1952, "step": 79570 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.0001957047337686284, "loss": 2.0514, "step": 79575 }, { "epoch": 0.19, "grad_norm": 1.5703125, "learning_rate": 0.00019570419787398428, "loss": 2.3698, "step": 79580 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019570366194664583, "loss": 2.081, "step": 79585 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.00019570312598661332, "loss": 2.1734, "step": 79590 }, { "epoch": 0.19, "grad_norm": 1.6875, "learning_rate": 0.00019570258999388685, "loss": 2.1975, "step": 79595 }, { "epoch": 0.19, "grad_norm": 2.484375, "learning_rate": 0.00019570205396846668, "loss": 2.2444, "step": 79600 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.00019570151791035293, "loss": 2.0623, "step": 79605 }, { "epoch": 0.19, "grad_norm": 1.7578125, "learning_rate": 0.0001957009818195458, "loss": 2.2329, "step": 79610 }, { "epoch": 0.19, "grad_norm": 1.8046875, "learning_rate": 0.00019570044569604546, "loss": 2.1679, "step": 79615 }, { "epoch": 0.19, "grad_norm": 1.7421875, "learning_rate": 0.00019569990953985212, "loss": 2.0113, "step": 79620 }, { "epoch": 0.19, "grad_norm": 2.140625, "learning_rate": 0.000195699373350966, "loss": 2.0996, "step": 79625 }, { "epoch": 0.19, "grad_norm": 2.4375, "learning_rate": 0.00019569883712938722, "loss": 2.1066, "step": 79630 }, { "epoch": 0.19, "grad_norm": 2.140625, "learning_rate": 0.00019569830087511595, "loss": 2.1775, "step": 79635 }, { "epoch": 0.19, "grad_norm": 1.7265625, "learning_rate": 0.00019569776458815241, "loss": 1.9515, "step": 79640 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.0001956972282684968, "loss": 2.0648, "step": 79645 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019569669191614927, "loss": 1.962, "step": 79650 }, { "epoch": 0.19, "grad_norm": 1.859375, "learning_rate": 0.00019569615553111002, "loss": 2.0053, "step": 79655 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019569561911337923, "loss": 2.0457, "step": 79660 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.0001956950826629571, "loss": 2.2315, "step": 79665 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019569454617984376, "loss": 2.0338, "step": 79670 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.00019569400966403943, "loss": 2.1139, "step": 79675 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019569347311554428, "loss": 2.2282, "step": 79680 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019569293653435855, "loss": 2.1744, "step": 79685 }, { "epoch": 0.19, "grad_norm": 2.21875, "learning_rate": 0.00019569239992048236, "loss": 2.1252, "step": 79690 }, { "epoch": 0.19, "grad_norm": 1.765625, "learning_rate": 0.0001956918632739159, "loss": 2.046, "step": 79695 }, { "epoch": 0.19, "grad_norm": 1.9375, "learning_rate": 0.00019569132659465934, "loss": 1.9281, "step": 79700 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.0001956907898827129, "loss": 2.1071, "step": 79705 }, { "epoch": 0.19, "grad_norm": 1.90625, "learning_rate": 0.0001956902531380768, "loss": 2.1952, "step": 79710 }, { "epoch": 0.19, "grad_norm": 2.21875, "learning_rate": 0.00019568971636075113, "loss": 2.2133, "step": 79715 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.00019568917955073613, "loss": 2.2085, "step": 79720 }, { "epoch": 0.19, "grad_norm": 1.9140625, "learning_rate": 0.00019568864270803196, "loss": 2.122, "step": 79725 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019568810583263884, "loss": 2.3007, "step": 79730 }, { "epoch": 0.19, "grad_norm": 1.875, "learning_rate": 0.00019568756892455688, "loss": 2.2421, "step": 79735 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019568703198378636, "loss": 2.2404, "step": 79740 }, { "epoch": 0.19, "grad_norm": 2.28125, "learning_rate": 0.0001956864950103274, "loss": 2.2122, "step": 79745 }, { "epoch": 0.19, "grad_norm": 1.8671875, "learning_rate": 0.00019568595800418016, "loss": 2.2589, "step": 79750 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.0001956854209653449, "loss": 2.1576, "step": 79755 }, { "epoch": 0.19, "grad_norm": 2.515625, "learning_rate": 0.00019568488389382175, "loss": 2.2149, "step": 79760 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.00019568434678961092, "loss": 2.0608, "step": 79765 }, { "epoch": 0.19, "grad_norm": 2.484375, "learning_rate": 0.00019568380965271257, "loss": 2.1492, "step": 79770 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.00019568327248312693, "loss": 2.2998, "step": 79775 }, { "epoch": 0.19, "grad_norm": 1.65625, "learning_rate": 0.00019568273528085413, "loss": 2.307, "step": 79780 }, { "epoch": 0.19, "grad_norm": 2.1875, "learning_rate": 0.00019568219804589436, "loss": 2.2217, "step": 79785 }, { "epoch": 0.19, "grad_norm": 2.5, "learning_rate": 0.0001956816607782478, "loss": 2.0463, "step": 79790 }, { "epoch": 0.19, "grad_norm": 1.734375, "learning_rate": 0.00019568112347791467, "loss": 2.122, "step": 79795 }, { "epoch": 0.19, "grad_norm": 1.5390625, "learning_rate": 0.00019568058614489515, "loss": 1.9417, "step": 79800 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019568004877918937, "loss": 2.1366, "step": 79805 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.00019567951138079755, "loss": 2.2296, "step": 79810 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.0001956789739497199, "loss": 2.142, "step": 79815 }, { "epoch": 0.19, "grad_norm": 1.8359375, "learning_rate": 0.0001956784364859566, "loss": 2.0465, "step": 79820 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.00019567789898950776, "loss": 2.1317, "step": 79825 }, { "epoch": 0.19, "grad_norm": 1.9453125, "learning_rate": 0.00019567736146037365, "loss": 2.1286, "step": 79830 }, { "epoch": 0.19, "grad_norm": 2.15625, "learning_rate": 0.0001956768238985544, "loss": 2.0828, "step": 79835 }, { "epoch": 0.19, "grad_norm": 1.9375, "learning_rate": 0.00019567628630405023, "loss": 2.0971, "step": 79840 }, { "epoch": 0.19, "grad_norm": 1.765625, "learning_rate": 0.00019567574867686128, "loss": 1.9923, "step": 79845 }, { "epoch": 0.19, "grad_norm": 1.9921875, "learning_rate": 0.00019567521101698776, "loss": 2.1454, "step": 79850 }, { "epoch": 0.19, "grad_norm": 1.796875, "learning_rate": 0.00019567467332442986, "loss": 2.276, "step": 79855 }, { "epoch": 0.19, "grad_norm": 1.96875, "learning_rate": 0.00019567413559918776, "loss": 2.2094, "step": 79860 }, { "epoch": 0.19, "grad_norm": 1.875, "learning_rate": 0.00019567359784126165, "loss": 2.1504, "step": 79865 }, { "epoch": 0.19, "grad_norm": 7.21875, "learning_rate": 0.0001956730600506517, "loss": 2.2284, "step": 79870 }, { "epoch": 0.19, "grad_norm": 1.890625, "learning_rate": 0.00019567252222735807, "loss": 2.1158, "step": 79875 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.000195671984371381, "loss": 2.1641, "step": 79880 }, { "epoch": 0.19, "grad_norm": 1.671875, "learning_rate": 0.00019567144648272063, "loss": 2.1693, "step": 79885 }, { "epoch": 0.19, "grad_norm": 2.453125, "learning_rate": 0.00019567090856137718, "loss": 2.0507, "step": 79890 }, { "epoch": 0.19, "grad_norm": 1.8046875, "learning_rate": 0.00019567037060735077, "loss": 2.1733, "step": 79895 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.0001956698326206417, "loss": 2.0329, "step": 79900 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.00019566929460125004, "loss": 2.1819, "step": 79905 }, { "epoch": 0.19, "grad_norm": 1.78125, "learning_rate": 0.00019566875654917598, "loss": 2.1424, "step": 79910 }, { "epoch": 0.19, "grad_norm": 2.28125, "learning_rate": 0.00019566821846441976, "loss": 2.1155, "step": 79915 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.00019566768034698156, "loss": 2.1806, "step": 79920 }, { "epoch": 0.19, "grad_norm": 2.421875, "learning_rate": 0.00019566714219686153, "loss": 2.1911, "step": 79925 }, { "epoch": 0.19, "grad_norm": 1.6953125, "learning_rate": 0.00019566660401405987, "loss": 2.2183, "step": 79930 }, { "epoch": 0.19, "grad_norm": 2.25, "learning_rate": 0.00019566606579857676, "loss": 2.2296, "step": 79935 }, { "epoch": 0.19, "grad_norm": 1.875, "learning_rate": 0.0001956655275504124, "loss": 2.2034, "step": 79940 }, { "epoch": 0.19, "grad_norm": 2.203125, "learning_rate": 0.00019566498926956697, "loss": 2.2095, "step": 79945 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019566445095604062, "loss": 2.2253, "step": 79950 }, { "epoch": 0.19, "grad_norm": 1.984375, "learning_rate": 0.00019566391260983354, "loss": 2.0894, "step": 79955 }, { "epoch": 0.19, "grad_norm": 1.9375, "learning_rate": 0.00019566337423094598, "loss": 2.0775, "step": 79960 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019566283581937805, "loss": 2.1697, "step": 79965 }, { "epoch": 0.19, "grad_norm": 1.9765625, "learning_rate": 0.00019566229737512996, "loss": 2.129, "step": 79970 }, { "epoch": 0.19, "grad_norm": 2.15625, "learning_rate": 0.0001956617588982019, "loss": 2.0835, "step": 79975 }, { "epoch": 0.19, "grad_norm": 2.21875, "learning_rate": 0.00019566122038859403, "loss": 2.134, "step": 79980 }, { "epoch": 0.19, "grad_norm": 1.65625, "learning_rate": 0.0001956606818463066, "loss": 2.1372, "step": 79985 }, { "epoch": 0.19, "grad_norm": 1.5859375, "learning_rate": 0.00019566014327133973, "loss": 1.8631, "step": 79990 }, { "epoch": 0.19, "grad_norm": 1.796875, "learning_rate": 0.0001956596046636936, "loss": 2.2154, "step": 79995 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.0001956590660233684, "loss": 1.9693, "step": 80000 }, { "epoch": 0.19, "grad_norm": 1.7890625, "learning_rate": 0.00019565852735036436, "loss": 2.0932, "step": 80005 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019565798864468162, "loss": 2.2359, "step": 80010 }, { "epoch": 0.19, "grad_norm": 1.6640625, "learning_rate": 0.00019565744990632038, "loss": 2.0898, "step": 80015 }, { "epoch": 0.19, "grad_norm": 1.7265625, "learning_rate": 0.00019565691113528083, "loss": 2.1297, "step": 80020 }, { "epoch": 0.19, "grad_norm": 1.8671875, "learning_rate": 0.00019565637233156312, "loss": 2.1959, "step": 80025 }, { "epoch": 0.19, "grad_norm": 2.140625, "learning_rate": 0.00019565583349516748, "loss": 2.2343, "step": 80030 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019565529462609407, "loss": 2.1634, "step": 80035 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019565475572434307, "loss": 2.2566, "step": 80040 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019565421678991465, "loss": 2.0417, "step": 80045 }, { "epoch": 0.19, "grad_norm": 1.8671875, "learning_rate": 0.00019565367782280904, "loss": 1.9957, "step": 80050 }, { "epoch": 0.19, "grad_norm": 1.96875, "learning_rate": 0.0001956531388230264, "loss": 2.0867, "step": 80055 }, { "epoch": 0.19, "grad_norm": 2.25, "learning_rate": 0.0001956525997905669, "loss": 2.1111, "step": 80060 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019565206072543075, "loss": 2.3803, "step": 80065 }, { "epoch": 0.19, "grad_norm": 1.84375, "learning_rate": 0.0001956515216276181, "loss": 2.0188, "step": 80070 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.00019565098249712918, "loss": 2.0833, "step": 80075 }, { "epoch": 0.19, "grad_norm": 1.9765625, "learning_rate": 0.00019565044333396414, "loss": 2.025, "step": 80080 }, { "epoch": 0.19, "grad_norm": 2.15625, "learning_rate": 0.0001956499041381232, "loss": 2.1609, "step": 80085 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019564936490960646, "loss": 2.1688, "step": 80090 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.0001956488256484142, "loss": 2.2776, "step": 80095 }, { "epoch": 0.19, "grad_norm": 1.9140625, "learning_rate": 0.00019564828635454658, "loss": 2.1496, "step": 80100 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.00019564774702800374, "loss": 2.0207, "step": 80105 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.00019564720766878591, "loss": 2.0773, "step": 80110 }, { "epoch": 0.19, "grad_norm": 1.5859375, "learning_rate": 0.00019564666827689325, "loss": 2.2488, "step": 80115 }, { "epoch": 0.19, "grad_norm": 1.8125, "learning_rate": 0.00019564612885232595, "loss": 2.1721, "step": 80120 }, { "epoch": 0.19, "grad_norm": 1.921875, "learning_rate": 0.0001956455893950842, "loss": 2.1355, "step": 80125 }, { "epoch": 0.19, "grad_norm": 2.28125, "learning_rate": 0.0001956450499051682, "loss": 2.2382, "step": 80130 }, { "epoch": 0.19, "grad_norm": 2.8125, "learning_rate": 0.0001956445103825781, "loss": 2.2072, "step": 80135 }, { "epoch": 0.19, "grad_norm": 2.296875, "learning_rate": 0.0001956439708273141, "loss": 2.2169, "step": 80140 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.0001956434312393764, "loss": 2.1425, "step": 80145 }, { "epoch": 0.19, "grad_norm": 1.796875, "learning_rate": 0.00019564289161876517, "loss": 1.9884, "step": 80150 }, { "epoch": 0.19, "grad_norm": 2.203125, "learning_rate": 0.0001956423519654806, "loss": 2.0778, "step": 80155 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.00019564181227952284, "loss": 2.1546, "step": 80160 }, { "epoch": 0.19, "grad_norm": 2.5, "learning_rate": 0.0001956412725608921, "loss": 2.2184, "step": 80165 }, { "epoch": 0.19, "grad_norm": 1.6875, "learning_rate": 0.00019564073280958858, "loss": 2.2392, "step": 80170 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.00019564019302561247, "loss": 2.1886, "step": 80175 }, { "epoch": 0.19, "grad_norm": 1.921875, "learning_rate": 0.00019563965320896392, "loss": 2.0495, "step": 80180 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019563911335964313, "loss": 2.2466, "step": 80185 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019563857347765028, "loss": 2.0952, "step": 80190 }, { "epoch": 0.19, "grad_norm": 1.875, "learning_rate": 0.00019563803356298555, "loss": 2.0964, "step": 80195 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019563749361564914, "loss": 2.1235, "step": 80200 }, { "epoch": 0.19, "grad_norm": 1.5078125, "learning_rate": 0.00019563695363564122, "loss": 2.0334, "step": 80205 }, { "epoch": 0.19, "grad_norm": 2.140625, "learning_rate": 0.00019563641362296203, "loss": 2.1096, "step": 80210 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.00019563587357761166, "loss": 2.1936, "step": 80215 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.00019563533349959035, "loss": 2.1146, "step": 80220 }, { "epoch": 0.19, "grad_norm": 1.796875, "learning_rate": 0.00019563479338889828, "loss": 2.0957, "step": 80225 }, { "epoch": 0.19, "grad_norm": 2.1875, "learning_rate": 0.0001956342532455356, "loss": 2.2567, "step": 80230 }, { "epoch": 0.19, "grad_norm": 1.46875, "learning_rate": 0.00019563371306950258, "loss": 2.0684, "step": 80235 }, { "epoch": 0.19, "grad_norm": 2.15625, "learning_rate": 0.0001956331728607993, "loss": 2.26, "step": 80240 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019563263261942603, "loss": 2.18, "step": 80245 }, { "epoch": 0.19, "grad_norm": 1.765625, "learning_rate": 0.00019563209234538291, "loss": 2.2738, "step": 80250 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019563155203867013, "loss": 2.0634, "step": 80255 }, { "epoch": 0.19, "grad_norm": 1.7890625, "learning_rate": 0.00019563101169928787, "loss": 2.0656, "step": 80260 }, { "epoch": 0.19, "grad_norm": 1.96875, "learning_rate": 0.00019563047132723632, "loss": 2.184, "step": 80265 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019562993092251568, "loss": 2.109, "step": 80270 }, { "epoch": 0.19, "grad_norm": 1.84375, "learning_rate": 0.00019562939048512612, "loss": 2.2131, "step": 80275 }, { "epoch": 0.19, "grad_norm": 2.3125, "learning_rate": 0.00019562885001506782, "loss": 2.2987, "step": 80280 }, { "epoch": 0.19, "grad_norm": 1.78125, "learning_rate": 0.00019562830951234096, "loss": 2.2031, "step": 80285 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019562776897694573, "loss": 2.0737, "step": 80290 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019562722840888236, "loss": 2.2393, "step": 80295 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.00019562668780815097, "loss": 2.1545, "step": 80300 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.00019562614717475176, "loss": 2.2757, "step": 80305 }, { "epoch": 0.19, "grad_norm": 1.7890625, "learning_rate": 0.00019562560650868493, "loss": 2.049, "step": 80310 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.00019562506580995067, "loss": 2.1754, "step": 80315 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.00019562452507854917, "loss": 2.3582, "step": 80320 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.00019562398431448056, "loss": 2.1859, "step": 80325 }, { "epoch": 0.19, "grad_norm": 1.90625, "learning_rate": 0.00019562344351774507, "loss": 2.1785, "step": 80330 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.0001956229026883429, "loss": 2.0722, "step": 80335 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019562236182627416, "loss": 2.2544, "step": 80340 }, { "epoch": 0.19, "grad_norm": 1.6875, "learning_rate": 0.00019562182093153913, "loss": 2.2642, "step": 80345 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019562128000413792, "loss": 2.2471, "step": 80350 }, { "epoch": 0.19, "grad_norm": 2.359375, "learning_rate": 0.0001956207390440708, "loss": 2.2369, "step": 80355 }, { "epoch": 0.19, "grad_norm": 1.796875, "learning_rate": 0.00019562019805133786, "loss": 2.3432, "step": 80360 }, { "epoch": 0.19, "grad_norm": 2.15625, "learning_rate": 0.00019561965702593934, "loss": 1.9245, "step": 80365 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019561911596787542, "loss": 2.2449, "step": 80370 }, { "epoch": 0.19, "grad_norm": 2.6875, "learning_rate": 0.0001956185748771463, "loss": 2.2313, "step": 80375 }, { "epoch": 0.19, "grad_norm": 2.15625, "learning_rate": 0.00019561803375375204, "loss": 2.1831, "step": 80380 }, { "epoch": 0.19, "grad_norm": 2.25, "learning_rate": 0.00019561749259769303, "loss": 2.0053, "step": 80385 }, { "epoch": 0.19, "grad_norm": 1.9609375, "learning_rate": 0.00019561695140896929, "loss": 2.1122, "step": 80390 }, { "epoch": 0.19, "grad_norm": 1.6796875, "learning_rate": 0.00019561641018758108, "loss": 2.2595, "step": 80395 }, { "epoch": 0.19, "grad_norm": 1.65625, "learning_rate": 0.0001956158689335286, "loss": 2.1917, "step": 80400 }, { "epoch": 0.19, "grad_norm": 2.421875, "learning_rate": 0.00019561532764681197, "loss": 2.1407, "step": 80405 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019561478632743143, "loss": 2.1267, "step": 80410 }, { "epoch": 0.19, "grad_norm": 1.9765625, "learning_rate": 0.00019561424497538713, "loss": 1.9962, "step": 80415 }, { "epoch": 0.19, "grad_norm": 2.359375, "learning_rate": 0.0001956137035906793, "loss": 2.3897, "step": 80420 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.00019561316217330804, "loss": 1.9492, "step": 80425 }, { "epoch": 0.19, "grad_norm": 1.734375, "learning_rate": 0.00019561262072327363, "loss": 2.0183, "step": 80430 }, { "epoch": 0.19, "grad_norm": 1.890625, "learning_rate": 0.00019561207924057621, "loss": 2.2886, "step": 80435 }, { "epoch": 0.19, "grad_norm": 2.609375, "learning_rate": 0.00019561153772521597, "loss": 2.0577, "step": 80440 }, { "epoch": 0.19, "grad_norm": 2.515625, "learning_rate": 0.00019561099617719307, "loss": 2.034, "step": 80445 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019561045459650774, "loss": 2.1632, "step": 80450 }, { "epoch": 0.19, "grad_norm": 2.140625, "learning_rate": 0.00019560991298316017, "loss": 2.1191, "step": 80455 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.00019560937133715047, "loss": 2.0589, "step": 80460 }, { "epoch": 0.19, "grad_norm": 1.7734375, "learning_rate": 0.0001956088296584789, "loss": 2.1554, "step": 80465 }, { "epoch": 0.19, "grad_norm": 1.984375, "learning_rate": 0.00019560828794714565, "loss": 2.0893, "step": 80470 }, { "epoch": 0.19, "grad_norm": 2.328125, "learning_rate": 0.00019560774620315082, "loss": 2.2099, "step": 80475 }, { "epoch": 0.19, "grad_norm": 1.6640625, "learning_rate": 0.00019560720442649468, "loss": 2.1156, "step": 80480 }, { "epoch": 0.19, "grad_norm": 1.96875, "learning_rate": 0.0001956066626171774, "loss": 2.293, "step": 80485 }, { "epoch": 0.19, "grad_norm": 2.28125, "learning_rate": 0.0001956061207751991, "loss": 2.0859, "step": 80490 }, { "epoch": 0.19, "grad_norm": 2.140625, "learning_rate": 0.00019560557890056007, "loss": 2.2979, "step": 80495 }, { "epoch": 0.19, "grad_norm": 1.9140625, "learning_rate": 0.0001956050369932604, "loss": 2.1204, "step": 80500 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.00019560449505330035, "loss": 2.2897, "step": 80505 }, { "epoch": 0.19, "grad_norm": 1.7265625, "learning_rate": 0.00019560395308068006, "loss": 1.9787, "step": 80510 }, { "epoch": 0.19, "grad_norm": 2.21875, "learning_rate": 0.0001956034110753997, "loss": 2.1556, "step": 80515 }, { "epoch": 0.19, "grad_norm": 1.78125, "learning_rate": 0.00019560286903745947, "loss": 2.1205, "step": 80520 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.0001956023269668596, "loss": 2.1177, "step": 80525 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019560178486360025, "loss": 2.1049, "step": 80530 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019560124272768157, "loss": 2.2093, "step": 80535 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.0001956007005591038, "loss": 2.2566, "step": 80540 }, { "epoch": 0.19, "grad_norm": 1.7890625, "learning_rate": 0.00019560015835786706, "loss": 2.071, "step": 80545 }, { "epoch": 0.19, "grad_norm": 2.203125, "learning_rate": 0.0001955996161239716, "loss": 2.1087, "step": 80550 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.00019559907385741755, "loss": 2.1489, "step": 80555 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.00019559853155820517, "loss": 2.3043, "step": 80560 }, { "epoch": 0.19, "grad_norm": 1.875, "learning_rate": 0.00019559798922633457, "loss": 1.9262, "step": 80565 }, { "epoch": 0.19, "grad_norm": 1.9453125, "learning_rate": 0.00019559744686180593, "loss": 2.1656, "step": 80570 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019559690446461952, "loss": 2.1949, "step": 80575 }, { "epoch": 0.19, "grad_norm": 1.7421875, "learning_rate": 0.00019559636203477548, "loss": 2.055, "step": 80580 }, { "epoch": 0.19, "grad_norm": 1.9921875, "learning_rate": 0.00019559581957227397, "loss": 2.2452, "step": 80585 }, { "epoch": 0.19, "grad_norm": 1.9921875, "learning_rate": 0.00019559527707711517, "loss": 2.1413, "step": 80590 }, { "epoch": 0.19, "grad_norm": 1.90625, "learning_rate": 0.00019559473454929931, "loss": 2.0057, "step": 80595 }, { "epoch": 0.19, "grad_norm": 2.3125, "learning_rate": 0.00019559419198882653, "loss": 2.132, "step": 80600 }, { "epoch": 0.19, "grad_norm": 2.515625, "learning_rate": 0.00019559364939569708, "loss": 2.2321, "step": 80605 }, { "epoch": 0.19, "grad_norm": 1.9921875, "learning_rate": 0.0001955931067699111, "loss": 2.0939, "step": 80610 }, { "epoch": 0.19, "grad_norm": 1.8828125, "learning_rate": 0.00019559256411146877, "loss": 2.2103, "step": 80615 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019559202142037027, "loss": 2.0797, "step": 80620 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.00019559147869661583, "loss": 2.1709, "step": 80625 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.0001955909359402056, "loss": 2.2036, "step": 80630 }, { "epoch": 0.19, "grad_norm": 2.609375, "learning_rate": 0.00019559039315113975, "loss": 2.1426, "step": 80635 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.00019558985032941853, "loss": 2.1136, "step": 80640 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019558930747504204, "loss": 2.0783, "step": 80645 }, { "epoch": 0.19, "grad_norm": 1.890625, "learning_rate": 0.00019558876458801054, "loss": 2.1678, "step": 80650 }, { "epoch": 0.19, "grad_norm": 1.796875, "learning_rate": 0.00019558822166832414, "loss": 2.2502, "step": 80655 }, { "epoch": 0.19, "grad_norm": 2.140625, "learning_rate": 0.00019558767871598312, "loss": 2.0794, "step": 80660 }, { "epoch": 0.19, "grad_norm": 2.21875, "learning_rate": 0.00019558713573098762, "loss": 2.2874, "step": 80665 }, { "epoch": 0.19, "grad_norm": 1.8828125, "learning_rate": 0.00019558659271333778, "loss": 2.3333, "step": 80670 }, { "epoch": 0.19, "grad_norm": 1.703125, "learning_rate": 0.00019558604966303387, "loss": 1.9808, "step": 80675 }, { "epoch": 0.19, "grad_norm": 1.7265625, "learning_rate": 0.000195585506580076, "loss": 2.3035, "step": 80680 }, { "epoch": 0.19, "grad_norm": 1.703125, "learning_rate": 0.00019558496346446438, "loss": 2.1694, "step": 80685 }, { "epoch": 0.19, "grad_norm": 2.421875, "learning_rate": 0.00019558442031619922, "loss": 2.1264, "step": 80690 }, { "epoch": 0.19, "grad_norm": 1.6875, "learning_rate": 0.0001955838771352807, "loss": 2.1021, "step": 80695 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019558333392170898, "loss": 2.2302, "step": 80700 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019558279067548426, "loss": 2.0124, "step": 80705 }, { "epoch": 0.19, "grad_norm": 1.6484375, "learning_rate": 0.00019558224739660673, "loss": 2.044, "step": 80710 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019558170408507657, "loss": 2.069, "step": 80715 }, { "epoch": 0.19, "grad_norm": 1.734375, "learning_rate": 0.00019558116074089394, "loss": 2.0887, "step": 80720 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.0001955806173640591, "loss": 2.1495, "step": 80725 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019558007395457217, "loss": 2.0544, "step": 80730 }, { "epoch": 0.19, "grad_norm": 2.421875, "learning_rate": 0.0001955795305124333, "loss": 2.1683, "step": 80735 }, { "epoch": 0.19, "grad_norm": 2.296875, "learning_rate": 0.00019557898703764283, "loss": 2.1874, "step": 80740 }, { "epoch": 0.19, "grad_norm": 1.84375, "learning_rate": 0.00019557844353020076, "loss": 2.2062, "step": 80745 }, { "epoch": 0.19, "grad_norm": 1.53125, "learning_rate": 0.0001955778999901074, "loss": 2.1409, "step": 80750 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.00019557735641736288, "loss": 2.1723, "step": 80755 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.0001955768128119674, "loss": 2.3622, "step": 80760 }, { "epoch": 0.19, "grad_norm": 1.8359375, "learning_rate": 0.00019557626917392115, "loss": 2.1795, "step": 80765 }, { "epoch": 0.19, "grad_norm": 1.84375, "learning_rate": 0.0001955757255032243, "loss": 2.22, "step": 80770 }, { "epoch": 0.19, "grad_norm": 2.28125, "learning_rate": 0.00019557518179987707, "loss": 2.1117, "step": 80775 }, { "epoch": 0.19, "grad_norm": 1.65625, "learning_rate": 0.00019557463806387965, "loss": 2.081, "step": 80780 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019557409429523217, "loss": 2.0726, "step": 80785 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019557355049393485, "loss": 2.2047, "step": 80790 }, { "epoch": 0.19, "grad_norm": 1.9921875, "learning_rate": 0.00019557300665998785, "loss": 2.1441, "step": 80795 }, { "epoch": 0.19, "grad_norm": 1.8515625, "learning_rate": 0.00019557246279339142, "loss": 2.0664, "step": 80800 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019557191889414566, "loss": 1.9606, "step": 80805 }, { "epoch": 0.19, "grad_norm": 2.4375, "learning_rate": 0.00019557137496225083, "loss": 2.0562, "step": 80810 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.0001955708309977071, "loss": 2.153, "step": 80815 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.00019557028700051458, "loss": 1.8962, "step": 80820 }, { "epoch": 0.19, "grad_norm": 1.515625, "learning_rate": 0.00019556974297067355, "loss": 2.1539, "step": 80825 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019556919890818418, "loss": 2.3457, "step": 80830 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.0001955686548130466, "loss": 2.1504, "step": 80835 }, { "epoch": 0.19, "grad_norm": 2.375, "learning_rate": 0.00019556811068526105, "loss": 2.1109, "step": 80840 }, { "epoch": 0.19, "grad_norm": 2.21875, "learning_rate": 0.00019556756652482773, "loss": 2.3456, "step": 80845 }, { "epoch": 0.19, "grad_norm": 2.4375, "learning_rate": 0.00019556702233174678, "loss": 2.2633, "step": 80850 }, { "epoch": 0.19, "grad_norm": 1.6484375, "learning_rate": 0.0001955664781060184, "loss": 2.1403, "step": 80855 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019556593384764277, "loss": 2.2153, "step": 80860 }, { "epoch": 0.19, "grad_norm": 3.015625, "learning_rate": 0.00019556538955662006, "loss": 2.3241, "step": 80865 }, { "epoch": 0.19, "grad_norm": 2.59375, "learning_rate": 0.0001955648452329505, "loss": 2.1031, "step": 80870 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.00019556430087663428, "loss": 2.1377, "step": 80875 }, { "epoch": 0.19, "grad_norm": 2.40625, "learning_rate": 0.00019556375648767155, "loss": 2.1797, "step": 80880 }, { "epoch": 0.19, "grad_norm": 1.9921875, "learning_rate": 0.00019556321206606252, "loss": 2.2725, "step": 80885 }, { "epoch": 0.19, "grad_norm": 1.65625, "learning_rate": 0.00019556266761180732, "loss": 2.2129, "step": 80890 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.0001955621231249062, "loss": 2.1864, "step": 80895 }, { "epoch": 0.19, "grad_norm": 2.359375, "learning_rate": 0.00019556157860535936, "loss": 2.1939, "step": 80900 }, { "epoch": 0.19, "grad_norm": 1.5234375, "learning_rate": 0.00019556103405316694, "loss": 2.1626, "step": 80905 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.00019556048946832907, "loss": 2.1818, "step": 80910 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.00019555994485084607, "loss": 2.0911, "step": 80915 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019555940020071804, "loss": 2.1721, "step": 80920 }, { "epoch": 0.19, "grad_norm": 1.8125, "learning_rate": 0.0001955588555179452, "loss": 1.974, "step": 80925 }, { "epoch": 0.19, "grad_norm": 1.9375, "learning_rate": 0.0001955583108025277, "loss": 2.3147, "step": 80930 }, { "epoch": 0.19, "grad_norm": 1.8359375, "learning_rate": 0.00019555776605446575, "loss": 2.5121, "step": 80935 }, { "epoch": 0.19, "grad_norm": 1.6796875, "learning_rate": 0.00019555722127375958, "loss": 2.1607, "step": 80940 }, { "epoch": 0.19, "grad_norm": 1.84375, "learning_rate": 0.00019555667646040928, "loss": 2.4717, "step": 80945 }, { "epoch": 0.19, "grad_norm": 2.3125, "learning_rate": 0.00019555613161441508, "loss": 2.1609, "step": 80950 }, { "epoch": 0.19, "grad_norm": 1.8671875, "learning_rate": 0.0001955555867357772, "loss": 2.0189, "step": 80955 }, { "epoch": 0.19, "grad_norm": 2.21875, "learning_rate": 0.0001955550418244958, "loss": 2.0092, "step": 80960 }, { "epoch": 0.19, "grad_norm": 1.9921875, "learning_rate": 0.00019555449688057105, "loss": 2.111, "step": 80965 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.00019555395190400316, "loss": 2.0486, "step": 80970 }, { "epoch": 0.19, "grad_norm": 2.15625, "learning_rate": 0.0001955534068947923, "loss": 2.1913, "step": 80975 }, { "epoch": 0.19, "grad_norm": 1.9609375, "learning_rate": 0.00019555286185293867, "loss": 2.2007, "step": 80980 }, { "epoch": 0.19, "grad_norm": 2.296875, "learning_rate": 0.00019555231677844247, "loss": 2.1356, "step": 80985 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.00019555177167130384, "loss": 2.1858, "step": 80990 }, { "epoch": 0.19, "grad_norm": 1.59375, "learning_rate": 0.00019555122653152295, "loss": 2.1917, "step": 80995 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019555068135910012, "loss": 2.3159, "step": 81000 }, { "epoch": 0.19, "grad_norm": 1.6640625, "learning_rate": 0.00019555013615403538, "loss": 2.1652, "step": 81005 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019554959091632902, "loss": 2.3314, "step": 81010 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019554904564598113, "loss": 2.1526, "step": 81015 }, { "epoch": 0.19, "grad_norm": 1.7734375, "learning_rate": 0.00019554850034299198, "loss": 2.1022, "step": 81020 }, { "epoch": 0.19, "grad_norm": 1.9765625, "learning_rate": 0.00019554795500736173, "loss": 2.1513, "step": 81025 }, { "epoch": 0.19, "grad_norm": 2.296875, "learning_rate": 0.00019554740963909057, "loss": 2.1897, "step": 81030 }, { "epoch": 0.19, "grad_norm": 1.9375, "learning_rate": 0.00019554686423817872, "loss": 1.9721, "step": 81035 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019554631880462625, "loss": 2.2845, "step": 81040 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.00019554577333843348, "loss": 2.2008, "step": 81045 }, { "epoch": 0.19, "grad_norm": 1.859375, "learning_rate": 0.00019554522783960055, "loss": 2.1544, "step": 81050 }, { "epoch": 0.19, "grad_norm": 1.9453125, "learning_rate": 0.00019554468230812758, "loss": 2.2915, "step": 81055 }, { "epoch": 0.19, "grad_norm": 1.765625, "learning_rate": 0.00019554413674401487, "loss": 2.002, "step": 81060 }, { "epoch": 0.19, "grad_norm": 2.34375, "learning_rate": 0.00019554359114726255, "loss": 1.8643, "step": 81065 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.00019554304551787076, "loss": 2.1319, "step": 81070 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.00019554249985583976, "loss": 2.3433, "step": 81075 }, { "epoch": 0.19, "grad_norm": 1.421875, "learning_rate": 0.00019554195416116973, "loss": 2.094, "step": 81080 }, { "epoch": 0.19, "grad_norm": 1.7734375, "learning_rate": 0.0001955414084338608, "loss": 2.1977, "step": 81085 }, { "epoch": 0.19, "grad_norm": 2.3125, "learning_rate": 0.0001955408626739132, "loss": 2.1815, "step": 81090 }, { "epoch": 0.19, "grad_norm": 1.96875, "learning_rate": 0.00019554031688132713, "loss": 2.1664, "step": 81095 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019553977105610273, "loss": 2.0279, "step": 81100 }, { "epoch": 0.19, "grad_norm": 1.765625, "learning_rate": 0.0001955392251982402, "loss": 2.2104, "step": 81105 }, { "epoch": 0.19, "grad_norm": 1.796875, "learning_rate": 0.0001955386793077398, "loss": 2.2994, "step": 81110 }, { "epoch": 0.19, "grad_norm": 1.96875, "learning_rate": 0.00019553813338460158, "loss": 2.2018, "step": 81115 }, { "epoch": 0.19, "grad_norm": 2.953125, "learning_rate": 0.00019553758742882585, "loss": 2.209, "step": 81120 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019553704144041273, "loss": 2.1208, "step": 81125 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.00019553649541936244, "loss": 2.3006, "step": 81130 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.00019553594936567512, "loss": 2.2882, "step": 81135 }, { "epoch": 0.19, "grad_norm": 1.828125, "learning_rate": 0.000195535403279351, "loss": 2.1086, "step": 81140 }, { "epoch": 0.19, "grad_norm": 2.671875, "learning_rate": 0.00019553485716039025, "loss": 2.0791, "step": 81145 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.0001955343110087931, "loss": 2.3026, "step": 81150 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019553376482455966, "loss": 2.1573, "step": 81155 }, { "epoch": 0.19, "grad_norm": 1.78125, "learning_rate": 0.00019553321860769015, "loss": 2.0061, "step": 81160 }, { "epoch": 0.19, "grad_norm": 1.5234375, "learning_rate": 0.00019553267235818475, "loss": 1.9996, "step": 81165 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.0001955321260760437, "loss": 2.0674, "step": 81170 }, { "epoch": 0.19, "grad_norm": 2.734375, "learning_rate": 0.0001955315797612671, "loss": 2.2446, "step": 81175 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.0001955310334138552, "loss": 2.1813, "step": 81180 }, { "epoch": 0.19, "grad_norm": 1.921875, "learning_rate": 0.00019553048703380816, "loss": 2.1565, "step": 81185 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019552994062112619, "loss": 2.1896, "step": 81190 }, { "epoch": 0.19, "grad_norm": 1.7421875, "learning_rate": 0.00019552939417580946, "loss": 2.1093, "step": 81195 }, { "epoch": 0.19, "grad_norm": 1.8046875, "learning_rate": 0.0001955288476978581, "loss": 2.0506, "step": 81200 }, { "epoch": 0.19, "grad_norm": 2.1875, "learning_rate": 0.0001955283011872724, "loss": 2.3036, "step": 81205 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019552775464405252, "loss": 2.1989, "step": 81210 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.0001955272080681986, "loss": 2.1357, "step": 81215 }, { "epoch": 0.19, "grad_norm": 1.5625, "learning_rate": 0.00019552666145971083, "loss": 2.0993, "step": 81220 }, { "epoch": 0.19, "grad_norm": 1.828125, "learning_rate": 0.00019552611481858943, "loss": 2.1507, "step": 81225 }, { "epoch": 0.19, "grad_norm": 1.8125, "learning_rate": 0.00019552556814483461, "loss": 2.1175, "step": 81230 }, { "epoch": 0.19, "grad_norm": 2.296875, "learning_rate": 0.0001955250214384465, "loss": 2.32, "step": 81235 }, { "epoch": 0.19, "grad_norm": 1.7578125, "learning_rate": 0.0001955244746994253, "loss": 2.2213, "step": 81240 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019552392792777126, "loss": 2.3972, "step": 81245 }, { "epoch": 0.19, "grad_norm": 2.203125, "learning_rate": 0.00019552338112348446, "loss": 2.3065, "step": 81250 }, { "epoch": 0.19, "grad_norm": 3.0, "learning_rate": 0.00019552283428656518, "loss": 2.2406, "step": 81255 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.00019552228741701353, "loss": 2.1503, "step": 81260 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019552174051482975, "loss": 2.3346, "step": 81265 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019552119358001398, "loss": 1.9686, "step": 81270 }, { "epoch": 0.19, "grad_norm": 1.90625, "learning_rate": 0.0001955206466125665, "loss": 2.0566, "step": 81275 }, { "epoch": 0.19, "grad_norm": 1.921875, "learning_rate": 0.0001955200996124874, "loss": 2.2833, "step": 81280 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.0001955195525797769, "loss": 2.2025, "step": 81285 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.0001955190055144352, "loss": 2.2356, "step": 81290 }, { "epoch": 0.19, "grad_norm": 1.9453125, "learning_rate": 0.00019551845841646247, "loss": 2.0286, "step": 81295 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.0001955179112858589, "loss": 2.1444, "step": 81300 }, { "epoch": 0.19, "grad_norm": 1.828125, "learning_rate": 0.00019551736412262466, "loss": 2.2905, "step": 81305 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019551681692675998, "loss": 2.2211, "step": 81310 }, { "epoch": 0.19, "grad_norm": 2.28125, "learning_rate": 0.00019551626969826504, "loss": 2.0908, "step": 81315 }, { "epoch": 0.19, "grad_norm": 1.859375, "learning_rate": 0.00019551572243713995, "loss": 2.1067, "step": 81320 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.000195515175143385, "loss": 2.2119, "step": 81325 }, { "epoch": 0.19, "grad_norm": 1.90625, "learning_rate": 0.00019551462781700036, "loss": 2.1388, "step": 81330 }, { "epoch": 0.19, "grad_norm": 2.625, "learning_rate": 0.00019551408045798615, "loss": 2.3742, "step": 81335 }, { "epoch": 0.19, "grad_norm": 2.390625, "learning_rate": 0.0001955135330663426, "loss": 2.0594, "step": 81340 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.00019551298564206992, "loss": 2.2789, "step": 81345 }, { "epoch": 0.19, "grad_norm": 2.21875, "learning_rate": 0.00019551243818516825, "loss": 2.1221, "step": 81350 }, { "epoch": 0.19, "grad_norm": 1.84375, "learning_rate": 0.0001955118906956378, "loss": 2.096, "step": 81355 }, { "epoch": 0.19, "grad_norm": 1.8125, "learning_rate": 0.00019551134317347875, "loss": 2.2461, "step": 81360 }, { "epoch": 0.19, "grad_norm": 1.890625, "learning_rate": 0.00019551079561869132, "loss": 2.2567, "step": 81365 }, { "epoch": 0.19, "grad_norm": 2.25, "learning_rate": 0.00019551024803127564, "loss": 2.29, "step": 81370 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019550970041123193, "loss": 1.8099, "step": 81375 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.0001955091527585604, "loss": 2.2565, "step": 81380 }, { "epoch": 0.19, "grad_norm": 2.34375, "learning_rate": 0.00019550860507326122, "loss": 2.1468, "step": 81385 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.0001955080573553345, "loss": 2.2784, "step": 81390 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019550750960478056, "loss": 2.2037, "step": 81395 }, { "epoch": 0.19, "grad_norm": 1.6953125, "learning_rate": 0.0001955069618215995, "loss": 2.1636, "step": 81400 }, { "epoch": 0.19, "grad_norm": 1.875, "learning_rate": 0.00019550641400579153, "loss": 2.1898, "step": 81405 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019550586615735684, "loss": 2.1382, "step": 81410 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019550531827629562, "loss": 2.2258, "step": 81415 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019550477036260803, "loss": 2.0352, "step": 81420 }, { "epoch": 0.19, "grad_norm": 1.9921875, "learning_rate": 0.0001955042224162943, "loss": 2.1132, "step": 81425 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019550367443735457, "loss": 2.352, "step": 81430 }, { "epoch": 0.19, "grad_norm": 1.6953125, "learning_rate": 0.00019550312642578908, "loss": 2.1656, "step": 81435 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.00019550257838159797, "loss": 2.1989, "step": 81440 }, { "epoch": 0.19, "grad_norm": 1.7265625, "learning_rate": 0.00019550203030478146, "loss": 2.01, "step": 81445 }, { "epoch": 0.19, "grad_norm": 1.7578125, "learning_rate": 0.00019550148219533973, "loss": 2.2046, "step": 81450 }, { "epoch": 0.19, "grad_norm": 1.96875, "learning_rate": 0.00019550093405327297, "loss": 2.2062, "step": 81455 }, { "epoch": 0.19, "grad_norm": 1.9375, "learning_rate": 0.00019550038587858132, "loss": 2.1766, "step": 81460 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.00019549983767126503, "loss": 2.1361, "step": 81465 }, { "epoch": 0.19, "grad_norm": 2.296875, "learning_rate": 0.00019549928943132428, "loss": 1.8928, "step": 81470 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.0001954987411587592, "loss": 2.1697, "step": 81475 }, { "epoch": 0.19, "grad_norm": 1.5859375, "learning_rate": 0.00019549819285357006, "loss": 2.1233, "step": 81480 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.000195497644515757, "loss": 2.152, "step": 81485 }, { "epoch": 0.19, "grad_norm": 1.90625, "learning_rate": 0.00019549709614532018, "loss": 2.273, "step": 81490 }, { "epoch": 0.19, "grad_norm": 2.1875, "learning_rate": 0.00019549654774225986, "loss": 2.3375, "step": 81495 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019549599930657614, "loss": 2.2488, "step": 81500 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.0001954954508382693, "loss": 2.1976, "step": 81505 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.00019549490233733945, "loss": 2.0811, "step": 81510 }, { "epoch": 0.19, "grad_norm": 1.6953125, "learning_rate": 0.00019549435380378683, "loss": 2.259, "step": 81515 }, { "epoch": 0.19, "grad_norm": 1.8515625, "learning_rate": 0.0001954938052376116, "loss": 2.1376, "step": 81520 }, { "epoch": 0.19, "grad_norm": 1.9765625, "learning_rate": 0.00019549325663881394, "loss": 1.999, "step": 81525 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019549270800739405, "loss": 2.1728, "step": 81530 }, { "epoch": 0.19, "grad_norm": 2.40625, "learning_rate": 0.00019549215934335217, "loss": 2.0316, "step": 81535 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.00019549161064668838, "loss": 2.2083, "step": 81540 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019549106191740293, "loss": 2.0822, "step": 81545 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019549051315549606, "loss": 2.1242, "step": 81550 }, { "epoch": 0.19, "grad_norm": 1.9453125, "learning_rate": 0.00019548996436096782, "loss": 2.2136, "step": 81555 }, { "epoch": 0.19, "grad_norm": 1.96875, "learning_rate": 0.00019548941553381853, "loss": 2.1271, "step": 81560 }, { "epoch": 0.19, "grad_norm": 1.796875, "learning_rate": 0.0001954888666740483, "loss": 2.102, "step": 81565 }, { "epoch": 0.19, "grad_norm": 1.7265625, "learning_rate": 0.00019548831778165736, "loss": 2.3174, "step": 81570 }, { "epoch": 0.19, "grad_norm": 1.90625, "learning_rate": 0.00019548776885664585, "loss": 2.1241, "step": 81575 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.000195487219899014, "loss": 2.1323, "step": 81580 }, { "epoch": 0.19, "grad_norm": 1.6328125, "learning_rate": 0.00019548667090876198, "loss": 2.1895, "step": 81585 }, { "epoch": 0.19, "grad_norm": 1.96875, "learning_rate": 0.00019548612188589, "loss": 2.1421, "step": 81590 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019548557283039823, "loss": 2.253, "step": 81595 }, { "epoch": 0.19, "grad_norm": 1.984375, "learning_rate": 0.00019548502374228683, "loss": 2.0287, "step": 81600 }, { "epoch": 0.19, "grad_norm": 1.96875, "learning_rate": 0.000195484474621556, "loss": 2.2183, "step": 81605 }, { "epoch": 0.19, "grad_norm": 1.8515625, "learning_rate": 0.000195483925468206, "loss": 2.2847, "step": 81610 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.0001954833762822369, "loss": 2.2125, "step": 81615 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.000195482827063649, "loss": 2.1298, "step": 81620 }, { "epoch": 0.19, "grad_norm": 1.5, "learning_rate": 0.00019548227781244243, "loss": 2.0812, "step": 81625 }, { "epoch": 0.19, "grad_norm": 1.7734375, "learning_rate": 0.00019548172852861735, "loss": 2.1592, "step": 81630 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.00019548117921217397, "loss": 2.148, "step": 81635 }, { "epoch": 0.19, "grad_norm": 1.875, "learning_rate": 0.00019548062986311253, "loss": 2.187, "step": 81640 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019548008048143314, "loss": 2.1768, "step": 81645 }, { "epoch": 0.19, "grad_norm": 2.328125, "learning_rate": 0.00019547953106713604, "loss": 2.105, "step": 81650 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.0001954789816202214, "loss": 2.2265, "step": 81655 }, { "epoch": 0.19, "grad_norm": 1.9375, "learning_rate": 0.0001954784321406894, "loss": 2.0381, "step": 81660 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.00019547788262854024, "loss": 2.2192, "step": 81665 }, { "epoch": 0.19, "grad_norm": 1.9140625, "learning_rate": 0.00019547733308377413, "loss": 2.3015, "step": 81670 }, { "epoch": 0.19, "grad_norm": 1.84375, "learning_rate": 0.0001954767835063912, "loss": 2.0502, "step": 81675 }, { "epoch": 0.19, "grad_norm": 2.21875, "learning_rate": 0.00019547623389639166, "loss": 2.344, "step": 81680 }, { "epoch": 0.19, "grad_norm": 1.9765625, "learning_rate": 0.00019547568425377573, "loss": 2.0705, "step": 81685 }, { "epoch": 0.19, "grad_norm": 1.6796875, "learning_rate": 0.0001954751345785436, "loss": 2.0908, "step": 81690 }, { "epoch": 0.19, "grad_norm": 1.796875, "learning_rate": 0.00019547458487069536, "loss": 2.1844, "step": 81695 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019547403513023133, "loss": 2.148, "step": 81700 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.00019547348535715162, "loss": 2.1595, "step": 81705 }, { "epoch": 0.19, "grad_norm": 1.640625, "learning_rate": 0.00019547293555145646, "loss": 2.0659, "step": 81710 }, { "epoch": 0.19, "grad_norm": 2.21875, "learning_rate": 0.00019547238571314597, "loss": 2.3759, "step": 81715 }, { "epoch": 0.19, "grad_norm": 1.59375, "learning_rate": 0.0001954718358422204, "loss": 2.2224, "step": 81720 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019547128593867992, "loss": 2.123, "step": 81725 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.0001954707360025247, "loss": 2.0581, "step": 81730 }, { "epoch": 0.19, "grad_norm": 1.921875, "learning_rate": 0.00019547018603375497, "loss": 2.123, "step": 81735 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.0001954696360323709, "loss": 2.1368, "step": 81740 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.00019546908599837263, "loss": 2.1656, "step": 81745 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019546853593176042, "loss": 2.0398, "step": 81750 }, { "epoch": 0.19, "grad_norm": 1.9921875, "learning_rate": 0.00019546798583253442, "loss": 2.0976, "step": 81755 }, { "epoch": 0.19, "grad_norm": 1.8984375, "learning_rate": 0.00019546743570069485, "loss": 2.3332, "step": 81760 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019546688553624184, "loss": 2.2168, "step": 81765 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.00019546633533917563, "loss": 2.1995, "step": 81770 }, { "epoch": 0.19, "grad_norm": 1.8359375, "learning_rate": 0.00019546578510949635, "loss": 2.2214, "step": 81775 }, { "epoch": 0.19, "grad_norm": 1.828125, "learning_rate": 0.00019546523484720426, "loss": 2.2565, "step": 81780 }, { "epoch": 0.19, "grad_norm": 2.703125, "learning_rate": 0.00019546468455229951, "loss": 2.1202, "step": 81785 }, { "epoch": 0.19, "grad_norm": 2.25, "learning_rate": 0.00019546413422478231, "loss": 2.1788, "step": 81790 }, { "epoch": 0.19, "grad_norm": 2.140625, "learning_rate": 0.0001954635838646528, "loss": 1.9948, "step": 81795 }, { "epoch": 0.19, "grad_norm": 1.9453125, "learning_rate": 0.00019546303347191122, "loss": 2.2919, "step": 81800 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019546248304655773, "loss": 1.9974, "step": 81805 }, { "epoch": 0.19, "grad_norm": 1.6328125, "learning_rate": 0.00019546193258859252, "loss": 2.2072, "step": 81810 }, { "epoch": 0.19, "grad_norm": 2.328125, "learning_rate": 0.0001954613820980158, "loss": 2.18, "step": 81815 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019546083157482772, "loss": 2.0513, "step": 81820 }, { "epoch": 0.19, "grad_norm": 1.828125, "learning_rate": 0.00019546028101902847, "loss": 1.9976, "step": 81825 }, { "epoch": 0.19, "grad_norm": 2.796875, "learning_rate": 0.0001954597304306183, "loss": 2.1706, "step": 81830 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.00019545917980959732, "loss": 2.2204, "step": 81835 }, { "epoch": 0.19, "grad_norm": 1.96875, "learning_rate": 0.00019545862915596577, "loss": 2.1087, "step": 81840 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019545807846972383, "loss": 2.2262, "step": 81845 }, { "epoch": 0.19, "grad_norm": 2.28125, "learning_rate": 0.00019545752775087166, "loss": 2.1735, "step": 81850 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019545697699940945, "loss": 2.206, "step": 81855 }, { "epoch": 0.19, "grad_norm": 2.1875, "learning_rate": 0.00019545642621533746, "loss": 2.1619, "step": 81860 }, { "epoch": 0.19, "grad_norm": 1.8359375, "learning_rate": 0.00019545587539865578, "loss": 2.0999, "step": 81865 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019545532454936466, "loss": 2.2045, "step": 81870 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019545477366746426, "loss": 2.1204, "step": 81875 }, { "epoch": 0.19, "grad_norm": 2.328125, "learning_rate": 0.0001954542227529548, "loss": 2.1148, "step": 81880 }, { "epoch": 0.19, "grad_norm": 1.7421875, "learning_rate": 0.0001954536718058364, "loss": 2.0944, "step": 81885 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.00019545312082610937, "loss": 2.0487, "step": 81890 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.00019545256981377376, "loss": 2.2714, "step": 81895 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019545201876882985, "loss": 2.0646, "step": 81900 }, { "epoch": 0.19, "grad_norm": 2.828125, "learning_rate": 0.00019545146769127776, "loss": 2.1799, "step": 81905 }, { "epoch": 0.19, "grad_norm": 2.28125, "learning_rate": 0.00019545091658111776, "loss": 2.3096, "step": 81910 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.00019545036543834997, "loss": 1.9943, "step": 81915 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.0001954498142629746, "loss": 2.1102, "step": 81920 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.00019544926305499187, "loss": 2.1169, "step": 81925 }, { "epoch": 0.19, "grad_norm": 1.609375, "learning_rate": 0.00019544871181440194, "loss": 1.9228, "step": 81930 }, { "epoch": 0.19, "grad_norm": 1.78125, "learning_rate": 0.00019544816054120498, "loss": 2.1972, "step": 81935 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.0001954476092354012, "loss": 2.1685, "step": 81940 }, { "epoch": 0.19, "grad_norm": 2.140625, "learning_rate": 0.0001954470578969908, "loss": 2.1661, "step": 81945 }, { "epoch": 0.19, "grad_norm": 2.390625, "learning_rate": 0.00019544650652597394, "loss": 2.0156, "step": 81950 }, { "epoch": 0.19, "grad_norm": 2.71875, "learning_rate": 0.00019544595512235082, "loss": 2.0475, "step": 81955 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.00019544540368612165, "loss": 2.1081, "step": 81960 }, { "epoch": 0.19, "grad_norm": 1.9453125, "learning_rate": 0.0001954448522172866, "loss": 2.1578, "step": 81965 }, { "epoch": 0.19, "grad_norm": 1.8046875, "learning_rate": 0.00019544430071584583, "loss": 2.1454, "step": 81970 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.00019544374918179958, "loss": 1.9673, "step": 81975 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.000195443197615148, "loss": 2.2943, "step": 81980 }, { "epoch": 0.19, "grad_norm": 1.921875, "learning_rate": 0.0001954426460158913, "loss": 2.2013, "step": 81985 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.00019544209438402966, "loss": 2.1736, "step": 81990 }, { "epoch": 0.19, "grad_norm": 1.796875, "learning_rate": 0.00019544154271956327, "loss": 2.2833, "step": 81995 }, { "epoch": 0.19, "grad_norm": 2.515625, "learning_rate": 0.00019544099102249233, "loss": 2.1088, "step": 82000 }, { "epoch": 0.19, "grad_norm": 1.65625, "learning_rate": 0.00019544043929281698, "loss": 2.255, "step": 82005 }, { "epoch": 0.19, "grad_norm": 1.9140625, "learning_rate": 0.00019543988753053748, "loss": 2.2611, "step": 82010 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019543933573565396, "loss": 2.2249, "step": 82015 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019543878390816666, "loss": 2.1494, "step": 82020 }, { "epoch": 0.19, "grad_norm": 2.1875, "learning_rate": 0.0001954382320480757, "loss": 2.2816, "step": 82025 }, { "epoch": 0.19, "grad_norm": 1.90625, "learning_rate": 0.00019543768015538134, "loss": 2.2167, "step": 82030 }, { "epoch": 0.19, "grad_norm": 1.4921875, "learning_rate": 0.00019543712823008372, "loss": 2.0917, "step": 82035 }, { "epoch": 0.19, "grad_norm": 1.8359375, "learning_rate": 0.00019543657627218305, "loss": 2.2461, "step": 82040 }, { "epoch": 0.19, "grad_norm": 1.890625, "learning_rate": 0.00019543602428167954, "loss": 2.2149, "step": 82045 }, { "epoch": 0.19, "grad_norm": 1.5859375, "learning_rate": 0.00019543547225857333, "loss": 2.2161, "step": 82050 }, { "epoch": 0.19, "grad_norm": 2.25, "learning_rate": 0.00019543492020286464, "loss": 2.0724, "step": 82055 }, { "epoch": 0.19, "grad_norm": 1.8671875, "learning_rate": 0.00019543436811455365, "loss": 2.1983, "step": 82060 }, { "epoch": 0.19, "grad_norm": 1.78125, "learning_rate": 0.00019543381599364053, "loss": 2.132, "step": 82065 }, { "epoch": 0.19, "grad_norm": 2.203125, "learning_rate": 0.0001954332638401255, "loss": 2.0271, "step": 82070 }, { "epoch": 0.19, "grad_norm": 1.984375, "learning_rate": 0.00019543271165400875, "loss": 2.2222, "step": 82075 }, { "epoch": 0.19, "grad_norm": 1.921875, "learning_rate": 0.00019543215943529046, "loss": 2.2689, "step": 82080 }, { "epoch": 0.19, "grad_norm": 1.734375, "learning_rate": 0.0001954316071839708, "loss": 2.3375, "step": 82085 }, { "epoch": 0.19, "grad_norm": 1.9296875, "learning_rate": 0.00019543105490004995, "loss": 2.2511, "step": 82090 }, { "epoch": 0.19, "grad_norm": 2.15625, "learning_rate": 0.00019543050258352812, "loss": 2.1801, "step": 82095 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019542995023440557, "loss": 2.1297, "step": 82100 }, { "epoch": 0.19, "grad_norm": 2.375, "learning_rate": 0.00019542939785268236, "loss": 2.0969, "step": 82105 }, { "epoch": 0.19, "grad_norm": 2.328125, "learning_rate": 0.00019542884543835874, "loss": 2.0433, "step": 82110 }, { "epoch": 0.19, "grad_norm": 1.546875, "learning_rate": 0.0001954282929914349, "loss": 2.1863, "step": 82115 }, { "epoch": 0.19, "grad_norm": 1.640625, "learning_rate": 0.000195427740511911, "loss": 2.1925, "step": 82120 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019542718799978728, "loss": 2.1036, "step": 82125 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019542663545506393, "loss": 2.1993, "step": 82130 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019542608287774108, "loss": 2.0444, "step": 82135 }, { "epoch": 0.19, "grad_norm": 2.921875, "learning_rate": 0.00019542553026781896, "loss": 2.1286, "step": 82140 }, { "epoch": 0.19, "grad_norm": 1.9609375, "learning_rate": 0.00019542497762529772, "loss": 2.0259, "step": 82145 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.0001954244249501776, "loss": 2.1687, "step": 82150 }, { "epoch": 0.19, "grad_norm": 1.8125, "learning_rate": 0.00019542387224245875, "loss": 2.0234, "step": 82155 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.00019542331950214142, "loss": 2.176, "step": 82160 }, { "epoch": 0.19, "grad_norm": 2.28125, "learning_rate": 0.0001954227667292257, "loss": 2.1474, "step": 82165 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.00019542221392371186, "loss": 2.1696, "step": 82170 }, { "epoch": 0.19, "grad_norm": 2.59375, "learning_rate": 0.00019542166108560004, "loss": 1.8556, "step": 82175 }, { "epoch": 0.19, "grad_norm": 1.640625, "learning_rate": 0.00019542110821489048, "loss": 2.1314, "step": 82180 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.0001954205553115833, "loss": 2.2204, "step": 82185 }, { "epoch": 0.19, "grad_norm": 1.84375, "learning_rate": 0.00019542000237567878, "loss": 2.2279, "step": 82190 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.000195419449407177, "loss": 1.9896, "step": 82195 }, { "epoch": 0.19, "grad_norm": 1.65625, "learning_rate": 0.00019541889640607824, "loss": 2.1615, "step": 82200 }, { "epoch": 0.19, "grad_norm": 2.375, "learning_rate": 0.00019541834337238268, "loss": 2.2151, "step": 82205 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019541779030609043, "loss": 2.1706, "step": 82210 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019541723720720175, "loss": 2.0496, "step": 82215 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019541668407571682, "loss": 2.3931, "step": 82220 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.0001954161309116358, "loss": 2.2775, "step": 82225 }, { "epoch": 0.19, "grad_norm": 1.8125, "learning_rate": 0.00019541557771495893, "loss": 2.1843, "step": 82230 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.00019541502448568637, "loss": 2.1031, "step": 82235 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.00019541447122381828, "loss": 2.0724, "step": 82240 }, { "epoch": 0.19, "grad_norm": 1.515625, "learning_rate": 0.00019541391792935486, "loss": 2.1941, "step": 82245 }, { "epoch": 0.19, "grad_norm": 1.84375, "learning_rate": 0.00019541336460229635, "loss": 2.2394, "step": 82250 }, { "epoch": 0.19, "grad_norm": 1.8515625, "learning_rate": 0.0001954128112426429, "loss": 2.2487, "step": 82255 }, { "epoch": 0.19, "grad_norm": 1.625, "learning_rate": 0.0001954122578503947, "loss": 2.3782, "step": 82260 }, { "epoch": 0.19, "grad_norm": 1.8359375, "learning_rate": 0.00019541170442555195, "loss": 2.1149, "step": 82265 }, { "epoch": 0.19, "grad_norm": 1.8828125, "learning_rate": 0.00019541115096811482, "loss": 2.1306, "step": 82270 }, { "epoch": 0.19, "grad_norm": 2.5625, "learning_rate": 0.0001954105974780835, "loss": 2.0326, "step": 82275 }, { "epoch": 0.19, "grad_norm": 1.9375, "learning_rate": 0.0001954100439554582, "loss": 2.1637, "step": 82280 }, { "epoch": 0.19, "grad_norm": 2.375, "learning_rate": 0.0001954094904002391, "loss": 2.3855, "step": 82285 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019540893681242638, "loss": 2.1162, "step": 82290 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019540838319202027, "loss": 2.1117, "step": 82295 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.00019540782953902087, "loss": 2.0958, "step": 82300 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.0001954072758534285, "loss": 2.1395, "step": 82305 }, { "epoch": 0.19, "grad_norm": 1.8828125, "learning_rate": 0.00019540672213524322, "loss": 2.2572, "step": 82310 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019540616838446527, "loss": 2.0495, "step": 82315 }, { "epoch": 0.19, "grad_norm": 2.0, "learning_rate": 0.00019540561460109485, "loss": 2.1449, "step": 82320 }, { "epoch": 0.19, "grad_norm": 1.9921875, "learning_rate": 0.00019540506078513214, "loss": 1.9927, "step": 82325 }, { "epoch": 0.19, "grad_norm": 1.9921875, "learning_rate": 0.00019540450693657734, "loss": 2.2692, "step": 82330 }, { "epoch": 0.19, "grad_norm": 1.859375, "learning_rate": 0.00019540395305543064, "loss": 2.32, "step": 82335 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019540339914169224, "loss": 2.1544, "step": 82340 }, { "epoch": 0.19, "grad_norm": 1.9765625, "learning_rate": 0.00019540284519536228, "loss": 2.1544, "step": 82345 }, { "epoch": 0.19, "grad_norm": 1.8046875, "learning_rate": 0.00019540229121644095, "loss": 2.1489, "step": 82350 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.0001954017372049285, "loss": 2.2341, "step": 82355 }, { "epoch": 0.19, "grad_norm": 1.7421875, "learning_rate": 0.00019540118316082507, "loss": 2.0993, "step": 82360 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019540062908413088, "loss": 2.1283, "step": 82365 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.00019540007497484608, "loss": 2.3279, "step": 82370 }, { "epoch": 0.19, "grad_norm": 1.8828125, "learning_rate": 0.0001953995208329709, "loss": 2.2356, "step": 82375 }, { "epoch": 0.19, "grad_norm": 1.984375, "learning_rate": 0.00019539896665850552, "loss": 2.4165, "step": 82380 }, { "epoch": 0.19, "grad_norm": 1.9453125, "learning_rate": 0.00019539841245145014, "loss": 2.2881, "step": 82385 }, { "epoch": 0.19, "grad_norm": 1.8046875, "learning_rate": 0.0001953978582118049, "loss": 2.2494, "step": 82390 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.00019539730393957006, "loss": 2.2694, "step": 82395 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019539674963474571, "loss": 2.2368, "step": 82400 }, { "epoch": 0.19, "grad_norm": 2.46875, "learning_rate": 0.00019539619529733215, "loss": 2.1114, "step": 82405 }, { "epoch": 0.19, "grad_norm": 2.1875, "learning_rate": 0.00019539564092732953, "loss": 2.0464, "step": 82410 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019539508652473799, "loss": 2.2989, "step": 82415 }, { "epoch": 0.19, "grad_norm": 1.671875, "learning_rate": 0.0001953945320895578, "loss": 2.2812, "step": 82420 }, { "epoch": 0.19, "grad_norm": 1.8359375, "learning_rate": 0.00019539397762178906, "loss": 2.1707, "step": 82425 }, { "epoch": 0.19, "grad_norm": 2.453125, "learning_rate": 0.000195393423121432, "loss": 2.253, "step": 82430 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019539286858848685, "loss": 2.2799, "step": 82435 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.00019539231402295376, "loss": 2.0175, "step": 82440 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.00019539175942483294, "loss": 2.0606, "step": 82445 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.00019539120479412453, "loss": 2.1412, "step": 82450 }, { "epoch": 0.19, "grad_norm": 1.9765625, "learning_rate": 0.00019539065013082877, "loss": 2.2397, "step": 82455 }, { "epoch": 0.19, "grad_norm": 2.375, "learning_rate": 0.00019539009543494587, "loss": 2.1004, "step": 82460 }, { "epoch": 0.19, "grad_norm": 1.640625, "learning_rate": 0.00019538954070647593, "loss": 2.0923, "step": 82465 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.0001953889859454192, "loss": 2.244, "step": 82470 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.0001953884311517759, "loss": 2.1997, "step": 82475 }, { "epoch": 0.19, "grad_norm": 1.6953125, "learning_rate": 0.00019538787632554616, "loss": 2.1721, "step": 82480 }, { "epoch": 0.19, "grad_norm": 2.53125, "learning_rate": 0.00019538732146673018, "loss": 2.1818, "step": 82485 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.00019538676657532817, "loss": 2.2286, "step": 82490 }, { "epoch": 0.19, "grad_norm": 1.7734375, "learning_rate": 0.0001953862116513403, "loss": 2.1653, "step": 82495 }, { "epoch": 0.19, "grad_norm": 2.109375, "learning_rate": 0.0001953856566947668, "loss": 2.1733, "step": 82500 }, { "epoch": 0.19, "grad_norm": 1.8125, "learning_rate": 0.00019538510170560782, "loss": 2.1362, "step": 82505 }, { "epoch": 0.19, "grad_norm": 2.34375, "learning_rate": 0.00019538454668386354, "loss": 2.2613, "step": 82510 }, { "epoch": 0.19, "grad_norm": 2.28125, "learning_rate": 0.0001953839916295342, "loss": 2.0434, "step": 82515 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.00019538343654261993, "loss": 2.3849, "step": 82520 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019538288142312093, "loss": 2.1159, "step": 82525 }, { "epoch": 0.19, "grad_norm": 1.75, "learning_rate": 0.00019538232627103744, "loss": 2.1926, "step": 82530 }, { "epoch": 0.19, "grad_norm": 1.6953125, "learning_rate": 0.00019538177108636965, "loss": 2.081, "step": 82535 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.00019538121586911764, "loss": 2.104, "step": 82540 }, { "epoch": 0.19, "grad_norm": 2.484375, "learning_rate": 0.00019538066061928173, "loss": 2.1064, "step": 82545 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.00019538010533686205, "loss": 2.0831, "step": 82550 }, { "epoch": 0.19, "grad_norm": 2.359375, "learning_rate": 0.0001953795500218588, "loss": 2.2199, "step": 82555 }, { "epoch": 0.19, "grad_norm": 2.15625, "learning_rate": 0.00019537899467427213, "loss": 2.015, "step": 82560 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.0001953784392941023, "loss": 2.2242, "step": 82565 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.00019537788388134946, "loss": 2.2229, "step": 82570 }, { "epoch": 0.19, "grad_norm": 1.6796875, "learning_rate": 0.0001953773284360138, "loss": 2.1551, "step": 82575 }, { "epoch": 0.19, "grad_norm": 1.9609375, "learning_rate": 0.00019537677295809551, "loss": 2.1801, "step": 82580 }, { "epoch": 0.19, "grad_norm": 1.7890625, "learning_rate": 0.00019537621744759477, "loss": 2.2449, "step": 82585 }, { "epoch": 0.19, "grad_norm": 1.8203125, "learning_rate": 0.00019537566190451182, "loss": 2.0586, "step": 82590 }, { "epoch": 0.19, "grad_norm": 1.7890625, "learning_rate": 0.0001953751063288468, "loss": 2.0796, "step": 82595 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.0001953745507205999, "loss": 2.1785, "step": 82600 }, { "epoch": 0.19, "grad_norm": 2.015625, "learning_rate": 0.00019537399507977136, "loss": 2.2637, "step": 82605 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.0001953734394063613, "loss": 2.2304, "step": 82610 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.00019537288370036996, "loss": 2.1421, "step": 82615 }, { "epoch": 0.19, "grad_norm": 1.8046875, "learning_rate": 0.00019537232796179753, "loss": 2.1522, "step": 82620 }, { "epoch": 0.19, "grad_norm": 2.375, "learning_rate": 0.00019537177219064415, "loss": 2.1337, "step": 82625 }, { "epoch": 0.19, "grad_norm": 2.484375, "learning_rate": 0.00019537121638691006, "loss": 2.3308, "step": 82630 }, { "epoch": 0.19, "grad_norm": 1.9609375, "learning_rate": 0.00019537066055059542, "loss": 2.2519, "step": 82635 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.00019537010468170042, "loss": 2.1611, "step": 82640 }, { "epoch": 0.19, "grad_norm": 2.0625, "learning_rate": 0.0001953695487802253, "loss": 2.3133, "step": 82645 }, { "epoch": 0.19, "grad_norm": 2.4375, "learning_rate": 0.00019536899284617018, "loss": 2.1951, "step": 82650 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.0001953684368795353, "loss": 2.1287, "step": 82655 }, { "epoch": 0.19, "grad_norm": 1.9140625, "learning_rate": 0.00019536788088032084, "loss": 2.2595, "step": 82660 }, { "epoch": 0.19, "grad_norm": 1.765625, "learning_rate": 0.00019536732484852696, "loss": 2.2573, "step": 82665 }, { "epoch": 0.19, "grad_norm": 1.90625, "learning_rate": 0.00019536676878415386, "loss": 2.2617, "step": 82670 }, { "epoch": 0.19, "grad_norm": 2.171875, "learning_rate": 0.0001953662126872018, "loss": 2.1603, "step": 82675 }, { "epoch": 0.19, "grad_norm": 1.9765625, "learning_rate": 0.00019536565655767089, "loss": 2.2923, "step": 82680 }, { "epoch": 0.19, "grad_norm": 2.046875, "learning_rate": 0.0001953651003955613, "loss": 2.1224, "step": 82685 }, { "epoch": 0.19, "grad_norm": 2.03125, "learning_rate": 0.0001953645442008733, "loss": 2.2035, "step": 82690 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.00019536398797360702, "loss": 2.2083, "step": 82695 }, { "epoch": 0.19, "grad_norm": 1.7421875, "learning_rate": 0.0001953634317137627, "loss": 2.2073, "step": 82700 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019536287542134048, "loss": 2.1322, "step": 82705 }, { "epoch": 0.19, "grad_norm": 2.125, "learning_rate": 0.0001953623190963406, "loss": 2.2162, "step": 82710 }, { "epoch": 0.19, "grad_norm": 2.15625, "learning_rate": 0.0001953617627387632, "loss": 2.0562, "step": 82715 }, { "epoch": 0.19, "grad_norm": 1.8046875, "learning_rate": 0.0001953612063486085, "loss": 2.1233, "step": 82720 }, { "epoch": 0.19, "grad_norm": 2.40625, "learning_rate": 0.00019536064992587665, "loss": 2.3052, "step": 82725 }, { "epoch": 0.19, "grad_norm": 1.9453125, "learning_rate": 0.00019536009347056793, "loss": 2.2985, "step": 82730 }, { "epoch": 0.19, "grad_norm": 2.515625, "learning_rate": 0.00019535953698268242, "loss": 1.9712, "step": 82735 }, { "epoch": 0.19, "grad_norm": 1.7890625, "learning_rate": 0.0001953589804622204, "loss": 2.1265, "step": 82740 }, { "epoch": 0.19, "grad_norm": 1.796875, "learning_rate": 0.000195358423909182, "loss": 2.0524, "step": 82745 }, { "epoch": 0.19, "grad_norm": 2.328125, "learning_rate": 0.00019535786732356744, "loss": 2.2261, "step": 82750 }, { "epoch": 0.19, "grad_norm": 2.34375, "learning_rate": 0.0001953573107053769, "loss": 2.0183, "step": 82755 }, { "epoch": 0.19, "grad_norm": 1.8359375, "learning_rate": 0.00019535675405461057, "loss": 2.1031, "step": 82760 }, { "epoch": 0.19, "grad_norm": 2.265625, "learning_rate": 0.00019535619737126868, "loss": 2.1841, "step": 82765 }, { "epoch": 0.19, "grad_norm": 1.734375, "learning_rate": 0.00019535564065535134, "loss": 2.0349, "step": 82770 }, { "epoch": 0.19, "grad_norm": 2.296875, "learning_rate": 0.00019535508390685879, "loss": 2.0768, "step": 82775 }, { "epoch": 0.19, "grad_norm": 1.703125, "learning_rate": 0.00019535452712579122, "loss": 2.3093, "step": 82780 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019535397031214885, "loss": 2.0493, "step": 82785 }, { "epoch": 0.19, "grad_norm": 2.28125, "learning_rate": 0.0001953534134659318, "loss": 2.3395, "step": 82790 }, { "epoch": 0.19, "grad_norm": 1.8359375, "learning_rate": 0.00019535285658714026, "loss": 2.1172, "step": 82795 }, { "epoch": 0.19, "grad_norm": 1.71875, "learning_rate": 0.00019535229967577452, "loss": 2.0324, "step": 82800 }, { "epoch": 0.19, "grad_norm": 2.3125, "learning_rate": 0.0001953517427318347, "loss": 2.2247, "step": 82805 }, { "epoch": 0.19, "grad_norm": 1.8515625, "learning_rate": 0.00019535118575532092, "loss": 2.0087, "step": 82810 }, { "epoch": 0.19, "grad_norm": 2.234375, "learning_rate": 0.00019535062874623352, "loss": 2.0394, "step": 82815 }, { "epoch": 0.19, "grad_norm": 1.953125, "learning_rate": 0.0001953500717045726, "loss": 2.084, "step": 82820 }, { "epoch": 0.19, "grad_norm": 1.78125, "learning_rate": 0.00019534951463033836, "loss": 2.1387, "step": 82825 }, { "epoch": 0.19, "grad_norm": 1.9609375, "learning_rate": 0.000195348957523531, "loss": 2.2704, "step": 82830 }, { "epoch": 0.19, "grad_norm": 1.453125, "learning_rate": 0.00019534840038415067, "loss": 2.0789, "step": 82835 }, { "epoch": 0.19, "grad_norm": 1.875, "learning_rate": 0.00019534784321219767, "loss": 2.0566, "step": 82840 }, { "epoch": 0.19, "grad_norm": 2.078125, "learning_rate": 0.00019534728600767204, "loss": 2.3852, "step": 82845 }, { "epoch": 0.19, "grad_norm": 1.8828125, "learning_rate": 0.0001953467287705741, "loss": 2.1809, "step": 82850 }, { "epoch": 0.19, "grad_norm": 1.890625, "learning_rate": 0.00019534617150090397, "loss": 2.2236, "step": 82855 }, { "epoch": 0.19, "grad_norm": 2.09375, "learning_rate": 0.00019534561419866185, "loss": 2.09, "step": 82860 }, { "epoch": 0.2, "grad_norm": 1.796875, "learning_rate": 0.00019534505686384797, "loss": 2.2124, "step": 82865 }, { "epoch": 0.2, "grad_norm": 1.8671875, "learning_rate": 0.00019534449949646247, "loss": 2.1969, "step": 82870 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.00019534394209650555, "loss": 2.2088, "step": 82875 }, { "epoch": 0.2, "grad_norm": 1.9140625, "learning_rate": 0.00019534338466397743, "loss": 2.1097, "step": 82880 }, { "epoch": 0.2, "grad_norm": 2.1875, "learning_rate": 0.00019534282719887826, "loss": 2.1771, "step": 82885 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019534226970120827, "loss": 2.2131, "step": 82890 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.00019534171217096762, "loss": 2.192, "step": 82895 }, { "epoch": 0.2, "grad_norm": 1.8515625, "learning_rate": 0.00019534115460815652, "loss": 2.1331, "step": 82900 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.00019534059701277516, "loss": 2.2135, "step": 82905 }, { "epoch": 0.2, "grad_norm": 2.734375, "learning_rate": 0.0001953400393848237, "loss": 2.1361, "step": 82910 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019533948172430237, "loss": 2.1625, "step": 82915 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.00019533892403121133, "loss": 2.2273, "step": 82920 }, { "epoch": 0.2, "grad_norm": 1.515625, "learning_rate": 0.0001953383663055508, "loss": 2.1372, "step": 82925 }, { "epoch": 0.2, "grad_norm": 2.234375, "learning_rate": 0.00019533780854732094, "loss": 2.2687, "step": 82930 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019533725075652195, "loss": 2.0809, "step": 82935 }, { "epoch": 0.2, "grad_norm": 1.7578125, "learning_rate": 0.00019533669293315405, "loss": 2.363, "step": 82940 }, { "epoch": 0.2, "grad_norm": 2.59375, "learning_rate": 0.00019533613507721742, "loss": 2.1103, "step": 82945 }, { "epoch": 0.2, "grad_norm": 1.6015625, "learning_rate": 0.0001953355771887122, "loss": 2.0203, "step": 82950 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019533501926763863, "loss": 2.2086, "step": 82955 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.00019533446131399687, "loss": 2.0276, "step": 82960 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019533390332778717, "loss": 2.0839, "step": 82965 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019533334530900964, "loss": 2.2002, "step": 82970 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019533278725766454, "loss": 2.1354, "step": 82975 }, { "epoch": 0.2, "grad_norm": 1.9375, "learning_rate": 0.00019533222917375202, "loss": 2.0811, "step": 82980 }, { "epoch": 0.2, "grad_norm": 1.7578125, "learning_rate": 0.0001953316710572723, "loss": 1.897, "step": 82985 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019533111290822552, "loss": 2.0764, "step": 82990 }, { "epoch": 0.2, "grad_norm": 1.625, "learning_rate": 0.0001953305547266119, "loss": 2.1917, "step": 82995 }, { "epoch": 0.2, "grad_norm": 1.640625, "learning_rate": 0.00019532999651243168, "loss": 2.0813, "step": 83000 }, { "epoch": 0.2, "grad_norm": 3.078125, "learning_rate": 0.00019532943826568495, "loss": 1.9068, "step": 83005 }, { "epoch": 0.2, "grad_norm": 1.6328125, "learning_rate": 0.000195328879986372, "loss": 2.3393, "step": 83010 }, { "epoch": 0.2, "grad_norm": 1.875, "learning_rate": 0.00019532832167449296, "loss": 2.2119, "step": 83015 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.000195327763330048, "loss": 2.2561, "step": 83020 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.0001953272049530374, "loss": 2.1882, "step": 83025 }, { "epoch": 0.2, "grad_norm": 2.296875, "learning_rate": 0.0001953266465434613, "loss": 1.9112, "step": 83030 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019532608810131984, "loss": 2.1113, "step": 83035 }, { "epoch": 0.2, "grad_norm": 2.25, "learning_rate": 0.00019532552962661328, "loss": 2.254, "step": 83040 }, { "epoch": 0.2, "grad_norm": 1.984375, "learning_rate": 0.00019532497111934182, "loss": 2.3207, "step": 83045 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019532441257950558, "loss": 2.0999, "step": 83050 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.0001953238540071048, "loss": 2.0387, "step": 83055 }, { "epoch": 0.2, "grad_norm": 1.7578125, "learning_rate": 0.00019532329540213967, "loss": 2.1515, "step": 83060 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.0001953227367646104, "loss": 2.0717, "step": 83065 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.0001953221780945171, "loss": 2.2248, "step": 83070 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.00019532161939186008, "loss": 2.0629, "step": 83075 }, { "epoch": 0.2, "grad_norm": 2.53125, "learning_rate": 0.0001953210606566394, "loss": 1.9781, "step": 83080 }, { "epoch": 0.2, "grad_norm": 1.9140625, "learning_rate": 0.00019532050188885537, "loss": 2.219, "step": 83085 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019531994308850806, "loss": 2.0859, "step": 83090 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.0001953193842555978, "loss": 2.0981, "step": 83095 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019531882539012467, "loss": 2.2044, "step": 83100 }, { "epoch": 0.2, "grad_norm": 2.671875, "learning_rate": 0.00019531826649208894, "loss": 2.286, "step": 83105 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019531770756149074, "loss": 2.1451, "step": 83110 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019531714859833026, "loss": 2.154, "step": 83115 }, { "epoch": 0.2, "grad_norm": 1.75, "learning_rate": 0.00019531658960260772, "loss": 2.253, "step": 83120 }, { "epoch": 0.2, "grad_norm": 1.8203125, "learning_rate": 0.00019531603057432333, "loss": 2.2718, "step": 83125 }, { "epoch": 0.2, "grad_norm": 1.5859375, "learning_rate": 0.00019531547151347724, "loss": 1.8744, "step": 83130 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019531491242006965, "loss": 2.0917, "step": 83135 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019531435329410077, "loss": 2.2246, "step": 83140 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019531379413557077, "loss": 2.0958, "step": 83145 }, { "epoch": 0.2, "grad_norm": 1.7109375, "learning_rate": 0.00019531323494447985, "loss": 2.2292, "step": 83150 }, { "epoch": 0.2, "grad_norm": 1.609375, "learning_rate": 0.0001953126757208282, "loss": 2.2608, "step": 83155 }, { "epoch": 0.2, "grad_norm": 2.265625, "learning_rate": 0.00019531211646461603, "loss": 2.305, "step": 83160 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.00019531155717584348, "loss": 2.3159, "step": 83165 }, { "epoch": 0.2, "grad_norm": 1.7109375, "learning_rate": 0.0001953109978545108, "loss": 2.3302, "step": 83170 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019531043850061814, "loss": 2.1913, "step": 83175 }, { "epoch": 0.2, "grad_norm": 1.921875, "learning_rate": 0.0001953098791141657, "loss": 2.1182, "step": 83180 }, { "epoch": 0.2, "grad_norm": 1.6640625, "learning_rate": 0.0001953093196951537, "loss": 1.9245, "step": 83185 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.0001953087602435823, "loss": 2.21, "step": 83190 }, { "epoch": 0.2, "grad_norm": 1.8125, "learning_rate": 0.0001953082007594517, "loss": 2.1263, "step": 83195 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019530764124276207, "loss": 2.1767, "step": 83200 }, { "epoch": 0.2, "grad_norm": 1.75, "learning_rate": 0.00019530708169351364, "loss": 2.2358, "step": 83205 }, { "epoch": 0.2, "grad_norm": 2.15625, "learning_rate": 0.00019530652211170656, "loss": 2.1306, "step": 83210 }, { "epoch": 0.2, "grad_norm": 1.8125, "learning_rate": 0.00019530596249734105, "loss": 2.2395, "step": 83215 }, { "epoch": 0.2, "grad_norm": 1.84375, "learning_rate": 0.0001953054028504173, "loss": 2.1513, "step": 83220 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019530484317093554, "loss": 2.201, "step": 83225 }, { "epoch": 0.2, "grad_norm": 2.4375, "learning_rate": 0.00019530428345889585, "loss": 2.0134, "step": 83230 }, { "epoch": 0.2, "grad_norm": 1.7421875, "learning_rate": 0.00019530372371429853, "loss": 2.2412, "step": 83235 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019530316393714372, "loss": 2.1172, "step": 83240 }, { "epoch": 0.2, "grad_norm": 2.203125, "learning_rate": 0.0001953026041274316, "loss": 2.0636, "step": 83245 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.0001953020442851624, "loss": 2.4308, "step": 83250 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.0001953014844103363, "loss": 2.1992, "step": 83255 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.00019530092450295347, "loss": 2.2564, "step": 83260 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.00019530036456301414, "loss": 2.1151, "step": 83265 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019529980459051847, "loss": 1.9803, "step": 83270 }, { "epoch": 0.2, "grad_norm": 2.34375, "learning_rate": 0.00019529924458546661, "loss": 2.0727, "step": 83275 }, { "epoch": 0.2, "grad_norm": 1.9296875, "learning_rate": 0.00019529868454785888, "loss": 2.0517, "step": 83280 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.00019529812447769532, "loss": 2.2261, "step": 83285 }, { "epoch": 0.2, "grad_norm": 1.6796875, "learning_rate": 0.00019529756437497623, "loss": 2.136, "step": 83290 }, { "epoch": 0.2, "grad_norm": 1.9765625, "learning_rate": 0.00019529700423970175, "loss": 2.0121, "step": 83295 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.0001952964440718721, "loss": 2.1411, "step": 83300 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.00019529588387148743, "loss": 2.2613, "step": 83305 }, { "epoch": 0.2, "grad_norm": 1.6953125, "learning_rate": 0.000195295323638548, "loss": 2.309, "step": 83310 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019529476337305391, "loss": 2.1692, "step": 83315 }, { "epoch": 0.2, "grad_norm": 1.703125, "learning_rate": 0.00019529420307500542, "loss": 2.1074, "step": 83320 }, { "epoch": 0.2, "grad_norm": 3.109375, "learning_rate": 0.0001952936427444027, "loss": 2.013, "step": 83325 }, { "epoch": 0.2, "grad_norm": 2.609375, "learning_rate": 0.00019529308238124598, "loss": 2.0663, "step": 83330 }, { "epoch": 0.2, "grad_norm": 2.203125, "learning_rate": 0.00019529252198553535, "loss": 2.0077, "step": 83335 }, { "epoch": 0.2, "grad_norm": 1.9375, "learning_rate": 0.00019529196155727111, "loss": 2.2598, "step": 83340 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.0001952914010964534, "loss": 2.0634, "step": 83345 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.00019529084060308243, "loss": 2.1404, "step": 83350 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.00019529028007715836, "loss": 2.1285, "step": 83355 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.0001952897195186814, "loss": 2.1349, "step": 83360 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019528915892765177, "loss": 2.2291, "step": 83365 }, { "epoch": 0.2, "grad_norm": 1.8046875, "learning_rate": 0.00019528859830406962, "loss": 2.2194, "step": 83370 }, { "epoch": 0.2, "grad_norm": 2.328125, "learning_rate": 0.00019528803764793515, "loss": 2.1552, "step": 83375 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.0001952874769592486, "loss": 2.0648, "step": 83380 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.00019528691623801005, "loss": 2.1457, "step": 83385 }, { "epoch": 0.2, "grad_norm": 1.734375, "learning_rate": 0.00019528635548421978, "loss": 2.1734, "step": 83390 }, { "epoch": 0.2, "grad_norm": 1.609375, "learning_rate": 0.000195285794697878, "loss": 2.1056, "step": 83395 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.00019528523387898482, "loss": 2.2613, "step": 83400 }, { "epoch": 0.2, "grad_norm": 2.3125, "learning_rate": 0.00019528467302754051, "loss": 2.1676, "step": 83405 }, { "epoch": 0.2, "grad_norm": 2.296875, "learning_rate": 0.00019528411214354523, "loss": 2.102, "step": 83410 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019528355122699914, "loss": 2.1494, "step": 83415 }, { "epoch": 0.2, "grad_norm": 2.296875, "learning_rate": 0.00019528299027790248, "loss": 2.1874, "step": 83420 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.0001952824292962554, "loss": 2.4536, "step": 83425 }, { "epoch": 0.2, "grad_norm": 1.9296875, "learning_rate": 0.00019528186828205815, "loss": 1.9634, "step": 83430 }, { "epoch": 0.2, "grad_norm": 1.9453125, "learning_rate": 0.00019528130723531087, "loss": 1.9919, "step": 83435 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019528074615601378, "loss": 2.1204, "step": 83440 }, { "epoch": 0.2, "grad_norm": 1.9296875, "learning_rate": 0.00019528018504416704, "loss": 2.0704, "step": 83445 }, { "epoch": 0.2, "grad_norm": 1.875, "learning_rate": 0.00019527962389977085, "loss": 2.2303, "step": 83450 }, { "epoch": 0.2, "grad_norm": 2.203125, "learning_rate": 0.00019527906272282544, "loss": 2.2741, "step": 83455 }, { "epoch": 0.2, "grad_norm": 2.1875, "learning_rate": 0.00019527850151333095, "loss": 2.1487, "step": 83460 }, { "epoch": 0.2, "grad_norm": 2.53125, "learning_rate": 0.0001952779402712876, "loss": 2.0647, "step": 83465 }, { "epoch": 0.2, "grad_norm": 1.9296875, "learning_rate": 0.0001952773789966956, "loss": 2.2035, "step": 83470 }, { "epoch": 0.2, "grad_norm": 2.1875, "learning_rate": 0.0001952768176895551, "loss": 2.1303, "step": 83475 }, { "epoch": 0.2, "grad_norm": 2.15625, "learning_rate": 0.00019527625634986633, "loss": 2.0244, "step": 83480 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019527569497762944, "loss": 1.969, "step": 83485 }, { "epoch": 0.2, "grad_norm": 1.7890625, "learning_rate": 0.00019527513357284467, "loss": 2.2337, "step": 83490 }, { "epoch": 0.2, "grad_norm": 1.921875, "learning_rate": 0.00019527457213551217, "loss": 2.3013, "step": 83495 }, { "epoch": 0.2, "grad_norm": 1.640625, "learning_rate": 0.00019527401066563214, "loss": 2.0134, "step": 83500 }, { "epoch": 0.2, "grad_norm": 2.1875, "learning_rate": 0.0001952734491632048, "loss": 2.1342, "step": 83505 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.00019527288762823027, "loss": 2.173, "step": 83510 }, { "epoch": 0.2, "grad_norm": 1.8203125, "learning_rate": 0.00019527232606070887, "loss": 2.2173, "step": 83515 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019527176446064069, "loss": 2.3107, "step": 83520 }, { "epoch": 0.2, "grad_norm": 1.7890625, "learning_rate": 0.00019527120282802592, "loss": 2.2723, "step": 83525 }, { "epoch": 0.2, "grad_norm": 2.203125, "learning_rate": 0.0001952706411628648, "loss": 2.1627, "step": 83530 }, { "epoch": 0.2, "grad_norm": 1.75, "learning_rate": 0.0001952700794651575, "loss": 2.1974, "step": 83535 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.00019526951773490421, "loss": 2.0235, "step": 83540 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019526895597210512, "loss": 2.0792, "step": 83545 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.00019526839417676045, "loss": 2.0713, "step": 83550 }, { "epoch": 0.2, "grad_norm": 1.984375, "learning_rate": 0.00019526783234887033, "loss": 2.1343, "step": 83555 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019526727048843503, "loss": 2.166, "step": 83560 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019526670859545465, "loss": 2.2464, "step": 83565 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.0001952661466699295, "loss": 2.1442, "step": 83570 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019526558471185968, "loss": 2.3103, "step": 83575 }, { "epoch": 0.2, "grad_norm": 2.53125, "learning_rate": 0.0001952650227212454, "loss": 2.1167, "step": 83580 }, { "epoch": 0.2, "grad_norm": 1.8359375, "learning_rate": 0.00019526446069808686, "loss": 2.1462, "step": 83585 }, { "epoch": 0.2, "grad_norm": 1.5546875, "learning_rate": 0.00019526389864238426, "loss": 2.2832, "step": 83590 }, { "epoch": 0.2, "grad_norm": 1.8515625, "learning_rate": 0.00019526333655413777, "loss": 2.1196, "step": 83595 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.0001952627744333476, "loss": 2.1033, "step": 83600 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.00019526221228001396, "loss": 2.1036, "step": 83605 }, { "epoch": 0.2, "grad_norm": 1.8046875, "learning_rate": 0.000195261650094137, "loss": 2.3169, "step": 83610 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019526108787571694, "loss": 2.2713, "step": 83615 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.00019526052562475398, "loss": 2.0783, "step": 83620 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019525996334124828, "loss": 2.1734, "step": 83625 }, { "epoch": 0.2, "grad_norm": 1.9921875, "learning_rate": 0.00019525940102520006, "loss": 2.3295, "step": 83630 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.0001952588386766095, "loss": 2.199, "step": 83635 }, { "epoch": 0.2, "grad_norm": 1.875, "learning_rate": 0.00019525827629547675, "loss": 1.9276, "step": 83640 }, { "epoch": 0.2, "grad_norm": 2.328125, "learning_rate": 0.0001952577138818021, "loss": 2.1792, "step": 83645 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019525715143558565, "loss": 2.2898, "step": 83650 }, { "epoch": 0.2, "grad_norm": 1.7265625, "learning_rate": 0.00019525658895682766, "loss": 2.072, "step": 83655 }, { "epoch": 0.2, "grad_norm": 1.75, "learning_rate": 0.00019525602644552826, "loss": 2.2151, "step": 83660 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.0001952554639016877, "loss": 2.0923, "step": 83665 }, { "epoch": 0.2, "grad_norm": 1.6015625, "learning_rate": 0.00019525490132530615, "loss": 2.1703, "step": 83670 }, { "epoch": 0.2, "grad_norm": 2.53125, "learning_rate": 0.00019525433871638378, "loss": 2.1502, "step": 83675 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019525377607492082, "loss": 2.0563, "step": 83680 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019525321340091743, "loss": 2.4057, "step": 83685 }, { "epoch": 0.2, "grad_norm": 1.5703125, "learning_rate": 0.0001952526506943738, "loss": 1.9592, "step": 83690 }, { "epoch": 0.2, "grad_norm": 1.640625, "learning_rate": 0.00019525208795529014, "loss": 2.0994, "step": 83695 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.00019525152518366667, "loss": 2.0296, "step": 83700 }, { "epoch": 0.2, "grad_norm": 1.4140625, "learning_rate": 0.00019525096237950354, "loss": 2.1322, "step": 83705 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019525039954280092, "loss": 1.9911, "step": 83710 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019524983667355906, "loss": 2.0922, "step": 83715 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019524927377177816, "loss": 2.0934, "step": 83720 }, { "epoch": 0.2, "grad_norm": 2.40625, "learning_rate": 0.00019524871083745834, "loss": 2.0328, "step": 83725 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019524814787059985, "loss": 2.0503, "step": 83730 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019524758487120286, "loss": 2.0735, "step": 83735 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.00019524702183926755, "loss": 2.1643, "step": 83740 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019524645877479416, "loss": 2.1507, "step": 83745 }, { "epoch": 0.2, "grad_norm": 1.5625, "learning_rate": 0.00019524589567778281, "loss": 2.1637, "step": 83750 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.00019524533254823377, "loss": 2.4398, "step": 83755 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019524476938614723, "loss": 2.1368, "step": 83760 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.0001952442061915233, "loss": 2.2588, "step": 83765 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.00019524364296436223, "loss": 2.2532, "step": 83770 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.00019524307970466417, "loss": 2.2687, "step": 83775 }, { "epoch": 0.2, "grad_norm": 2.1875, "learning_rate": 0.0001952425164124294, "loss": 2.2611, "step": 83780 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019524195308765805, "loss": 2.1964, "step": 83785 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.0001952413897303503, "loss": 2.1325, "step": 83790 }, { "epoch": 0.2, "grad_norm": 2.609375, "learning_rate": 0.00019524082634050636, "loss": 2.2858, "step": 83795 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.00019524026291812646, "loss": 2.3554, "step": 83800 }, { "epoch": 0.2, "grad_norm": 1.9140625, "learning_rate": 0.00019523969946321076, "loss": 2.0532, "step": 83805 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019523913597575943, "loss": 2.1052, "step": 83810 }, { "epoch": 0.2, "grad_norm": 1.921875, "learning_rate": 0.00019523857245577268, "loss": 2.0646, "step": 83815 }, { "epoch": 0.2, "grad_norm": 1.6796875, "learning_rate": 0.00019523800890325073, "loss": 2.0812, "step": 83820 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.0001952374453181937, "loss": 2.1145, "step": 83825 }, { "epoch": 0.2, "grad_norm": 2.703125, "learning_rate": 0.00019523688170060188, "loss": 2.0567, "step": 83830 }, { "epoch": 0.2, "grad_norm": 1.7578125, "learning_rate": 0.0001952363180504754, "loss": 2.0822, "step": 83835 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.00019523575436781448, "loss": 2.1107, "step": 83840 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.00019523519065261927, "loss": 2.2146, "step": 83845 }, { "epoch": 0.2, "grad_norm": 2.15625, "learning_rate": 0.00019523462690489, "loss": 2.1978, "step": 83850 }, { "epoch": 0.2, "grad_norm": 1.84375, "learning_rate": 0.00019523406312462686, "loss": 2.3077, "step": 83855 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019523349931183006, "loss": 2.2158, "step": 83860 }, { "epoch": 0.2, "grad_norm": 2.546875, "learning_rate": 0.00019523293546649972, "loss": 2.0885, "step": 83865 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019523237158863613, "loss": 2.3496, "step": 83870 }, { "epoch": 0.2, "grad_norm": 1.7421875, "learning_rate": 0.00019523180767823942, "loss": 2.1579, "step": 83875 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.0001952312437353098, "loss": 2.097, "step": 83880 }, { "epoch": 0.2, "grad_norm": 1.640625, "learning_rate": 0.00019523067975984744, "loss": 2.2965, "step": 83885 }, { "epoch": 0.2, "grad_norm": 2.359375, "learning_rate": 0.00019523011575185256, "loss": 2.0128, "step": 83890 }, { "epoch": 0.2, "grad_norm": 2.203125, "learning_rate": 0.00019522955171132536, "loss": 2.1731, "step": 83895 }, { "epoch": 0.2, "grad_norm": 2.34375, "learning_rate": 0.000195228987638266, "loss": 2.4005, "step": 83900 }, { "epoch": 0.2, "grad_norm": 2.390625, "learning_rate": 0.00019522842353267468, "loss": 2.1443, "step": 83905 }, { "epoch": 0.2, "grad_norm": 1.796875, "learning_rate": 0.0001952278593945516, "loss": 2.1356, "step": 83910 }, { "epoch": 0.2, "grad_norm": 1.6015625, "learning_rate": 0.00019522729522389703, "loss": 2.2132, "step": 83915 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.00019522673102071104, "loss": 2.1332, "step": 83920 }, { "epoch": 0.2, "grad_norm": 1.796875, "learning_rate": 0.00019522616678499386, "loss": 2.0376, "step": 83925 }, { "epoch": 0.2, "grad_norm": 2.234375, "learning_rate": 0.00019522560251674569, "loss": 2.1437, "step": 83930 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019522503821596673, "loss": 2.2442, "step": 83935 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019522447388265718, "loss": 2.0555, "step": 83940 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.0001952239095168172, "loss": 2.3695, "step": 83945 }, { "epoch": 0.2, "grad_norm": 1.78125, "learning_rate": 0.00019522334511844704, "loss": 2.2129, "step": 83950 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019522278068754686, "loss": 2.0231, "step": 83955 }, { "epoch": 0.2, "grad_norm": 1.7578125, "learning_rate": 0.00019522221622411684, "loss": 2.2954, "step": 83960 }, { "epoch": 0.2, "grad_norm": 2.25, "learning_rate": 0.00019522165172815718, "loss": 2.1585, "step": 83965 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019522108719966807, "loss": 2.1278, "step": 83970 }, { "epoch": 0.2, "grad_norm": 2.4375, "learning_rate": 0.00019522052263864974, "loss": 2.1964, "step": 83975 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.00019521995804510232, "loss": 2.2512, "step": 83980 }, { "epoch": 0.2, "grad_norm": 1.328125, "learning_rate": 0.00019521939341902604, "loss": 2.0413, "step": 83985 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.0001952188287604211, "loss": 2.0882, "step": 83990 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019521826406928766, "loss": 2.0615, "step": 83995 }, { "epoch": 0.2, "grad_norm": 1.8515625, "learning_rate": 0.00019521769934562598, "loss": 2.2454, "step": 84000 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019521713458943617, "loss": 2.0797, "step": 84005 }, { "epoch": 0.2, "grad_norm": 1.6015625, "learning_rate": 0.00019521656980071845, "loss": 2.2138, "step": 84010 }, { "epoch": 0.2, "grad_norm": 2.3125, "learning_rate": 0.00019521600497947303, "loss": 1.9854, "step": 84015 }, { "epoch": 0.2, "grad_norm": 1.9296875, "learning_rate": 0.00019521544012570012, "loss": 2.2993, "step": 84020 }, { "epoch": 0.2, "grad_norm": 2.15625, "learning_rate": 0.00019521487523939987, "loss": 2.1174, "step": 84025 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.0001952143103205725, "loss": 2.274, "step": 84030 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.00019521374536921819, "loss": 2.0566, "step": 84035 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019521318038533714, "loss": 2.2425, "step": 84040 }, { "epoch": 0.2, "grad_norm": 2.53125, "learning_rate": 0.00019521261536892953, "loss": 2.4281, "step": 84045 }, { "epoch": 0.2, "grad_norm": 2.265625, "learning_rate": 0.0001952120503199956, "loss": 2.2014, "step": 84050 }, { "epoch": 0.2, "grad_norm": 1.6875, "learning_rate": 0.00019521148523853548, "loss": 2.0642, "step": 84055 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019521092012454938, "loss": 2.4122, "step": 84060 }, { "epoch": 0.2, "grad_norm": 2.3125, "learning_rate": 0.0001952103549780375, "loss": 2.1019, "step": 84065 }, { "epoch": 0.2, "grad_norm": 2.703125, "learning_rate": 0.00019520978979900005, "loss": 2.0895, "step": 84070 }, { "epoch": 0.2, "grad_norm": 2.296875, "learning_rate": 0.0001952092245874372, "loss": 2.2736, "step": 84075 }, { "epoch": 0.2, "grad_norm": 1.84375, "learning_rate": 0.00019520865934334917, "loss": 2.2348, "step": 84080 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019520809406673614, "loss": 2.2416, "step": 84085 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019520752875759826, "loss": 2.1645, "step": 84090 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019520696341593577, "loss": 2.0941, "step": 84095 }, { "epoch": 0.2, "grad_norm": 2.4375, "learning_rate": 0.00019520639804174888, "loss": 2.2816, "step": 84100 }, { "epoch": 0.2, "grad_norm": 2.515625, "learning_rate": 0.00019520583263503777, "loss": 2.0131, "step": 84105 }, { "epoch": 0.2, "grad_norm": 2.375, "learning_rate": 0.0001952052671958026, "loss": 2.1608, "step": 84110 }, { "epoch": 0.2, "grad_norm": 1.734375, "learning_rate": 0.00019520470172404357, "loss": 2.1293, "step": 84115 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.0001952041362197609, "loss": 2.2395, "step": 84120 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019520357068295477, "loss": 2.0726, "step": 84125 }, { "epoch": 0.2, "grad_norm": 2.578125, "learning_rate": 0.00019520300511362536, "loss": 2.1399, "step": 84130 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.0001952024395117729, "loss": 2.1408, "step": 84135 }, { "epoch": 0.2, "grad_norm": 2.4375, "learning_rate": 0.00019520187387739757, "loss": 2.2652, "step": 84140 }, { "epoch": 0.2, "grad_norm": 3.1875, "learning_rate": 0.00019520130821049954, "loss": 2.0837, "step": 84145 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.000195200742511079, "loss": 2.1975, "step": 84150 }, { "epoch": 0.2, "grad_norm": 1.765625, "learning_rate": 0.00019520017677913617, "loss": 2.1523, "step": 84155 }, { "epoch": 0.2, "grad_norm": 2.375, "learning_rate": 0.00019519961101467122, "loss": 2.1299, "step": 84160 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019519904521768437, "loss": 2.2001, "step": 84165 }, { "epoch": 0.2, "grad_norm": 2.609375, "learning_rate": 0.00019519847938817583, "loss": 2.1673, "step": 84170 }, { "epoch": 0.2, "grad_norm": 2.609375, "learning_rate": 0.0001951979135261457, "loss": 1.9297, "step": 84175 }, { "epoch": 0.2, "grad_norm": 2.28125, "learning_rate": 0.00019519734763159428, "loss": 2.2138, "step": 84180 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019519678170452172, "loss": 2.1421, "step": 84185 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.00019519621574492818, "loss": 2.4142, "step": 84190 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.00019519564975281392, "loss": 2.3753, "step": 84195 }, { "epoch": 0.2, "grad_norm": 1.9765625, "learning_rate": 0.00019519508372817907, "loss": 2.1036, "step": 84200 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019519451767102387, "loss": 2.1293, "step": 84205 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019519395158134852, "loss": 2.2421, "step": 84210 }, { "epoch": 0.2, "grad_norm": 1.9140625, "learning_rate": 0.00019519338545915315, "loss": 2.249, "step": 84215 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019519281930443802, "loss": 2.0891, "step": 84220 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.0001951922531172033, "loss": 2.3446, "step": 84225 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019519168689744913, "loss": 2.1424, "step": 84230 }, { "epoch": 0.2, "grad_norm": 2.359375, "learning_rate": 0.00019519112064517578, "loss": 2.1823, "step": 84235 }, { "epoch": 0.2, "grad_norm": 1.8046875, "learning_rate": 0.00019519055436038345, "loss": 2.1721, "step": 84240 }, { "epoch": 0.2, "grad_norm": 1.984375, "learning_rate": 0.00019518998804307226, "loss": 2.015, "step": 84245 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.0001951894216932425, "loss": 2.1015, "step": 84250 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.00019518885531089424, "loss": 2.2202, "step": 84255 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.0001951882888960278, "loss": 2.0309, "step": 84260 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019518772244864324, "loss": 2.1012, "step": 84265 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.00019518715596874087, "loss": 1.9827, "step": 84270 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019518658945632086, "loss": 2.0195, "step": 84275 }, { "epoch": 0.2, "grad_norm": 1.8359375, "learning_rate": 0.00019518602291138337, "loss": 1.8901, "step": 84280 }, { "epoch": 0.2, "grad_norm": 1.9140625, "learning_rate": 0.0001951854563339286, "loss": 2.1781, "step": 84285 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019518488972395675, "loss": 2.2148, "step": 84290 }, { "epoch": 0.2, "grad_norm": 2.359375, "learning_rate": 0.000195184323081468, "loss": 1.9691, "step": 84295 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.00019518375640646262, "loss": 1.9638, "step": 84300 }, { "epoch": 0.2, "grad_norm": 2.28125, "learning_rate": 0.00019518318969894067, "loss": 2.1974, "step": 84305 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019518262295890244, "loss": 2.1719, "step": 84310 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019518205618634812, "loss": 2.3375, "step": 84315 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019518148938127788, "loss": 2.0045, "step": 84320 }, { "epoch": 0.2, "grad_norm": 2.59375, "learning_rate": 0.0001951809225436919, "loss": 2.2804, "step": 84325 }, { "epoch": 0.2, "grad_norm": 1.9921875, "learning_rate": 0.00019518035567359042, "loss": 2.2484, "step": 84330 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.00019517978877097355, "loss": 2.215, "step": 84335 }, { "epoch": 0.2, "grad_norm": 1.6484375, "learning_rate": 0.00019517922183584158, "loss": 2.1994, "step": 84340 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.00019517865486819467, "loss": 2.2981, "step": 84345 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.00019517808786803298, "loss": 2.2292, "step": 84350 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.00019517752083535674, "loss": 2.2215, "step": 84355 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019517695377016612, "loss": 2.1247, "step": 84360 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.00019517638667246133, "loss": 2.1496, "step": 84365 }, { "epoch": 0.2, "grad_norm": 1.9765625, "learning_rate": 0.00019517581954224257, "loss": 2.2123, "step": 84370 }, { "epoch": 0.2, "grad_norm": 1.8046875, "learning_rate": 0.00019517525237951002, "loss": 2.3047, "step": 84375 }, { "epoch": 0.2, "grad_norm": 2.296875, "learning_rate": 0.0001951746851842639, "loss": 2.113, "step": 84380 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.00019517411795650434, "loss": 2.1588, "step": 84385 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.0001951735506962316, "loss": 2.1517, "step": 84390 }, { "epoch": 0.2, "grad_norm": 2.390625, "learning_rate": 0.00019517298340344586, "loss": 2.1831, "step": 84395 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.00019517241607814727, "loss": 2.2684, "step": 84400 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.0001951718487203361, "loss": 2.3283, "step": 84405 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.00019517128133001246, "loss": 1.9477, "step": 84410 }, { "epoch": 0.2, "grad_norm": 2.3125, "learning_rate": 0.0001951707139071766, "loss": 2.2377, "step": 84415 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019517014645182868, "loss": 2.1855, "step": 84420 }, { "epoch": 0.2, "grad_norm": 2.1875, "learning_rate": 0.00019516957896396894, "loss": 2.1488, "step": 84425 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019516901144359754, "loss": 2.128, "step": 84430 }, { "epoch": 0.2, "grad_norm": 2.4375, "learning_rate": 0.00019516844389071467, "loss": 2.214, "step": 84435 }, { "epoch": 0.2, "grad_norm": 2.4375, "learning_rate": 0.00019516787630532054, "loss": 2.1602, "step": 84440 }, { "epoch": 0.2, "grad_norm": 1.8515625, "learning_rate": 0.00019516730868741533, "loss": 2.0063, "step": 84445 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019516674103699924, "loss": 2.1378, "step": 84450 }, { "epoch": 0.2, "grad_norm": 1.8359375, "learning_rate": 0.00019516617335407248, "loss": 2.2312, "step": 84455 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.00019516560563863524, "loss": 2.054, "step": 84460 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.00019516503789068768, "loss": 2.1668, "step": 84465 }, { "epoch": 0.2, "grad_norm": 1.6484375, "learning_rate": 0.00019516447011023002, "loss": 2.1991, "step": 84470 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.00019516390229726245, "loss": 2.2142, "step": 84475 }, { "epoch": 0.2, "grad_norm": 1.765625, "learning_rate": 0.00019516333445178516, "loss": 2.2686, "step": 84480 }, { "epoch": 0.2, "grad_norm": 1.5390625, "learning_rate": 0.00019516276657379836, "loss": 2.2895, "step": 84485 }, { "epoch": 0.2, "grad_norm": 1.75, "learning_rate": 0.00019516219866330223, "loss": 1.9969, "step": 84490 }, { "epoch": 0.2, "grad_norm": 1.9921875, "learning_rate": 0.00019516163072029696, "loss": 2.2279, "step": 84495 }, { "epoch": 0.2, "grad_norm": 1.8671875, "learning_rate": 0.00019516106274478277, "loss": 2.1187, "step": 84500 }, { "epoch": 0.2, "grad_norm": 1.8671875, "learning_rate": 0.0001951604947367598, "loss": 2.2607, "step": 84505 }, { "epoch": 0.2, "grad_norm": 2.3125, "learning_rate": 0.00019515992669622833, "loss": 2.136, "step": 84510 }, { "epoch": 0.2, "grad_norm": 2.625, "learning_rate": 0.00019515935862318847, "loss": 2.1466, "step": 84515 }, { "epoch": 0.2, "grad_norm": 1.8515625, "learning_rate": 0.00019515879051764046, "loss": 2.0722, "step": 84520 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019515822237958446, "loss": 2.1824, "step": 84525 }, { "epoch": 0.2, "grad_norm": 2.3125, "learning_rate": 0.0001951576542090207, "loss": 2.1496, "step": 84530 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019515708600594938, "loss": 2.1815, "step": 84535 }, { "epoch": 0.2, "grad_norm": 1.8125, "learning_rate": 0.00019515651777037065, "loss": 2.3579, "step": 84540 }, { "epoch": 0.2, "grad_norm": 1.4375, "learning_rate": 0.00019515594950228474, "loss": 2.275, "step": 84545 }, { "epoch": 0.2, "grad_norm": 1.796875, "learning_rate": 0.0001951553812016918, "loss": 2.2297, "step": 84550 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019515481286859208, "loss": 2.2009, "step": 84555 }, { "epoch": 0.2, "grad_norm": 1.703125, "learning_rate": 0.00019515424450298574, "loss": 2.163, "step": 84560 }, { "epoch": 0.2, "grad_norm": 1.71875, "learning_rate": 0.00019515367610487298, "loss": 2.2393, "step": 84565 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019515310767425403, "loss": 2.1342, "step": 84570 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.00019515253921112904, "loss": 2.1503, "step": 84575 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.0001951519707154982, "loss": 2.156, "step": 84580 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.00019515140218736175, "loss": 2.3198, "step": 84585 }, { "epoch": 0.2, "grad_norm": 2.4375, "learning_rate": 0.00019515083362671982, "loss": 1.9834, "step": 84590 }, { "epoch": 0.2, "grad_norm": 2.328125, "learning_rate": 0.00019515026503357267, "loss": 2.2127, "step": 84595 }, { "epoch": 0.2, "grad_norm": 1.7890625, "learning_rate": 0.00019514969640792044, "loss": 2.2937, "step": 84600 }, { "epoch": 0.2, "grad_norm": 1.9765625, "learning_rate": 0.00019514912774976336, "loss": 2.2365, "step": 84605 }, { "epoch": 0.2, "grad_norm": 2.265625, "learning_rate": 0.0001951485590591016, "loss": 2.1871, "step": 84610 }, { "epoch": 0.2, "grad_norm": 1.9453125, "learning_rate": 0.00019514799033593542, "loss": 2.0395, "step": 84615 }, { "epoch": 0.2, "grad_norm": 1.9765625, "learning_rate": 0.0001951474215802649, "loss": 2.3104, "step": 84620 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.0001951468527920903, "loss": 2.3438, "step": 84625 }, { "epoch": 0.2, "grad_norm": 1.9453125, "learning_rate": 0.00019514628397141183, "loss": 2.1098, "step": 84630 }, { "epoch": 0.2, "grad_norm": 1.8203125, "learning_rate": 0.00019514571511822966, "loss": 2.0475, "step": 84635 }, { "epoch": 0.2, "grad_norm": 2.296875, "learning_rate": 0.00019514514623254398, "loss": 2.2014, "step": 84640 }, { "epoch": 0.2, "grad_norm": 1.875, "learning_rate": 0.000195144577314355, "loss": 2.0266, "step": 84645 }, { "epoch": 0.2, "grad_norm": 2.453125, "learning_rate": 0.0001951440083636629, "loss": 2.338, "step": 84650 }, { "epoch": 0.2, "grad_norm": 1.9765625, "learning_rate": 0.0001951434393804679, "loss": 2.152, "step": 84655 }, { "epoch": 0.2, "grad_norm": 1.9453125, "learning_rate": 0.00019514287036477015, "loss": 2.2654, "step": 84660 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.0001951423013165699, "loss": 2.1813, "step": 84665 }, { "epoch": 0.2, "grad_norm": 2.734375, "learning_rate": 0.00019514173223586728, "loss": 2.3289, "step": 84670 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019514116312266256, "loss": 2.1499, "step": 84675 }, { "epoch": 0.2, "grad_norm": 1.5625, "learning_rate": 0.00019514059397695584, "loss": 2.1592, "step": 84680 }, { "epoch": 0.2, "grad_norm": 1.703125, "learning_rate": 0.0001951400247987474, "loss": 2.2679, "step": 84685 }, { "epoch": 0.2, "grad_norm": 1.71875, "learning_rate": 0.0001951394555880374, "loss": 2.1876, "step": 84690 }, { "epoch": 0.2, "grad_norm": 2.265625, "learning_rate": 0.00019513888634482604, "loss": 2.0075, "step": 84695 }, { "epoch": 0.2, "grad_norm": 2.328125, "learning_rate": 0.0001951383170691135, "loss": 2.1771, "step": 84700 }, { "epoch": 0.2, "grad_norm": 1.765625, "learning_rate": 0.00019513774776089998, "loss": 2.259, "step": 84705 }, { "epoch": 0.2, "grad_norm": 1.8046875, "learning_rate": 0.0001951371784201857, "loss": 2.2719, "step": 84710 }, { "epoch": 0.2, "grad_norm": 1.7109375, "learning_rate": 0.00019513660904697084, "loss": 1.9896, "step": 84715 }, { "epoch": 0.2, "grad_norm": 2.875, "learning_rate": 0.00019513603964125555, "loss": 2.2853, "step": 84720 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.0001951354702030401, "loss": 2.2499, "step": 84725 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019513490073232466, "loss": 2.1252, "step": 84730 }, { "epoch": 0.2, "grad_norm": 1.7421875, "learning_rate": 0.00019513433122910935, "loss": 2.1901, "step": 84735 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.0001951337616933945, "loss": 2.1636, "step": 84740 }, { "epoch": 0.2, "grad_norm": 1.9921875, "learning_rate": 0.00019513319212518016, "loss": 2.171, "step": 84745 }, { "epoch": 0.2, "grad_norm": 1.796875, "learning_rate": 0.00019513262252446664, "loss": 1.9905, "step": 84750 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.00019513205289125408, "loss": 2.1128, "step": 84755 }, { "epoch": 0.2, "grad_norm": 1.8203125, "learning_rate": 0.00019513148322554272, "loss": 2.1002, "step": 84760 }, { "epoch": 0.2, "grad_norm": 1.8046875, "learning_rate": 0.00019513091352733267, "loss": 2.2426, "step": 84765 }, { "epoch": 0.2, "grad_norm": 2.28125, "learning_rate": 0.0001951303437966242, "loss": 2.2746, "step": 84770 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.0001951297740334175, "loss": 2.0463, "step": 84775 }, { "epoch": 0.2, "grad_norm": 1.703125, "learning_rate": 0.00019512920423771267, "loss": 2.0992, "step": 84780 }, { "epoch": 0.2, "grad_norm": 1.515625, "learning_rate": 0.00019512863440951007, "loss": 2.0613, "step": 84785 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019512806454880974, "loss": 2.2087, "step": 84790 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.00019512749465561195, "loss": 2.1431, "step": 84795 }, { "epoch": 0.2, "grad_norm": 1.65625, "learning_rate": 0.0001951269247299169, "loss": 2.2467, "step": 84800 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019512635477172475, "loss": 2.0704, "step": 84805 }, { "epoch": 0.2, "grad_norm": 2.203125, "learning_rate": 0.00019512578478103572, "loss": 2.1996, "step": 84810 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.00019512521475785, "loss": 2.0958, "step": 84815 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.0001951246447021678, "loss": 2.0401, "step": 84820 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019512407461398925, "loss": 2.2261, "step": 84825 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019512350449331462, "loss": 2.2831, "step": 84830 }, { "epoch": 0.2, "grad_norm": 1.84375, "learning_rate": 0.0001951229343401441, "loss": 2.1177, "step": 84835 }, { "epoch": 0.2, "grad_norm": 1.921875, "learning_rate": 0.0001951223641544778, "loss": 2.1789, "step": 84840 }, { "epoch": 0.2, "grad_norm": 2.84375, "learning_rate": 0.000195121793936316, "loss": 2.2432, "step": 84845 }, { "epoch": 0.2, "grad_norm": 1.8671875, "learning_rate": 0.0001951212236856589, "loss": 2.1251, "step": 84850 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019512065340250662, "loss": 2.1366, "step": 84855 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.0001951200830868594, "loss": 2.1803, "step": 84860 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.00019511951273871748, "loss": 2.075, "step": 84865 }, { "epoch": 0.2, "grad_norm": 1.9921875, "learning_rate": 0.00019511894235808098, "loss": 2.3071, "step": 84870 }, { "epoch": 0.2, "grad_norm": 1.8359375, "learning_rate": 0.00019511837194495014, "loss": 2.2717, "step": 84875 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019511780149932513, "loss": 2.1732, "step": 84880 }, { "epoch": 0.2, "grad_norm": 1.7890625, "learning_rate": 0.00019511723102120615, "loss": 2.0597, "step": 84885 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019511666051059338, "loss": 2.2653, "step": 84890 }, { "epoch": 0.2, "grad_norm": 2.203125, "learning_rate": 0.00019511608996748705, "loss": 2.166, "step": 84895 }, { "epoch": 0.2, "grad_norm": 1.921875, "learning_rate": 0.00019511551939188735, "loss": 2.1193, "step": 84900 }, { "epoch": 0.2, "grad_norm": 1.875, "learning_rate": 0.00019511494878379445, "loss": 2.1135, "step": 84905 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.0001951143781432086, "loss": 2.1906, "step": 84910 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.0001951138074701299, "loss": 2.1984, "step": 84915 }, { "epoch": 0.2, "grad_norm": 1.9921875, "learning_rate": 0.00019511323676455863, "loss": 2.1548, "step": 84920 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.00019511266602649494, "loss": 2.2879, "step": 84925 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.000195112095255939, "loss": 2.1649, "step": 84930 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.00019511152445289112, "loss": 1.9665, "step": 84935 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019511095361735137, "loss": 2.2694, "step": 84940 }, { "epoch": 0.2, "grad_norm": 1.8671875, "learning_rate": 0.00019511038274932, "loss": 2.1999, "step": 84945 }, { "epoch": 0.2, "grad_norm": 1.8046875, "learning_rate": 0.0001951098118487972, "loss": 2.1481, "step": 84950 }, { "epoch": 0.2, "grad_norm": 2.28125, "learning_rate": 0.00019510924091578318, "loss": 1.9854, "step": 84955 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019510866995027808, "loss": 2.0071, "step": 84960 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.00019510809895228218, "loss": 2.0183, "step": 84965 }, { "epoch": 0.2, "grad_norm": 2.25, "learning_rate": 0.00019510752792179558, "loss": 2.315, "step": 84970 }, { "epoch": 0.2, "grad_norm": 1.7734375, "learning_rate": 0.00019510695685881854, "loss": 2.0508, "step": 84975 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019510638576335126, "loss": 2.2407, "step": 84980 }, { "epoch": 0.2, "grad_norm": 2.5625, "learning_rate": 0.00019510581463539391, "loss": 1.9685, "step": 84985 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.0001951052434749467, "loss": 1.967, "step": 84990 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019510467228200977, "loss": 2.1977, "step": 84995 }, { "epoch": 0.2, "grad_norm": 2.296875, "learning_rate": 0.00019510410105658338, "loss": 2.3278, "step": 85000 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019510352979866774, "loss": 2.0639, "step": 85005 }, { "epoch": 0.2, "grad_norm": 2.546875, "learning_rate": 0.00019510295850826297, "loss": 2.155, "step": 85010 }, { "epoch": 0.2, "grad_norm": 1.9296875, "learning_rate": 0.0001951023871853693, "loss": 2.3555, "step": 85015 }, { "epoch": 0.2, "grad_norm": 2.296875, "learning_rate": 0.00019510181582998691, "loss": 2.2585, "step": 85020 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019510124444211609, "loss": 2.1505, "step": 85025 }, { "epoch": 0.2, "grad_norm": 1.78125, "learning_rate": 0.0001951006730217569, "loss": 2.0576, "step": 85030 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.0001951001015689096, "loss": 2.1791, "step": 85035 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019509953008357443, "loss": 2.014, "step": 85040 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019509895856575148, "loss": 2.1504, "step": 85045 }, { "epoch": 0.2, "grad_norm": 1.9765625, "learning_rate": 0.00019509838701544104, "loss": 2.0342, "step": 85050 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.00019509781543264324, "loss": 2.1762, "step": 85055 }, { "epoch": 0.2, "grad_norm": 1.875, "learning_rate": 0.0001950972438173583, "loss": 2.1806, "step": 85060 }, { "epoch": 0.2, "grad_norm": 1.7578125, "learning_rate": 0.00019509667216958642, "loss": 2.2504, "step": 85065 }, { "epoch": 0.2, "grad_norm": 1.9296875, "learning_rate": 0.0001950961004893278, "loss": 2.3262, "step": 85070 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.0001950955287765826, "loss": 2.2581, "step": 85075 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019509495703135107, "loss": 2.1174, "step": 85080 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.00019509438525363337, "loss": 2.3289, "step": 85085 }, { "epoch": 0.2, "grad_norm": 2.703125, "learning_rate": 0.00019509381344342972, "loss": 2.3244, "step": 85090 }, { "epoch": 0.2, "grad_norm": 1.6484375, "learning_rate": 0.00019509324160074028, "loss": 1.9763, "step": 85095 }, { "epoch": 0.2, "grad_norm": 2.296875, "learning_rate": 0.00019509266972556527, "loss": 2.1094, "step": 85100 }, { "epoch": 0.2, "grad_norm": 2.25, "learning_rate": 0.0001950920978179049, "loss": 2.0804, "step": 85105 }, { "epoch": 0.2, "grad_norm": 2.1875, "learning_rate": 0.00019509152587775933, "loss": 2.097, "step": 85110 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019509095390512875, "loss": 2.086, "step": 85115 }, { "epoch": 0.2, "grad_norm": 2.375, "learning_rate": 0.00019509038190001338, "loss": 2.0465, "step": 85120 }, { "epoch": 0.2, "grad_norm": 1.9140625, "learning_rate": 0.00019508980986241342, "loss": 2.0902, "step": 85125 }, { "epoch": 0.2, "grad_norm": 1.578125, "learning_rate": 0.00019508923779232906, "loss": 2.1808, "step": 85130 }, { "epoch": 0.2, "grad_norm": 1.734375, "learning_rate": 0.0001950886656897605, "loss": 2.2534, "step": 85135 }, { "epoch": 0.2, "grad_norm": 1.8515625, "learning_rate": 0.00019508809355470793, "loss": 2.0818, "step": 85140 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.0001950875213871715, "loss": 2.0857, "step": 85145 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019508694918715148, "loss": 2.1075, "step": 85150 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.00019508637695464805, "loss": 2.1616, "step": 85155 }, { "epoch": 0.2, "grad_norm": 1.984375, "learning_rate": 0.00019508580468966135, "loss": 2.1046, "step": 85160 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.00019508523239219165, "loss": 1.9358, "step": 85165 }, { "epoch": 0.2, "grad_norm": 1.6640625, "learning_rate": 0.0001950846600622391, "loss": 2.1172, "step": 85170 }, { "epoch": 0.2, "grad_norm": 2.359375, "learning_rate": 0.0001950840876998039, "loss": 2.0201, "step": 85175 }, { "epoch": 0.2, "grad_norm": 1.59375, "learning_rate": 0.00019508351530488627, "loss": 1.962, "step": 85180 }, { "epoch": 0.2, "grad_norm": 1.796875, "learning_rate": 0.00019508294287748633, "loss": 1.9679, "step": 85185 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.0001950823704176044, "loss": 2.3605, "step": 85190 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.00019508179792524058, "loss": 2.2858, "step": 85195 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.0001950812254003951, "loss": 2.1965, "step": 85200 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.0001950806528430681, "loss": 2.1915, "step": 85205 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.00019508008025325988, "loss": 2.2335, "step": 85210 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.0001950795076309706, "loss": 2.1313, "step": 85215 }, { "epoch": 0.2, "grad_norm": 2.9375, "learning_rate": 0.0001950789349762004, "loss": 2.1067, "step": 85220 }, { "epoch": 0.2, "grad_norm": 1.8203125, "learning_rate": 0.00019507836228894952, "loss": 2.2283, "step": 85225 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019507778956921813, "loss": 2.2158, "step": 85230 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.00019507721681700645, "loss": 2.1646, "step": 85235 }, { "epoch": 0.2, "grad_norm": 2.265625, "learning_rate": 0.00019507664403231468, "loss": 2.0945, "step": 85240 }, { "epoch": 0.2, "grad_norm": 1.7265625, "learning_rate": 0.00019507607121514297, "loss": 2.1582, "step": 85245 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.00019507549836549157, "loss": 2.1624, "step": 85250 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.00019507492548336068, "loss": 2.1779, "step": 85255 }, { "epoch": 0.2, "grad_norm": 1.8203125, "learning_rate": 0.00019507435256875045, "loss": 2.2017, "step": 85260 }, { "epoch": 0.2, "grad_norm": 2.28125, "learning_rate": 0.0001950737796216611, "loss": 2.1207, "step": 85265 }, { "epoch": 0.2, "grad_norm": 1.6953125, "learning_rate": 0.00019507320664209282, "loss": 2.1864, "step": 85270 }, { "epoch": 0.2, "grad_norm": 2.359375, "learning_rate": 0.00019507263363004583, "loss": 2.1063, "step": 85275 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.00019507206058552026, "loss": 2.2459, "step": 85280 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.0001950714875085164, "loss": 2.2251, "step": 85285 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019507091439903434, "loss": 2.3237, "step": 85290 }, { "epoch": 0.2, "grad_norm": 1.8515625, "learning_rate": 0.00019507034125707437, "loss": 2.1662, "step": 85295 }, { "epoch": 0.2, "grad_norm": 1.8203125, "learning_rate": 0.00019506976808263664, "loss": 2.0999, "step": 85300 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.00019506919487572138, "loss": 2.0112, "step": 85305 }, { "epoch": 0.2, "grad_norm": 1.8125, "learning_rate": 0.0001950686216363287, "loss": 2.1407, "step": 85310 }, { "epoch": 0.2, "grad_norm": 1.609375, "learning_rate": 0.00019506804836445888, "loss": 2.0538, "step": 85315 }, { "epoch": 0.2, "grad_norm": 1.7734375, "learning_rate": 0.00019506747506011212, "loss": 2.3743, "step": 85320 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019506690172328855, "loss": 2.2279, "step": 85325 }, { "epoch": 0.2, "grad_norm": 2.15625, "learning_rate": 0.00019506632835398843, "loss": 2.0998, "step": 85330 }, { "epoch": 0.2, "grad_norm": 1.6640625, "learning_rate": 0.0001950657549522119, "loss": 2.1056, "step": 85335 }, { "epoch": 0.2, "grad_norm": 1.734375, "learning_rate": 0.0001950651815179592, "loss": 2.1056, "step": 85340 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.0001950646080512305, "loss": 1.9713, "step": 85345 }, { "epoch": 0.2, "grad_norm": 1.9921875, "learning_rate": 0.000195064034552026, "loss": 2.0356, "step": 85350 }, { "epoch": 0.2, "grad_norm": 1.8671875, "learning_rate": 0.00019506346102034593, "loss": 2.2529, "step": 85355 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.00019506288745619043, "loss": 2.253, "step": 85360 }, { "epoch": 0.2, "grad_norm": 1.734375, "learning_rate": 0.00019506231385955976, "loss": 2.1601, "step": 85365 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019506174023045404, "loss": 2.194, "step": 85370 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019506116656887353, "loss": 2.2399, "step": 85375 }, { "epoch": 0.2, "grad_norm": 1.71875, "learning_rate": 0.00019506059287481838, "loss": 2.1092, "step": 85380 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019506001914828885, "loss": 2.0777, "step": 85385 }, { "epoch": 0.2, "grad_norm": 2.15625, "learning_rate": 0.00019505944538928503, "loss": 2.2779, "step": 85390 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019505887159780724, "loss": 2.1713, "step": 85395 }, { "epoch": 0.2, "grad_norm": 1.796875, "learning_rate": 0.00019505829777385558, "loss": 1.864, "step": 85400 }, { "epoch": 0.2, "grad_norm": 1.9453125, "learning_rate": 0.00019505772391743026, "loss": 2.2071, "step": 85405 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.00019505715002853156, "loss": 2.2042, "step": 85410 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.00019505657610715957, "loss": 2.0646, "step": 85415 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019505600215331454, "loss": 2.0246, "step": 85420 }, { "epoch": 0.2, "grad_norm": 1.9765625, "learning_rate": 0.00019505542816699664, "loss": 2.3104, "step": 85425 }, { "epoch": 0.2, "grad_norm": 2.4375, "learning_rate": 0.0001950548541482061, "loss": 2.2062, "step": 85430 }, { "epoch": 0.2, "grad_norm": 1.640625, "learning_rate": 0.0001950542800969431, "loss": 2.173, "step": 85435 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.00019505370601320782, "loss": 1.9256, "step": 85440 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.0001950531318970005, "loss": 2.0821, "step": 85445 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.00019505255774832127, "loss": 2.0477, "step": 85450 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019505198356717038, "loss": 2.0907, "step": 85455 }, { "epoch": 0.2, "grad_norm": 1.9375, "learning_rate": 0.000195051409353548, "loss": 2.0944, "step": 85460 }, { "epoch": 0.2, "grad_norm": 1.71875, "learning_rate": 0.00019505083510745435, "loss": 2.1255, "step": 85465 }, { "epoch": 0.2, "grad_norm": 2.328125, "learning_rate": 0.0001950502608288896, "loss": 2.1083, "step": 85470 }, { "epoch": 0.2, "grad_norm": 1.6796875, "learning_rate": 0.00019504968651785397, "loss": 2.261, "step": 85475 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019504911217434762, "loss": 2.1663, "step": 85480 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.0001950485377983708, "loss": 1.9269, "step": 85485 }, { "epoch": 0.2, "grad_norm": 1.71875, "learning_rate": 0.00019504796338992364, "loss": 1.9839, "step": 85490 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019504738894900642, "loss": 2.1582, "step": 85495 }, { "epoch": 0.2, "grad_norm": 1.5703125, "learning_rate": 0.00019504681447561923, "loss": 2.1722, "step": 85500 }, { "epoch": 0.2, "grad_norm": 1.7265625, "learning_rate": 0.00019504623996976237, "loss": 2.3223, "step": 85505 }, { "epoch": 0.2, "grad_norm": 1.6640625, "learning_rate": 0.00019504566543143597, "loss": 2.0402, "step": 85510 }, { "epoch": 0.2, "grad_norm": 1.9921875, "learning_rate": 0.00019504509086064025, "loss": 2.1941, "step": 85515 }, { "epoch": 0.2, "grad_norm": 2.375, "learning_rate": 0.00019504451625737538, "loss": 2.1599, "step": 85520 }, { "epoch": 0.2, "grad_norm": 1.984375, "learning_rate": 0.00019504394162164163, "loss": 2.1462, "step": 85525 }, { "epoch": 0.2, "grad_norm": 1.84375, "learning_rate": 0.0001950433669534391, "loss": 2.2251, "step": 85530 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.00019504279225276806, "loss": 2.2356, "step": 85535 }, { "epoch": 0.2, "grad_norm": 1.84375, "learning_rate": 0.00019504221751962867, "loss": 2.2581, "step": 85540 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.00019504164275402114, "loss": 1.9783, "step": 85545 }, { "epoch": 0.2, "grad_norm": 1.7265625, "learning_rate": 0.00019504106795594566, "loss": 2.1361, "step": 85550 }, { "epoch": 0.2, "grad_norm": 1.8671875, "learning_rate": 0.0001950404931254024, "loss": 2.1086, "step": 85555 }, { "epoch": 0.2, "grad_norm": 2.25, "learning_rate": 0.0001950399182623916, "loss": 2.2368, "step": 85560 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019503934336691348, "loss": 2.2746, "step": 85565 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.00019503876843896816, "loss": 2.1042, "step": 85570 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.00019503819347855587, "loss": 2.2176, "step": 85575 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.0001950376184856768, "loss": 2.0918, "step": 85580 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.0001950370434603312, "loss": 2.2087, "step": 85585 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.00019503646840251919, "loss": 2.2257, "step": 85590 }, { "epoch": 0.2, "grad_norm": 2.1875, "learning_rate": 0.00019503589331224103, "loss": 2.1635, "step": 85595 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019503531818949684, "loss": 2.0775, "step": 85600 }, { "epoch": 0.2, "grad_norm": 2.15625, "learning_rate": 0.0001950347430342869, "loss": 2.2188, "step": 85605 }, { "epoch": 0.2, "grad_norm": 2.15625, "learning_rate": 0.0001950341678466113, "loss": 2.2264, "step": 85610 }, { "epoch": 0.2, "grad_norm": 1.7578125, "learning_rate": 0.00019503359262647036, "loss": 2.1856, "step": 85615 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019503301737386423, "loss": 2.2011, "step": 85620 }, { "epoch": 0.2, "grad_norm": 2.296875, "learning_rate": 0.00019503244208879308, "loss": 2.192, "step": 85625 }, { "epoch": 0.2, "grad_norm": 1.8515625, "learning_rate": 0.00019503186677125714, "loss": 2.2487, "step": 85630 }, { "epoch": 0.2, "grad_norm": 2.234375, "learning_rate": 0.00019503129142125656, "loss": 2.2554, "step": 85635 }, { "epoch": 0.2, "grad_norm": 2.71875, "learning_rate": 0.00019503071603879158, "loss": 2.1139, "step": 85640 }, { "epoch": 0.2, "grad_norm": 1.875, "learning_rate": 0.0001950301406238624, "loss": 2.1778, "step": 85645 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019502956517646918, "loss": 2.1895, "step": 85650 }, { "epoch": 0.2, "grad_norm": 2.34375, "learning_rate": 0.00019502898969661216, "loss": 2.2171, "step": 85655 }, { "epoch": 0.2, "grad_norm": 2.234375, "learning_rate": 0.0001950284141842915, "loss": 2.1521, "step": 85660 }, { "epoch": 0.2, "grad_norm": 3.09375, "learning_rate": 0.0001950278386395074, "loss": 2.2456, "step": 85665 }, { "epoch": 0.2, "grad_norm": 1.8515625, "learning_rate": 0.0001950272630622601, "loss": 2.0901, "step": 85670 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.00019502668745254972, "loss": 2.3621, "step": 85675 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019502611181037652, "loss": 2.1258, "step": 85680 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019502553613574069, "loss": 2.1713, "step": 85685 }, { "epoch": 0.2, "grad_norm": 3.484375, "learning_rate": 0.0001950249604286424, "loss": 2.0293, "step": 85690 }, { "epoch": 0.2, "grad_norm": 1.734375, "learning_rate": 0.00019502438468908185, "loss": 2.0825, "step": 85695 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019502380891705924, "loss": 2.182, "step": 85700 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.0001950232331125748, "loss": 2.208, "step": 85705 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019502265727562872, "loss": 2.1594, "step": 85710 }, { "epoch": 0.2, "grad_norm": 1.78125, "learning_rate": 0.00019502208140622114, "loss": 2.2981, "step": 85715 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.0001950215055043523, "loss": 2.2368, "step": 85720 }, { "epoch": 0.2, "grad_norm": 1.65625, "learning_rate": 0.0001950209295700224, "loss": 2.2015, "step": 85725 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019502035360323162, "loss": 2.1556, "step": 85730 }, { "epoch": 0.2, "grad_norm": 2.3125, "learning_rate": 0.00019501977760398015, "loss": 2.2523, "step": 85735 }, { "epoch": 0.2, "grad_norm": 3.0, "learning_rate": 0.00019501920157226823, "loss": 2.2435, "step": 85740 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.00019501862550809598, "loss": 2.1583, "step": 85745 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.0001950180494114637, "loss": 1.9021, "step": 85750 }, { "epoch": 0.2, "grad_norm": 1.984375, "learning_rate": 0.0001950174732823715, "loss": 2.2338, "step": 85755 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.00019501689712081964, "loss": 2.1665, "step": 85760 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019501632092680824, "loss": 2.1277, "step": 85765 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.00019501574470033757, "loss": 2.1246, "step": 85770 }, { "epoch": 0.2, "grad_norm": 2.34375, "learning_rate": 0.00019501516844140778, "loss": 2.1754, "step": 85775 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.0001950145921500191, "loss": 2.0155, "step": 85780 }, { "epoch": 0.2, "grad_norm": 1.5859375, "learning_rate": 0.0001950140158261717, "loss": 2.2073, "step": 85785 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.0001950134394698658, "loss": 2.2228, "step": 85790 }, { "epoch": 0.2, "grad_norm": 1.765625, "learning_rate": 0.0001950128630811016, "loss": 2.0906, "step": 85795 }, { "epoch": 0.2, "grad_norm": 2.25, "learning_rate": 0.00019501228665987926, "loss": 2.2263, "step": 85800 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.000195011710206199, "loss": 2.1605, "step": 85805 }, { "epoch": 0.2, "grad_norm": 1.921875, "learning_rate": 0.00019501113372006102, "loss": 2.0999, "step": 85810 }, { "epoch": 0.2, "grad_norm": 1.984375, "learning_rate": 0.00019501055720146551, "loss": 2.1998, "step": 85815 }, { "epoch": 0.2, "grad_norm": 1.9296875, "learning_rate": 0.00019500998065041268, "loss": 2.2367, "step": 85820 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019500940406690272, "loss": 2.2315, "step": 85825 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.0001950088274509358, "loss": 2.2861, "step": 85830 }, { "epoch": 0.2, "grad_norm": 2.46875, "learning_rate": 0.00019500825080251218, "loss": 2.0879, "step": 85835 }, { "epoch": 0.2, "grad_norm": 1.7265625, "learning_rate": 0.000195007674121632, "loss": 2.1304, "step": 85840 }, { "epoch": 0.2, "grad_norm": 2.234375, "learning_rate": 0.00019500709740829546, "loss": 2.1451, "step": 85845 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019500652066250279, "loss": 2.1527, "step": 85850 }, { "epoch": 0.2, "grad_norm": 2.25, "learning_rate": 0.00019500594388425414, "loss": 2.1978, "step": 85855 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.0001950053670735498, "loss": 2.2256, "step": 85860 }, { "epoch": 0.2, "grad_norm": 1.6953125, "learning_rate": 0.00019500479023038984, "loss": 2.3744, "step": 85865 }, { "epoch": 0.2, "grad_norm": 1.8515625, "learning_rate": 0.00019500421335477458, "loss": 2.0807, "step": 85870 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019500363644670408, "loss": 1.9219, "step": 85875 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019500305950617867, "loss": 2.2365, "step": 85880 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.0001950024825331985, "loss": 2.2555, "step": 85885 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019500190552776373, "loss": 2.3574, "step": 85890 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.00019500132848987458, "loss": 2.1753, "step": 85895 }, { "epoch": 0.2, "grad_norm": 1.703125, "learning_rate": 0.0001950007514195313, "loss": 2.2862, "step": 85900 }, { "epoch": 0.2, "grad_norm": 2.421875, "learning_rate": 0.00019500017431673398, "loss": 2.095, "step": 85905 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.0001949995971814829, "loss": 2.2334, "step": 85910 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019499902001377827, "loss": 2.0087, "step": 85915 }, { "epoch": 0.2, "grad_norm": 2.15625, "learning_rate": 0.00019499844281362024, "loss": 2.1159, "step": 85920 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.00019499786558100897, "loss": 2.338, "step": 85925 }, { "epoch": 0.2, "grad_norm": 1.921875, "learning_rate": 0.00019499728831594474, "loss": 2.2023, "step": 85930 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019499671101842772, "loss": 2.0938, "step": 85935 }, { "epoch": 0.2, "grad_norm": 1.78125, "learning_rate": 0.00019499613368845808, "loss": 2.2112, "step": 85940 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019499555632603605, "loss": 2.1169, "step": 85945 }, { "epoch": 0.2, "grad_norm": 2.328125, "learning_rate": 0.0001949949789311618, "loss": 2.0659, "step": 85950 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.0001949944015038356, "loss": 2.197, "step": 85955 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.00019499382404405754, "loss": 2.1543, "step": 85960 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.00019499324655182786, "loss": 2.2861, "step": 85965 }, { "epoch": 0.2, "grad_norm": 1.625, "learning_rate": 0.00019499266902714679, "loss": 2.2286, "step": 85970 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019499209147001448, "loss": 2.3303, "step": 85975 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019499151388043115, "loss": 2.1639, "step": 85980 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.000194990936258397, "loss": 2.1949, "step": 85985 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.00019499035860391227, "loss": 2.2858, "step": 85990 }, { "epoch": 0.2, "grad_norm": 1.9375, "learning_rate": 0.00019498978091697704, "loss": 2.1576, "step": 85995 }, { "epoch": 0.2, "grad_norm": 2.390625, "learning_rate": 0.0001949892031975916, "loss": 2.2897, "step": 86000 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019498862544575616, "loss": 2.2053, "step": 86005 }, { "epoch": 0.2, "grad_norm": 1.8515625, "learning_rate": 0.00019498804766147085, "loss": 2.1565, "step": 86010 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.0001949874698447359, "loss": 2.1436, "step": 86015 }, { "epoch": 0.2, "grad_norm": 2.1875, "learning_rate": 0.00019498689199555152, "loss": 2.1956, "step": 86020 }, { "epoch": 0.2, "grad_norm": 2.1875, "learning_rate": 0.00019498631411391787, "loss": 2.246, "step": 86025 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.0001949857361998352, "loss": 2.0084, "step": 86030 }, { "epoch": 0.2, "grad_norm": 2.265625, "learning_rate": 0.00019498515825330367, "loss": 2.0217, "step": 86035 }, { "epoch": 0.2, "grad_norm": 2.328125, "learning_rate": 0.00019498458027432348, "loss": 2.1867, "step": 86040 }, { "epoch": 0.2, "grad_norm": 1.984375, "learning_rate": 0.00019498400226289486, "loss": 2.2317, "step": 86045 }, { "epoch": 0.2, "grad_norm": 1.9296875, "learning_rate": 0.00019498342421901796, "loss": 2.261, "step": 86050 }, { "epoch": 0.2, "grad_norm": 2.328125, "learning_rate": 0.00019498284614269303, "loss": 2.3524, "step": 86055 }, { "epoch": 0.2, "grad_norm": 2.265625, "learning_rate": 0.00019498226803392018, "loss": 2.0506, "step": 86060 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.0001949816898926997, "loss": 2.213, "step": 86065 }, { "epoch": 0.2, "grad_norm": 1.78125, "learning_rate": 0.00019498111171903175, "loss": 2.181, "step": 86070 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.00019498053351291655, "loss": 2.1233, "step": 86075 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.00019497995527435424, "loss": 2.0034, "step": 86080 }, { "epoch": 0.2, "grad_norm": 1.9921875, "learning_rate": 0.00019497937700334506, "loss": 2.1614, "step": 86085 }, { "epoch": 0.2, "grad_norm": 1.8359375, "learning_rate": 0.00019497879869988921, "loss": 1.9989, "step": 86090 }, { "epoch": 0.2, "grad_norm": 2.78125, "learning_rate": 0.0001949782203639869, "loss": 2.1255, "step": 86095 }, { "epoch": 0.2, "grad_norm": 1.7265625, "learning_rate": 0.00019497764199563828, "loss": 2.1732, "step": 86100 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019497706359484357, "loss": 2.0253, "step": 86105 }, { "epoch": 0.2, "grad_norm": 1.75, "learning_rate": 0.000194976485161603, "loss": 2.028, "step": 86110 }, { "epoch": 0.2, "grad_norm": 2.1875, "learning_rate": 0.00019497590669591673, "loss": 2.1932, "step": 86115 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.00019497532819778496, "loss": 2.1442, "step": 86120 }, { "epoch": 0.2, "grad_norm": 1.7421875, "learning_rate": 0.0001949747496672079, "loss": 2.208, "step": 86125 }, { "epoch": 0.2, "grad_norm": 1.921875, "learning_rate": 0.00019497417110418575, "loss": 2.0638, "step": 86130 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.0001949735925087187, "loss": 2.2886, "step": 86135 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019497301388080694, "loss": 2.1689, "step": 86140 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.00019497243522045068, "loss": 2.0272, "step": 86145 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.0001949718565276501, "loss": 2.148, "step": 86150 }, { "epoch": 0.2, "grad_norm": 1.703125, "learning_rate": 0.00019497127780240546, "loss": 2.3058, "step": 86155 }, { "epoch": 0.2, "grad_norm": 2.359375, "learning_rate": 0.00019497069904471685, "loss": 2.2665, "step": 86160 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019497012025458457, "loss": 2.1765, "step": 86165 }, { "epoch": 0.2, "grad_norm": 1.8515625, "learning_rate": 0.00019496954143200873, "loss": 2.149, "step": 86170 }, { "epoch": 0.2, "grad_norm": 1.9453125, "learning_rate": 0.0001949689625769896, "loss": 2.2131, "step": 86175 }, { "epoch": 0.2, "grad_norm": 1.8125, "learning_rate": 0.00019496838368952734, "loss": 2.0187, "step": 86180 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019496780476962216, "loss": 2.0989, "step": 86185 }, { "epoch": 0.2, "grad_norm": 1.6640625, "learning_rate": 0.0001949672258172743, "loss": 2.2194, "step": 86190 }, { "epoch": 0.2, "grad_norm": 1.9296875, "learning_rate": 0.00019496664683248385, "loss": 2.1336, "step": 86195 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.00019496606781525109, "loss": 2.1951, "step": 86200 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.0001949654887655762, "loss": 2.034, "step": 86205 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019496490968345936, "loss": 2.1505, "step": 86210 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.00019496433056890084, "loss": 2.024, "step": 86215 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.00019496375142190076, "loss": 2.2506, "step": 86220 }, { "epoch": 0.2, "grad_norm": 2.796875, "learning_rate": 0.0001949631722424593, "loss": 2.1803, "step": 86225 }, { "epoch": 0.2, "grad_norm": 2.28125, "learning_rate": 0.00019496259303057673, "loss": 2.0603, "step": 86230 }, { "epoch": 0.2, "grad_norm": 1.9765625, "learning_rate": 0.0001949620137862532, "loss": 2.2306, "step": 86235 }, { "epoch": 0.2, "grad_norm": 1.78125, "learning_rate": 0.00019496143450948897, "loss": 2.1089, "step": 86240 }, { "epoch": 0.2, "grad_norm": 2.390625, "learning_rate": 0.00019496085520028414, "loss": 2.1375, "step": 86245 }, { "epoch": 0.2, "grad_norm": 1.5703125, "learning_rate": 0.00019496027585863898, "loss": 2.0235, "step": 86250 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019495969648455365, "loss": 2.3941, "step": 86255 }, { "epoch": 0.2, "grad_norm": 1.8203125, "learning_rate": 0.0001949591170780284, "loss": 2.1133, "step": 86260 }, { "epoch": 0.2, "grad_norm": 1.5546875, "learning_rate": 0.00019495853763906338, "loss": 2.1295, "step": 86265 }, { "epoch": 0.2, "grad_norm": 1.921875, "learning_rate": 0.0001949579581676588, "loss": 2.1748, "step": 86270 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019495737866381485, "loss": 2.1142, "step": 86275 }, { "epoch": 0.2, "grad_norm": 1.7109375, "learning_rate": 0.0001949567991275317, "loss": 2.1492, "step": 86280 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019495621955880965, "loss": 2.1064, "step": 86285 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.0001949556399576488, "loss": 2.2722, "step": 86290 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.0001949550603240494, "loss": 2.1964, "step": 86295 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.0001949544806580116, "loss": 2.0898, "step": 86300 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019495390095953567, "loss": 2.1397, "step": 86305 }, { "epoch": 0.2, "grad_norm": 1.8046875, "learning_rate": 0.00019495332122862173, "loss": 2.119, "step": 86310 }, { "epoch": 0.2, "grad_norm": 1.8359375, "learning_rate": 0.00019495274146527004, "loss": 1.9927, "step": 86315 }, { "epoch": 0.2, "grad_norm": 2.28125, "learning_rate": 0.00019495216166948075, "loss": 2.2559, "step": 86320 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.0001949515818412541, "loss": 2.234, "step": 86325 }, { "epoch": 0.2, "grad_norm": 1.796875, "learning_rate": 0.00019495100198059024, "loss": 2.0378, "step": 86330 }, { "epoch": 0.2, "grad_norm": 2.109375, "learning_rate": 0.0001949504220874894, "loss": 2.3672, "step": 86335 }, { "epoch": 0.2, "grad_norm": 2.375, "learning_rate": 0.00019494984216195184, "loss": 1.9388, "step": 86340 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.0001949492622039776, "loss": 2.2175, "step": 86345 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.00019494868221356701, "loss": 2.091, "step": 86350 }, { "epoch": 0.2, "grad_norm": 1.90625, "learning_rate": 0.00019494810219072023, "loss": 2.1215, "step": 86355 }, { "epoch": 0.2, "grad_norm": 2.1875, "learning_rate": 0.00019494752213543745, "loss": 2.1891, "step": 86360 }, { "epoch": 0.2, "grad_norm": 1.9453125, "learning_rate": 0.00019494694204771887, "loss": 2.2063, "step": 86365 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.0001949463619275647, "loss": 2.1817, "step": 86370 }, { "epoch": 0.2, "grad_norm": 2.28125, "learning_rate": 0.00019494578177497513, "loss": 1.9951, "step": 86375 }, { "epoch": 0.2, "grad_norm": 1.9140625, "learning_rate": 0.00019494520158995035, "loss": 2.3289, "step": 86380 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.00019494462137249058, "loss": 2.1679, "step": 86385 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.000194944041122596, "loss": 2.0145, "step": 86390 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019494346084026682, "loss": 2.2092, "step": 86395 }, { "epoch": 0.2, "grad_norm": 1.9140625, "learning_rate": 0.00019494288052550324, "loss": 2.1436, "step": 86400 }, { "epoch": 0.2, "grad_norm": 1.9453125, "learning_rate": 0.00019494230017830542, "loss": 2.3022, "step": 86405 }, { "epoch": 0.2, "grad_norm": 1.78125, "learning_rate": 0.0001949417197986736, "loss": 2.0844, "step": 86410 }, { "epoch": 0.2, "grad_norm": 2.203125, "learning_rate": 0.00019494113938660798, "loss": 2.016, "step": 86415 }, { "epoch": 0.2, "grad_norm": 2.609375, "learning_rate": 0.00019494055894210872, "loss": 2.1777, "step": 86420 }, { "epoch": 0.2, "grad_norm": 1.8125, "learning_rate": 0.00019493997846517605, "loss": 2.2639, "step": 86425 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.0001949393979558102, "loss": 2.1549, "step": 86430 }, { "epoch": 0.2, "grad_norm": 2.421875, "learning_rate": 0.0001949388174140113, "loss": 2.1707, "step": 86435 }, { "epoch": 0.2, "grad_norm": 2.46875, "learning_rate": 0.00019493823683977958, "loss": 2.0764, "step": 86440 }, { "epoch": 0.2, "grad_norm": 2.15625, "learning_rate": 0.00019493765623311525, "loss": 2.1259, "step": 86445 }, { "epoch": 0.2, "grad_norm": 1.7734375, "learning_rate": 0.00019493707559401847, "loss": 2.1627, "step": 86450 }, { "epoch": 0.2, "grad_norm": 2.296875, "learning_rate": 0.00019493649492248946, "loss": 2.2892, "step": 86455 }, { "epoch": 0.2, "grad_norm": 1.7734375, "learning_rate": 0.00019493591421852844, "loss": 2.0099, "step": 86460 }, { "epoch": 0.2, "grad_norm": 1.875, "learning_rate": 0.0001949353334821356, "loss": 1.9916, "step": 86465 }, { "epoch": 0.2, "grad_norm": 1.984375, "learning_rate": 0.0001949347527133111, "loss": 2.0717, "step": 86470 }, { "epoch": 0.2, "grad_norm": 2.359375, "learning_rate": 0.0001949341719120552, "loss": 2.3434, "step": 86475 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.00019493359107836803, "loss": 2.0952, "step": 86480 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019493301021224983, "loss": 2.2829, "step": 86485 }, { "epoch": 0.2, "grad_norm": 2.203125, "learning_rate": 0.00019493242931370078, "loss": 2.2062, "step": 86490 }, { "epoch": 0.2, "grad_norm": 2.328125, "learning_rate": 0.00019493184838272114, "loss": 2.01, "step": 86495 }, { "epoch": 0.2, "grad_norm": 1.6171875, "learning_rate": 0.00019493126741931103, "loss": 2.172, "step": 86500 }, { "epoch": 0.2, "grad_norm": 1.9921875, "learning_rate": 0.00019493068642347067, "loss": 2.1894, "step": 86505 }, { "epoch": 0.2, "grad_norm": 1.609375, "learning_rate": 0.00019493010539520028, "loss": 2.1578, "step": 86510 }, { "epoch": 0.2, "grad_norm": 2.125, "learning_rate": 0.00019492952433450004, "loss": 2.2987, "step": 86515 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.00019492894324137016, "loss": 2.1707, "step": 86520 }, { "epoch": 0.2, "grad_norm": 1.9453125, "learning_rate": 0.0001949283621158108, "loss": 2.3202, "step": 86525 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.0001949277809578222, "loss": 2.1724, "step": 86530 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.0001949271997674046, "loss": 2.1001, "step": 86535 }, { "epoch": 0.2, "grad_norm": 1.9375, "learning_rate": 0.0001949266185445581, "loss": 2.0063, "step": 86540 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.00019492603728928293, "loss": 2.1972, "step": 86545 }, { "epoch": 0.2, "grad_norm": 1.9140625, "learning_rate": 0.00019492545600157933, "loss": 2.2535, "step": 86550 }, { "epoch": 0.2, "grad_norm": 1.984375, "learning_rate": 0.00019492487468144746, "loss": 2.3242, "step": 86555 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019492429332888757, "loss": 2.079, "step": 86560 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.0001949237119438998, "loss": 2.3305, "step": 86565 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.00019492313052648435, "loss": 2.0796, "step": 86570 }, { "epoch": 0.2, "grad_norm": 1.7734375, "learning_rate": 0.00019492254907664146, "loss": 2.3672, "step": 86575 }, { "epoch": 0.2, "grad_norm": 1.8046875, "learning_rate": 0.00019492196759437127, "loss": 2.2482, "step": 86580 }, { "epoch": 0.2, "grad_norm": 1.765625, "learning_rate": 0.00019492138607967406, "loss": 2.2978, "step": 86585 }, { "epoch": 0.2, "grad_norm": 1.8203125, "learning_rate": 0.00019492080453254994, "loss": 2.2811, "step": 86590 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019492022295299917, "loss": 2.3018, "step": 86595 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019491964134102192, "loss": 2.2813, "step": 86600 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019491905969661844, "loss": 2.3647, "step": 86605 }, { "epoch": 0.2, "grad_norm": 2.265625, "learning_rate": 0.00019491847801978885, "loss": 2.1407, "step": 86610 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019491789631053338, "loss": 2.2445, "step": 86615 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019491731456885224, "loss": 2.0649, "step": 86620 }, { "epoch": 0.2, "grad_norm": 1.8125, "learning_rate": 0.00019491673279474565, "loss": 2.224, "step": 86625 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019491615098821377, "loss": 1.9713, "step": 86630 }, { "epoch": 0.2, "grad_norm": 1.6328125, "learning_rate": 0.0001949155691492568, "loss": 2.1905, "step": 86635 }, { "epoch": 0.2, "grad_norm": 1.75, "learning_rate": 0.00019491498727787496, "loss": 2.3519, "step": 86640 }, { "epoch": 0.2, "grad_norm": 2.296875, "learning_rate": 0.00019491440537406843, "loss": 2.1568, "step": 86645 }, { "epoch": 0.2, "grad_norm": 2.5, "learning_rate": 0.00019491382343783743, "loss": 2.2838, "step": 86650 }, { "epoch": 0.2, "grad_norm": 1.625, "learning_rate": 0.00019491324146918214, "loss": 2.3074, "step": 86655 }, { "epoch": 0.2, "grad_norm": 1.84375, "learning_rate": 0.00019491265946810279, "loss": 2.2003, "step": 86660 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.00019491207743459952, "loss": 2.1406, "step": 86665 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.00019491149536867257, "loss": 2.0871, "step": 86670 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.00019491091327032215, "loss": 2.1575, "step": 86675 }, { "epoch": 0.2, "grad_norm": 1.9609375, "learning_rate": 0.00019491033113954841, "loss": 2.1768, "step": 86680 }, { "epoch": 0.2, "grad_norm": 1.9921875, "learning_rate": 0.00019490974897635162, "loss": 2.1372, "step": 86685 }, { "epoch": 0.2, "grad_norm": 1.6015625, "learning_rate": 0.0001949091667807319, "loss": 2.0908, "step": 86690 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.0001949085845526895, "loss": 2.4089, "step": 86695 }, { "epoch": 0.2, "grad_norm": 1.8203125, "learning_rate": 0.00019490800229222458, "loss": 2.0587, "step": 86700 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019490741999933742, "loss": 2.0388, "step": 86705 }, { "epoch": 0.2, "grad_norm": 1.59375, "learning_rate": 0.00019490683767402813, "loss": 2.0379, "step": 86710 }, { "epoch": 0.2, "grad_norm": 1.7578125, "learning_rate": 0.00019490625531629695, "loss": 2.1174, "step": 86715 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019490567292614408, "loss": 2.2599, "step": 86720 }, { "epoch": 0.2, "grad_norm": 1.953125, "learning_rate": 0.00019490509050356972, "loss": 2.0836, "step": 86725 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019490450804857402, "loss": 2.1128, "step": 86730 }, { "epoch": 0.2, "grad_norm": 1.890625, "learning_rate": 0.00019490392556115724, "loss": 2.2867, "step": 86735 }, { "epoch": 0.2, "grad_norm": 1.5859375, "learning_rate": 0.0001949033430413196, "loss": 2.2339, "step": 86740 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.0001949027604890612, "loss": 2.1902, "step": 86745 }, { "epoch": 0.2, "grad_norm": 1.875, "learning_rate": 0.0001949021779043823, "loss": 2.0662, "step": 86750 }, { "epoch": 0.2, "grad_norm": 2.3125, "learning_rate": 0.0001949015952872831, "loss": 2.1242, "step": 86755 }, { "epoch": 0.2, "grad_norm": 1.6015625, "learning_rate": 0.0001949010126377638, "loss": 2.0042, "step": 86760 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.0001949004299558246, "loss": 2.3269, "step": 86765 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.00019489984724146568, "loss": 2.0447, "step": 86770 }, { "epoch": 0.2, "grad_norm": 1.828125, "learning_rate": 0.00019489926449468728, "loss": 2.323, "step": 86775 }, { "epoch": 0.2, "grad_norm": 2.484375, "learning_rate": 0.00019489868171548955, "loss": 2.3408, "step": 86780 }, { "epoch": 0.2, "grad_norm": 2.140625, "learning_rate": 0.00019489809890387268, "loss": 2.1791, "step": 86785 }, { "epoch": 0.2, "grad_norm": 1.796875, "learning_rate": 0.00019489751605983693, "loss": 2.104, "step": 86790 }, { "epoch": 0.2, "grad_norm": 1.765625, "learning_rate": 0.00019489693318338247, "loss": 2.1994, "step": 86795 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.0001948963502745095, "loss": 2.1867, "step": 86800 }, { "epoch": 0.2, "grad_norm": 1.9140625, "learning_rate": 0.0001948957673332182, "loss": 2.102, "step": 86805 }, { "epoch": 0.2, "grad_norm": 2.203125, "learning_rate": 0.00019489518435950878, "loss": 2.2171, "step": 86810 }, { "epoch": 0.2, "grad_norm": 2.203125, "learning_rate": 0.00019489460135338146, "loss": 2.1889, "step": 86815 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019489401831483642, "loss": 1.9064, "step": 86820 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019489343524387387, "loss": 1.9877, "step": 86825 }, { "epoch": 0.2, "grad_norm": 1.7578125, "learning_rate": 0.00019489285214049397, "loss": 2.0696, "step": 86830 }, { "epoch": 0.2, "grad_norm": 1.921875, "learning_rate": 0.000194892269004697, "loss": 2.2379, "step": 86835 }, { "epoch": 0.2, "grad_norm": 1.734375, "learning_rate": 0.00019489168583648305, "loss": 1.9988, "step": 86840 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.00019489110263585241, "loss": 2.2281, "step": 86845 }, { "epoch": 0.2, "grad_norm": 1.78125, "learning_rate": 0.00019489051940280527, "loss": 2.2908, "step": 86850 }, { "epoch": 0.2, "grad_norm": 2.0, "learning_rate": 0.00019488993613734178, "loss": 2.0853, "step": 86855 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.0001948893528394622, "loss": 2.1655, "step": 86860 }, { "epoch": 0.2, "grad_norm": 1.640625, "learning_rate": 0.00019488876950916667, "loss": 2.1234, "step": 86865 }, { "epoch": 0.2, "grad_norm": 1.8671875, "learning_rate": 0.0001948881861464554, "loss": 2.1069, "step": 86870 }, { "epoch": 0.2, "grad_norm": 1.734375, "learning_rate": 0.00019488760275132862, "loss": 2.1813, "step": 86875 }, { "epoch": 0.2, "grad_norm": 2.390625, "learning_rate": 0.00019488701932378653, "loss": 2.0359, "step": 86880 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019488643586382929, "loss": 2.0706, "step": 86885 }, { "epoch": 0.2, "grad_norm": 2.265625, "learning_rate": 0.0001948858523714571, "loss": 2.0536, "step": 86890 }, { "epoch": 0.2, "grad_norm": 1.9375, "learning_rate": 0.00019488526884667024, "loss": 2.0778, "step": 86895 }, { "epoch": 0.2, "grad_norm": 1.5390625, "learning_rate": 0.00019488468528946882, "loss": 2.084, "step": 86900 }, { "epoch": 0.2, "grad_norm": 1.96875, "learning_rate": 0.00019488410169985306, "loss": 2.2304, "step": 86905 }, { "epoch": 0.2, "grad_norm": 1.9140625, "learning_rate": 0.00019488351807782318, "loss": 2.2419, "step": 86910 }, { "epoch": 0.2, "grad_norm": 1.9453125, "learning_rate": 0.0001948829344233794, "loss": 1.9857, "step": 86915 }, { "epoch": 0.2, "grad_norm": 2.84375, "learning_rate": 0.00019488235073652184, "loss": 2.1241, "step": 86920 }, { "epoch": 0.2, "grad_norm": 2.171875, "learning_rate": 0.00019488176701725079, "loss": 2.2617, "step": 86925 }, { "epoch": 0.2, "grad_norm": 2.03125, "learning_rate": 0.00019488118326556637, "loss": 2.256, "step": 86930 }, { "epoch": 0.2, "grad_norm": 1.7265625, "learning_rate": 0.00019488059948146884, "loss": 2.2142, "step": 86935 }, { "epoch": 0.2, "grad_norm": 1.6796875, "learning_rate": 0.00019488001566495838, "loss": 2.237, "step": 86940 }, { "epoch": 0.2, "grad_norm": 2.046875, "learning_rate": 0.00019487943181603517, "loss": 2.1467, "step": 86945 }, { "epoch": 0.2, "grad_norm": 1.9296875, "learning_rate": 0.00019487884793469942, "loss": 2.2028, "step": 86950 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.00019487826402095134, "loss": 2.2082, "step": 86955 }, { "epoch": 0.2, "grad_norm": 1.765625, "learning_rate": 0.00019487768007479117, "loss": 2.1192, "step": 86960 }, { "epoch": 0.2, "grad_norm": 2.21875, "learning_rate": 0.000194877096096219, "loss": 2.1417, "step": 86965 }, { "epoch": 0.2, "grad_norm": 1.8984375, "learning_rate": 0.0001948765120852351, "loss": 2.2064, "step": 86970 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.00019487592804183973, "loss": 2.0849, "step": 86975 }, { "epoch": 0.2, "grad_norm": 1.5859375, "learning_rate": 0.00019487534396603296, "loss": 2.1832, "step": 86980 }, { "epoch": 0.2, "grad_norm": 2.0625, "learning_rate": 0.00019487475985781506, "loss": 1.9305, "step": 86985 }, { "epoch": 0.2, "grad_norm": 2.09375, "learning_rate": 0.00019487417571718624, "loss": 2.2422, "step": 86990 }, { "epoch": 0.2, "grad_norm": 1.921875, "learning_rate": 0.00019487359154414665, "loss": 2.1807, "step": 86995 }, { "epoch": 0.2, "grad_norm": 2.359375, "learning_rate": 0.00019487300733869652, "loss": 2.1428, "step": 87000 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.0001948724231008361, "loss": 2.1261, "step": 87005 }, { "epoch": 0.2, "grad_norm": 1.859375, "learning_rate": 0.00019487183883056553, "loss": 2.1886, "step": 87010 }, { "epoch": 0.2, "grad_norm": 2.203125, "learning_rate": 0.00019487125452788496, "loss": 2.2149, "step": 87015 }, { "epoch": 0.2, "grad_norm": 2.28125, "learning_rate": 0.0001948706701927947, "loss": 2.1976, "step": 87020 }, { "epoch": 0.2, "grad_norm": 1.9375, "learning_rate": 0.00019487008582529488, "loss": 2.2139, "step": 87025 }, { "epoch": 0.2, "grad_norm": 2.15625, "learning_rate": 0.00019486950142538574, "loss": 2.3327, "step": 87030 }, { "epoch": 0.2, "grad_norm": 2.46875, "learning_rate": 0.00019486891699306745, "loss": 2.2349, "step": 87035 }, { "epoch": 0.2, "grad_norm": 1.703125, "learning_rate": 0.0001948683325283402, "loss": 2.0584, "step": 87040 }, { "epoch": 0.2, "grad_norm": 1.6953125, "learning_rate": 0.00019486774803120422, "loss": 2.1035, "step": 87045 }, { "epoch": 0.2, "grad_norm": 1.984375, "learning_rate": 0.00019486716350165968, "loss": 2.3178, "step": 87050 }, { "epoch": 0.2, "grad_norm": 1.921875, "learning_rate": 0.0001948665789397068, "loss": 2.1599, "step": 87055 }, { "epoch": 0.2, "grad_norm": 1.8203125, "learning_rate": 0.0001948659943453458, "loss": 2.1698, "step": 87060 }, { "epoch": 0.2, "grad_norm": 1.8671875, "learning_rate": 0.00019486540971857684, "loss": 2.2575, "step": 87065 }, { "epoch": 0.2, "grad_norm": 1.65625, "learning_rate": 0.00019486482505940014, "loss": 2.1251, "step": 87070 }, { "epoch": 0.2, "grad_norm": 2.375, "learning_rate": 0.0001948642403678159, "loss": 1.9427, "step": 87075 }, { "epoch": 0.2, "grad_norm": 2.078125, "learning_rate": 0.0001948636556438243, "loss": 2.1739, "step": 87080 }, { "epoch": 0.2, "grad_norm": 2.25, "learning_rate": 0.00019486307088742555, "loss": 2.1243, "step": 87085 }, { "epoch": 0.2, "grad_norm": 2.015625, "learning_rate": 0.00019486248609861984, "loss": 2.1771, "step": 87090 }, { "epoch": 0.2, "grad_norm": 2.265625, "learning_rate": 0.00019486190127740745, "loss": 2.3599, "step": 87095 }, { "epoch": 0.2, "grad_norm": 1.6484375, "learning_rate": 0.00019486131642378846, "loss": 2.0556, "step": 87100 }, { "epoch": 0.2, "grad_norm": 2.328125, "learning_rate": 0.00019486073153776313, "loss": 2.1703, "step": 87105 }, { "epoch": 0.2, "grad_norm": 1.8828125, "learning_rate": 0.00019486014661933168, "loss": 2.1966, "step": 87110 }, { "epoch": 0.21, "grad_norm": 1.765625, "learning_rate": 0.00019485956166849427, "loss": 2.1733, "step": 87115 }, { "epoch": 0.21, "grad_norm": 1.7890625, "learning_rate": 0.0001948589766852511, "loss": 2.1449, "step": 87120 }, { "epoch": 0.21, "grad_norm": 1.9375, "learning_rate": 0.0001948583916696024, "loss": 2.2744, "step": 87125 }, { "epoch": 0.21, "grad_norm": 1.625, "learning_rate": 0.00019485780662154833, "loss": 2.0536, "step": 87130 }, { "epoch": 0.21, "grad_norm": 1.7734375, "learning_rate": 0.00019485722154108915, "loss": 2.1426, "step": 87135 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.000194856636428225, "loss": 2.313, "step": 87140 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.0001948560512829561, "loss": 2.1623, "step": 87145 }, { "epoch": 0.21, "grad_norm": 1.6796875, "learning_rate": 0.00019485546610528264, "loss": 1.9835, "step": 87150 }, { "epoch": 0.21, "grad_norm": 1.7109375, "learning_rate": 0.00019485488089520487, "loss": 2.1545, "step": 87155 }, { "epoch": 0.21, "grad_norm": 2.546875, "learning_rate": 0.00019485429565272293, "loss": 2.2207, "step": 87160 }, { "epoch": 0.21, "grad_norm": 2.296875, "learning_rate": 0.00019485371037783705, "loss": 2.1199, "step": 87165 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.0001948531250705474, "loss": 2.1606, "step": 87170 }, { "epoch": 0.21, "grad_norm": 2.71875, "learning_rate": 0.00019485253973085422, "loss": 2.3288, "step": 87175 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.0001948519543587577, "loss": 2.1152, "step": 87180 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019485136895425802, "loss": 2.3282, "step": 87185 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019485078351735537, "loss": 2.3218, "step": 87190 }, { "epoch": 0.21, "grad_norm": 1.6953125, "learning_rate": 0.00019485019804805004, "loss": 2.1834, "step": 87195 }, { "epoch": 0.21, "grad_norm": 1.7265625, "learning_rate": 0.0001948496125463421, "loss": 2.1998, "step": 87200 }, { "epoch": 0.21, "grad_norm": 1.7421875, "learning_rate": 0.00019484902701223184, "loss": 2.2533, "step": 87205 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.0001948484414457194, "loss": 2.2369, "step": 87210 }, { "epoch": 0.21, "grad_norm": 1.7890625, "learning_rate": 0.00019484785584680505, "loss": 2.2416, "step": 87215 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.00019484727021548893, "loss": 2.1111, "step": 87220 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019484668455177126, "loss": 2.1762, "step": 87225 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.00019484609885565227, "loss": 2.2399, "step": 87230 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.00019484551312713212, "loss": 2.4506, "step": 87235 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.000194844927366211, "loss": 2.1526, "step": 87240 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019484434157288915, "loss": 2.1803, "step": 87245 }, { "epoch": 0.21, "grad_norm": 1.7265625, "learning_rate": 0.00019484375574716677, "loss": 2.162, "step": 87250 }, { "epoch": 0.21, "grad_norm": 1.7265625, "learning_rate": 0.000194843169889044, "loss": 2.2394, "step": 87255 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019484258399852112, "loss": 2.1702, "step": 87260 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019484199807559828, "loss": 2.1151, "step": 87265 }, { "epoch": 0.21, "grad_norm": 1.7265625, "learning_rate": 0.00019484141212027572, "loss": 2.2713, "step": 87270 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019484082613255356, "loss": 2.1578, "step": 87275 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.0001948402401124321, "loss": 2.1271, "step": 87280 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.00019483965405991147, "loss": 2.2976, "step": 87285 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019483906797499188, "loss": 2.3564, "step": 87290 }, { "epoch": 0.21, "grad_norm": 1.9375, "learning_rate": 0.0001948384818576736, "loss": 2.0865, "step": 87295 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019483789570795673, "loss": 2.0345, "step": 87300 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.0001948373095258415, "loss": 2.2348, "step": 87305 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.0001948367233113282, "loss": 2.2986, "step": 87310 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.0001948361370644169, "loss": 2.0943, "step": 87315 }, { "epoch": 0.21, "grad_norm": 1.7421875, "learning_rate": 0.00019483555078510784, "loss": 2.0746, "step": 87320 }, { "epoch": 0.21, "grad_norm": 1.8359375, "learning_rate": 0.00019483496447340128, "loss": 2.2224, "step": 87325 }, { "epoch": 0.21, "grad_norm": 1.796875, "learning_rate": 0.00019483437812929734, "loss": 2.2593, "step": 87330 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019483379175279627, "loss": 2.3135, "step": 87335 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019483320534389827, "loss": 2.2639, "step": 87340 }, { "epoch": 0.21, "grad_norm": 2.375, "learning_rate": 0.00019483261890260352, "loss": 2.1052, "step": 87345 }, { "epoch": 0.21, "grad_norm": 1.9296875, "learning_rate": 0.0001948320324289122, "loss": 2.011, "step": 87350 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019483144592282456, "loss": 2.2733, "step": 87355 }, { "epoch": 0.21, "grad_norm": 2.265625, "learning_rate": 0.0001948308593843408, "loss": 2.2401, "step": 87360 }, { "epoch": 0.21, "grad_norm": 1.84375, "learning_rate": 0.00019483027281346103, "loss": 2.1374, "step": 87365 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019482968621018559, "loss": 2.094, "step": 87370 }, { "epoch": 0.21, "grad_norm": 1.8359375, "learning_rate": 0.00019482909957451458, "loss": 2.1895, "step": 87375 }, { "epoch": 0.21, "grad_norm": 1.6953125, "learning_rate": 0.00019482851290644827, "loss": 2.2004, "step": 87380 }, { "epoch": 0.21, "grad_norm": 1.703125, "learning_rate": 0.00019482792620598675, "loss": 2.0761, "step": 87385 }, { "epoch": 0.21, "grad_norm": 1.6640625, "learning_rate": 0.00019482733947313033, "loss": 2.2651, "step": 87390 }, { "epoch": 0.21, "grad_norm": 1.84375, "learning_rate": 0.00019482675270787917, "loss": 2.2103, "step": 87395 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019482616591023346, "loss": 2.2619, "step": 87400 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.00019482557908019344, "loss": 2.0639, "step": 87405 }, { "epoch": 0.21, "grad_norm": 1.9375, "learning_rate": 0.00019482499221775925, "loss": 2.1342, "step": 87410 }, { "epoch": 0.21, "grad_norm": 1.8203125, "learning_rate": 0.00019482440532293113, "loss": 1.9472, "step": 87415 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019482381839570928, "loss": 2.1414, "step": 87420 }, { "epoch": 0.21, "grad_norm": 1.875, "learning_rate": 0.0001948232314360939, "loss": 2.0095, "step": 87425 }, { "epoch": 0.21, "grad_norm": 1.6328125, "learning_rate": 0.00019482264444408519, "loss": 2.2636, "step": 87430 }, { "epoch": 0.21, "grad_norm": 1.859375, "learning_rate": 0.0001948220574196833, "loss": 2.2706, "step": 87435 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.0001948214703628885, "loss": 2.174, "step": 87440 }, { "epoch": 0.21, "grad_norm": 1.7890625, "learning_rate": 0.000194820883273701, "loss": 2.2113, "step": 87445 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.00019482029615212093, "loss": 2.1189, "step": 87450 }, { "epoch": 0.21, "grad_norm": 1.7578125, "learning_rate": 0.00019481970899814854, "loss": 2.0922, "step": 87455 }, { "epoch": 0.21, "grad_norm": 1.90625, "learning_rate": 0.00019481912181178401, "loss": 2.2512, "step": 87460 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019481853459302756, "loss": 2.1813, "step": 87465 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.00019481794734187938, "loss": 2.2261, "step": 87470 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019481736005833966, "loss": 2.1266, "step": 87475 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.0001948167727424086, "loss": 2.1605, "step": 87480 }, { "epoch": 0.21, "grad_norm": 2.53125, "learning_rate": 0.00019481618539408642, "loss": 2.2782, "step": 87485 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019481559801337332, "loss": 2.2443, "step": 87490 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019481501060026948, "loss": 2.1234, "step": 87495 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019481442315477513, "loss": 2.1288, "step": 87500 }, { "epoch": 0.21, "grad_norm": 1.859375, "learning_rate": 0.00019481383567689043, "loss": 2.3147, "step": 87505 }, { "epoch": 0.21, "grad_norm": 2.421875, "learning_rate": 0.0001948132481666156, "loss": 2.3799, "step": 87510 }, { "epoch": 0.21, "grad_norm": 1.7890625, "learning_rate": 0.00019481266062395088, "loss": 2.1558, "step": 87515 }, { "epoch": 0.21, "grad_norm": 1.8359375, "learning_rate": 0.00019481207304889643, "loss": 2.2454, "step": 87520 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.00019481148544145245, "loss": 2.2026, "step": 87525 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.0001948108978016191, "loss": 2.0821, "step": 87530 }, { "epoch": 0.21, "grad_norm": 2.484375, "learning_rate": 0.0001948103101293967, "loss": 2.1553, "step": 87535 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.00019480972242478536, "loss": 2.0004, "step": 87540 }, { "epoch": 0.21, "grad_norm": 1.9296875, "learning_rate": 0.00019480913468778526, "loss": 2.2467, "step": 87545 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.00019480854691839668, "loss": 2.1486, "step": 87550 }, { "epoch": 0.21, "grad_norm": 1.7890625, "learning_rate": 0.00019480795911661978, "loss": 2.091, "step": 87555 }, { "epoch": 0.21, "grad_norm": 1.7109375, "learning_rate": 0.00019480737128245475, "loss": 2.1582, "step": 87560 }, { "epoch": 0.21, "grad_norm": 1.84375, "learning_rate": 0.0001948067834159018, "loss": 2.1908, "step": 87565 }, { "epoch": 0.21, "grad_norm": 1.6796875, "learning_rate": 0.0001948061955169611, "loss": 2.0521, "step": 87570 }, { "epoch": 0.21, "grad_norm": 1.75, "learning_rate": 0.00019480560758563294, "loss": 2.1002, "step": 87575 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.00019480501962191747, "loss": 2.2203, "step": 87580 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.00019480443162581484, "loss": 2.0319, "step": 87585 }, { "epoch": 0.21, "grad_norm": 2.984375, "learning_rate": 0.0001948038435973253, "loss": 2.0724, "step": 87590 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.00019480325553644908, "loss": 2.2151, "step": 87595 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.00019480266744318633, "loss": 2.1213, "step": 87600 }, { "epoch": 0.21, "grad_norm": 2.34375, "learning_rate": 0.00019480207931753726, "loss": 2.0663, "step": 87605 }, { "epoch": 0.21, "grad_norm": 2.296875, "learning_rate": 0.0001948014911595021, "loss": 2.1447, "step": 87610 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.00019480090296908104, "loss": 2.126, "step": 87615 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019480031474627426, "loss": 2.294, "step": 87620 }, { "epoch": 0.21, "grad_norm": 1.7109375, "learning_rate": 0.00019479972649108195, "loss": 1.9846, "step": 87625 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019479913820350436, "loss": 2.0937, "step": 87630 }, { "epoch": 0.21, "grad_norm": 1.765625, "learning_rate": 0.00019479854988354168, "loss": 2.1913, "step": 87635 }, { "epoch": 0.21, "grad_norm": 1.984375, "learning_rate": 0.00019479796153119407, "loss": 2.3066, "step": 87640 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.00019479737314646175, "loss": 2.1487, "step": 87645 }, { "epoch": 0.21, "grad_norm": 1.953125, "learning_rate": 0.00019479678472934494, "loss": 2.0446, "step": 87650 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019479619627984382, "loss": 2.1195, "step": 87655 }, { "epoch": 0.21, "grad_norm": 1.78125, "learning_rate": 0.00019479560779795863, "loss": 2.1527, "step": 87660 }, { "epoch": 0.21, "grad_norm": 1.75, "learning_rate": 0.0001947950192836895, "loss": 2.1875, "step": 87665 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.0001947944307370367, "loss": 2.2887, "step": 87670 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019479384215800042, "loss": 2.0941, "step": 87675 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.0001947932535465808, "loss": 2.1908, "step": 87680 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.00019479266490277813, "loss": 2.0699, "step": 87685 }, { "epoch": 0.21, "grad_norm": 1.71875, "learning_rate": 0.00019479207622659253, "loss": 2.3892, "step": 87690 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.00019479148751802425, "loss": 2.3344, "step": 87695 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.00019479089877707346, "loss": 2.1746, "step": 87700 }, { "epoch": 0.21, "grad_norm": 1.609375, "learning_rate": 0.0001947903100037404, "loss": 2.185, "step": 87705 }, { "epoch": 0.21, "grad_norm": 1.9296875, "learning_rate": 0.00019478972119802524, "loss": 2.2085, "step": 87710 }, { "epoch": 0.21, "grad_norm": 1.84375, "learning_rate": 0.00019478913235992823, "loss": 2.223, "step": 87715 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.0001947885434894495, "loss": 2.0499, "step": 87720 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.0001947879545865893, "loss": 2.2526, "step": 87725 }, { "epoch": 0.21, "grad_norm": 2.265625, "learning_rate": 0.0001947873656513478, "loss": 2.2245, "step": 87730 }, { "epoch": 0.21, "grad_norm": 1.7890625, "learning_rate": 0.00019478677668372522, "loss": 1.9394, "step": 87735 }, { "epoch": 0.21, "grad_norm": 2.21875, "learning_rate": 0.00019478618768372177, "loss": 2.2904, "step": 87740 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.00019478559865133762, "loss": 2.0415, "step": 87745 }, { "epoch": 0.21, "grad_norm": 2.28125, "learning_rate": 0.000194785009586573, "loss": 2.203, "step": 87750 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019478442048942812, "loss": 2.1043, "step": 87755 }, { "epoch": 0.21, "grad_norm": 1.9453125, "learning_rate": 0.00019478383135990312, "loss": 2.1215, "step": 87760 }, { "epoch": 0.21, "grad_norm": 1.703125, "learning_rate": 0.00019478324219799825, "loss": 2.1747, "step": 87765 }, { "epoch": 0.21, "grad_norm": 1.984375, "learning_rate": 0.00019478265300371374, "loss": 2.3798, "step": 87770 }, { "epoch": 0.21, "grad_norm": 2.1875, "learning_rate": 0.00019478206377704976, "loss": 2.4628, "step": 87775 }, { "epoch": 0.21, "grad_norm": 2.578125, "learning_rate": 0.00019478147451800648, "loss": 2.0685, "step": 87780 }, { "epoch": 0.21, "grad_norm": 1.7265625, "learning_rate": 0.00019478088522658412, "loss": 1.9293, "step": 87785 }, { "epoch": 0.21, "grad_norm": 2.71875, "learning_rate": 0.00019478029590278294, "loss": 2.3035, "step": 87790 }, { "epoch": 0.21, "grad_norm": 1.90625, "learning_rate": 0.00019477970654660306, "loss": 2.0807, "step": 87795 }, { "epoch": 0.21, "grad_norm": 1.9375, "learning_rate": 0.0001947791171580447, "loss": 2.1208, "step": 87800 }, { "epoch": 0.21, "grad_norm": 1.953125, "learning_rate": 0.0001947785277371081, "loss": 2.192, "step": 87805 }, { "epoch": 0.21, "grad_norm": 1.7421875, "learning_rate": 0.00019477793828379342, "loss": 2.0996, "step": 87810 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019477734879810087, "loss": 2.3415, "step": 87815 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.00019477675928003066, "loss": 2.2, "step": 87820 }, { "epoch": 0.21, "grad_norm": 1.6953125, "learning_rate": 0.00019477616972958303, "loss": 2.1018, "step": 87825 }, { "epoch": 0.21, "grad_norm": 1.7265625, "learning_rate": 0.00019477558014675808, "loss": 2.2144, "step": 87830 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.00019477499053155613, "loss": 2.1752, "step": 87835 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.0001947744008839773, "loss": 2.0257, "step": 87840 }, { "epoch": 0.21, "grad_norm": 1.8515625, "learning_rate": 0.00019477381120402182, "loss": 1.8988, "step": 87845 }, { "epoch": 0.21, "grad_norm": 1.7421875, "learning_rate": 0.00019477322149168987, "loss": 2.055, "step": 87850 }, { "epoch": 0.21, "grad_norm": 1.796875, "learning_rate": 0.0001947726317469817, "loss": 2.0625, "step": 87855 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019477204196989746, "loss": 2.1236, "step": 87860 }, { "epoch": 0.21, "grad_norm": 1.84375, "learning_rate": 0.00019477145216043737, "loss": 2.148, "step": 87865 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.00019477086231860162, "loss": 2.2474, "step": 87870 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.00019477027244439046, "loss": 2.0695, "step": 87875 }, { "epoch": 0.21, "grad_norm": 1.75, "learning_rate": 0.00019476968253780404, "loss": 2.11, "step": 87880 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.00019476909259884257, "loss": 2.2395, "step": 87885 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019476850262750624, "loss": 2.2095, "step": 87890 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019476791262379532, "loss": 2.1356, "step": 87895 }, { "epoch": 0.21, "grad_norm": 1.7578125, "learning_rate": 0.00019476732258770992, "loss": 2.0612, "step": 87900 }, { "epoch": 0.21, "grad_norm": 1.7109375, "learning_rate": 0.00019476673251925033, "loss": 2.0741, "step": 87905 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019476614241841666, "loss": 2.0186, "step": 87910 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.0001947655522852092, "loss": 2.1874, "step": 87915 }, { "epoch": 0.21, "grad_norm": 1.9375, "learning_rate": 0.00019476496211962807, "loss": 2.0853, "step": 87920 }, { "epoch": 0.21, "grad_norm": 1.96875, "learning_rate": 0.00019476437192167352, "loss": 2.082, "step": 87925 }, { "epoch": 0.21, "grad_norm": 1.8046875, "learning_rate": 0.00019476378169134575, "loss": 2.1931, "step": 87930 }, { "epoch": 0.21, "grad_norm": 1.984375, "learning_rate": 0.00019476319142864494, "loss": 1.9709, "step": 87935 }, { "epoch": 0.21, "grad_norm": 1.6328125, "learning_rate": 0.00019476260113357132, "loss": 2.1886, "step": 87940 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019476201080612508, "loss": 2.2582, "step": 87945 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.0001947614204463064, "loss": 2.1454, "step": 87950 }, { "epoch": 0.21, "grad_norm": 1.8046875, "learning_rate": 0.00019476083005411551, "loss": 2.158, "step": 87955 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019476023962955262, "loss": 2.2261, "step": 87960 }, { "epoch": 0.21, "grad_norm": 1.828125, "learning_rate": 0.0001947596491726179, "loss": 2.0086, "step": 87965 }, { "epoch": 0.21, "grad_norm": 1.796875, "learning_rate": 0.00019475905868331154, "loss": 2.049, "step": 87970 }, { "epoch": 0.21, "grad_norm": 1.9375, "learning_rate": 0.0001947584681616338, "loss": 2.2302, "step": 87975 }, { "epoch": 0.21, "grad_norm": 1.84375, "learning_rate": 0.00019475787760758482, "loss": 2.1833, "step": 87980 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.00019475728702116486, "loss": 2.0538, "step": 87985 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.00019475669640237406, "loss": 2.2639, "step": 87990 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.00019475610575121267, "loss": 2.2162, "step": 87995 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019475551506768088, "loss": 2.2502, "step": 88000 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.0001947549243517789, "loss": 2.1047, "step": 88005 }, { "epoch": 0.21, "grad_norm": 2.1875, "learning_rate": 0.0001947543336035069, "loss": 2.2526, "step": 88010 }, { "epoch": 0.21, "grad_norm": 1.8046875, "learning_rate": 0.0001947537428228651, "loss": 2.0356, "step": 88015 }, { "epoch": 0.21, "grad_norm": 2.34375, "learning_rate": 0.00019475315200985374, "loss": 2.1855, "step": 88020 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.00019475256116447293, "loss": 2.1015, "step": 88025 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019475197028672296, "loss": 2.1254, "step": 88030 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.000194751379376604, "loss": 2.0805, "step": 88035 }, { "epoch": 0.21, "grad_norm": 1.8984375, "learning_rate": 0.00019475078843411622, "loss": 2.1976, "step": 88040 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.0001947501974592599, "loss": 2.0753, "step": 88045 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019474960645203516, "loss": 2.116, "step": 88050 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.00019474901541244224, "loss": 2.0673, "step": 88055 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019474842434048133, "loss": 2.1281, "step": 88060 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019474783323615265, "loss": 2.3866, "step": 88065 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019474724209945642, "loss": 2.2745, "step": 88070 }, { "epoch": 0.21, "grad_norm": 1.984375, "learning_rate": 0.00019474665093039274, "loss": 2.0896, "step": 88075 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019474605972896197, "loss": 2.2502, "step": 88080 }, { "epoch": 0.21, "grad_norm": 2.21875, "learning_rate": 0.00019474546849516418, "loss": 2.0084, "step": 88085 }, { "epoch": 0.21, "grad_norm": 1.8203125, "learning_rate": 0.00019474487722899962, "loss": 2.1442, "step": 88090 }, { "epoch": 0.21, "grad_norm": 1.6796875, "learning_rate": 0.0001947442859304685, "loss": 2.2185, "step": 88095 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.000194743694599571, "loss": 2.2157, "step": 88100 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019474310323630735, "loss": 2.107, "step": 88105 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019474251184067775, "loss": 2.2896, "step": 88110 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.0001947419204126824, "loss": 2.28, "step": 88115 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.00019474132895232145, "loss": 2.2256, "step": 88120 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.00019474073745959518, "loss": 2.2424, "step": 88125 }, { "epoch": 0.21, "grad_norm": 1.7421875, "learning_rate": 0.00019474014593450373, "loss": 2.2405, "step": 88130 }, { "epoch": 0.21, "grad_norm": 1.640625, "learning_rate": 0.00019473955437704734, "loss": 2.1542, "step": 88135 }, { "epoch": 0.21, "grad_norm": 1.875, "learning_rate": 0.00019473896278722618, "loss": 2.2172, "step": 88140 }, { "epoch": 0.21, "grad_norm": 2.703125, "learning_rate": 0.0001947383711650405, "loss": 2.0759, "step": 88145 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019473777951049045, "loss": 2.1587, "step": 88150 }, { "epoch": 0.21, "grad_norm": 1.90625, "learning_rate": 0.00019473718782357627, "loss": 2.078, "step": 88155 }, { "epoch": 0.21, "grad_norm": 1.890625, "learning_rate": 0.00019473659610429818, "loss": 2.2234, "step": 88160 }, { "epoch": 0.21, "grad_norm": 1.75, "learning_rate": 0.0001947360043526563, "loss": 2.1432, "step": 88165 }, { "epoch": 0.21, "grad_norm": 1.859375, "learning_rate": 0.0001947354125686509, "loss": 2.1898, "step": 88170 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019473482075228217, "loss": 2.1629, "step": 88175 }, { "epoch": 0.21, "grad_norm": 2.265625, "learning_rate": 0.0001947342289035503, "loss": 2.1153, "step": 88180 }, { "epoch": 0.21, "grad_norm": 2.234375, "learning_rate": 0.0001947336370224555, "loss": 2.3046, "step": 88185 }, { "epoch": 0.21, "grad_norm": 1.7734375, "learning_rate": 0.00019473304510899797, "loss": 2.2892, "step": 88190 }, { "epoch": 0.21, "grad_norm": 2.359375, "learning_rate": 0.0001947324531631779, "loss": 2.1554, "step": 88195 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.00019473186118499556, "loss": 2.1464, "step": 88200 }, { "epoch": 0.21, "grad_norm": 1.703125, "learning_rate": 0.00019473126917445106, "loss": 2.2036, "step": 88205 }, { "epoch": 0.21, "grad_norm": 1.6171875, "learning_rate": 0.00019473067713154466, "loss": 2.1836, "step": 88210 }, { "epoch": 0.21, "grad_norm": 1.9296875, "learning_rate": 0.0001947300850562765, "loss": 2.2251, "step": 88215 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019472949294864687, "loss": 2.2051, "step": 88220 }, { "epoch": 0.21, "grad_norm": 1.828125, "learning_rate": 0.00019472890080865588, "loss": 2.1477, "step": 88225 }, { "epoch": 0.21, "grad_norm": 1.8984375, "learning_rate": 0.00019472830863630382, "loss": 2.0735, "step": 88230 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019472771643159085, "loss": 2.2174, "step": 88235 }, { "epoch": 0.21, "grad_norm": 1.71875, "learning_rate": 0.00019472712419451715, "loss": 2.1048, "step": 88240 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.00019472653192508297, "loss": 2.0985, "step": 88245 }, { "epoch": 0.21, "grad_norm": 1.734375, "learning_rate": 0.00019472593962328847, "loss": 2.2779, "step": 88250 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.0001947253472891339, "loss": 2.0681, "step": 88255 }, { "epoch": 0.21, "grad_norm": 2.421875, "learning_rate": 0.00019472475492261939, "loss": 2.0502, "step": 88260 }, { "epoch": 0.21, "grad_norm": 2.4375, "learning_rate": 0.0001947241625237452, "loss": 2.3758, "step": 88265 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019472357009251155, "loss": 2.2841, "step": 88270 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019472297762891858, "loss": 2.1167, "step": 88275 }, { "epoch": 0.21, "grad_norm": 1.890625, "learning_rate": 0.00019472238513296652, "loss": 2.0873, "step": 88280 }, { "epoch": 0.21, "grad_norm": 2.265625, "learning_rate": 0.0001947217926046556, "loss": 2.2937, "step": 88285 }, { "epoch": 0.21, "grad_norm": 1.7578125, "learning_rate": 0.00019472120004398598, "loss": 2.0233, "step": 88290 }, { "epoch": 0.21, "grad_norm": 1.984375, "learning_rate": 0.0001947206074509579, "loss": 2.0484, "step": 88295 }, { "epoch": 0.21, "grad_norm": 1.71875, "learning_rate": 0.0001947200148255715, "loss": 2.1432, "step": 88300 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.00019471942216782705, "loss": 2.2452, "step": 88305 }, { "epoch": 0.21, "grad_norm": 1.90625, "learning_rate": 0.00019471882947772473, "loss": 2.1342, "step": 88310 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.00019471823675526475, "loss": 2.2306, "step": 88315 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019471764400044728, "loss": 2.3537, "step": 88320 }, { "epoch": 0.21, "grad_norm": 2.265625, "learning_rate": 0.00019471705121327254, "loss": 2.1183, "step": 88325 }, { "epoch": 0.21, "grad_norm": 2.78125, "learning_rate": 0.00019471645839374077, "loss": 2.037, "step": 88330 }, { "epoch": 0.21, "grad_norm": 1.828125, "learning_rate": 0.00019471586554185212, "loss": 2.0708, "step": 88335 }, { "epoch": 0.21, "grad_norm": 2.4375, "learning_rate": 0.00019471527265760684, "loss": 2.2516, "step": 88340 }, { "epoch": 0.21, "grad_norm": 2.5, "learning_rate": 0.00019471467974100507, "loss": 2.2872, "step": 88345 }, { "epoch": 0.21, "grad_norm": 1.9375, "learning_rate": 0.00019471408679204707, "loss": 2.1036, "step": 88350 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019471349381073298, "loss": 2.1447, "step": 88355 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019471290079706307, "loss": 2.0963, "step": 88360 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.00019471230775103756, "loss": 2.0823, "step": 88365 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.00019471171467265653, "loss": 2.1347, "step": 88370 }, { "epoch": 0.21, "grad_norm": 1.953125, "learning_rate": 0.0001947111215619203, "loss": 2.1243, "step": 88375 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019471052841882905, "loss": 2.203, "step": 88380 }, { "epoch": 0.21, "grad_norm": 1.7265625, "learning_rate": 0.00019470993524338295, "loss": 2.1117, "step": 88385 }, { "epoch": 0.21, "grad_norm": 2.28125, "learning_rate": 0.0001947093420355822, "loss": 2.4658, "step": 88390 }, { "epoch": 0.21, "grad_norm": 2.453125, "learning_rate": 0.000194708748795427, "loss": 2.2334, "step": 88395 }, { "epoch": 0.21, "grad_norm": 1.9296875, "learning_rate": 0.00019470815552291763, "loss": 2.156, "step": 88400 }, { "epoch": 0.21, "grad_norm": 1.953125, "learning_rate": 0.00019470756221805422, "loss": 2.2439, "step": 88405 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.000194706968880837, "loss": 2.1253, "step": 88410 }, { "epoch": 0.21, "grad_norm": 2.734375, "learning_rate": 0.00019470637551126613, "loss": 2.3173, "step": 88415 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019470578210934188, "loss": 2.1246, "step": 88420 }, { "epoch": 0.21, "grad_norm": 1.875, "learning_rate": 0.00019470518867506438, "loss": 2.3368, "step": 88425 }, { "epoch": 0.21, "grad_norm": 2.4375, "learning_rate": 0.0001947045952084339, "loss": 2.2487, "step": 88430 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019470400170945059, "loss": 1.9792, "step": 88435 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.0001947034081781147, "loss": 2.2102, "step": 88440 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.0001947028146144264, "loss": 2.1298, "step": 88445 }, { "epoch": 0.21, "grad_norm": 1.7734375, "learning_rate": 0.00019470222101838588, "loss": 2.0681, "step": 88450 }, { "epoch": 0.21, "grad_norm": 1.6875, "learning_rate": 0.00019470162738999337, "loss": 2.209, "step": 88455 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019470103372924908, "loss": 2.1569, "step": 88460 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.0001947004400361532, "loss": 2.0652, "step": 88465 }, { "epoch": 0.21, "grad_norm": 1.953125, "learning_rate": 0.00019469984631070592, "loss": 2.1189, "step": 88470 }, { "epoch": 0.21, "grad_norm": 2.46875, "learning_rate": 0.00019469925255290746, "loss": 2.0708, "step": 88475 }, { "epoch": 0.21, "grad_norm": 2.40625, "learning_rate": 0.00019469865876275803, "loss": 2.0911, "step": 88480 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.0001946980649402578, "loss": 2.3127, "step": 88485 }, { "epoch": 0.21, "grad_norm": 1.9296875, "learning_rate": 0.000194697471085407, "loss": 2.0265, "step": 88490 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019469687719820584, "loss": 2.045, "step": 88495 }, { "epoch": 0.21, "grad_norm": 1.875, "learning_rate": 0.0001946962832786545, "loss": 2.1901, "step": 88500 }, { "epoch": 0.21, "grad_norm": 2.21875, "learning_rate": 0.0001946956893267532, "loss": 2.121, "step": 88505 }, { "epoch": 0.21, "grad_norm": 1.8046875, "learning_rate": 0.00019469509534250211, "loss": 1.9939, "step": 88510 }, { "epoch": 0.21, "grad_norm": 2.484375, "learning_rate": 0.00019469450132590148, "loss": 2.178, "step": 88515 }, { "epoch": 0.21, "grad_norm": 1.8515625, "learning_rate": 0.0001946939072769515, "loss": 2.0543, "step": 88520 }, { "epoch": 0.21, "grad_norm": 1.671875, "learning_rate": 0.00019469331319565237, "loss": 2.1958, "step": 88525 }, { "epoch": 0.21, "grad_norm": 2.1875, "learning_rate": 0.00019469271908200423, "loss": 1.9781, "step": 88530 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.0001946921249360074, "loss": 2.2681, "step": 88535 }, { "epoch": 0.21, "grad_norm": 2.1875, "learning_rate": 0.000194691530757662, "loss": 2.1297, "step": 88540 }, { "epoch": 0.21, "grad_norm": 1.8359375, "learning_rate": 0.00019469093654696823, "loss": 2.1075, "step": 88545 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.00019469034230392635, "loss": 2.3096, "step": 88550 }, { "epoch": 0.21, "grad_norm": 1.8984375, "learning_rate": 0.00019468974802853652, "loss": 2.2391, "step": 88555 }, { "epoch": 0.21, "grad_norm": 1.7578125, "learning_rate": 0.00019468915372079895, "loss": 2.155, "step": 88560 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019468855938071389, "loss": 2.0221, "step": 88565 }, { "epoch": 0.21, "grad_norm": 1.9375, "learning_rate": 0.00019468796500828148, "loss": 1.9954, "step": 88570 }, { "epoch": 0.21, "grad_norm": 1.859375, "learning_rate": 0.0001946873706035019, "loss": 2.2378, "step": 88575 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019468677616637546, "loss": 2.0678, "step": 88580 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019468618169690225, "loss": 2.1812, "step": 88585 }, { "epoch": 0.21, "grad_norm": 2.34375, "learning_rate": 0.00019468558719508257, "loss": 1.9609, "step": 88590 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019468499266091653, "loss": 2.076, "step": 88595 }, { "epoch": 0.21, "grad_norm": 1.796875, "learning_rate": 0.00019468439809440443, "loss": 2.1385, "step": 88600 }, { "epoch": 0.21, "grad_norm": 2.5625, "learning_rate": 0.00019468380349554635, "loss": 2.265, "step": 88605 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019468320886434262, "loss": 2.211, "step": 88610 }, { "epoch": 0.21, "grad_norm": 1.96875, "learning_rate": 0.00019468261420079336, "loss": 2.0644, "step": 88615 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019468201950489884, "loss": 2.1367, "step": 88620 }, { "epoch": 0.21, "grad_norm": 1.5703125, "learning_rate": 0.0001946814247766592, "loss": 2.3702, "step": 88625 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.00019468083001607467, "loss": 2.0761, "step": 88630 }, { "epoch": 0.21, "grad_norm": 2.21875, "learning_rate": 0.00019468023522314546, "loss": 2.1097, "step": 88635 }, { "epoch": 0.21, "grad_norm": 1.7421875, "learning_rate": 0.00019467964039787173, "loss": 2.1432, "step": 88640 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.00019467904554025376, "loss": 2.2397, "step": 88645 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.0001946784506502917, "loss": 2.1916, "step": 88650 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.00019467785572798577, "loss": 2.1498, "step": 88655 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.00019467726077333615, "loss": 2.1538, "step": 88660 }, { "epoch": 0.21, "grad_norm": 1.7421875, "learning_rate": 0.00019467666578634307, "loss": 2.0117, "step": 88665 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019467607076700672, "loss": 2.0406, "step": 88670 }, { "epoch": 0.21, "grad_norm": 2.59375, "learning_rate": 0.0001946754757153273, "loss": 2.0987, "step": 88675 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019467488063130503, "loss": 2.2033, "step": 88680 }, { "epoch": 0.21, "grad_norm": 2.375, "learning_rate": 0.0001946742855149401, "loss": 2.283, "step": 88685 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019467369036623274, "loss": 2.1662, "step": 88690 }, { "epoch": 0.21, "grad_norm": 1.8515625, "learning_rate": 0.00019467309518518308, "loss": 2.1655, "step": 88695 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.0001946724999717914, "loss": 2.1208, "step": 88700 }, { "epoch": 0.21, "grad_norm": 2.375, "learning_rate": 0.0001946719047260579, "loss": 2.1465, "step": 88705 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.00019467130944798275, "loss": 2.1724, "step": 88710 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.00019467071413756614, "loss": 2.0425, "step": 88715 }, { "epoch": 0.21, "grad_norm": 2.359375, "learning_rate": 0.00019467011879480833, "loss": 2.1756, "step": 88720 }, { "epoch": 0.21, "grad_norm": 2.375, "learning_rate": 0.00019466952341970945, "loss": 2.1058, "step": 88725 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.00019466892801226976, "loss": 2.0882, "step": 88730 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019466833257248946, "loss": 2.1966, "step": 88735 }, { "epoch": 0.21, "grad_norm": 2.546875, "learning_rate": 0.00019466773710036874, "loss": 2.3771, "step": 88740 }, { "epoch": 0.21, "grad_norm": 1.796875, "learning_rate": 0.0001946671415959078, "loss": 2.0982, "step": 88745 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019466654605910684, "loss": 2.0874, "step": 88750 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.00019466595048996607, "loss": 2.2353, "step": 88755 }, { "epoch": 0.21, "grad_norm": 2.3125, "learning_rate": 0.0001946653548884857, "loss": 2.1535, "step": 88760 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019466475925466594, "loss": 2.1345, "step": 88765 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019466416358850696, "loss": 2.0829, "step": 88770 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.000194663567890009, "loss": 2.1224, "step": 88775 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.00019466297215917227, "loss": 1.9715, "step": 88780 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.0001946623763959969, "loss": 2.1448, "step": 88785 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.00019466178060048318, "loss": 2.0748, "step": 88790 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019466118477263128, "loss": 2.1102, "step": 88795 }, { "epoch": 0.21, "grad_norm": 1.7734375, "learning_rate": 0.0001946605889124414, "loss": 2.2056, "step": 88800 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019465999301991374, "loss": 1.9874, "step": 88805 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.0001946593970950485, "loss": 2.1493, "step": 88810 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.0001946588011378459, "loss": 2.1387, "step": 88815 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.00019465820514830612, "loss": 2.222, "step": 88820 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019465760912642943, "loss": 2.1397, "step": 88825 }, { "epoch": 0.21, "grad_norm": 2.46875, "learning_rate": 0.00019465701307221593, "loss": 2.1314, "step": 88830 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019465641698566591, "loss": 2.2771, "step": 88835 }, { "epoch": 0.21, "grad_norm": 2.375, "learning_rate": 0.00019465582086677954, "loss": 2.2039, "step": 88840 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.00019465522471555702, "loss": 2.163, "step": 88845 }, { "epoch": 0.21, "grad_norm": 1.6796875, "learning_rate": 0.00019465462853199855, "loss": 2.2401, "step": 88850 }, { "epoch": 0.21, "grad_norm": 1.8203125, "learning_rate": 0.00019465403231610435, "loss": 2.2019, "step": 88855 }, { "epoch": 0.21, "grad_norm": 2.65625, "learning_rate": 0.0001946534360678746, "loss": 2.1717, "step": 88860 }, { "epoch": 0.21, "grad_norm": 1.6015625, "learning_rate": 0.00019465283978730954, "loss": 2.1794, "step": 88865 }, { "epoch": 0.21, "grad_norm": 1.8203125, "learning_rate": 0.00019465224347440935, "loss": 2.1123, "step": 88870 }, { "epoch": 0.21, "grad_norm": 1.796875, "learning_rate": 0.00019465164712917423, "loss": 2.2364, "step": 88875 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.0001946510507516044, "loss": 2.2707, "step": 88880 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.00019465045434170005, "loss": 2.1975, "step": 88885 }, { "epoch": 0.21, "grad_norm": 1.671875, "learning_rate": 0.00019464985789946138, "loss": 2.263, "step": 88890 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.00019464926142488863, "loss": 2.3951, "step": 88895 }, { "epoch": 0.21, "grad_norm": 1.8046875, "learning_rate": 0.00019464866491798196, "loss": 2.2833, "step": 88900 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019464806837874154, "loss": 2.321, "step": 88905 }, { "epoch": 0.21, "grad_norm": 1.7734375, "learning_rate": 0.00019464747180716768, "loss": 2.2253, "step": 88910 }, { "epoch": 0.21, "grad_norm": 1.796875, "learning_rate": 0.00019464687520326054, "loss": 2.1239, "step": 88915 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019464627856702026, "loss": 2.264, "step": 88920 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019464568189844715, "loss": 1.9983, "step": 88925 }, { "epoch": 0.21, "grad_norm": 2.1875, "learning_rate": 0.0001946450851975413, "loss": 2.2261, "step": 88930 }, { "epoch": 0.21, "grad_norm": 1.9296875, "learning_rate": 0.000194644488464303, "loss": 2.2263, "step": 88935 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019464389169873244, "loss": 2.1849, "step": 88940 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.0001946432949008298, "loss": 2.302, "step": 88945 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.00019464269807059528, "loss": 2.2706, "step": 88950 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.0001946421012080291, "loss": 2.2308, "step": 88955 }, { "epoch": 0.21, "grad_norm": 2.671875, "learning_rate": 0.0001946415043131315, "loss": 2.2033, "step": 88960 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019464090738590263, "loss": 2.1467, "step": 88965 }, { "epoch": 0.21, "grad_norm": 2.265625, "learning_rate": 0.00019464031042634266, "loss": 2.2799, "step": 88970 }, { "epoch": 0.21, "grad_norm": 1.7265625, "learning_rate": 0.00019463971343445187, "loss": 2.134, "step": 88975 }, { "epoch": 0.21, "grad_norm": 1.7265625, "learning_rate": 0.00019463911641023047, "loss": 2.2453, "step": 88980 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.0001946385193536786, "loss": 2.1809, "step": 88985 }, { "epoch": 0.21, "grad_norm": 1.828125, "learning_rate": 0.00019463792226479651, "loss": 2.3177, "step": 88990 }, { "epoch": 0.21, "grad_norm": 2.734375, "learning_rate": 0.0001946373251435844, "loss": 2.2436, "step": 88995 }, { "epoch": 0.21, "grad_norm": 2.296875, "learning_rate": 0.00019463672799004242, "loss": 2.0623, "step": 89000 }, { "epoch": 0.21, "grad_norm": 1.9296875, "learning_rate": 0.00019463613080417086, "loss": 2.2651, "step": 89005 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.0001946355335859699, "loss": 2.1645, "step": 89010 }, { "epoch": 0.21, "grad_norm": 1.6875, "learning_rate": 0.00019463493633543966, "loss": 1.9534, "step": 89015 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.00019463433905258045, "loss": 2.1806, "step": 89020 }, { "epoch": 0.21, "grad_norm": 1.875, "learning_rate": 0.00019463374173739242, "loss": 2.2119, "step": 89025 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019463314438987578, "loss": 2.2497, "step": 89030 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019463254701003075, "loss": 2.1826, "step": 89035 }, { "epoch": 0.21, "grad_norm": 1.640625, "learning_rate": 0.00019463194959785755, "loss": 2.1359, "step": 89040 }, { "epoch": 0.21, "grad_norm": 2.21875, "learning_rate": 0.00019463135215335632, "loss": 2.1162, "step": 89045 }, { "epoch": 0.21, "grad_norm": 2.375, "learning_rate": 0.00019463075467652735, "loss": 2.1525, "step": 89050 }, { "epoch": 0.21, "grad_norm": 1.9296875, "learning_rate": 0.00019463015716737075, "loss": 2.2526, "step": 89055 }, { "epoch": 0.21, "grad_norm": 1.8984375, "learning_rate": 0.0001946295596258868, "loss": 2.1408, "step": 89060 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.0001946289620520757, "loss": 2.1339, "step": 89065 }, { "epoch": 0.21, "grad_norm": 2.3125, "learning_rate": 0.00019462836444593756, "loss": 2.0739, "step": 89070 }, { "epoch": 0.21, "grad_norm": 1.7734375, "learning_rate": 0.0001946277668074727, "loss": 1.9988, "step": 89075 }, { "epoch": 0.21, "grad_norm": 1.875, "learning_rate": 0.00019462716913668126, "loss": 2.015, "step": 89080 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019462657143356352, "loss": 2.2478, "step": 89085 }, { "epoch": 0.21, "grad_norm": 2.296875, "learning_rate": 0.00019462597369811956, "loss": 2.1288, "step": 89090 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019462537593034968, "loss": 2.2743, "step": 89095 }, { "epoch": 0.21, "grad_norm": 1.8359375, "learning_rate": 0.00019462477813025404, "loss": 1.9538, "step": 89100 }, { "epoch": 0.21, "grad_norm": 1.90625, "learning_rate": 0.00019462418029783288, "loss": 2.1783, "step": 89105 }, { "epoch": 0.21, "grad_norm": 2.265625, "learning_rate": 0.00019462358243308635, "loss": 2.3178, "step": 89110 }, { "epoch": 0.21, "grad_norm": 2.484375, "learning_rate": 0.00019462298453601473, "loss": 2.2019, "step": 89115 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019462238660661815, "loss": 2.0949, "step": 89120 }, { "epoch": 0.21, "grad_norm": 1.890625, "learning_rate": 0.00019462178864489688, "loss": 2.2131, "step": 89125 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.00019462119065085106, "loss": 2.1009, "step": 89130 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.00019462059262448093, "loss": 2.1379, "step": 89135 }, { "epoch": 0.21, "grad_norm": 3.0, "learning_rate": 0.00019461999456578673, "loss": 2.1416, "step": 89140 }, { "epoch": 0.21, "grad_norm": 2.375, "learning_rate": 0.00019461939647476856, "loss": 2.2066, "step": 89145 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019461879835142671, "loss": 2.1539, "step": 89150 }, { "epoch": 0.21, "grad_norm": 1.7578125, "learning_rate": 0.0001946182001957614, "loss": 2.2674, "step": 89155 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.00019461760200777274, "loss": 2.1865, "step": 89160 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.00019461700378746102, "loss": 2.1582, "step": 89165 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.00019461640553482642, "loss": 2.0587, "step": 89170 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019461580724986914, "loss": 2.2109, "step": 89175 }, { "epoch": 0.21, "grad_norm": 2.40625, "learning_rate": 0.0001946152089325894, "loss": 2.2338, "step": 89180 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019461461058298735, "loss": 2.1983, "step": 89185 }, { "epoch": 0.21, "grad_norm": 1.8203125, "learning_rate": 0.00019461401220106326, "loss": 2.3654, "step": 89190 }, { "epoch": 0.21, "grad_norm": 1.71875, "learning_rate": 0.00019461341378681729, "loss": 2.2149, "step": 89195 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019461281534024967, "loss": 2.0761, "step": 89200 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.0001946122168613606, "loss": 2.1389, "step": 89205 }, { "epoch": 0.21, "grad_norm": 1.90625, "learning_rate": 0.00019461161835015027, "loss": 2.3113, "step": 89210 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019461101980661888, "loss": 2.0834, "step": 89215 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.0001946104212307667, "loss": 2.1673, "step": 89220 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019460982262259387, "loss": 2.0702, "step": 89225 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019460922398210057, "loss": 2.0765, "step": 89230 }, { "epoch": 0.21, "grad_norm": 1.796875, "learning_rate": 0.00019460862530928707, "loss": 2.083, "step": 89235 }, { "epoch": 0.21, "grad_norm": 1.671875, "learning_rate": 0.00019460802660415354, "loss": 2.2365, "step": 89240 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.0001946074278667002, "loss": 2.0792, "step": 89245 }, { "epoch": 0.21, "grad_norm": 2.40625, "learning_rate": 0.00019460682909692726, "loss": 2.0977, "step": 89250 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.00019460623029483488, "loss": 2.1837, "step": 89255 }, { "epoch": 0.21, "grad_norm": 1.7734375, "learning_rate": 0.00019460563146042332, "loss": 2.1852, "step": 89260 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.00019460503259369276, "loss": 2.4083, "step": 89265 }, { "epoch": 0.21, "grad_norm": 2.640625, "learning_rate": 0.0001946044336946434, "loss": 2.3324, "step": 89270 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.00019460383476327544, "loss": 1.9159, "step": 89275 }, { "epoch": 0.21, "grad_norm": 2.34375, "learning_rate": 0.0001946032357995891, "loss": 2.1061, "step": 89280 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019460263680358457, "loss": 2.0906, "step": 89285 }, { "epoch": 0.21, "grad_norm": 1.6953125, "learning_rate": 0.0001946020377752621, "loss": 2.0481, "step": 89290 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019460143871462182, "loss": 2.0994, "step": 89295 }, { "epoch": 0.21, "grad_norm": 1.53125, "learning_rate": 0.000194600839621664, "loss": 2.1685, "step": 89300 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.0001946002404963888, "loss": 2.3195, "step": 89305 }, { "epoch": 0.21, "grad_norm": 1.890625, "learning_rate": 0.00019459964133879644, "loss": 2.1876, "step": 89310 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019459904214888713, "loss": 2.2155, "step": 89315 }, { "epoch": 0.21, "grad_norm": 1.609375, "learning_rate": 0.00019459844292666107, "loss": 2.1455, "step": 89320 }, { "epoch": 0.21, "grad_norm": 1.6953125, "learning_rate": 0.0001945978436721185, "loss": 2.2131, "step": 89325 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.00019459724438525955, "loss": 1.9638, "step": 89330 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.00019459664506608446, "loss": 1.9749, "step": 89335 }, { "epoch": 0.21, "grad_norm": 2.5, "learning_rate": 0.00019459604571459344, "loss": 2.2135, "step": 89340 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019459544633078672, "loss": 2.226, "step": 89345 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019459484691466448, "loss": 2.133, "step": 89350 }, { "epoch": 0.21, "grad_norm": 1.78125, "learning_rate": 0.00019459424746622693, "loss": 2.2079, "step": 89355 }, { "epoch": 0.21, "grad_norm": 2.4375, "learning_rate": 0.00019459364798547424, "loss": 2.0947, "step": 89360 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019459304847240666, "loss": 2.2732, "step": 89365 }, { "epoch": 0.21, "grad_norm": 1.96875, "learning_rate": 0.00019459244892702438, "loss": 2.1207, "step": 89370 }, { "epoch": 0.21, "grad_norm": 2.390625, "learning_rate": 0.0001945918493493276, "loss": 2.2187, "step": 89375 }, { "epoch": 0.21, "grad_norm": 2.359375, "learning_rate": 0.00019459124973931656, "loss": 2.1956, "step": 89380 }, { "epoch": 0.21, "grad_norm": 1.8203125, "learning_rate": 0.0001945906500969914, "loss": 1.9718, "step": 89385 }, { "epoch": 0.21, "grad_norm": 1.8203125, "learning_rate": 0.00019459005042235235, "loss": 2.1364, "step": 89390 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.00019458945071539966, "loss": 2.0806, "step": 89395 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019458885097613346, "loss": 2.0938, "step": 89400 }, { "epoch": 0.21, "grad_norm": 1.5859375, "learning_rate": 0.000194588251204554, "loss": 2.1732, "step": 89405 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.0001945876514006615, "loss": 2.0757, "step": 89410 }, { "epoch": 0.21, "grad_norm": 1.9453125, "learning_rate": 0.0001945870515644561, "loss": 2.1814, "step": 89415 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.0001945864516959381, "loss": 2.2205, "step": 89420 }, { "epoch": 0.21, "grad_norm": 1.953125, "learning_rate": 0.00019458585179510764, "loss": 2.1306, "step": 89425 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019458525186196493, "loss": 2.2404, "step": 89430 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019458465189651017, "loss": 2.206, "step": 89435 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019458405189874358, "loss": 2.042, "step": 89440 }, { "epoch": 0.21, "grad_norm": 2.40625, "learning_rate": 0.0001945834518686654, "loss": 2.1683, "step": 89445 }, { "epoch": 0.21, "grad_norm": 2.46875, "learning_rate": 0.00019458285180627574, "loss": 2.1883, "step": 89450 }, { "epoch": 0.21, "grad_norm": 2.34375, "learning_rate": 0.00019458225171157492, "loss": 2.143, "step": 89455 }, { "epoch": 0.21, "grad_norm": 1.703125, "learning_rate": 0.00019458165158456306, "loss": 2.2993, "step": 89460 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.00019458105142524038, "loss": 2.0964, "step": 89465 }, { "epoch": 0.21, "grad_norm": 1.5703125, "learning_rate": 0.0001945804512336071, "loss": 2.1686, "step": 89470 }, { "epoch": 0.21, "grad_norm": 1.7890625, "learning_rate": 0.00019457985100966344, "loss": 2.3979, "step": 89475 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019457925075340958, "loss": 2.1312, "step": 89480 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019457865046484574, "loss": 2.0428, "step": 89485 }, { "epoch": 0.21, "grad_norm": 2.21875, "learning_rate": 0.0001945780501439721, "loss": 2.1514, "step": 89490 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.0001945774497907889, "loss": 2.2629, "step": 89495 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.0001945768494052963, "loss": 2.114, "step": 89500 }, { "epoch": 0.21, "grad_norm": 1.7578125, "learning_rate": 0.00019457624898749456, "loss": 2.1077, "step": 89505 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.00019457564853738384, "loss": 1.875, "step": 89510 }, { "epoch": 0.21, "grad_norm": 2.46875, "learning_rate": 0.00019457504805496439, "loss": 2.119, "step": 89515 }, { "epoch": 0.21, "grad_norm": 1.6171875, "learning_rate": 0.00019457444754023638, "loss": 2.1139, "step": 89520 }, { "epoch": 0.21, "grad_norm": 1.890625, "learning_rate": 0.0001945738469932, "loss": 2.1247, "step": 89525 }, { "epoch": 0.21, "grad_norm": 2.46875, "learning_rate": 0.0001945732464138555, "loss": 2.0655, "step": 89530 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019457264580220303, "loss": 2.1053, "step": 89535 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.00019457204515824287, "loss": 2.2584, "step": 89540 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019457144448197517, "loss": 2.1642, "step": 89545 }, { "epoch": 0.21, "grad_norm": 1.8515625, "learning_rate": 0.00019457084377340016, "loss": 2.0761, "step": 89550 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019457024303251802, "loss": 2.1476, "step": 89555 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019456964225932896, "loss": 2.324, "step": 89560 }, { "epoch": 0.21, "grad_norm": 2.3125, "learning_rate": 0.00019456904145383325, "loss": 2.2428, "step": 89565 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.000194568440616031, "loss": 2.2658, "step": 89570 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.00019456783974592244, "loss": 2.1982, "step": 89575 }, { "epoch": 0.21, "grad_norm": 1.890625, "learning_rate": 0.0001945672388435078, "loss": 2.1715, "step": 89580 }, { "epoch": 0.21, "grad_norm": 1.828125, "learning_rate": 0.00019456663790878728, "loss": 2.0577, "step": 89585 }, { "epoch": 0.21, "grad_norm": 1.9375, "learning_rate": 0.00019456603694176109, "loss": 2.031, "step": 89590 }, { "epoch": 0.21, "grad_norm": 2.265625, "learning_rate": 0.00019456543594242943, "loss": 2.1508, "step": 89595 }, { "epoch": 0.21, "grad_norm": 1.8984375, "learning_rate": 0.00019456483491079248, "loss": 1.9016, "step": 89600 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019456423384685048, "loss": 2.0503, "step": 89605 }, { "epoch": 0.21, "grad_norm": 1.9296875, "learning_rate": 0.00019456363275060362, "loss": 2.2467, "step": 89610 }, { "epoch": 0.21, "grad_norm": 2.265625, "learning_rate": 0.00019456303162205215, "loss": 2.2419, "step": 89615 }, { "epoch": 0.21, "grad_norm": 1.5234375, "learning_rate": 0.00019456243046119616, "loss": 2.1716, "step": 89620 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019456182926803596, "loss": 2.2549, "step": 89625 }, { "epoch": 0.21, "grad_norm": 1.8515625, "learning_rate": 0.00019456122804257175, "loss": 2.1212, "step": 89630 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.0001945606267848037, "loss": 2.0796, "step": 89635 }, { "epoch": 0.21, "grad_norm": 1.6484375, "learning_rate": 0.000194560025494732, "loss": 1.9467, "step": 89640 }, { "epoch": 0.21, "grad_norm": 1.6875, "learning_rate": 0.0001945594241723569, "loss": 2.1288, "step": 89645 }, { "epoch": 0.21, "grad_norm": 1.578125, "learning_rate": 0.00019455882281767858, "loss": 1.9627, "step": 89650 }, { "epoch": 0.21, "grad_norm": 2.3125, "learning_rate": 0.0001945582214306972, "loss": 2.1576, "step": 89655 }, { "epoch": 0.21, "grad_norm": 1.96875, "learning_rate": 0.0001945576200114131, "loss": 2.0611, "step": 89660 }, { "epoch": 0.21, "grad_norm": 2.6875, "learning_rate": 0.00019455701855982635, "loss": 2.0275, "step": 89665 }, { "epoch": 0.21, "grad_norm": 2.40625, "learning_rate": 0.00019455641707593726, "loss": 2.0763, "step": 89670 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.00019455581555974594, "loss": 2.2202, "step": 89675 }, { "epoch": 0.21, "grad_norm": 1.6328125, "learning_rate": 0.00019455521401125263, "loss": 2.2379, "step": 89680 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.00019455461243045758, "loss": 2.2083, "step": 89685 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.00019455401081736094, "loss": 2.1226, "step": 89690 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019455340917196292, "loss": 2.1134, "step": 89695 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.00019455280749426377, "loss": 2.351, "step": 89700 }, { "epoch": 0.21, "grad_norm": 1.8984375, "learning_rate": 0.00019455220578426364, "loss": 2.0882, "step": 89705 }, { "epoch": 0.21, "grad_norm": 1.8359375, "learning_rate": 0.00019455160404196277, "loss": 2.134, "step": 89710 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019455100226736138, "loss": 2.1787, "step": 89715 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019455040046045965, "loss": 2.196, "step": 89720 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.00019454979862125774, "loss": 2.2793, "step": 89725 }, { "epoch": 0.21, "grad_norm": 1.828125, "learning_rate": 0.00019454919674975597, "loss": 2.0698, "step": 89730 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019454859484595446, "loss": 2.1, "step": 89735 }, { "epoch": 0.21, "grad_norm": 2.359375, "learning_rate": 0.0001945479929098534, "loss": 2.2422, "step": 89740 }, { "epoch": 0.21, "grad_norm": 2.3125, "learning_rate": 0.00019454739094145306, "loss": 2.0137, "step": 89745 }, { "epoch": 0.21, "grad_norm": 1.9453125, "learning_rate": 0.0001945467889407536, "loss": 2.0528, "step": 89750 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.00019454618690775525, "loss": 2.0502, "step": 89755 }, { "epoch": 0.21, "grad_norm": 1.65625, "learning_rate": 0.00019454558484245822, "loss": 2.244, "step": 89760 }, { "epoch": 0.21, "grad_norm": 1.8203125, "learning_rate": 0.00019454498274486267, "loss": 2.2213, "step": 89765 }, { "epoch": 0.21, "grad_norm": 1.796875, "learning_rate": 0.0001945443806149689, "loss": 2.1715, "step": 89770 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.000194543778452777, "loss": 2.389, "step": 89775 }, { "epoch": 0.21, "grad_norm": 1.7265625, "learning_rate": 0.00019454317625828725, "loss": 2.1688, "step": 89780 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.00019454257403149982, "loss": 2.1356, "step": 89785 }, { "epoch": 0.21, "grad_norm": 1.640625, "learning_rate": 0.00019454197177241496, "loss": 2.0701, "step": 89790 }, { "epoch": 0.21, "grad_norm": 1.8359375, "learning_rate": 0.00019454136948103283, "loss": 2.184, "step": 89795 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.00019454076715735367, "loss": 2.106, "step": 89800 }, { "epoch": 0.21, "grad_norm": 1.7109375, "learning_rate": 0.00019454016480137764, "loss": 2.2635, "step": 89805 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.000194539562413105, "loss": 2.1489, "step": 89810 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.00019453895999253592, "loss": 2.1009, "step": 89815 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019453835753967063, "loss": 2.2047, "step": 89820 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.0001945377550545093, "loss": 2.2382, "step": 89825 }, { "epoch": 0.21, "grad_norm": 1.90625, "learning_rate": 0.00019453715253705216, "loss": 2.0676, "step": 89830 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019453654998729945, "loss": 1.9893, "step": 89835 }, { "epoch": 0.21, "grad_norm": 2.40625, "learning_rate": 0.00019453594740525133, "loss": 2.0315, "step": 89840 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.00019453534479090798, "loss": 2.3011, "step": 89845 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.00019453474214426965, "loss": 2.193, "step": 89850 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019453413946533656, "loss": 2.3, "step": 89855 }, { "epoch": 0.21, "grad_norm": 1.8359375, "learning_rate": 0.00019453353675410887, "loss": 2.175, "step": 89860 }, { "epoch": 0.21, "grad_norm": 2.328125, "learning_rate": 0.00019453293401058686, "loss": 2.2086, "step": 89865 }, { "epoch": 0.21, "grad_norm": 1.5859375, "learning_rate": 0.00019453233123477062, "loss": 2.1488, "step": 89870 }, { "epoch": 0.21, "grad_norm": 1.7578125, "learning_rate": 0.00019453172842666045, "loss": 2.1633, "step": 89875 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.00019453112558625652, "loss": 2.1352, "step": 89880 }, { "epoch": 0.21, "grad_norm": 2.28125, "learning_rate": 0.00019453052271355906, "loss": 2.2377, "step": 89885 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019452991980856824, "loss": 2.1081, "step": 89890 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.00019452931687128432, "loss": 2.1578, "step": 89895 }, { "epoch": 0.21, "grad_norm": 1.890625, "learning_rate": 0.00019452871390170744, "loss": 2.2181, "step": 89900 }, { "epoch": 0.21, "grad_norm": 2.34375, "learning_rate": 0.00019452811089983783, "loss": 2.072, "step": 89905 }, { "epoch": 0.21, "grad_norm": 1.875, "learning_rate": 0.00019452750786567573, "loss": 2.1655, "step": 89910 }, { "epoch": 0.21, "grad_norm": 1.7734375, "learning_rate": 0.0001945269047992213, "loss": 2.0296, "step": 89915 }, { "epoch": 0.21, "grad_norm": 2.71875, "learning_rate": 0.00019452630170047476, "loss": 2.1489, "step": 89920 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.00019452569856943632, "loss": 2.1238, "step": 89925 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.0001945250954061062, "loss": 2.0164, "step": 89930 }, { "epoch": 0.21, "grad_norm": 2.375, "learning_rate": 0.00019452449221048462, "loss": 1.9964, "step": 89935 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019452388898257171, "loss": 1.9008, "step": 89940 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019452328572236774, "loss": 2.1435, "step": 89945 }, { "epoch": 0.21, "grad_norm": 2.1875, "learning_rate": 0.00019452268242987292, "loss": 2.0504, "step": 89950 }, { "epoch": 0.21, "grad_norm": 1.96875, "learning_rate": 0.00019452207910508742, "loss": 2.2774, "step": 89955 }, { "epoch": 0.21, "grad_norm": 2.234375, "learning_rate": 0.00019452147574801148, "loss": 2.224, "step": 89960 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019452087235864526, "loss": 2.0083, "step": 89965 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019452026893698903, "loss": 2.1117, "step": 89970 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019451966548304292, "loss": 2.14, "step": 89975 }, { "epoch": 0.21, "grad_norm": 1.78125, "learning_rate": 0.0001945190619968072, "loss": 2.1056, "step": 89980 }, { "epoch": 0.21, "grad_norm": 1.6015625, "learning_rate": 0.00019451845847828208, "loss": 2.2522, "step": 89985 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.0001945178549274677, "loss": 2.234, "step": 89990 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.0001945172513443643, "loss": 2.1859, "step": 89995 }, { "epoch": 0.21, "grad_norm": 1.9453125, "learning_rate": 0.00019451664772897215, "loss": 2.2442, "step": 90000 }, { "epoch": 0.21, "grad_norm": 2.375, "learning_rate": 0.00019451604408129137, "loss": 2.1705, "step": 90005 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019451544040132216, "loss": 2.105, "step": 90010 }, { "epoch": 0.21, "grad_norm": 1.796875, "learning_rate": 0.0001945148366890648, "loss": 2.1574, "step": 90015 }, { "epoch": 0.21, "grad_norm": 2.734375, "learning_rate": 0.00019451423294451946, "loss": 2.2227, "step": 90020 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019451362916768635, "loss": 2.1385, "step": 90025 }, { "epoch": 0.21, "grad_norm": 1.828125, "learning_rate": 0.00019451302535856564, "loss": 2.1856, "step": 90030 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019451242151715756, "loss": 2.1938, "step": 90035 }, { "epoch": 0.21, "grad_norm": 2.5, "learning_rate": 0.00019451181764346234, "loss": 2.0846, "step": 90040 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019451121373748018, "loss": 2.0953, "step": 90045 }, { "epoch": 0.21, "grad_norm": 2.46875, "learning_rate": 0.00019451060979921127, "loss": 2.1193, "step": 90050 }, { "epoch": 0.21, "grad_norm": 2.359375, "learning_rate": 0.00019451000582865584, "loss": 2.1984, "step": 90055 }, { "epoch": 0.21, "grad_norm": 1.8046875, "learning_rate": 0.00019450940182581404, "loss": 2.0998, "step": 90060 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.00019450879779068614, "loss": 2.1194, "step": 90065 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.00019450819372327232, "loss": 2.0796, "step": 90070 }, { "epoch": 0.21, "grad_norm": 1.734375, "learning_rate": 0.00019450758962357277, "loss": 2.0812, "step": 90075 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019450698549158772, "loss": 2.1487, "step": 90080 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.00019450638132731735, "loss": 2.0726, "step": 90085 }, { "epoch": 0.21, "grad_norm": 1.6171875, "learning_rate": 0.00019450577713076193, "loss": 2.0662, "step": 90090 }, { "epoch": 0.21, "grad_norm": 2.234375, "learning_rate": 0.0001945051729019216, "loss": 2.239, "step": 90095 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.0001945045686407966, "loss": 2.0172, "step": 90100 }, { "epoch": 0.21, "grad_norm": 1.46875, "learning_rate": 0.0001945039643473871, "loss": 2.0356, "step": 90105 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.00019450336002169337, "loss": 2.0989, "step": 90110 }, { "epoch": 0.21, "grad_norm": 1.8359375, "learning_rate": 0.00019450275566371554, "loss": 2.2198, "step": 90115 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019450215127345385, "loss": 2.1216, "step": 90120 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019450154685090855, "loss": 2.3473, "step": 90125 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019450094239607978, "loss": 2.2536, "step": 90130 }, { "epoch": 0.21, "grad_norm": 1.953125, "learning_rate": 0.00019450033790896777, "loss": 2.1498, "step": 90135 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.00019449973338957278, "loss": 2.0711, "step": 90140 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.0001944991288378949, "loss": 2.0103, "step": 90145 }, { "epoch": 0.21, "grad_norm": 1.7734375, "learning_rate": 0.00019449852425393446, "loss": 2.028, "step": 90150 }, { "epoch": 0.21, "grad_norm": 1.859375, "learning_rate": 0.00019449791963769157, "loss": 2.1246, "step": 90155 }, { "epoch": 0.21, "grad_norm": 1.859375, "learning_rate": 0.0001944973149891665, "loss": 2.1777, "step": 90160 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019449671030835944, "loss": 2.1404, "step": 90165 }, { "epoch": 0.21, "grad_norm": 1.8125, "learning_rate": 0.00019449610559527057, "loss": 2.0555, "step": 90170 }, { "epoch": 0.21, "grad_norm": 1.8203125, "learning_rate": 0.00019449550084990011, "loss": 2.3196, "step": 90175 }, { "epoch": 0.21, "grad_norm": 1.8984375, "learning_rate": 0.0001944948960722483, "loss": 2.0029, "step": 90180 }, { "epoch": 0.21, "grad_norm": 1.8515625, "learning_rate": 0.0001944942912623153, "loss": 2.105, "step": 90185 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019449368642010132, "loss": 2.3382, "step": 90190 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019449308154560662, "loss": 2.1152, "step": 90195 }, { "epoch": 0.21, "grad_norm": 2.1875, "learning_rate": 0.00019449247663883137, "loss": 2.1557, "step": 90200 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019449187169977574, "loss": 2.1663, "step": 90205 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.00019449126672843999, "loss": 2.1336, "step": 90210 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019449066172482434, "loss": 2.0334, "step": 90215 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019449005668892893, "loss": 2.1927, "step": 90220 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.000194489451620754, "loss": 2.0809, "step": 90225 }, { "epoch": 0.21, "grad_norm": 1.84375, "learning_rate": 0.00019448884652029978, "loss": 2.287, "step": 90230 }, { "epoch": 0.21, "grad_norm": 1.875, "learning_rate": 0.00019448824138756641, "loss": 2.0708, "step": 90235 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.0001944876362225542, "loss": 2.2452, "step": 90240 }, { "epoch": 0.21, "grad_norm": 1.7890625, "learning_rate": 0.0001944870310252633, "loss": 2.1933, "step": 90245 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.0001944864257956939, "loss": 2.2325, "step": 90250 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.0001944858205338462, "loss": 2.2267, "step": 90255 }, { "epoch": 0.21, "grad_norm": 1.78125, "learning_rate": 0.00019448521523972046, "loss": 2.285, "step": 90260 }, { "epoch": 0.21, "grad_norm": 1.96875, "learning_rate": 0.00019448460991331685, "loss": 2.2388, "step": 90265 }, { "epoch": 0.21, "grad_norm": 2.625, "learning_rate": 0.0001944840045546356, "loss": 2.2158, "step": 90270 }, { "epoch": 0.21, "grad_norm": 1.75, "learning_rate": 0.00019448339916367687, "loss": 2.1888, "step": 90275 }, { "epoch": 0.21, "grad_norm": 1.75, "learning_rate": 0.00019448279374044089, "loss": 2.0977, "step": 90280 }, { "epoch": 0.21, "grad_norm": 1.9453125, "learning_rate": 0.0001944821882849279, "loss": 2.0656, "step": 90285 }, { "epoch": 0.21, "grad_norm": 1.8203125, "learning_rate": 0.0001944815827971381, "loss": 2.2924, "step": 90290 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019448097727707163, "loss": 2.1464, "step": 90295 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019448037172472877, "loss": 2.2162, "step": 90300 }, { "epoch": 0.21, "grad_norm": 1.6875, "learning_rate": 0.0001944797661401097, "loss": 2.262, "step": 90305 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.00019447916052321463, "loss": 2.1235, "step": 90310 }, { "epoch": 0.21, "grad_norm": 2.71875, "learning_rate": 0.00019447855487404376, "loss": 2.138, "step": 90315 }, { "epoch": 0.21, "grad_norm": 2.703125, "learning_rate": 0.0001944779491925973, "loss": 2.2113, "step": 90320 }, { "epoch": 0.21, "grad_norm": 1.8203125, "learning_rate": 0.00019447734347887544, "loss": 2.1096, "step": 90325 }, { "epoch": 0.21, "grad_norm": 1.734375, "learning_rate": 0.00019447673773287843, "loss": 2.2032, "step": 90330 }, { "epoch": 0.21, "grad_norm": 1.953125, "learning_rate": 0.0001944761319546065, "loss": 2.1335, "step": 90335 }, { "epoch": 0.21, "grad_norm": 2.46875, "learning_rate": 0.00019447552614405974, "loss": 2.2246, "step": 90340 }, { "epoch": 0.21, "grad_norm": 1.7890625, "learning_rate": 0.00019447492030123846, "loss": 2.1558, "step": 90345 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.00019447431442614282, "loss": 2.1206, "step": 90350 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019447370851877302, "loss": 2.1286, "step": 90355 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019447310257912932, "loss": 2.1729, "step": 90360 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.0001944724966072119, "loss": 2.2046, "step": 90365 }, { "epoch": 0.21, "grad_norm": 1.84375, "learning_rate": 0.00019447189060302095, "loss": 2.3273, "step": 90370 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019447128456655667, "loss": 2.2399, "step": 90375 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.00019447067849781932, "loss": 2.148, "step": 90380 }, { "epoch": 0.21, "grad_norm": 1.8359375, "learning_rate": 0.00019447007239680906, "loss": 2.3248, "step": 90385 }, { "epoch": 0.21, "grad_norm": 1.6796875, "learning_rate": 0.0001944694662635261, "loss": 2.0851, "step": 90390 }, { "epoch": 0.21, "grad_norm": 1.8359375, "learning_rate": 0.00019446886009797068, "loss": 2.1171, "step": 90395 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019446825390014296, "loss": 1.9971, "step": 90400 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.00019446764767004315, "loss": 2.287, "step": 90405 }, { "epoch": 0.21, "grad_norm": 2.03125, "learning_rate": 0.00019446704140767152, "loss": 2.2335, "step": 90410 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.00019446643511302823, "loss": 2.0498, "step": 90415 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019446582878611347, "loss": 2.0847, "step": 90420 }, { "epoch": 0.21, "grad_norm": 2.296875, "learning_rate": 0.0001944652224269275, "loss": 2.3889, "step": 90425 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019446461603547048, "loss": 2.0702, "step": 90430 }, { "epoch": 0.21, "grad_norm": 2.21875, "learning_rate": 0.00019446400961174262, "loss": 1.9277, "step": 90435 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019446340315574417, "loss": 2.1165, "step": 90440 }, { "epoch": 0.21, "grad_norm": 2.359375, "learning_rate": 0.0001944627966674753, "loss": 2.1173, "step": 90445 }, { "epoch": 0.21, "grad_norm": 2.1875, "learning_rate": 0.0001944621901469362, "loss": 2.1712, "step": 90450 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.00019446158359412712, "loss": 2.3675, "step": 90455 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019446097700904827, "loss": 2.2862, "step": 90460 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.0001944603703916998, "loss": 1.9481, "step": 90465 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019445976374208196, "loss": 2.2507, "step": 90470 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.00019445915706019497, "loss": 2.3624, "step": 90475 }, { "epoch": 0.21, "grad_norm": 1.8515625, "learning_rate": 0.00019445855034603903, "loss": 2.241, "step": 90480 }, { "epoch": 0.21, "grad_norm": 1.8359375, "learning_rate": 0.00019445794359961434, "loss": 2.0552, "step": 90485 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.00019445733682092107, "loss": 2.1743, "step": 90490 }, { "epoch": 0.21, "grad_norm": 1.96875, "learning_rate": 0.0001944567300099595, "loss": 2.2176, "step": 90495 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.00019445612316672974, "loss": 2.114, "step": 90500 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.00019445551629123208, "loss": 1.9788, "step": 90505 }, { "epoch": 0.21, "grad_norm": 1.984375, "learning_rate": 0.00019445490938346671, "loss": 2.2001, "step": 90510 }, { "epoch": 0.21, "grad_norm": 1.8046875, "learning_rate": 0.00019445430244343387, "loss": 2.3209, "step": 90515 }, { "epoch": 0.21, "grad_norm": 1.859375, "learning_rate": 0.00019445369547113365, "loss": 2.086, "step": 90520 }, { "epoch": 0.21, "grad_norm": 1.5, "learning_rate": 0.00019445308846656638, "loss": 2.1055, "step": 90525 }, { "epoch": 0.21, "grad_norm": 2.296875, "learning_rate": 0.0001944524814297322, "loss": 1.9943, "step": 90530 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019445187436063137, "loss": 1.9115, "step": 90535 }, { "epoch": 0.21, "grad_norm": 1.9453125, "learning_rate": 0.00019445126725926403, "loss": 2.2253, "step": 90540 }, { "epoch": 0.21, "grad_norm": 2.359375, "learning_rate": 0.00019445066012563044, "loss": 2.0525, "step": 90545 }, { "epoch": 0.21, "grad_norm": 2.328125, "learning_rate": 0.00019445005295973082, "loss": 2.0338, "step": 90550 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.00019444944576156532, "loss": 2.0783, "step": 90555 }, { "epoch": 0.21, "grad_norm": 1.8203125, "learning_rate": 0.00019444883853113418, "loss": 2.3078, "step": 90560 }, { "epoch": 0.21, "grad_norm": 2.234375, "learning_rate": 0.00019444823126843762, "loss": 2.2842, "step": 90565 }, { "epoch": 0.21, "grad_norm": 1.71875, "learning_rate": 0.0001944476239734758, "loss": 2.2232, "step": 90570 }, { "epoch": 0.21, "grad_norm": 2.21875, "learning_rate": 0.00019444701664624899, "loss": 2.2354, "step": 90575 }, { "epoch": 0.21, "grad_norm": 2.46875, "learning_rate": 0.00019444640928675732, "loss": 1.9421, "step": 90580 }, { "epoch": 0.21, "grad_norm": 1.5703125, "learning_rate": 0.0001944458018950011, "loss": 2.0335, "step": 90585 }, { "epoch": 0.21, "grad_norm": 1.9609375, "learning_rate": 0.00019444519447098046, "loss": 2.2735, "step": 90590 }, { "epoch": 0.21, "grad_norm": 1.6484375, "learning_rate": 0.0001944445870146956, "loss": 2.1045, "step": 90595 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019444397952614678, "loss": 2.1174, "step": 90600 }, { "epoch": 0.21, "grad_norm": 1.8046875, "learning_rate": 0.0001944433720053342, "loss": 2.1145, "step": 90605 }, { "epoch": 0.21, "grad_norm": 1.6484375, "learning_rate": 0.00019444276445225805, "loss": 2.1244, "step": 90610 }, { "epoch": 0.21, "grad_norm": 2.296875, "learning_rate": 0.0001944421568669185, "loss": 2.1121, "step": 90615 }, { "epoch": 0.21, "grad_norm": 1.859375, "learning_rate": 0.00019444154924931585, "loss": 2.4641, "step": 90620 }, { "epoch": 0.21, "grad_norm": 1.96875, "learning_rate": 0.0001944409415994502, "loss": 2.3016, "step": 90625 }, { "epoch": 0.21, "grad_norm": 1.796875, "learning_rate": 0.00019444033391732184, "loss": 2.1684, "step": 90630 }, { "epoch": 0.21, "grad_norm": 1.875, "learning_rate": 0.00019443972620293093, "loss": 2.0811, "step": 90635 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019443911845627775, "loss": 2.1682, "step": 90640 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019443851067736241, "loss": 2.2132, "step": 90645 }, { "epoch": 0.21, "grad_norm": 2.15625, "learning_rate": 0.00019443790286618518, "loss": 2.294, "step": 90650 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019443729502274622, "loss": 2.1455, "step": 90655 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.00019443668714704579, "loss": 2.2036, "step": 90660 }, { "epoch": 0.21, "grad_norm": 1.84375, "learning_rate": 0.00019443607923908408, "loss": 2.2477, "step": 90665 }, { "epoch": 0.21, "grad_norm": 2.3125, "learning_rate": 0.00019443547129886128, "loss": 2.0761, "step": 90670 }, { "epoch": 0.21, "grad_norm": 1.6484375, "learning_rate": 0.0001944348633263776, "loss": 2.0922, "step": 90675 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.00019443425532163328, "loss": 2.0235, "step": 90680 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.0001944336472846285, "loss": 2.2143, "step": 90685 }, { "epoch": 0.21, "grad_norm": 1.7265625, "learning_rate": 0.00019443303921536347, "loss": 2.1526, "step": 90690 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.0001944324311138384, "loss": 2.1711, "step": 90695 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.00019443182298005353, "loss": 2.0408, "step": 90700 }, { "epoch": 0.21, "grad_norm": 2.1875, "learning_rate": 0.00019443121481400896, "loss": 2.1701, "step": 90705 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.00019443060661570504, "loss": 2.3516, "step": 90710 }, { "epoch": 0.21, "grad_norm": 1.90625, "learning_rate": 0.0001944299983851419, "loss": 2.1329, "step": 90715 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.00019442939012231976, "loss": 2.2376, "step": 90720 }, { "epoch": 0.21, "grad_norm": 1.9453125, "learning_rate": 0.00019442878182723882, "loss": 2.2369, "step": 90725 }, { "epoch": 0.21, "grad_norm": 1.7421875, "learning_rate": 0.0001944281734998993, "loss": 1.973, "step": 90730 }, { "epoch": 0.21, "grad_norm": 1.796875, "learning_rate": 0.0001944275651403014, "loss": 2.3218, "step": 90735 }, { "epoch": 0.21, "grad_norm": 1.703125, "learning_rate": 0.00019442695674844535, "loss": 2.1699, "step": 90740 }, { "epoch": 0.21, "grad_norm": 2.328125, "learning_rate": 0.00019442634832433128, "loss": 2.2205, "step": 90745 }, { "epoch": 0.21, "grad_norm": 2.625, "learning_rate": 0.0001944257398679595, "loss": 2.0999, "step": 90750 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.0001944251313793302, "loss": 2.1854, "step": 90755 }, { "epoch": 0.21, "grad_norm": 1.984375, "learning_rate": 0.00019442452285844353, "loss": 2.0427, "step": 90760 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.00019442391430529973, "loss": 2.1166, "step": 90765 }, { "epoch": 0.21, "grad_norm": 1.703125, "learning_rate": 0.00019442330571989901, "loss": 1.9262, "step": 90770 }, { "epoch": 0.21, "grad_norm": 1.734375, "learning_rate": 0.0001944226971022416, "loss": 2.0347, "step": 90775 }, { "epoch": 0.21, "grad_norm": 2.140625, "learning_rate": 0.00019442208845232766, "loss": 2.1575, "step": 90780 }, { "epoch": 0.21, "grad_norm": 2.4375, "learning_rate": 0.00019442147977015743, "loss": 2.1858, "step": 90785 }, { "epoch": 0.21, "grad_norm": 2.75, "learning_rate": 0.0001944208710557311, "loss": 2.1449, "step": 90790 }, { "epoch": 0.21, "grad_norm": 1.78125, "learning_rate": 0.00019442026230904888, "loss": 2.3076, "step": 90795 }, { "epoch": 0.21, "grad_norm": 1.7890625, "learning_rate": 0.00019441965353011103, "loss": 1.9743, "step": 90800 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019441904471891768, "loss": 2.2129, "step": 90805 }, { "epoch": 0.21, "grad_norm": 1.828125, "learning_rate": 0.0001944184358754691, "loss": 2.227, "step": 90810 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019441782699976541, "loss": 1.957, "step": 90815 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019441721809180694, "loss": 2.1682, "step": 90820 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.00019441660915159382, "loss": 2.1486, "step": 90825 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019441600017912623, "loss": 2.0463, "step": 90830 }, { "epoch": 0.21, "grad_norm": 1.828125, "learning_rate": 0.00019441539117440447, "loss": 2.1187, "step": 90835 }, { "epoch": 0.21, "grad_norm": 2.1875, "learning_rate": 0.0001944147821374287, "loss": 2.1223, "step": 90840 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.0001944141730681991, "loss": 2.0848, "step": 90845 }, { "epoch": 0.21, "grad_norm": 2.25, "learning_rate": 0.00019441356396671593, "loss": 2.0632, "step": 90850 }, { "epoch": 0.21, "grad_norm": 2.484375, "learning_rate": 0.00019441295483297937, "loss": 1.97, "step": 90855 }, { "epoch": 0.21, "grad_norm": 1.5, "learning_rate": 0.00019441234566698961, "loss": 2.0433, "step": 90860 }, { "epoch": 0.21, "grad_norm": 1.78125, "learning_rate": 0.0001944117364687469, "loss": 2.2687, "step": 90865 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019441112723825142, "loss": 2.0409, "step": 90870 }, { "epoch": 0.21, "grad_norm": 1.9453125, "learning_rate": 0.0001944105179755034, "loss": 2.1283, "step": 90875 }, { "epoch": 0.21, "grad_norm": 1.7578125, "learning_rate": 0.00019440990868050304, "loss": 2.0957, "step": 90880 }, { "epoch": 0.21, "grad_norm": 1.5625, "learning_rate": 0.0001944092993532505, "loss": 2.343, "step": 90885 }, { "epoch": 0.21, "grad_norm": 2.515625, "learning_rate": 0.0001944086899937461, "loss": 2.2864, "step": 90890 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019440808060198995, "loss": 2.0617, "step": 90895 }, { "epoch": 0.21, "grad_norm": 1.84375, "learning_rate": 0.00019440747117798228, "loss": 2.2196, "step": 90900 }, { "epoch": 0.21, "grad_norm": 2.171875, "learning_rate": 0.0001944068617217233, "loss": 2.2512, "step": 90905 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019440625223321323, "loss": 2.0497, "step": 90910 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.00019440564271245228, "loss": 2.1604, "step": 90915 }, { "epoch": 0.21, "grad_norm": 1.9296875, "learning_rate": 0.0001944050331594406, "loss": 2.1025, "step": 90920 }, { "epoch": 0.21, "grad_norm": 1.7578125, "learning_rate": 0.00019440442357417852, "loss": 2.2162, "step": 90925 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.00019440381395666615, "loss": 2.1701, "step": 90930 }, { "epoch": 0.21, "grad_norm": 1.8515625, "learning_rate": 0.00019440320430690373, "loss": 2.0992, "step": 90935 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.00019440259462489144, "loss": 2.1049, "step": 90940 }, { "epoch": 0.21, "grad_norm": 2.453125, "learning_rate": 0.00019440198491062953, "loss": 2.0824, "step": 90945 }, { "epoch": 0.21, "grad_norm": 1.890625, "learning_rate": 0.0001944013751641182, "loss": 2.1165, "step": 90950 }, { "epoch": 0.21, "grad_norm": 1.921875, "learning_rate": 0.00019440076538535764, "loss": 2.257, "step": 90955 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019440015557434804, "loss": 2.1416, "step": 90960 }, { "epoch": 0.21, "grad_norm": 3.21875, "learning_rate": 0.00019439954573108965, "loss": 2.2422, "step": 90965 }, { "epoch": 0.21, "grad_norm": 1.7265625, "learning_rate": 0.00019439893585558268, "loss": 2.103, "step": 90970 }, { "epoch": 0.21, "grad_norm": 2.296875, "learning_rate": 0.0001943983259478273, "loss": 2.2691, "step": 90975 }, { "epoch": 0.21, "grad_norm": 1.8046875, "learning_rate": 0.00019439771600782377, "loss": 2.1052, "step": 90980 }, { "epoch": 0.21, "grad_norm": 1.84375, "learning_rate": 0.00019439710603557224, "loss": 2.276, "step": 90985 }, { "epoch": 0.21, "grad_norm": 1.8671875, "learning_rate": 0.00019439649603107296, "loss": 2.3097, "step": 90990 }, { "epoch": 0.21, "grad_norm": 1.984375, "learning_rate": 0.00019439588599432612, "loss": 2.3283, "step": 90995 }, { "epoch": 0.21, "grad_norm": 1.7421875, "learning_rate": 0.00019439527592533194, "loss": 1.9715, "step": 91000 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.0001943946658240906, "loss": 2.2424, "step": 91005 }, { "epoch": 0.21, "grad_norm": 1.6875, "learning_rate": 0.00019439405569060235, "loss": 2.165, "step": 91010 }, { "epoch": 0.21, "grad_norm": 2.21875, "learning_rate": 0.00019439344552486739, "loss": 2.0566, "step": 91015 }, { "epoch": 0.21, "grad_norm": 2.65625, "learning_rate": 0.0001943928353268859, "loss": 2.0388, "step": 91020 }, { "epoch": 0.21, "grad_norm": 1.828125, "learning_rate": 0.0001943922250966581, "loss": 1.9663, "step": 91025 }, { "epoch": 0.21, "grad_norm": 2.40625, "learning_rate": 0.00019439161483418422, "loss": 2.1738, "step": 91030 }, { "epoch": 0.21, "grad_norm": 2.40625, "learning_rate": 0.00019439100453946443, "loss": 2.1987, "step": 91035 }, { "epoch": 0.21, "grad_norm": 1.6640625, "learning_rate": 0.00019439039421249898, "loss": 2.1969, "step": 91040 }, { "epoch": 0.21, "grad_norm": 1.8515625, "learning_rate": 0.00019438978385328805, "loss": 2.3312, "step": 91045 }, { "epoch": 0.21, "grad_norm": 1.7421875, "learning_rate": 0.00019438917346183187, "loss": 2.2004, "step": 91050 }, { "epoch": 0.21, "grad_norm": 2.0, "learning_rate": 0.00019438856303813061, "loss": 2.0156, "step": 91055 }, { "epoch": 0.21, "grad_norm": 1.5078125, "learning_rate": 0.00019438795258218454, "loss": 2.0127, "step": 91060 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.0001943873420939938, "loss": 2.2272, "step": 91065 }, { "epoch": 0.21, "grad_norm": 2.5625, "learning_rate": 0.00019438673157355865, "loss": 2.12, "step": 91070 }, { "epoch": 0.21, "grad_norm": 2.078125, "learning_rate": 0.0001943861210208793, "loss": 1.9699, "step": 91075 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.00019438551043595592, "loss": 2.0018, "step": 91080 }, { "epoch": 0.21, "grad_norm": 1.96875, "learning_rate": 0.00019438489981878874, "loss": 2.0293, "step": 91085 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.00019438428916937795, "loss": 1.9705, "step": 91090 }, { "epoch": 0.21, "grad_norm": 2.53125, "learning_rate": 0.0001943836784877238, "loss": 2.1491, "step": 91095 }, { "epoch": 0.21, "grad_norm": 1.7578125, "learning_rate": 0.00019438306777382644, "loss": 2.1561, "step": 91100 }, { "epoch": 0.21, "grad_norm": 2.796875, "learning_rate": 0.00019438245702768617, "loss": 2.2669, "step": 91105 }, { "epoch": 0.21, "grad_norm": 2.515625, "learning_rate": 0.0001943818462493031, "loss": 2.3046, "step": 91110 }, { "epoch": 0.21, "grad_norm": 2.09375, "learning_rate": 0.0001943812354386775, "loss": 2.1809, "step": 91115 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019438062459580953, "loss": 1.9418, "step": 91120 }, { "epoch": 0.21, "grad_norm": 2.40625, "learning_rate": 0.00019438001372069943, "loss": 1.9866, "step": 91125 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.0001943794028133474, "loss": 2.0578, "step": 91130 }, { "epoch": 0.21, "grad_norm": 2.484375, "learning_rate": 0.00019437879187375368, "loss": 2.1578, "step": 91135 }, { "epoch": 0.21, "grad_norm": 1.609375, "learning_rate": 0.00019437818090191846, "loss": 2.3188, "step": 91140 }, { "epoch": 0.21, "grad_norm": 2.234375, "learning_rate": 0.00019437756989784192, "loss": 1.9799, "step": 91145 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.0001943769588615243, "loss": 2.2272, "step": 91150 }, { "epoch": 0.21, "grad_norm": 4.21875, "learning_rate": 0.0001943763477929658, "loss": 2.055, "step": 91155 }, { "epoch": 0.21, "grad_norm": 2.421875, "learning_rate": 0.00019437573669216663, "loss": 2.03, "step": 91160 }, { "epoch": 0.21, "grad_norm": 1.9296875, "learning_rate": 0.00019437512555912697, "loss": 2.0698, "step": 91165 }, { "epoch": 0.21, "grad_norm": 2.234375, "learning_rate": 0.00019437451439384708, "loss": 2.2428, "step": 91170 }, { "epoch": 0.21, "grad_norm": 1.71875, "learning_rate": 0.00019437390319632713, "loss": 2.0305, "step": 91175 }, { "epoch": 0.21, "grad_norm": 2.21875, "learning_rate": 0.00019437329196656737, "loss": 2.2456, "step": 91180 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019437268070456796, "loss": 2.0663, "step": 91185 }, { "epoch": 0.21, "grad_norm": 2.015625, "learning_rate": 0.00019437206941032913, "loss": 2.18, "step": 91190 }, { "epoch": 0.21, "grad_norm": 2.296875, "learning_rate": 0.0001943714580838511, "loss": 2.2432, "step": 91195 }, { "epoch": 0.21, "grad_norm": 1.8828125, "learning_rate": 0.00019437084672513407, "loss": 2.3454, "step": 91200 }, { "epoch": 0.21, "grad_norm": 2.296875, "learning_rate": 0.00019437023533417822, "loss": 2.3361, "step": 91205 }, { "epoch": 0.21, "grad_norm": 2.265625, "learning_rate": 0.00019436962391098384, "loss": 2.182, "step": 91210 }, { "epoch": 0.21, "grad_norm": 1.890625, "learning_rate": 0.00019436901245555103, "loss": 1.9438, "step": 91215 }, { "epoch": 0.21, "grad_norm": 1.8515625, "learning_rate": 0.00019436840096788007, "loss": 2.1561, "step": 91220 }, { "epoch": 0.21, "grad_norm": 1.65625, "learning_rate": 0.0001943677894479712, "loss": 2.2525, "step": 91225 }, { "epoch": 0.21, "grad_norm": 1.890625, "learning_rate": 0.0001943671778958245, "loss": 2.2217, "step": 91230 }, { "epoch": 0.21, "grad_norm": 1.9765625, "learning_rate": 0.00019436656631144033, "loss": 2.1832, "step": 91235 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.00019436595469481878, "loss": 2.0511, "step": 91240 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019436534304596013, "loss": 2.1423, "step": 91245 }, { "epoch": 0.21, "grad_norm": 2.265625, "learning_rate": 0.00019436473136486457, "loss": 2.2844, "step": 91250 }, { "epoch": 0.21, "grad_norm": 1.96875, "learning_rate": 0.0001943641196515323, "loss": 2.239, "step": 91255 }, { "epoch": 0.21, "grad_norm": 1.9140625, "learning_rate": 0.00019436350790596354, "loss": 2.2099, "step": 91260 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.0001943628961281585, "loss": 2.3243, "step": 91265 }, { "epoch": 0.21, "grad_norm": 1.5546875, "learning_rate": 0.00019436228431811737, "loss": 2.2338, "step": 91270 }, { "epoch": 0.21, "grad_norm": 1.8984375, "learning_rate": 0.00019436167247584036, "loss": 2.1202, "step": 91275 }, { "epoch": 0.21, "grad_norm": 1.9921875, "learning_rate": 0.0001943610606013277, "loss": 2.0587, "step": 91280 }, { "epoch": 0.21, "grad_norm": 2.203125, "learning_rate": 0.00019436044869457962, "loss": 1.8737, "step": 91285 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019435983675559628, "loss": 2.2711, "step": 91290 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019435922478437792, "loss": 2.175, "step": 91295 }, { "epoch": 0.21, "grad_norm": 2.375, "learning_rate": 0.00019435861278092471, "loss": 2.2479, "step": 91300 }, { "epoch": 0.21, "grad_norm": 2.21875, "learning_rate": 0.00019435800074523688, "loss": 2.1558, "step": 91305 }, { "epoch": 0.21, "grad_norm": 1.703125, "learning_rate": 0.00019435738867731468, "loss": 2.0814, "step": 91310 }, { "epoch": 0.21, "grad_norm": 2.0625, "learning_rate": 0.00019435677657715829, "loss": 2.1416, "step": 91315 }, { "epoch": 0.21, "grad_norm": 1.90625, "learning_rate": 0.00019435616444476788, "loss": 2.0327, "step": 91320 }, { "epoch": 0.21, "grad_norm": 2.046875, "learning_rate": 0.00019435555228014371, "loss": 2.166, "step": 91325 }, { "epoch": 0.21, "grad_norm": 1.6875, "learning_rate": 0.00019435494008328597, "loss": 2.1581, "step": 91330 }, { "epoch": 0.21, "grad_norm": 1.71875, "learning_rate": 0.00019435432785419487, "loss": 2.2492, "step": 91335 }, { "epoch": 0.21, "grad_norm": 3.078125, "learning_rate": 0.00019435371559287063, "loss": 2.0374, "step": 91340 }, { "epoch": 0.21, "grad_norm": 1.7109375, "learning_rate": 0.00019435310329931344, "loss": 2.1691, "step": 91345 }, { "epoch": 0.21, "grad_norm": 2.125, "learning_rate": 0.00019435249097352351, "loss": 2.2094, "step": 91350 }, { "epoch": 0.21, "grad_norm": 2.109375, "learning_rate": 0.00019435187861550107, "loss": 2.1956, "step": 91355 }, { "epoch": 0.21, "grad_norm": 1.6640625, "learning_rate": 0.00019435126622524633, "loss": 2.1318, "step": 91360 }, { "epoch": 0.22, "grad_norm": 1.953125, "learning_rate": 0.00019435065380275947, "loss": 2.1577, "step": 91365 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019435004134804072, "loss": 2.2372, "step": 91370 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.0001943494288610903, "loss": 2.0239, "step": 91375 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.00019434881634190835, "loss": 2.1008, "step": 91380 }, { "epoch": 0.22, "grad_norm": 1.578125, "learning_rate": 0.0001943482037904952, "loss": 1.9611, "step": 91385 }, { "epoch": 0.22, "grad_norm": 2.296875, "learning_rate": 0.00019434759120685096, "loss": 2.2171, "step": 91390 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.00019434697859097586, "loss": 2.2046, "step": 91395 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.00019434636594287014, "loss": 2.2713, "step": 91400 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019434575326253398, "loss": 2.2206, "step": 91405 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019434514054996762, "loss": 2.0434, "step": 91410 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.00019434452780517123, "loss": 2.1859, "step": 91415 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.00019434391502814503, "loss": 1.9747, "step": 91420 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.00019434330221888924, "loss": 2.3452, "step": 91425 }, { "epoch": 0.22, "grad_norm": 2.453125, "learning_rate": 0.0001943426893774041, "loss": 2.1039, "step": 91430 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.00019434207650368977, "loss": 2.1741, "step": 91435 }, { "epoch": 0.22, "grad_norm": 1.9140625, "learning_rate": 0.00019434146359774648, "loss": 2.0783, "step": 91440 }, { "epoch": 0.22, "grad_norm": 1.734375, "learning_rate": 0.0001943408506595744, "loss": 1.8852, "step": 91445 }, { "epoch": 0.22, "grad_norm": 1.640625, "learning_rate": 0.0001943402376891738, "loss": 1.9856, "step": 91450 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019433962468654484, "loss": 2.0453, "step": 91455 }, { "epoch": 0.22, "grad_norm": 1.796875, "learning_rate": 0.0001943390116516878, "loss": 2.2225, "step": 91460 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.00019433839858460283, "loss": 2.3854, "step": 91465 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019433778548529014, "loss": 2.1728, "step": 91470 }, { "epoch": 0.22, "grad_norm": 1.671875, "learning_rate": 0.00019433717235374992, "loss": 2.1876, "step": 91475 }, { "epoch": 0.22, "grad_norm": 2.453125, "learning_rate": 0.0001943365591899825, "loss": 2.1871, "step": 91480 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.0001943359459939879, "loss": 2.1791, "step": 91485 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.00019433533276576648, "loss": 2.1758, "step": 91490 }, { "epoch": 0.22, "grad_norm": 2.234375, "learning_rate": 0.00019433471950531842, "loss": 2.2207, "step": 91495 }, { "epoch": 0.22, "grad_norm": 2.40625, "learning_rate": 0.00019433410621264385, "loss": 2.1676, "step": 91500 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019433349288774308, "loss": 1.9881, "step": 91505 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019433287953061625, "loss": 2.0144, "step": 91510 }, { "epoch": 0.22, "grad_norm": 2.359375, "learning_rate": 0.00019433226614126362, "loss": 2.2461, "step": 91515 }, { "epoch": 0.22, "grad_norm": 2.34375, "learning_rate": 0.00019433165271968538, "loss": 2.1952, "step": 91520 }, { "epoch": 0.22, "grad_norm": 1.6015625, "learning_rate": 0.00019433103926588173, "loss": 2.1501, "step": 91525 }, { "epoch": 0.22, "grad_norm": 2.453125, "learning_rate": 0.00019433042577985288, "loss": 2.3132, "step": 91530 }, { "epoch": 0.22, "grad_norm": 2.25, "learning_rate": 0.00019432981226159908, "loss": 2.2022, "step": 91535 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019432919871112048, "loss": 1.8762, "step": 91540 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.0001943285851284173, "loss": 2.1417, "step": 91545 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019432797151348977, "loss": 2.1143, "step": 91550 }, { "epoch": 0.22, "grad_norm": 1.6875, "learning_rate": 0.0001943273578663381, "loss": 2.0842, "step": 91555 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.0001943267441869625, "loss": 1.9349, "step": 91560 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019432613047536313, "loss": 2.3038, "step": 91565 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.0001943255167315403, "loss": 2.0073, "step": 91570 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.00019432490295549415, "loss": 2.1003, "step": 91575 }, { "epoch": 0.22, "grad_norm": 1.890625, "learning_rate": 0.0001943242891472249, "loss": 2.1565, "step": 91580 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019432367530673275, "loss": 2.2229, "step": 91585 }, { "epoch": 0.22, "grad_norm": 1.6484375, "learning_rate": 0.00019432306143401794, "loss": 2.0657, "step": 91590 }, { "epoch": 0.22, "grad_norm": 1.8046875, "learning_rate": 0.00019432244752908066, "loss": 2.2181, "step": 91595 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.00019432183359192112, "loss": 2.1074, "step": 91600 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.0001943212196225395, "loss": 2.1216, "step": 91605 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019432060562093607, "loss": 2.273, "step": 91610 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.000194319991587111, "loss": 2.0064, "step": 91615 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019431937752106452, "loss": 2.3742, "step": 91620 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.00019431876342279684, "loss": 2.1998, "step": 91625 }, { "epoch": 0.22, "grad_norm": 2.34375, "learning_rate": 0.00019431814929230814, "loss": 2.3009, "step": 91630 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019431753512959865, "loss": 1.9581, "step": 91635 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.0001943169209346686, "loss": 2.0769, "step": 91640 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019431630670751815, "loss": 2.2197, "step": 91645 }, { "epoch": 0.22, "grad_norm": 2.28125, "learning_rate": 0.00019431569244814757, "loss": 2.0534, "step": 91650 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.00019431507815655702, "loss": 2.1557, "step": 91655 }, { "epoch": 0.22, "grad_norm": 1.875, "learning_rate": 0.00019431446383274672, "loss": 2.1644, "step": 91660 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019431384947671688, "loss": 2.2762, "step": 91665 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019431323508846778, "loss": 2.2634, "step": 91670 }, { "epoch": 0.22, "grad_norm": 2.328125, "learning_rate": 0.00019431262066799948, "loss": 2.1587, "step": 91675 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019431200621531237, "loss": 2.1172, "step": 91680 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.0001943113917304065, "loss": 2.2876, "step": 91685 }, { "epoch": 0.22, "grad_norm": 1.5859375, "learning_rate": 0.00019431077721328218, "loss": 2.1322, "step": 91690 }, { "epoch": 0.22, "grad_norm": 1.7265625, "learning_rate": 0.00019431016266393958, "loss": 2.2029, "step": 91695 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.00019430954808237892, "loss": 2.1883, "step": 91700 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.0001943089334686004, "loss": 2.2062, "step": 91705 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.00019430831882260425, "loss": 2.1302, "step": 91710 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.00019430770414439064, "loss": 2.2124, "step": 91715 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019430708943395987, "loss": 2.1224, "step": 91720 }, { "epoch": 0.22, "grad_norm": 1.4921875, "learning_rate": 0.00019430647469131202, "loss": 2.2146, "step": 91725 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.00019430585991644738, "loss": 2.125, "step": 91730 }, { "epoch": 0.22, "grad_norm": 4.03125, "learning_rate": 0.00019430524510936618, "loss": 2.0418, "step": 91735 }, { "epoch": 0.22, "grad_norm": 1.8359375, "learning_rate": 0.00019430463027006857, "loss": 2.1037, "step": 91740 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019430401539855482, "loss": 2.2375, "step": 91745 }, { "epoch": 0.22, "grad_norm": 1.7265625, "learning_rate": 0.00019430340049482507, "loss": 2.0222, "step": 91750 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.00019430278555887957, "loss": 2.0128, "step": 91755 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019430217059071856, "loss": 2.0743, "step": 91760 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.0001943015555903422, "loss": 2.2038, "step": 91765 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019430094055775072, "loss": 2.2413, "step": 91770 }, { "epoch": 0.22, "grad_norm": 1.6875, "learning_rate": 0.00019430032549294434, "loss": 2.3787, "step": 91775 }, { "epoch": 0.22, "grad_norm": 2.25, "learning_rate": 0.00019429971039592326, "loss": 2.0886, "step": 91780 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019429909526668765, "loss": 2.2762, "step": 91785 }, { "epoch": 0.22, "grad_norm": 1.9296875, "learning_rate": 0.0001942984801052378, "loss": 2.1959, "step": 91790 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019429786491157388, "loss": 1.9029, "step": 91795 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019429724968569607, "loss": 2.0901, "step": 91800 }, { "epoch": 0.22, "grad_norm": 1.8359375, "learning_rate": 0.0001942966344276046, "loss": 2.1851, "step": 91805 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019429601913729973, "loss": 2.1645, "step": 91810 }, { "epoch": 0.22, "grad_norm": 1.890625, "learning_rate": 0.0001942954038147816, "loss": 2.168, "step": 91815 }, { "epoch": 0.22, "grad_norm": 1.7734375, "learning_rate": 0.00019429478846005047, "loss": 2.2864, "step": 91820 }, { "epoch": 0.22, "grad_norm": 1.8203125, "learning_rate": 0.0001942941730731065, "loss": 2.1551, "step": 91825 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019429355765394996, "loss": 2.0351, "step": 91830 }, { "epoch": 0.22, "grad_norm": 1.765625, "learning_rate": 0.00019429294220258103, "loss": 2.1111, "step": 91835 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.0001942923267189999, "loss": 2.237, "step": 91840 }, { "epoch": 0.22, "grad_norm": 1.703125, "learning_rate": 0.00019429171120320681, "loss": 2.1952, "step": 91845 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.000194291095655202, "loss": 2.1213, "step": 91850 }, { "epoch": 0.22, "grad_norm": 1.9140625, "learning_rate": 0.00019429048007498558, "loss": 2.297, "step": 91855 }, { "epoch": 0.22, "grad_norm": 1.7734375, "learning_rate": 0.00019428986446255785, "loss": 2.5354, "step": 91860 }, { "epoch": 0.22, "grad_norm": 2.265625, "learning_rate": 0.000194289248817919, "loss": 2.3659, "step": 91865 }, { "epoch": 0.22, "grad_norm": 2.375, "learning_rate": 0.00019428863314106923, "loss": 2.0382, "step": 91870 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019428801743200875, "loss": 2.1501, "step": 91875 }, { "epoch": 0.22, "grad_norm": 1.6953125, "learning_rate": 0.00019428740169073777, "loss": 2.1339, "step": 91880 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.0001942867859172565, "loss": 2.159, "step": 91885 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019428617011156516, "loss": 2.1601, "step": 91890 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019428555427366395, "loss": 2.0624, "step": 91895 }, { "epoch": 0.22, "grad_norm": 1.5, "learning_rate": 0.00019428493840355308, "loss": 2.2904, "step": 91900 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.00019428432250123278, "loss": 2.0984, "step": 91905 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019428370656670324, "loss": 2.2304, "step": 91910 }, { "epoch": 0.22, "grad_norm": 1.734375, "learning_rate": 0.0001942830905999647, "loss": 2.0728, "step": 91915 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.00019428247460101728, "loss": 2.341, "step": 91920 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.0001942818585698613, "loss": 2.081, "step": 91925 }, { "epoch": 0.22, "grad_norm": 1.78125, "learning_rate": 0.00019428124250649693, "loss": 2.0776, "step": 91930 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019428062641092437, "loss": 2.1521, "step": 91935 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.00019428001028314382, "loss": 2.2441, "step": 91940 }, { "epoch": 0.22, "grad_norm": 1.671875, "learning_rate": 0.00019427939412315555, "loss": 2.1601, "step": 91945 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.0001942787779309597, "loss": 1.9956, "step": 91950 }, { "epoch": 0.22, "grad_norm": 2.28125, "learning_rate": 0.0001942781617065565, "loss": 2.121, "step": 91955 }, { "epoch": 0.22, "grad_norm": 2.25, "learning_rate": 0.00019427754544994622, "loss": 2.1453, "step": 91960 }, { "epoch": 0.22, "grad_norm": 1.6640625, "learning_rate": 0.00019427692916112896, "loss": 2.1613, "step": 91965 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.00019427631284010504, "loss": 2.0513, "step": 91970 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.0001942756964868746, "loss": 2.2317, "step": 91975 }, { "epoch": 0.22, "grad_norm": 1.953125, "learning_rate": 0.00019427508010143787, "loss": 2.0408, "step": 91980 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.00019427446368379506, "loss": 2.3221, "step": 91985 }, { "epoch": 0.22, "grad_norm": 1.9140625, "learning_rate": 0.0001942738472339464, "loss": 2.2736, "step": 91990 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.00019427323075189206, "loss": 2.2028, "step": 91995 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019427261423763227, "loss": 2.2216, "step": 92000 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.0001942719976911673, "loss": 2.0837, "step": 92005 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019427138111249726, "loss": 2.2226, "step": 92010 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.0001942707645016224, "loss": 2.3018, "step": 92015 }, { "epoch": 0.22, "grad_norm": 2.34375, "learning_rate": 0.00019427014785854295, "loss": 2.1901, "step": 92020 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019426953118325911, "loss": 2.4474, "step": 92025 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019426891447577109, "loss": 2.143, "step": 92030 }, { "epoch": 0.22, "grad_norm": 2.328125, "learning_rate": 0.0001942682977360791, "loss": 2.4206, "step": 92035 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019426768096418337, "loss": 1.9066, "step": 92040 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.00019426706416008407, "loss": 2.0383, "step": 92045 }, { "epoch": 0.22, "grad_norm": 1.765625, "learning_rate": 0.0001942664473237814, "loss": 2.1761, "step": 92050 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019426583045527564, "loss": 2.1686, "step": 92055 }, { "epoch": 0.22, "grad_norm": 1.9140625, "learning_rate": 0.00019426521355456696, "loss": 1.9209, "step": 92060 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.00019426459662165557, "loss": 2.0523, "step": 92065 }, { "epoch": 0.22, "grad_norm": 1.5859375, "learning_rate": 0.00019426397965654167, "loss": 2.1672, "step": 92070 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.0001942633626592255, "loss": 2.2372, "step": 92075 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.00019426274562970726, "loss": 1.9276, "step": 92080 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019426212856798715, "loss": 2.2782, "step": 92085 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.0001942615114740654, "loss": 2.0277, "step": 92090 }, { "epoch": 0.22, "grad_norm": 1.875, "learning_rate": 0.00019426089434794217, "loss": 2.2798, "step": 92095 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.00019426027718961775, "loss": 2.0318, "step": 92100 }, { "epoch": 0.22, "grad_norm": 2.25, "learning_rate": 0.00019425965999909228, "loss": 1.976, "step": 92105 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.000194259042776366, "loss": 2.0128, "step": 92110 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.00019425842552143915, "loss": 2.3042, "step": 92115 }, { "epoch": 0.22, "grad_norm": 2.328125, "learning_rate": 0.00019425780823431192, "loss": 2.1754, "step": 92120 }, { "epoch": 0.22, "grad_norm": 2.296875, "learning_rate": 0.00019425719091498449, "loss": 2.3082, "step": 92125 }, { "epoch": 0.22, "grad_norm": 2.890625, "learning_rate": 0.00019425657356345708, "loss": 1.9649, "step": 92130 }, { "epoch": 0.22, "grad_norm": 2.546875, "learning_rate": 0.00019425595617972993, "loss": 2.2674, "step": 92135 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019425533876380325, "loss": 2.2496, "step": 92140 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.0001942547213156772, "loss": 2.1632, "step": 92145 }, { "epoch": 0.22, "grad_norm": 2.359375, "learning_rate": 0.0001942541038353521, "loss": 2.3504, "step": 92150 }, { "epoch": 0.22, "grad_norm": 1.8046875, "learning_rate": 0.00019425348632282804, "loss": 2.0816, "step": 92155 }, { "epoch": 0.22, "grad_norm": 2.3125, "learning_rate": 0.00019425286877810524, "loss": 2.1857, "step": 92160 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.000194252251201184, "loss": 2.0426, "step": 92165 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019425163359206447, "loss": 2.123, "step": 92170 }, { "epoch": 0.22, "grad_norm": 1.7890625, "learning_rate": 0.00019425101595074688, "loss": 2.2639, "step": 92175 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.0001942503982772314, "loss": 2.2061, "step": 92180 }, { "epoch": 0.22, "grad_norm": 2.28125, "learning_rate": 0.00019424978057151833, "loss": 2.1198, "step": 92185 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019424916283360778, "loss": 2.2254, "step": 92190 }, { "epoch": 0.22, "grad_norm": 1.9453125, "learning_rate": 0.00019424854506350006, "loss": 2.1391, "step": 92195 }, { "epoch": 0.22, "grad_norm": 1.90625, "learning_rate": 0.00019424792726119528, "loss": 2.0893, "step": 92200 }, { "epoch": 0.22, "grad_norm": 2.34375, "learning_rate": 0.00019424730942669373, "loss": 2.2339, "step": 92205 }, { "epoch": 0.22, "grad_norm": 1.6484375, "learning_rate": 0.00019424669155999555, "loss": 2.3082, "step": 92210 }, { "epoch": 0.22, "grad_norm": 1.6640625, "learning_rate": 0.000194246073661101, "loss": 1.9217, "step": 92215 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019424545573001032, "loss": 2.2676, "step": 92220 }, { "epoch": 0.22, "grad_norm": 1.8828125, "learning_rate": 0.00019424483776672363, "loss": 2.1214, "step": 92225 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.00019424421977124124, "loss": 2.1576, "step": 92230 }, { "epoch": 0.22, "grad_norm": 1.7421875, "learning_rate": 0.0001942436017435633, "loss": 2.0363, "step": 92235 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.00019424298368369002, "loss": 2.2541, "step": 92240 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019424236559162163, "loss": 2.2144, "step": 92245 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019424174746735838, "loss": 2.2499, "step": 92250 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.0001942411293109004, "loss": 2.1776, "step": 92255 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019424051112224794, "loss": 2.3541, "step": 92260 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019423989290140122, "loss": 2.1174, "step": 92265 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019423927464836047, "loss": 2.1188, "step": 92270 }, { "epoch": 0.22, "grad_norm": 1.953125, "learning_rate": 0.00019423865636312583, "loss": 2.1544, "step": 92275 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.00019423803804569757, "loss": 2.2863, "step": 92280 }, { "epoch": 0.22, "grad_norm": 1.5625, "learning_rate": 0.0001942374196960759, "loss": 1.9259, "step": 92285 }, { "epoch": 0.22, "grad_norm": 2.34375, "learning_rate": 0.00019423680131426104, "loss": 2.3354, "step": 92290 }, { "epoch": 0.22, "grad_norm": 2.609375, "learning_rate": 0.00019423618290025313, "loss": 2.3179, "step": 92295 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019423556445405247, "loss": 2.2808, "step": 92300 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.0001942349459756592, "loss": 1.8851, "step": 92305 }, { "epoch": 0.22, "grad_norm": 2.21875, "learning_rate": 0.00019423432746507358, "loss": 2.136, "step": 92310 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.0001942337089222958, "loss": 2.0969, "step": 92315 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.0001942330903473261, "loss": 2.2791, "step": 92320 }, { "epoch": 0.22, "grad_norm": 2.265625, "learning_rate": 0.00019423247174016464, "loss": 2.2583, "step": 92325 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019423185310081165, "loss": 2.0735, "step": 92330 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019423123442926737, "loss": 2.1316, "step": 92335 }, { "epoch": 0.22, "grad_norm": 1.9296875, "learning_rate": 0.000194230615725532, "loss": 2.0701, "step": 92340 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.0001942299969896057, "loss": 2.1113, "step": 92345 }, { "epoch": 0.22, "grad_norm": 1.9296875, "learning_rate": 0.00019422937822148877, "loss": 2.3704, "step": 92350 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.00019422875942118136, "loss": 1.9206, "step": 92355 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019422814058868372, "loss": 2.115, "step": 92360 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.00019422752172399598, "loss": 2.2235, "step": 92365 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.00019422690282711846, "loss": 2.1426, "step": 92370 }, { "epoch": 0.22, "grad_norm": 2.265625, "learning_rate": 0.00019422628389805131, "loss": 2.1295, "step": 92375 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019422566493679474, "loss": 2.1972, "step": 92380 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.00019422504594334897, "loss": 2.1608, "step": 92385 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.00019422442691771424, "loss": 2.1732, "step": 92390 }, { "epoch": 0.22, "grad_norm": 2.609375, "learning_rate": 0.0001942238078598907, "loss": 2.1698, "step": 92395 }, { "epoch": 0.22, "grad_norm": 1.796875, "learning_rate": 0.00019422318876987865, "loss": 2.0968, "step": 92400 }, { "epoch": 0.22, "grad_norm": 1.9140625, "learning_rate": 0.00019422256964767823, "loss": 2.3136, "step": 92405 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019422195049328964, "loss": 2.1944, "step": 92410 }, { "epoch": 0.22, "grad_norm": 1.796875, "learning_rate": 0.00019422133130671317, "loss": 2.0228, "step": 92415 }, { "epoch": 0.22, "grad_norm": 2.265625, "learning_rate": 0.00019422071208794893, "loss": 2.322, "step": 92420 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.00019422009283699723, "loss": 2.1658, "step": 92425 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.00019421947355385824, "loss": 2.1118, "step": 92430 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.00019421885423853214, "loss": 2.0975, "step": 92435 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019421823489101917, "loss": 1.9286, "step": 92440 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019421761551131957, "loss": 2.2041, "step": 92445 }, { "epoch": 0.22, "grad_norm": 1.8046875, "learning_rate": 0.0001942169960994335, "loss": 2.2006, "step": 92450 }, { "epoch": 0.22, "grad_norm": 2.21875, "learning_rate": 0.0001942163766553612, "loss": 2.0863, "step": 92455 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.0001942157571791029, "loss": 2.3022, "step": 92460 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.00019421513767065878, "loss": 2.1748, "step": 92465 }, { "epoch": 0.22, "grad_norm": 1.7734375, "learning_rate": 0.00019421451813002904, "loss": 2.166, "step": 92470 }, { "epoch": 0.22, "grad_norm": 2.234375, "learning_rate": 0.0001942138985572139, "loss": 2.0535, "step": 92475 }, { "epoch": 0.22, "grad_norm": 1.9453125, "learning_rate": 0.00019421327895221362, "loss": 2.1539, "step": 92480 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019421265931502836, "loss": 2.2699, "step": 92485 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019421203964565834, "loss": 2.2003, "step": 92490 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.0001942114199441038, "loss": 2.1625, "step": 92495 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.0001942108002103649, "loss": 2.1391, "step": 92500 }, { "epoch": 0.22, "grad_norm": 1.953125, "learning_rate": 0.0001942101804444419, "loss": 2.3465, "step": 92505 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.000194209560646335, "loss": 2.1699, "step": 92510 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019420894081604442, "loss": 2.2717, "step": 92515 }, { "epoch": 0.22, "grad_norm": 1.734375, "learning_rate": 0.00019420832095357033, "loss": 2.0884, "step": 92520 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.00019420770105891297, "loss": 2.1953, "step": 92525 }, { "epoch": 0.22, "grad_norm": 2.234375, "learning_rate": 0.00019420708113207256, "loss": 2.204, "step": 92530 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019420646117304928, "loss": 2.1635, "step": 92535 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.0001942058411818434, "loss": 2.3056, "step": 92540 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019420522115845508, "loss": 2.1394, "step": 92545 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019420460110288455, "loss": 2.0788, "step": 92550 }, { "epoch": 0.22, "grad_norm": 1.6875, "learning_rate": 0.00019420398101513203, "loss": 2.0818, "step": 92555 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.00019420336089519768, "loss": 2.0031, "step": 92560 }, { "epoch": 0.22, "grad_norm": 1.703125, "learning_rate": 0.0001942027407430818, "loss": 2.289, "step": 92565 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019420212055878454, "loss": 2.0337, "step": 92570 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.0001942015003423061, "loss": 2.071, "step": 92575 }, { "epoch": 0.22, "grad_norm": 2.515625, "learning_rate": 0.00019420088009364675, "loss": 2.0777, "step": 92580 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.00019420025981280666, "loss": 2.1077, "step": 92585 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.00019419963949978607, "loss": 2.0912, "step": 92590 }, { "epoch": 0.22, "grad_norm": 1.453125, "learning_rate": 0.00019419901915458516, "loss": 2.0121, "step": 92595 }, { "epoch": 0.22, "grad_norm": 1.875, "learning_rate": 0.00019419839877720416, "loss": 2.1312, "step": 92600 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019419777836764327, "loss": 2.2611, "step": 92605 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.00019419715792590273, "loss": 2.0903, "step": 92610 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.0001941965374519827, "loss": 2.2115, "step": 92615 }, { "epoch": 0.22, "grad_norm": 1.6328125, "learning_rate": 0.00019419591694588346, "loss": 2.2094, "step": 92620 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.00019419529640760517, "loss": 2.2613, "step": 92625 }, { "epoch": 0.22, "grad_norm": 2.28125, "learning_rate": 0.00019419467583714806, "loss": 2.1836, "step": 92630 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019419405523451232, "loss": 2.3918, "step": 92635 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.00019419343459969825, "loss": 1.9969, "step": 92640 }, { "epoch": 0.22, "grad_norm": 2.28125, "learning_rate": 0.00019419281393270592, "loss": 2.0655, "step": 92645 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.00019419219323353563, "loss": 2.18, "step": 92650 }, { "epoch": 0.22, "grad_norm": 2.328125, "learning_rate": 0.00019419157250218764, "loss": 2.0176, "step": 92655 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.000194190951738662, "loss": 2.2087, "step": 92660 }, { "epoch": 0.22, "grad_norm": 2.265625, "learning_rate": 0.00019419033094295908, "loss": 2.2188, "step": 92665 }, { "epoch": 0.22, "grad_norm": 1.71875, "learning_rate": 0.00019418971011507904, "loss": 2.3009, "step": 92670 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.0001941890892550221, "loss": 2.2639, "step": 92675 }, { "epoch": 0.22, "grad_norm": 2.234375, "learning_rate": 0.00019418846836278842, "loss": 2.2642, "step": 92680 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019418784743837825, "loss": 2.1389, "step": 92685 }, { "epoch": 0.22, "grad_norm": 1.84375, "learning_rate": 0.00019418722648179183, "loss": 2.0751, "step": 92690 }, { "epoch": 0.22, "grad_norm": 1.953125, "learning_rate": 0.00019418660549302933, "loss": 2.1322, "step": 92695 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.000194185984472091, "loss": 2.0254, "step": 92700 }, { "epoch": 0.22, "grad_norm": 1.7421875, "learning_rate": 0.00019418536341897703, "loss": 2.1173, "step": 92705 }, { "epoch": 0.22, "grad_norm": 1.90625, "learning_rate": 0.0001941847423336876, "loss": 2.155, "step": 92710 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.00019418412121622295, "loss": 2.2005, "step": 92715 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.0001941835000665833, "loss": 2.0365, "step": 92720 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019418287888476888, "loss": 2.0674, "step": 92725 }, { "epoch": 0.22, "grad_norm": 1.7265625, "learning_rate": 0.0001941822576707799, "loss": 2.229, "step": 92730 }, { "epoch": 0.22, "grad_norm": 1.578125, "learning_rate": 0.00019418163642461651, "loss": 2.2127, "step": 92735 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019418101514627898, "loss": 2.2077, "step": 92740 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.0001941803938357675, "loss": 2.2498, "step": 92745 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.0001941797724930823, "loss": 2.2165, "step": 92750 }, { "epoch": 0.22, "grad_norm": 2.28125, "learning_rate": 0.0001941791511182236, "loss": 2.2507, "step": 92755 }, { "epoch": 0.22, "grad_norm": 1.65625, "learning_rate": 0.00019417852971119156, "loss": 2.2142, "step": 92760 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.00019417790827198642, "loss": 2.249, "step": 92765 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.00019417728680060843, "loss": 2.2196, "step": 92770 }, { "epoch": 0.22, "grad_norm": 2.3125, "learning_rate": 0.00019417666529705777, "loss": 2.2045, "step": 92775 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019417604376133462, "loss": 2.18, "step": 92780 }, { "epoch": 0.22, "grad_norm": 1.84375, "learning_rate": 0.00019417542219343927, "loss": 2.3016, "step": 92785 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019417480059337184, "loss": 2.0044, "step": 92790 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019417417896113262, "loss": 2.1002, "step": 92795 }, { "epoch": 0.22, "grad_norm": 1.90625, "learning_rate": 0.00019417355729672178, "loss": 2.3758, "step": 92800 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.00019417293560013957, "loss": 2.2174, "step": 92805 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.00019417231387138615, "loss": 2.21, "step": 92810 }, { "epoch": 0.22, "grad_norm": 1.875, "learning_rate": 0.00019417169211046175, "loss": 2.2329, "step": 92815 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.00019417107031736663, "loss": 2.0136, "step": 92820 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.00019417044849210093, "loss": 2.1392, "step": 92825 }, { "epoch": 0.22, "grad_norm": 2.296875, "learning_rate": 0.00019416982663466494, "loss": 2.0603, "step": 92830 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019416920474505877, "loss": 2.2838, "step": 92835 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019416858282328273, "loss": 2.0914, "step": 92840 }, { "epoch": 0.22, "grad_norm": 1.890625, "learning_rate": 0.00019416796086933703, "loss": 2.1422, "step": 92845 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019416733888322178, "loss": 2.1668, "step": 92850 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019416671686493727, "loss": 2.2374, "step": 92855 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019416609481448372, "loss": 1.9601, "step": 92860 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.00019416547273186133, "loss": 2.1364, "step": 92865 }, { "epoch": 0.22, "grad_norm": 2.296875, "learning_rate": 0.0001941648506170703, "loss": 2.3602, "step": 92870 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019416422847011084, "loss": 2.0939, "step": 92875 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.00019416360629098318, "loss": 2.1171, "step": 92880 }, { "epoch": 0.22, "grad_norm": 1.953125, "learning_rate": 0.0001941629840796875, "loss": 2.1851, "step": 92885 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019416236183622407, "loss": 1.9055, "step": 92890 }, { "epoch": 0.22, "grad_norm": 1.640625, "learning_rate": 0.00019416173956059306, "loss": 2.3576, "step": 92895 }, { "epoch": 0.22, "grad_norm": 1.71875, "learning_rate": 0.0001941611172527947, "loss": 2.0565, "step": 92900 }, { "epoch": 0.22, "grad_norm": 1.8203125, "learning_rate": 0.0001941604949128292, "loss": 2.0944, "step": 92905 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.00019415987254069674, "loss": 2.1371, "step": 92910 }, { "epoch": 0.22, "grad_norm": 1.7421875, "learning_rate": 0.00019415925013639758, "loss": 2.12, "step": 92915 }, { "epoch": 0.22, "grad_norm": 1.7421875, "learning_rate": 0.00019415862769993187, "loss": 2.06, "step": 92920 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.00019415800523129992, "loss": 2.1722, "step": 92925 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.0001941573827305019, "loss": 2.3684, "step": 92930 }, { "epoch": 0.22, "grad_norm": 2.328125, "learning_rate": 0.00019415676019753794, "loss": 2.1316, "step": 92935 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.00019415613763240836, "loss": 2.0476, "step": 92940 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019415551503511333, "loss": 2.2571, "step": 92945 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.0001941548924056531, "loss": 2.2577, "step": 92950 }, { "epoch": 0.22, "grad_norm": 1.703125, "learning_rate": 0.00019415426974402783, "loss": 2.1589, "step": 92955 }, { "epoch": 0.22, "grad_norm": 1.875, "learning_rate": 0.00019415364705023774, "loss": 2.17, "step": 92960 }, { "epoch": 0.22, "grad_norm": 1.9453125, "learning_rate": 0.00019415302432428306, "loss": 2.3183, "step": 92965 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.00019415240156616398, "loss": 2.0299, "step": 92970 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019415177877588077, "loss": 2.2421, "step": 92975 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.00019415115595343362, "loss": 2.1056, "step": 92980 }, { "epoch": 0.22, "grad_norm": 1.875, "learning_rate": 0.00019415053309882268, "loss": 2.1279, "step": 92985 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019414991021204822, "loss": 2.0584, "step": 92990 }, { "epoch": 0.22, "grad_norm": 2.234375, "learning_rate": 0.00019414928729311046, "loss": 2.3729, "step": 92995 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.0001941486643420096, "loss": 2.2478, "step": 93000 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019414804135874585, "loss": 2.2851, "step": 93005 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.0001941474183433194, "loss": 2.146, "step": 93010 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.0001941467952957305, "loss": 2.2436, "step": 93015 }, { "epoch": 0.22, "grad_norm": 2.25, "learning_rate": 0.00019414617221597932, "loss": 2.09, "step": 93020 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.00019414554910406615, "loss": 2.0858, "step": 93025 }, { "epoch": 0.22, "grad_norm": 2.234375, "learning_rate": 0.0001941449259599911, "loss": 2.0195, "step": 93030 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.0001941443027837545, "loss": 2.1741, "step": 93035 }, { "epoch": 0.22, "grad_norm": 1.7265625, "learning_rate": 0.00019414367957535644, "loss": 2.3996, "step": 93040 }, { "epoch": 0.22, "grad_norm": 1.90625, "learning_rate": 0.0001941430563347972, "loss": 2.0889, "step": 93045 }, { "epoch": 0.22, "grad_norm": 1.84375, "learning_rate": 0.000194142433062077, "loss": 2.1119, "step": 93050 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.00019414180975719605, "loss": 2.1961, "step": 93055 }, { "epoch": 0.22, "grad_norm": 1.7265625, "learning_rate": 0.0001941411864201545, "loss": 2.3747, "step": 93060 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.00019414056305095269, "loss": 1.9418, "step": 93065 }, { "epoch": 0.22, "grad_norm": 1.84375, "learning_rate": 0.00019413993964959069, "loss": 2.2392, "step": 93070 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.0001941393162160688, "loss": 2.1849, "step": 93075 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.0001941386927503872, "loss": 1.8199, "step": 93080 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019413806925254617, "loss": 2.1725, "step": 93085 }, { "epoch": 0.22, "grad_norm": 1.7890625, "learning_rate": 0.00019413744572254583, "loss": 2.1863, "step": 93090 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.0001941368221603864, "loss": 2.1882, "step": 93095 }, { "epoch": 0.22, "grad_norm": 1.796875, "learning_rate": 0.00019413619856606818, "loss": 2.1356, "step": 93100 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.00019413557493959128, "loss": 2.1267, "step": 93105 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.000194134951280956, "loss": 2.1387, "step": 93110 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.0001941343275901625, "loss": 2.1504, "step": 93115 }, { "epoch": 0.22, "grad_norm": 1.8828125, "learning_rate": 0.000194133703867211, "loss": 2.125, "step": 93120 }, { "epoch": 0.22, "grad_norm": 2.421875, "learning_rate": 0.0001941330801121017, "loss": 2.095, "step": 93125 }, { "epoch": 0.22, "grad_norm": 2.453125, "learning_rate": 0.00019413245632483487, "loss": 2.2962, "step": 93130 }, { "epoch": 0.22, "grad_norm": 1.7890625, "learning_rate": 0.00019413183250541067, "loss": 2.1, "step": 93135 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.00019413120865382932, "loss": 2.125, "step": 93140 }, { "epoch": 0.22, "grad_norm": 1.703125, "learning_rate": 0.00019413058477009106, "loss": 2.0698, "step": 93145 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.0001941299608541961, "loss": 2.2173, "step": 93150 }, { "epoch": 0.22, "grad_norm": 2.453125, "learning_rate": 0.00019412933690614458, "loss": 2.3039, "step": 93155 }, { "epoch": 0.22, "grad_norm": 1.890625, "learning_rate": 0.00019412871292593683, "loss": 2.0565, "step": 93160 }, { "epoch": 0.22, "grad_norm": 2.28125, "learning_rate": 0.00019412808891357294, "loss": 2.1816, "step": 93165 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.00019412746486905324, "loss": 2.2403, "step": 93170 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.0001941268407923779, "loss": 2.0852, "step": 93175 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019412621668354709, "loss": 2.1675, "step": 93180 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.0001941255925425611, "loss": 2.1513, "step": 93185 }, { "epoch": 0.22, "grad_norm": 1.7890625, "learning_rate": 0.00019412496836942005, "loss": 2.1118, "step": 93190 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019412434416412423, "loss": 2.2611, "step": 93195 }, { "epoch": 0.22, "grad_norm": 1.9296875, "learning_rate": 0.0001941237199266738, "loss": 2.0553, "step": 93200 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019412309565706903, "loss": 2.1322, "step": 93205 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019412247135531008, "loss": 2.123, "step": 93210 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.00019412184702139723, "loss": 2.2653, "step": 93215 }, { "epoch": 0.22, "grad_norm": 2.421875, "learning_rate": 0.00019412122265533062, "loss": 2.186, "step": 93220 }, { "epoch": 0.22, "grad_norm": 1.8046875, "learning_rate": 0.00019412059825711048, "loss": 2.2517, "step": 93225 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019411997382673705, "loss": 2.1013, "step": 93230 }, { "epoch": 0.22, "grad_norm": 1.953125, "learning_rate": 0.00019411934936421052, "loss": 2.2258, "step": 93235 }, { "epoch": 0.22, "grad_norm": 2.234375, "learning_rate": 0.00019411872486953114, "loss": 2.2073, "step": 93240 }, { "epoch": 0.22, "grad_norm": 1.7734375, "learning_rate": 0.0001941181003426991, "loss": 2.0529, "step": 93245 }, { "epoch": 0.22, "grad_norm": 1.59375, "learning_rate": 0.00019411747578371456, "loss": 2.1749, "step": 93250 }, { "epoch": 0.22, "grad_norm": 1.7578125, "learning_rate": 0.00019411685119257783, "loss": 2.08, "step": 93255 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019411622656928908, "loss": 1.9963, "step": 93260 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.0001941156019138485, "loss": 2.2138, "step": 93265 }, { "epoch": 0.22, "grad_norm": 1.84375, "learning_rate": 0.0001941149772262563, "loss": 2.1369, "step": 93270 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019411435250651275, "loss": 2.0542, "step": 93275 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019411372775461803, "loss": 1.9594, "step": 93280 }, { "epoch": 0.22, "grad_norm": 1.8046875, "learning_rate": 0.00019411310297057235, "loss": 2.1434, "step": 93285 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.00019411247815437593, "loss": 1.9749, "step": 93290 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.00019411185330602896, "loss": 2.258, "step": 93295 }, { "epoch": 0.22, "grad_norm": 2.421875, "learning_rate": 0.00019411122842553168, "loss": 2.0427, "step": 93300 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.0001941106035128843, "loss": 2.181, "step": 93305 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.00019410997856808706, "loss": 2.2157, "step": 93310 }, { "epoch": 0.22, "grad_norm": 2.359375, "learning_rate": 0.0001941093535911401, "loss": 2.2527, "step": 93315 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019410872858204373, "loss": 2.2053, "step": 93320 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019410810354079808, "loss": 1.9325, "step": 93325 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019410747846740337, "loss": 1.9882, "step": 93330 }, { "epoch": 0.22, "grad_norm": 2.265625, "learning_rate": 0.00019410685336185987, "loss": 2.1957, "step": 93335 }, { "epoch": 0.22, "grad_norm": 2.484375, "learning_rate": 0.00019410622822416777, "loss": 2.0518, "step": 93340 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.00019410560305432726, "loss": 1.9456, "step": 93345 }, { "epoch": 0.22, "grad_norm": 1.9453125, "learning_rate": 0.00019410497785233858, "loss": 2.1819, "step": 93350 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019410435261820193, "loss": 2.2374, "step": 93355 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.00019410372735191751, "loss": 2.2701, "step": 93360 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019410310205348556, "loss": 2.1396, "step": 93365 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.0001941024767229063, "loss": 2.1716, "step": 93370 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.0001941018513601799, "loss": 2.2693, "step": 93375 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.0001941012259653066, "loss": 2.205, "step": 93380 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019410060053828664, "loss": 2.1951, "step": 93385 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.0001940999750791202, "loss": 2.1146, "step": 93390 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.0001940993495878075, "loss": 2.2612, "step": 93395 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.00019409872406434876, "loss": 2.2333, "step": 93400 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019409809850874416, "loss": 2.2494, "step": 93405 }, { "epoch": 0.22, "grad_norm": 1.6015625, "learning_rate": 0.000194097472920994, "loss": 2.231, "step": 93410 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.0001940968473010984, "loss": 2.0915, "step": 93415 }, { "epoch": 0.22, "grad_norm": 2.359375, "learning_rate": 0.0001940962216490576, "loss": 2.039, "step": 93420 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019409559596487183, "loss": 2.1036, "step": 93425 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019409497024854133, "loss": 2.2432, "step": 93430 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.00019409434450006624, "loss": 2.0629, "step": 93435 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.00019409371871944683, "loss": 2.2881, "step": 93440 }, { "epoch": 0.22, "grad_norm": 1.5390625, "learning_rate": 0.00019409309290668332, "loss": 2.1038, "step": 93445 }, { "epoch": 0.22, "grad_norm": 1.765625, "learning_rate": 0.00019409246706177585, "loss": 2.0092, "step": 93450 }, { "epoch": 0.22, "grad_norm": 1.71875, "learning_rate": 0.00019409184118472474, "loss": 2.1621, "step": 93455 }, { "epoch": 0.22, "grad_norm": 2.34375, "learning_rate": 0.00019409121527553015, "loss": 2.0546, "step": 93460 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019409058933419227, "loss": 2.0439, "step": 93465 }, { "epoch": 0.22, "grad_norm": 2.296875, "learning_rate": 0.00019408996336071134, "loss": 1.9917, "step": 93470 }, { "epoch": 0.22, "grad_norm": 2.421875, "learning_rate": 0.00019408933735508757, "loss": 2.1535, "step": 93475 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019408871131732118, "loss": 2.2295, "step": 93480 }, { "epoch": 0.22, "grad_norm": 2.296875, "learning_rate": 0.00019408808524741238, "loss": 2.0858, "step": 93485 }, { "epoch": 0.22, "grad_norm": 1.9296875, "learning_rate": 0.0001940874591453614, "loss": 2.1885, "step": 93490 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.00019408683301116842, "loss": 2.2342, "step": 93495 }, { "epoch": 0.22, "grad_norm": 1.8046875, "learning_rate": 0.00019408620684483369, "loss": 2.1293, "step": 93500 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.00019408558064635737, "loss": 2.0576, "step": 93505 }, { "epoch": 0.22, "grad_norm": 2.359375, "learning_rate": 0.00019408495441573974, "loss": 2.1795, "step": 93510 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019408432815298096, "loss": 2.2083, "step": 93515 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.0001940837018580813, "loss": 2.3108, "step": 93520 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.0001940830755310409, "loss": 2.1272, "step": 93525 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019408244917186004, "loss": 2.2178, "step": 93530 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.0001940818227805389, "loss": 2.0004, "step": 93535 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.0001940811963570777, "loss": 2.2133, "step": 93540 }, { "epoch": 0.22, "grad_norm": 2.6875, "learning_rate": 0.0001940805699014767, "loss": 2.1302, "step": 93545 }, { "epoch": 0.22, "grad_norm": 2.265625, "learning_rate": 0.00019407994341373604, "loss": 2.13, "step": 93550 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.00019407931689385593, "loss": 2.2306, "step": 93555 }, { "epoch": 0.22, "grad_norm": 2.296875, "learning_rate": 0.00019407869034183668, "loss": 2.1003, "step": 93560 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.0001940780637576784, "loss": 2.3024, "step": 93565 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.00019407743714138138, "loss": 2.0846, "step": 93570 }, { "epoch": 0.22, "grad_norm": 1.8046875, "learning_rate": 0.00019407681049294576, "loss": 2.1429, "step": 93575 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.00019407618381237182, "loss": 2.2267, "step": 93580 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.00019407555709965974, "loss": 2.0612, "step": 93585 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019407493035480976, "loss": 2.1724, "step": 93590 }, { "epoch": 0.22, "grad_norm": 1.65625, "learning_rate": 0.00019407430357782209, "loss": 2.331, "step": 93595 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.0001940736767686969, "loss": 2.2216, "step": 93600 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019407304992743446, "loss": 2.2298, "step": 93605 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019407242305403494, "loss": 2.118, "step": 93610 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.0001940717961484986, "loss": 1.9417, "step": 93615 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.0001940711692108256, "loss": 2.1703, "step": 93620 }, { "epoch": 0.22, "grad_norm": 1.765625, "learning_rate": 0.00019407054224101618, "loss": 2.2256, "step": 93625 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019406991523907057, "loss": 2.2911, "step": 93630 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019406928820498896, "loss": 2.0303, "step": 93635 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.0001940686611387716, "loss": 2.1456, "step": 93640 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019406803404041866, "loss": 2.2756, "step": 93645 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.00019406740690993036, "loss": 1.9819, "step": 93650 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019406677974730696, "loss": 2.2003, "step": 93655 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.00019406615255254862, "loss": 2.0957, "step": 93660 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.00019406552532565558, "loss": 2.0879, "step": 93665 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.00019406489806662807, "loss": 2.1852, "step": 93670 }, { "epoch": 0.22, "grad_norm": 1.6796875, "learning_rate": 0.00019406427077546626, "loss": 2.0907, "step": 93675 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.0001940636434521704, "loss": 2.3037, "step": 93680 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019406301609674068, "loss": 2.0883, "step": 93685 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019406238870917734, "loss": 1.9714, "step": 93690 }, { "epoch": 0.22, "grad_norm": 1.7109375, "learning_rate": 0.0001940617612894806, "loss": 2.0799, "step": 93695 }, { "epoch": 0.22, "grad_norm": 1.890625, "learning_rate": 0.00019406113383765064, "loss": 2.1533, "step": 93700 }, { "epoch": 0.22, "grad_norm": 2.3125, "learning_rate": 0.00019406050635368768, "loss": 2.263, "step": 93705 }, { "epoch": 0.22, "grad_norm": 2.578125, "learning_rate": 0.00019405987883759195, "loss": 2.1944, "step": 93710 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019405925128936367, "loss": 2.0657, "step": 93715 }, { "epoch": 0.22, "grad_norm": 2.25, "learning_rate": 0.00019405862370900306, "loss": 2.2569, "step": 93720 }, { "epoch": 0.22, "grad_norm": 2.359375, "learning_rate": 0.00019405799609651026, "loss": 2.3126, "step": 93725 }, { "epoch": 0.22, "grad_norm": 1.6953125, "learning_rate": 0.0001940573684518856, "loss": 2.0995, "step": 93730 }, { "epoch": 0.22, "grad_norm": 1.5234375, "learning_rate": 0.0001940567407751292, "loss": 2.1777, "step": 93735 }, { "epoch": 0.22, "grad_norm": 2.375, "learning_rate": 0.00019405611306624133, "loss": 2.0314, "step": 93740 }, { "epoch": 0.22, "grad_norm": 1.546875, "learning_rate": 0.0001940554853252222, "loss": 2.1786, "step": 93745 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019405485755207198, "loss": 2.17, "step": 93750 }, { "epoch": 0.22, "grad_norm": 1.734375, "learning_rate": 0.00019405422974679094, "loss": 2.1307, "step": 93755 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019405360190937924, "loss": 2.1651, "step": 93760 }, { "epoch": 0.22, "grad_norm": 1.8046875, "learning_rate": 0.00019405297403983715, "loss": 2.2067, "step": 93765 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.00019405234613816485, "loss": 2.2066, "step": 93770 }, { "epoch": 0.22, "grad_norm": 2.671875, "learning_rate": 0.00019405171820436256, "loss": 2.1897, "step": 93775 }, { "epoch": 0.22, "grad_norm": 2.296875, "learning_rate": 0.00019405109023843051, "loss": 2.2339, "step": 93780 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.0001940504622403689, "loss": 2.2292, "step": 93785 }, { "epoch": 0.22, "grad_norm": 2.71875, "learning_rate": 0.00019404983421017792, "loss": 2.0871, "step": 93790 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019404920614785783, "loss": 2.1752, "step": 93795 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019404857805340885, "loss": 2.3269, "step": 93800 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.00019404794992683113, "loss": 2.2597, "step": 93805 }, { "epoch": 0.22, "grad_norm": 1.8359375, "learning_rate": 0.00019404732176812496, "loss": 2.1632, "step": 93810 }, { "epoch": 0.22, "grad_norm": 1.796875, "learning_rate": 0.00019404669357729048, "loss": 2.2405, "step": 93815 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019404606535432795, "loss": 2.1782, "step": 93820 }, { "epoch": 0.22, "grad_norm": 2.625, "learning_rate": 0.00019404543709923759, "loss": 2.0607, "step": 93825 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.0001940448088120196, "loss": 2.1393, "step": 93830 }, { "epoch": 0.22, "grad_norm": 1.484375, "learning_rate": 0.0001940441804926742, "loss": 2.1017, "step": 93835 }, { "epoch": 0.22, "grad_norm": 1.7109375, "learning_rate": 0.00019404355214120163, "loss": 1.9871, "step": 93840 }, { "epoch": 0.22, "grad_norm": 2.328125, "learning_rate": 0.00019404292375760205, "loss": 2.2061, "step": 93845 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.00019404229534187567, "loss": 2.2365, "step": 93850 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.0001940416668940228, "loss": 2.2705, "step": 93855 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019404103841404356, "loss": 2.0185, "step": 93860 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.0001940404099019382, "loss": 1.9966, "step": 93865 }, { "epoch": 0.22, "grad_norm": 2.25, "learning_rate": 0.00019403978135770693, "loss": 1.9757, "step": 93870 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019403915278134995, "loss": 2.2206, "step": 93875 }, { "epoch": 0.22, "grad_norm": 1.7734375, "learning_rate": 0.0001940385241728675, "loss": 2.1261, "step": 93880 }, { "epoch": 0.22, "grad_norm": 1.5078125, "learning_rate": 0.0001940378955322598, "loss": 2.194, "step": 93885 }, { "epoch": 0.22, "grad_norm": 1.734375, "learning_rate": 0.00019403726685952703, "loss": 2.1934, "step": 93890 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.00019403663815466943, "loss": 2.0629, "step": 93895 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.0001940360094176872, "loss": 2.0434, "step": 93900 }, { "epoch": 0.22, "grad_norm": 1.8828125, "learning_rate": 0.0001940353806485806, "loss": 2.2808, "step": 93905 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.00019403475184734975, "loss": 2.1424, "step": 93910 }, { "epoch": 0.22, "grad_norm": 1.765625, "learning_rate": 0.00019403412301399495, "loss": 2.1338, "step": 93915 }, { "epoch": 0.22, "grad_norm": 2.421875, "learning_rate": 0.0001940334941485164, "loss": 2.184, "step": 93920 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019403286525091428, "loss": 2.2166, "step": 93925 }, { "epoch": 0.22, "grad_norm": 2.375, "learning_rate": 0.00019403223632118887, "loss": 2.0548, "step": 93930 }, { "epoch": 0.22, "grad_norm": 1.765625, "learning_rate": 0.0001940316073593403, "loss": 2.1278, "step": 93935 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.00019403097836536885, "loss": 2.3271, "step": 93940 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.0001940303493392747, "loss": 2.2111, "step": 93945 }, { "epoch": 0.22, "grad_norm": 1.7890625, "learning_rate": 0.0001940297202810581, "loss": 2.0823, "step": 93950 }, { "epoch": 0.22, "grad_norm": 1.6015625, "learning_rate": 0.00019402909119071922, "loss": 2.0517, "step": 93955 }, { "epoch": 0.22, "grad_norm": 1.515625, "learning_rate": 0.00019402846206825828, "loss": 2.0911, "step": 93960 }, { "epoch": 0.22, "grad_norm": 2.359375, "learning_rate": 0.00019402783291367555, "loss": 2.0519, "step": 93965 }, { "epoch": 0.22, "grad_norm": 1.875, "learning_rate": 0.0001940272037269712, "loss": 2.0585, "step": 93970 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.00019402657450814546, "loss": 2.2397, "step": 93975 }, { "epoch": 0.22, "grad_norm": 1.8203125, "learning_rate": 0.0001940259452571985, "loss": 2.2454, "step": 93980 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019402531597413058, "loss": 2.1589, "step": 93985 }, { "epoch": 0.22, "grad_norm": 1.7890625, "learning_rate": 0.00019402468665894194, "loss": 2.2284, "step": 93990 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.00019402405731163273, "loss": 2.349, "step": 93995 }, { "epoch": 0.22, "grad_norm": 1.7421875, "learning_rate": 0.00019402342793220322, "loss": 2.326, "step": 94000 }, { "epoch": 0.22, "grad_norm": 2.265625, "learning_rate": 0.0001940227985206536, "loss": 2.1309, "step": 94005 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.00019402216907698407, "loss": 2.152, "step": 94010 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019402153960119487, "loss": 2.1403, "step": 94015 }, { "epoch": 0.22, "grad_norm": 2.234375, "learning_rate": 0.0001940209100932862, "loss": 2.158, "step": 94020 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.0001940202805532583, "loss": 2.1493, "step": 94025 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019401965098111136, "loss": 2.2089, "step": 94030 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019401902137684558, "loss": 2.2453, "step": 94035 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.0001940183917404612, "loss": 2.2226, "step": 94040 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019401776207195846, "loss": 2.0615, "step": 94045 }, { "epoch": 0.22, "grad_norm": 2.234375, "learning_rate": 0.00019401713237133752, "loss": 2.2601, "step": 94050 }, { "epoch": 0.22, "grad_norm": 1.953125, "learning_rate": 0.00019401650263859866, "loss": 2.2037, "step": 94055 }, { "epoch": 0.22, "grad_norm": 1.90625, "learning_rate": 0.000194015872873742, "loss": 2.1421, "step": 94060 }, { "epoch": 0.22, "grad_norm": 3.0625, "learning_rate": 0.00019401524307676788, "loss": 2.3086, "step": 94065 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.0001940146132476764, "loss": 2.2664, "step": 94070 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019401398338646787, "loss": 2.1569, "step": 94075 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.0001940133534931424, "loss": 2.1323, "step": 94080 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.00019401272356770027, "loss": 2.0233, "step": 94085 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.00019401209361014172, "loss": 2.2957, "step": 94090 }, { "epoch": 0.22, "grad_norm": 1.78125, "learning_rate": 0.00019401146362046695, "loss": 2.1364, "step": 94095 }, { "epoch": 0.22, "grad_norm": 1.6796875, "learning_rate": 0.0001940108335986761, "loss": 2.0588, "step": 94100 }, { "epoch": 0.22, "grad_norm": 2.40625, "learning_rate": 0.0001940102035447695, "loss": 2.36, "step": 94105 }, { "epoch": 0.22, "grad_norm": 1.6640625, "learning_rate": 0.00019400957345874726, "loss": 2.1651, "step": 94110 }, { "epoch": 0.22, "grad_norm": 1.9140625, "learning_rate": 0.0001940089433406097, "loss": 2.1645, "step": 94115 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.00019400831319035694, "loss": 2.1903, "step": 94120 }, { "epoch": 0.22, "grad_norm": 1.7421875, "learning_rate": 0.00019400768300798925, "loss": 2.2198, "step": 94125 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019400705279350682, "loss": 2.1365, "step": 94130 }, { "epoch": 0.22, "grad_norm": 1.890625, "learning_rate": 0.00019400642254690985, "loss": 2.249, "step": 94135 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019400579226819863, "loss": 2.3633, "step": 94140 }, { "epoch": 0.22, "grad_norm": 1.7578125, "learning_rate": 0.00019400516195737332, "loss": 2.2884, "step": 94145 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.0001940045316144341, "loss": 2.16, "step": 94150 }, { "epoch": 0.22, "grad_norm": 1.734375, "learning_rate": 0.0001940039012393813, "loss": 2.0026, "step": 94155 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.000194003270832215, "loss": 2.2081, "step": 94160 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.0001940026403929355, "loss": 2.1261, "step": 94165 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.00019400200992154297, "loss": 2.1533, "step": 94170 }, { "epoch": 0.22, "grad_norm": 2.328125, "learning_rate": 0.00019400137941803767, "loss": 1.9768, "step": 94175 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.0001940007488824198, "loss": 2.1631, "step": 94180 }, { "epoch": 0.22, "grad_norm": 2.28125, "learning_rate": 0.00019400011831468956, "loss": 2.1902, "step": 94185 }, { "epoch": 0.22, "grad_norm": 1.875, "learning_rate": 0.00019399948771484717, "loss": 2.3345, "step": 94190 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019399885708289288, "loss": 1.8343, "step": 94195 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.00019399822641882682, "loss": 2.1994, "step": 94200 }, { "epoch": 0.22, "grad_norm": 2.390625, "learning_rate": 0.00019399759572264933, "loss": 2.0939, "step": 94205 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.0001939969649943605, "loss": 2.0973, "step": 94210 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019399633423396062, "loss": 1.9767, "step": 94215 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.0001939957034414499, "loss": 2.127, "step": 94220 }, { "epoch": 0.22, "grad_norm": 2.5, "learning_rate": 0.00019399507261682854, "loss": 2.1884, "step": 94225 }, { "epoch": 0.22, "grad_norm": 2.875, "learning_rate": 0.00019399444176009675, "loss": 2.0991, "step": 94230 }, { "epoch": 0.22, "grad_norm": 1.5546875, "learning_rate": 0.00019399381087125477, "loss": 2.1638, "step": 94235 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.00019399317995030278, "loss": 2.2028, "step": 94240 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019399254899724101, "loss": 2.2355, "step": 94245 }, { "epoch": 0.22, "grad_norm": 2.546875, "learning_rate": 0.00019399191801206968, "loss": 2.2369, "step": 94250 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.00019399128699478904, "loss": 2.2307, "step": 94255 }, { "epoch": 0.22, "grad_norm": 1.9140625, "learning_rate": 0.00019399065594539924, "loss": 2.0471, "step": 94260 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019399002486390055, "loss": 2.2832, "step": 94265 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019398939375029316, "loss": 2.1206, "step": 94270 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.00019398876260457725, "loss": 2.1354, "step": 94275 }, { "epoch": 0.22, "grad_norm": 2.53125, "learning_rate": 0.0001939881314267531, "loss": 2.3477, "step": 94280 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.0001939875002168209, "loss": 2.2897, "step": 94285 }, { "epoch": 0.22, "grad_norm": 1.7421875, "learning_rate": 0.00019398686897478088, "loss": 2.081, "step": 94290 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019398623770063323, "loss": 2.0379, "step": 94295 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019398560639437816, "loss": 2.2065, "step": 94300 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.00019398497505601593, "loss": 1.9786, "step": 94305 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.00019398434368554673, "loss": 2.0115, "step": 94310 }, { "epoch": 0.22, "grad_norm": 1.9296875, "learning_rate": 0.00019398371228297075, "loss": 2.2366, "step": 94315 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.00019398308084828822, "loss": 2.1258, "step": 94320 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.00019398244938149942, "loss": 2.0978, "step": 94325 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.00019398181788260447, "loss": 2.1572, "step": 94330 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019398118635160363, "loss": 2.1175, "step": 94335 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.0001939805547884971, "loss": 2.088, "step": 94340 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.0001939799231932851, "loss": 2.2815, "step": 94345 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.0001939792915659679, "loss": 2.1126, "step": 94350 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019397865990654565, "loss": 2.1733, "step": 94355 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019397802821501857, "loss": 2.3442, "step": 94360 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.0001939773964913869, "loss": 2.18, "step": 94365 }, { "epoch": 0.22, "grad_norm": 2.28125, "learning_rate": 0.00019397676473565086, "loss": 2.1404, "step": 94370 }, { "epoch": 0.22, "grad_norm": 2.21875, "learning_rate": 0.00019397613294781064, "loss": 1.9319, "step": 94375 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.00019397550112786646, "loss": 2.234, "step": 94380 }, { "epoch": 0.22, "grad_norm": 1.7265625, "learning_rate": 0.00019397486927581856, "loss": 2.2353, "step": 94385 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.00019397423739166714, "loss": 2.1406, "step": 94390 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019397360547541238, "loss": 2.0629, "step": 94395 }, { "epoch": 0.22, "grad_norm": 1.90625, "learning_rate": 0.0001939729735270546, "loss": 2.1212, "step": 94400 }, { "epoch": 0.22, "grad_norm": 1.953125, "learning_rate": 0.0001939723415465939, "loss": 2.2114, "step": 94405 }, { "epoch": 0.22, "grad_norm": 1.7109375, "learning_rate": 0.00019397170953403055, "loss": 2.0101, "step": 94410 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019397107748936475, "loss": 2.0409, "step": 94415 }, { "epoch": 0.22, "grad_norm": 2.359375, "learning_rate": 0.00019397044541259674, "loss": 2.2407, "step": 94420 }, { "epoch": 0.22, "grad_norm": 2.859375, "learning_rate": 0.00019396981330372675, "loss": 2.231, "step": 94425 }, { "epoch": 0.22, "grad_norm": 2.265625, "learning_rate": 0.0001939691811627549, "loss": 2.2797, "step": 94430 }, { "epoch": 0.22, "grad_norm": 1.9453125, "learning_rate": 0.00019396854898968155, "loss": 2.084, "step": 94435 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.0001939679167845068, "loss": 2.0193, "step": 94440 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.0001939672845472309, "loss": 2.1621, "step": 94445 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.00019396665227785408, "loss": 2.2103, "step": 94450 }, { "epoch": 0.22, "grad_norm": 1.671875, "learning_rate": 0.00019396601997637654, "loss": 2.2261, "step": 94455 }, { "epoch": 0.22, "grad_norm": 1.8359375, "learning_rate": 0.00019396538764279852, "loss": 2.2229, "step": 94460 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.00019396475527712023, "loss": 2.2193, "step": 94465 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.00019396412287934182, "loss": 2.1369, "step": 94470 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.0001939634904494636, "loss": 1.9356, "step": 94475 }, { "epoch": 0.22, "grad_norm": 1.875, "learning_rate": 0.00019396285798748574, "loss": 2.2551, "step": 94480 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.00019396222549340848, "loss": 2.1666, "step": 94485 }, { "epoch": 0.22, "grad_norm": 1.9609375, "learning_rate": 0.000193961592967232, "loss": 2.2379, "step": 94490 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.00019396096040895654, "loss": 2.2859, "step": 94495 }, { "epoch": 0.22, "grad_norm": 1.7890625, "learning_rate": 0.00019396032781858233, "loss": 2.0573, "step": 94500 }, { "epoch": 0.22, "grad_norm": 1.78125, "learning_rate": 0.00019395969519610956, "loss": 2.2242, "step": 94505 }, { "epoch": 0.22, "grad_norm": 1.890625, "learning_rate": 0.00019395906254153844, "loss": 2.3176, "step": 94510 }, { "epoch": 0.22, "grad_norm": 1.7734375, "learning_rate": 0.00019395842985486922, "loss": 2.1733, "step": 94515 }, { "epoch": 0.22, "grad_norm": 2.421875, "learning_rate": 0.00019395779713610205, "loss": 2.2555, "step": 94520 }, { "epoch": 0.22, "grad_norm": 1.890625, "learning_rate": 0.00019395716438523722, "loss": 2.1331, "step": 94525 }, { "epoch": 0.22, "grad_norm": 1.8359375, "learning_rate": 0.00019395653160227493, "loss": 2.1648, "step": 94530 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.00019395589878721538, "loss": 2.1809, "step": 94535 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.0001939552659400588, "loss": 2.196, "step": 94540 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.0001939546330608054, "loss": 2.1197, "step": 94545 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.00019395400014945538, "loss": 2.1779, "step": 94550 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019395336720600897, "loss": 1.9954, "step": 94555 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019395273423046639, "loss": 2.2368, "step": 94560 }, { "epoch": 0.22, "grad_norm": 1.796875, "learning_rate": 0.00019395210122282784, "loss": 2.1454, "step": 94565 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.00019395146818309354, "loss": 2.0768, "step": 94570 }, { "epoch": 0.22, "grad_norm": 1.75, "learning_rate": 0.00019395083511126372, "loss": 2.1165, "step": 94575 }, { "epoch": 0.22, "grad_norm": 1.9453125, "learning_rate": 0.00019395020200733862, "loss": 2.1814, "step": 94580 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019394956887131841, "loss": 2.166, "step": 94585 }, { "epoch": 0.22, "grad_norm": 2.421875, "learning_rate": 0.00019394893570320334, "loss": 2.1113, "step": 94590 }, { "epoch": 0.22, "grad_norm": 1.78125, "learning_rate": 0.00019394830250299357, "loss": 2.2138, "step": 94595 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019394766927068936, "loss": 2.0701, "step": 94600 }, { "epoch": 0.22, "grad_norm": 1.8359375, "learning_rate": 0.00019394703600629097, "loss": 1.9322, "step": 94605 }, { "epoch": 0.22, "grad_norm": 1.4296875, "learning_rate": 0.00019394640270979855, "loss": 2.2906, "step": 94610 }, { "epoch": 0.22, "grad_norm": 1.7578125, "learning_rate": 0.0001939457693812123, "loss": 2.1639, "step": 94615 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.0001939451360205325, "loss": 2.1109, "step": 94620 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.00019394450262775933, "loss": 2.1185, "step": 94625 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.00019394386920289303, "loss": 2.0158, "step": 94630 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019394323574593378, "loss": 2.2583, "step": 94635 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019394260225688182, "loss": 2.0305, "step": 94640 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.00019394196873573737, "loss": 2.0469, "step": 94645 }, { "epoch": 0.22, "grad_norm": 1.7578125, "learning_rate": 0.00019394133518250067, "loss": 2.0321, "step": 94650 }, { "epoch": 0.22, "grad_norm": 1.9453125, "learning_rate": 0.00019394070159717183, "loss": 2.0708, "step": 94655 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.00019394006797975123, "loss": 2.0481, "step": 94660 }, { "epoch": 0.22, "grad_norm": 2.234375, "learning_rate": 0.00019393943433023894, "loss": 2.0865, "step": 94665 }, { "epoch": 0.22, "grad_norm": 1.75, "learning_rate": 0.00019393880064863525, "loss": 2.1429, "step": 94670 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.00019393816693494037, "loss": 2.2164, "step": 94675 }, { "epoch": 0.22, "grad_norm": 2.5625, "learning_rate": 0.00019393753318915449, "loss": 2.1037, "step": 94680 }, { "epoch": 0.22, "grad_norm": 2.703125, "learning_rate": 0.00019393689941127787, "loss": 2.2809, "step": 94685 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019393626560131066, "loss": 2.2877, "step": 94690 }, { "epoch": 0.22, "grad_norm": 1.8828125, "learning_rate": 0.00019393563175925315, "loss": 2.1351, "step": 94695 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019393499788510554, "loss": 2.1203, "step": 94700 }, { "epoch": 0.22, "grad_norm": 2.0, "learning_rate": 0.000193934363978868, "loss": 2.1791, "step": 94705 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.0001939337300405408, "loss": 2.086, "step": 94710 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019393309607012413, "loss": 2.0627, "step": 94715 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.0001939324620676182, "loss": 2.1476, "step": 94720 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019393182803302322, "loss": 1.8895, "step": 94725 }, { "epoch": 0.22, "grad_norm": 1.8203125, "learning_rate": 0.0001939311939663395, "loss": 2.0724, "step": 94730 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.00019393055986756708, "loss": 2.1478, "step": 94735 }, { "epoch": 0.22, "grad_norm": 1.7109375, "learning_rate": 0.00019392992573670633, "loss": 2.2763, "step": 94740 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.0001939292915737574, "loss": 2.1613, "step": 94745 }, { "epoch": 0.22, "grad_norm": 2.296875, "learning_rate": 0.00019392865737872055, "loss": 1.8885, "step": 94750 }, { "epoch": 0.22, "grad_norm": 1.921875, "learning_rate": 0.00019392802315159596, "loss": 2.0677, "step": 94755 }, { "epoch": 0.22, "grad_norm": 2.390625, "learning_rate": 0.00019392738889238383, "loss": 2.4196, "step": 94760 }, { "epoch": 0.22, "grad_norm": 1.9296875, "learning_rate": 0.0001939267546010844, "loss": 2.1881, "step": 94765 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.0001939261202776979, "loss": 2.3304, "step": 94770 }, { "epoch": 0.22, "grad_norm": 1.9296875, "learning_rate": 0.00019392548592222453, "loss": 2.0354, "step": 94775 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.0001939248515346645, "loss": 2.1936, "step": 94780 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019392421711501804, "loss": 2.2945, "step": 94785 }, { "epoch": 0.22, "grad_norm": 2.3125, "learning_rate": 0.00019392358266328538, "loss": 2.1144, "step": 94790 }, { "epoch": 0.22, "grad_norm": 2.34375, "learning_rate": 0.00019392294817946674, "loss": 2.2375, "step": 94795 }, { "epoch": 0.22, "grad_norm": 1.65625, "learning_rate": 0.00019392231366356226, "loss": 2.1363, "step": 94800 }, { "epoch": 0.22, "grad_norm": 2.578125, "learning_rate": 0.00019392167911557226, "loss": 2.2729, "step": 94805 }, { "epoch": 0.22, "grad_norm": 2.3125, "learning_rate": 0.0001939210445354969, "loss": 2.0873, "step": 94810 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.0001939204099233364, "loss": 2.1248, "step": 94815 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.000193919775279091, "loss": 2.321, "step": 94820 }, { "epoch": 0.22, "grad_norm": 2.59375, "learning_rate": 0.00019391914060276088, "loss": 2.2498, "step": 94825 }, { "epoch": 0.22, "grad_norm": 1.890625, "learning_rate": 0.00019391850589434628, "loss": 2.2899, "step": 94830 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.00019391787115384743, "loss": 2.2426, "step": 94835 }, { "epoch": 0.22, "grad_norm": 1.78125, "learning_rate": 0.00019391723638126454, "loss": 2.157, "step": 94840 }, { "epoch": 0.22, "grad_norm": 1.8046875, "learning_rate": 0.0001939166015765978, "loss": 2.0928, "step": 94845 }, { "epoch": 0.22, "grad_norm": 1.890625, "learning_rate": 0.00019391596673984745, "loss": 2.1355, "step": 94850 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.0001939153318710137, "loss": 2.1808, "step": 94855 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019391469697009676, "loss": 2.1224, "step": 94860 }, { "epoch": 0.22, "grad_norm": 2.375, "learning_rate": 0.00019391406203709687, "loss": 2.201, "step": 94865 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019391342707201427, "loss": 2.0514, "step": 94870 }, { "epoch": 0.22, "grad_norm": 1.84375, "learning_rate": 0.00019391279207484907, "loss": 2.3528, "step": 94875 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.0001939121570456016, "loss": 2.1839, "step": 94880 }, { "epoch": 0.22, "grad_norm": 2.21875, "learning_rate": 0.00019391152198427203, "loss": 2.1518, "step": 94885 }, { "epoch": 0.22, "grad_norm": 1.8828125, "learning_rate": 0.00019391088689086058, "loss": 2.1229, "step": 94890 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019391025176536744, "loss": 2.0357, "step": 94895 }, { "epoch": 0.22, "grad_norm": 1.7890625, "learning_rate": 0.00019390961660779288, "loss": 2.2506, "step": 94900 }, { "epoch": 0.22, "grad_norm": 1.9453125, "learning_rate": 0.0001939089814181371, "loss": 2.2385, "step": 94905 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019390834619640028, "loss": 2.2159, "step": 94910 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.0001939077109425827, "loss": 2.1361, "step": 94915 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.0001939070756566845, "loss": 2.2305, "step": 94920 }, { "epoch": 0.22, "grad_norm": 2.125, "learning_rate": 0.00019390644033870596, "loss": 2.3365, "step": 94925 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.0001939058049886473, "loss": 2.2213, "step": 94930 }, { "epoch": 0.22, "grad_norm": 2.21875, "learning_rate": 0.0001939051696065087, "loss": 2.299, "step": 94935 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019390453419229037, "loss": 2.183, "step": 94940 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.00019390389874599258, "loss": 2.0159, "step": 94945 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.0001939032632676155, "loss": 2.0857, "step": 94950 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.00019390262775715935, "loss": 2.203, "step": 94955 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019390199221462439, "loss": 2.2184, "step": 94960 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.00019390135664001078, "loss": 2.1694, "step": 94965 }, { "epoch": 0.22, "grad_norm": 1.578125, "learning_rate": 0.00019390072103331877, "loss": 2.071, "step": 94970 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019390008539454858, "loss": 2.1513, "step": 94975 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019389944972370043, "loss": 2.0711, "step": 94980 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019389881402077447, "loss": 2.2012, "step": 94985 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.00019389817828577104, "loss": 2.0601, "step": 94990 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019389754251869024, "loss": 2.1283, "step": 94995 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.00019389690671953234, "loss": 2.1362, "step": 95000 }, { "epoch": 0.22, "grad_norm": 2.390625, "learning_rate": 0.0001938962708882976, "loss": 2.2446, "step": 95005 }, { "epoch": 0.22, "grad_norm": 1.84375, "learning_rate": 0.00019389563502498613, "loss": 2.2446, "step": 95010 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019389499912959824, "loss": 2.1057, "step": 95015 }, { "epoch": 0.22, "grad_norm": 1.8828125, "learning_rate": 0.00019389436320213412, "loss": 2.3443, "step": 95020 }, { "epoch": 0.22, "grad_norm": 1.796875, "learning_rate": 0.000193893727242594, "loss": 2.2204, "step": 95025 }, { "epoch": 0.22, "grad_norm": 1.9765625, "learning_rate": 0.00019389309125097804, "loss": 2.1886, "step": 95030 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.00019389245522728648, "loss": 1.9991, "step": 95035 }, { "epoch": 0.22, "grad_norm": 2.203125, "learning_rate": 0.0001938918191715196, "loss": 2.2581, "step": 95040 }, { "epoch": 0.22, "grad_norm": 1.859375, "learning_rate": 0.00019389118308367755, "loss": 2.1199, "step": 95045 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019389054696376058, "loss": 2.0327, "step": 95050 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.0001938899108117689, "loss": 2.1786, "step": 95055 }, { "epoch": 0.22, "grad_norm": 1.84375, "learning_rate": 0.00019388927462770273, "loss": 2.0627, "step": 95060 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.00019388863841156227, "loss": 2.2032, "step": 95065 }, { "epoch": 0.22, "grad_norm": 1.609375, "learning_rate": 0.00019388800216334776, "loss": 2.2292, "step": 95070 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019388736588305937, "loss": 2.2564, "step": 95075 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.0001938867295706974, "loss": 1.9787, "step": 95080 }, { "epoch": 0.22, "grad_norm": 2.46875, "learning_rate": 0.00019388609322626196, "loss": 1.9573, "step": 95085 }, { "epoch": 0.22, "grad_norm": 1.9453125, "learning_rate": 0.00019388545684975337, "loss": 2.2139, "step": 95090 }, { "epoch": 0.22, "grad_norm": 1.9296875, "learning_rate": 0.0001938848204411718, "loss": 2.1331, "step": 95095 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.00019388418400051748, "loss": 2.0715, "step": 95100 }, { "epoch": 0.22, "grad_norm": 1.84375, "learning_rate": 0.0001938835475277906, "loss": 2.2638, "step": 95105 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.00019388291102299143, "loss": 2.0535, "step": 95110 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.00019388227448612012, "loss": 2.0951, "step": 95115 }, { "epoch": 0.22, "grad_norm": 2.234375, "learning_rate": 0.00019388163791717692, "loss": 2.371, "step": 95120 }, { "epoch": 0.22, "grad_norm": 2.515625, "learning_rate": 0.0001938810013161621, "loss": 2.1507, "step": 95125 }, { "epoch": 0.22, "grad_norm": 2.3125, "learning_rate": 0.00019388036468307576, "loss": 2.0959, "step": 95130 }, { "epoch": 0.22, "grad_norm": 1.7734375, "learning_rate": 0.00019387972801791824, "loss": 2.0822, "step": 95135 }, { "epoch": 0.22, "grad_norm": 1.78125, "learning_rate": 0.0001938790913206897, "loss": 2.1524, "step": 95140 }, { "epoch": 0.22, "grad_norm": 2.234375, "learning_rate": 0.0001938784545913903, "loss": 2.0669, "step": 95145 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019387781783002038, "loss": 1.9392, "step": 95150 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019387718103658008, "loss": 2.2611, "step": 95155 }, { "epoch": 0.22, "grad_norm": 1.953125, "learning_rate": 0.00019387654421106963, "loss": 2.1316, "step": 95160 }, { "epoch": 0.22, "grad_norm": 1.953125, "learning_rate": 0.00019387590735348923, "loss": 1.923, "step": 95165 }, { "epoch": 0.22, "grad_norm": 1.75, "learning_rate": 0.00019387527046383913, "loss": 2.3284, "step": 95170 }, { "epoch": 0.22, "grad_norm": 2.46875, "learning_rate": 0.00019387463354211953, "loss": 2.0432, "step": 95175 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019387399658833066, "loss": 2.2905, "step": 95180 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019387335960247275, "loss": 2.2669, "step": 95185 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.00019387272258454598, "loss": 2.3324, "step": 95190 }, { "epoch": 0.22, "grad_norm": 1.4921875, "learning_rate": 0.00019387208553455057, "loss": 2.2238, "step": 95195 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.00019387144845248676, "loss": 2.1004, "step": 95200 }, { "epoch": 0.22, "grad_norm": 1.8125, "learning_rate": 0.00019387081133835478, "loss": 2.1788, "step": 95205 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.00019387017419215483, "loss": 2.2745, "step": 95210 }, { "epoch": 0.22, "grad_norm": 2.46875, "learning_rate": 0.0001938695370138871, "loss": 2.0188, "step": 95215 }, { "epoch": 0.22, "grad_norm": 1.8671875, "learning_rate": 0.00019386889980355183, "loss": 2.0978, "step": 95220 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.00019386826256114926, "loss": 2.0975, "step": 95225 }, { "epoch": 0.22, "grad_norm": 1.796875, "learning_rate": 0.0001938676252866796, "loss": 2.1815, "step": 95230 }, { "epoch": 0.22, "grad_norm": 1.8515625, "learning_rate": 0.00019386698798014304, "loss": 2.147, "step": 95235 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019386635064153981, "loss": 2.1156, "step": 95240 }, { "epoch": 0.22, "grad_norm": 2.21875, "learning_rate": 0.00019386571327087014, "loss": 2.0963, "step": 95245 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.00019386507586813425, "loss": 2.2067, "step": 95250 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.00019386443843333236, "loss": 2.4637, "step": 95255 }, { "epoch": 0.22, "grad_norm": 1.8984375, "learning_rate": 0.00019386380096646463, "loss": 2.1883, "step": 95260 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019386316346753135, "loss": 2.2444, "step": 95265 }, { "epoch": 0.22, "grad_norm": 2.34375, "learning_rate": 0.0001938625259365327, "loss": 2.2744, "step": 95270 }, { "epoch": 0.22, "grad_norm": 1.9296875, "learning_rate": 0.00019386188837346892, "loss": 2.2245, "step": 95275 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.0001938612507783402, "loss": 2.2986, "step": 95280 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019386061315114682, "loss": 2.0933, "step": 95285 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019385997549188893, "loss": 2.1265, "step": 95290 }, { "epoch": 0.22, "grad_norm": 1.890625, "learning_rate": 0.00019385933780056674, "loss": 2.0508, "step": 95295 }, { "epoch": 0.22, "grad_norm": 2.28125, "learning_rate": 0.0001938587000771805, "loss": 2.2817, "step": 95300 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019385806232173044, "loss": 2.0767, "step": 95305 }, { "epoch": 0.22, "grad_norm": 1.765625, "learning_rate": 0.0001938574245342168, "loss": 2.0852, "step": 95310 }, { "epoch": 0.22, "grad_norm": 1.828125, "learning_rate": 0.0001938567867146397, "loss": 2.1234, "step": 95315 }, { "epoch": 0.22, "grad_norm": 1.765625, "learning_rate": 0.00019385614886299946, "loss": 2.163, "step": 95320 }, { "epoch": 0.22, "grad_norm": 1.9921875, "learning_rate": 0.00019385551097929625, "loss": 2.1815, "step": 95325 }, { "epoch": 0.22, "grad_norm": 1.7421875, "learning_rate": 0.0001938548730635303, "loss": 2.2188, "step": 95330 }, { "epoch": 0.22, "grad_norm": 2.4375, "learning_rate": 0.00019385423511570181, "loss": 2.1827, "step": 95335 }, { "epoch": 0.22, "grad_norm": 2.28125, "learning_rate": 0.00019385359713581103, "loss": 2.3414, "step": 95340 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.00019385295912385813, "loss": 2.1728, "step": 95345 }, { "epoch": 0.22, "grad_norm": 1.8203125, "learning_rate": 0.00019385232107984338, "loss": 2.1424, "step": 95350 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019385168300376697, "loss": 2.13, "step": 95355 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.0001938510448956291, "loss": 1.9474, "step": 95360 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019385040675543006, "loss": 2.1862, "step": 95365 }, { "epoch": 0.22, "grad_norm": 1.984375, "learning_rate": 0.00019384976858317, "loss": 2.2252, "step": 95370 }, { "epoch": 0.22, "grad_norm": 1.890625, "learning_rate": 0.00019384913037884914, "loss": 2.3154, "step": 95375 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.0001938484921424677, "loss": 2.1848, "step": 95380 }, { "epoch": 0.22, "grad_norm": 2.3125, "learning_rate": 0.00019384785387402596, "loss": 2.2452, "step": 95385 }, { "epoch": 0.22, "grad_norm": 2.03125, "learning_rate": 0.00019384721557352404, "loss": 2.1777, "step": 95390 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019384657724096224, "loss": 2.2674, "step": 95395 }, { "epoch": 0.22, "grad_norm": 1.71875, "learning_rate": 0.00019384593887634074, "loss": 2.0897, "step": 95400 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.0001938453004796598, "loss": 2.2726, "step": 95405 }, { "epoch": 0.22, "grad_norm": 2.140625, "learning_rate": 0.00019384466205091955, "loss": 2.2104, "step": 95410 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019384402359012026, "loss": 1.9652, "step": 95415 }, { "epoch": 0.22, "grad_norm": 1.765625, "learning_rate": 0.00019384338509726218, "loss": 2.2433, "step": 95420 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019384274657234548, "loss": 2.1022, "step": 95425 }, { "epoch": 0.22, "grad_norm": 1.78125, "learning_rate": 0.00019384210801537042, "loss": 2.111, "step": 95430 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019384146942633717, "loss": 2.226, "step": 95435 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.000193840830805246, "loss": 2.1851, "step": 95440 }, { "epoch": 0.22, "grad_norm": 1.78125, "learning_rate": 0.00019384019215209708, "loss": 2.2065, "step": 95445 }, { "epoch": 0.22, "grad_norm": 1.7578125, "learning_rate": 0.00019383955346689066, "loss": 2.0787, "step": 95450 }, { "epoch": 0.22, "grad_norm": 2.1875, "learning_rate": 0.00019383891474962693, "loss": 2.2376, "step": 95455 }, { "epoch": 0.22, "grad_norm": 1.9453125, "learning_rate": 0.00019383827600030613, "loss": 2.111, "step": 95460 }, { "epoch": 0.22, "grad_norm": 2.578125, "learning_rate": 0.00019383763721892848, "loss": 1.9208, "step": 95465 }, { "epoch": 0.22, "grad_norm": 2.09375, "learning_rate": 0.00019383699840549418, "loss": 2.1642, "step": 95470 }, { "epoch": 0.22, "grad_norm": 1.6953125, "learning_rate": 0.00019383635956000345, "loss": 2.3062, "step": 95475 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019383572068245653, "loss": 2.2234, "step": 95480 }, { "epoch": 0.22, "grad_norm": 2.015625, "learning_rate": 0.00019383508177285366, "loss": 2.246, "step": 95485 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.000193834442831195, "loss": 2.0659, "step": 95490 }, { "epoch": 0.22, "grad_norm": 1.78125, "learning_rate": 0.00019383380385748077, "loss": 2.1742, "step": 95495 }, { "epoch": 0.22, "grad_norm": 1.6875, "learning_rate": 0.00019383316485171125, "loss": 2.1909, "step": 95500 }, { "epoch": 0.22, "grad_norm": 1.7890625, "learning_rate": 0.00019383252581388658, "loss": 2.2364, "step": 95505 }, { "epoch": 0.22, "grad_norm": 1.9375, "learning_rate": 0.00019383188674400702, "loss": 2.1492, "step": 95510 }, { "epoch": 0.22, "grad_norm": 2.484375, "learning_rate": 0.0001938312476420728, "loss": 2.0499, "step": 95515 }, { "epoch": 0.22, "grad_norm": 1.5625, "learning_rate": 0.00019383060850808415, "loss": 2.2661, "step": 95520 }, { "epoch": 0.22, "grad_norm": 2.109375, "learning_rate": 0.0001938299693420412, "loss": 2.1099, "step": 95525 }, { "epoch": 0.22, "grad_norm": 1.5859375, "learning_rate": 0.0001938293301439443, "loss": 2.0973, "step": 95530 }, { "epoch": 0.22, "grad_norm": 1.90625, "learning_rate": 0.00019382869091379355, "loss": 2.091, "step": 95535 }, { "epoch": 0.22, "grad_norm": 1.8046875, "learning_rate": 0.00019382805165158923, "loss": 2.0792, "step": 95540 }, { "epoch": 0.22, "grad_norm": 2.0625, "learning_rate": 0.00019382741235733156, "loss": 2.4243, "step": 95545 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.00019382677303102073, "loss": 2.1437, "step": 95550 }, { "epoch": 0.22, "grad_norm": 2.875, "learning_rate": 0.00019382613367265698, "loss": 2.2688, "step": 95555 }, { "epoch": 0.22, "grad_norm": 2.515625, "learning_rate": 0.00019382549428224053, "loss": 2.1918, "step": 95560 }, { "epoch": 0.22, "grad_norm": 1.6875, "learning_rate": 0.00019382485485977156, "loss": 2.168, "step": 95565 }, { "epoch": 0.22, "grad_norm": 2.171875, "learning_rate": 0.00019382421540525035, "loss": 2.0975, "step": 95570 }, { "epoch": 0.22, "grad_norm": 2.375, "learning_rate": 0.00019382357591867706, "loss": 2.1805, "step": 95575 }, { "epoch": 0.22, "grad_norm": 1.96875, "learning_rate": 0.00019382293640005194, "loss": 2.2271, "step": 95580 }, { "epoch": 0.22, "grad_norm": 2.078125, "learning_rate": 0.0001938222968493752, "loss": 2.0657, "step": 95585 }, { "epoch": 0.22, "grad_norm": 2.15625, "learning_rate": 0.0001938216572666471, "loss": 2.2402, "step": 95590 }, { "epoch": 0.22, "grad_norm": 1.6328125, "learning_rate": 0.0001938210176518678, "loss": 2.0207, "step": 95595 }, { "epoch": 0.22, "grad_norm": 1.6015625, "learning_rate": 0.00019382037800503752, "loss": 2.1011, "step": 95600 }, { "epoch": 0.22, "grad_norm": 2.046875, "learning_rate": 0.0001938197383261565, "loss": 2.2196, "step": 95605 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019381909861522498, "loss": 2.3524, "step": 95610 }, { "epoch": 0.23, "grad_norm": 1.7890625, "learning_rate": 0.00019381845887224313, "loss": 2.2154, "step": 95615 }, { "epoch": 0.23, "grad_norm": 2.484375, "learning_rate": 0.0001938178190972112, "loss": 2.0744, "step": 95620 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.00019381717929012942, "loss": 2.0718, "step": 95625 }, { "epoch": 0.23, "grad_norm": 1.5078125, "learning_rate": 0.00019381653945099795, "loss": 2.0707, "step": 95630 }, { "epoch": 0.23, "grad_norm": 1.7421875, "learning_rate": 0.00019381589957981706, "loss": 2.3235, "step": 95635 }, { "epoch": 0.23, "grad_norm": 1.71875, "learning_rate": 0.00019381525967658697, "loss": 2.0441, "step": 95640 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.0001938146197413079, "loss": 2.105, "step": 95645 }, { "epoch": 0.23, "grad_norm": 1.8046875, "learning_rate": 0.00019381397977398003, "loss": 2.1422, "step": 95650 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019381333977460362, "loss": 2.2301, "step": 95655 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.0001938126997431789, "loss": 2.337, "step": 95660 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.000193812059679706, "loss": 2.0434, "step": 95665 }, { "epoch": 0.23, "grad_norm": 1.8125, "learning_rate": 0.00019381141958418522, "loss": 2.2859, "step": 95670 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019381077945661675, "loss": 2.0418, "step": 95675 }, { "epoch": 0.23, "grad_norm": 2.3125, "learning_rate": 0.00019381013929700082, "loss": 2.0474, "step": 95680 }, { "epoch": 0.23, "grad_norm": 2.734375, "learning_rate": 0.00019380949910533768, "loss": 2.2176, "step": 95685 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019380885888162748, "loss": 2.1938, "step": 95690 }, { "epoch": 0.23, "grad_norm": 1.7109375, "learning_rate": 0.00019380821862587046, "loss": 2.3587, "step": 95695 }, { "epoch": 0.23, "grad_norm": 1.703125, "learning_rate": 0.00019380757833806689, "loss": 2.1845, "step": 95700 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.00019380693801821695, "loss": 2.3057, "step": 95705 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019380629766632083, "loss": 2.4185, "step": 95710 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.00019380565728237877, "loss": 2.1162, "step": 95715 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.000193805016866391, "loss": 2.249, "step": 95720 }, { "epoch": 0.23, "grad_norm": 1.7421875, "learning_rate": 0.00019380437641835778, "loss": 2.1065, "step": 95725 }, { "epoch": 0.23, "grad_norm": 1.640625, "learning_rate": 0.00019380373593827924, "loss": 2.1254, "step": 95730 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019380309542615565, "loss": 2.1885, "step": 95735 }, { "epoch": 0.23, "grad_norm": 1.9140625, "learning_rate": 0.00019380245488198722, "loss": 2.1142, "step": 95740 }, { "epoch": 0.23, "grad_norm": 2.78125, "learning_rate": 0.0001938018143057742, "loss": 2.0058, "step": 95745 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.00019380117369751675, "loss": 2.0341, "step": 95750 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.00019380053305721512, "loss": 2.0748, "step": 95755 }, { "epoch": 0.23, "grad_norm": 1.8671875, "learning_rate": 0.00019379989238486954, "loss": 2.1064, "step": 95760 }, { "epoch": 0.23, "grad_norm": 1.8515625, "learning_rate": 0.0001937992516804802, "loss": 2.1875, "step": 95765 }, { "epoch": 0.23, "grad_norm": 2.359375, "learning_rate": 0.00019379861094404732, "loss": 2.3321, "step": 95770 }, { "epoch": 0.23, "grad_norm": 2.265625, "learning_rate": 0.00019379797017557115, "loss": 2.3645, "step": 95775 }, { "epoch": 0.23, "grad_norm": 2.3125, "learning_rate": 0.0001937973293750519, "loss": 2.1995, "step": 95780 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.00019379668854248977, "loss": 2.139, "step": 95785 }, { "epoch": 0.23, "grad_norm": 1.9375, "learning_rate": 0.000193796047677885, "loss": 2.0528, "step": 95790 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.0001937954067812378, "loss": 1.9737, "step": 95795 }, { "epoch": 0.23, "grad_norm": 1.84375, "learning_rate": 0.00019379476585254838, "loss": 1.9384, "step": 95800 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.00019379412489181698, "loss": 2.1679, "step": 95805 }, { "epoch": 0.23, "grad_norm": 2.375, "learning_rate": 0.0001937934838990438, "loss": 2.2447, "step": 95810 }, { "epoch": 0.23, "grad_norm": 1.78125, "learning_rate": 0.00019379284287422905, "loss": 2.1632, "step": 95815 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019379220181737296, "loss": 2.1702, "step": 95820 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.0001937915607284758, "loss": 2.1975, "step": 95825 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.0001937909196075377, "loss": 2.1877, "step": 95830 }, { "epoch": 0.23, "grad_norm": 2.640625, "learning_rate": 0.00019379027845455892, "loss": 2.1448, "step": 95835 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019378963726953972, "loss": 1.9071, "step": 95840 }, { "epoch": 0.23, "grad_norm": 1.9453125, "learning_rate": 0.0001937889960524802, "loss": 2.2353, "step": 95845 }, { "epoch": 0.23, "grad_norm": 1.953125, "learning_rate": 0.00019378835480338074, "loss": 2.0941, "step": 95850 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.00019378771352224143, "loss": 2.1185, "step": 95855 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.00019378707220906255, "loss": 2.2406, "step": 95860 }, { "epoch": 0.23, "grad_norm": 1.6875, "learning_rate": 0.00019378643086384432, "loss": 2.0372, "step": 95865 }, { "epoch": 0.23, "grad_norm": 1.8671875, "learning_rate": 0.00019378578948658692, "loss": 2.3614, "step": 95870 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.0001937851480772906, "loss": 2.3316, "step": 95875 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.0001937845066359556, "loss": 2.2426, "step": 95880 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019378386516258206, "loss": 2.0898, "step": 95885 }, { "epoch": 0.23, "grad_norm": 2.40625, "learning_rate": 0.0001937832236571703, "loss": 2.1528, "step": 95890 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.00019378258211972046, "loss": 2.1837, "step": 95895 }, { "epoch": 0.23, "grad_norm": 1.7109375, "learning_rate": 0.0001937819405502328, "loss": 2.1079, "step": 95900 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.00019378129894870752, "loss": 2.3147, "step": 95905 }, { "epoch": 0.23, "grad_norm": 1.59375, "learning_rate": 0.00019378065731514486, "loss": 2.0916, "step": 95910 }, { "epoch": 0.23, "grad_norm": 1.6796875, "learning_rate": 0.00019378001564954503, "loss": 2.2064, "step": 95915 }, { "epoch": 0.23, "grad_norm": 1.8671875, "learning_rate": 0.00019377937395190822, "loss": 2.3294, "step": 95920 }, { "epoch": 0.23, "grad_norm": 2.75, "learning_rate": 0.0001937787322222347, "loss": 2.044, "step": 95925 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019377809046052465, "loss": 2.1092, "step": 95930 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019377744866677833, "loss": 2.2092, "step": 95935 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.0001937768068409959, "loss": 2.1759, "step": 95940 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.00019377616498317761, "loss": 2.3442, "step": 95945 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.0001937755230933237, "loss": 2.3753, "step": 95950 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.00019377488117143437, "loss": 2.1615, "step": 95955 }, { "epoch": 0.23, "grad_norm": 2.625, "learning_rate": 0.00019377423921750983, "loss": 2.1881, "step": 95960 }, { "epoch": 0.23, "grad_norm": 2.296875, "learning_rate": 0.00019377359723155032, "loss": 2.036, "step": 95965 }, { "epoch": 0.23, "grad_norm": 1.8046875, "learning_rate": 0.00019377295521355605, "loss": 2.2302, "step": 95970 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019377231316352722, "loss": 2.1901, "step": 95975 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019377167108146408, "loss": 2.2487, "step": 95980 }, { "epoch": 0.23, "grad_norm": 1.6953125, "learning_rate": 0.00019377102896736683, "loss": 2.2593, "step": 95985 }, { "epoch": 0.23, "grad_norm": 2.640625, "learning_rate": 0.00019377038682123572, "loss": 2.2592, "step": 95990 }, { "epoch": 0.23, "grad_norm": 2.234375, "learning_rate": 0.00019376974464307089, "loss": 2.0736, "step": 95995 }, { "epoch": 0.23, "grad_norm": 2.34375, "learning_rate": 0.00019376910243287267, "loss": 2.1374, "step": 96000 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.0001937684601906412, "loss": 2.1479, "step": 96005 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.00019376781791637676, "loss": 2.2079, "step": 96010 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.0001937671756100795, "loss": 2.0944, "step": 96015 }, { "epoch": 0.23, "grad_norm": 2.359375, "learning_rate": 0.00019376653327174962, "loss": 2.0415, "step": 96020 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.00019376589090138748, "loss": 2.1643, "step": 96025 }, { "epoch": 0.23, "grad_norm": 1.859375, "learning_rate": 0.00019376524849899318, "loss": 2.2947, "step": 96030 }, { "epoch": 0.23, "grad_norm": 1.90625, "learning_rate": 0.00019376460606456694, "loss": 2.2522, "step": 96035 }, { "epoch": 0.23, "grad_norm": 2.375, "learning_rate": 0.00019376396359810906, "loss": 2.1272, "step": 96040 }, { "epoch": 0.23, "grad_norm": 2.203125, "learning_rate": 0.00019376332109961967, "loss": 2.0906, "step": 96045 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019376267856909904, "loss": 2.1959, "step": 96050 }, { "epoch": 0.23, "grad_norm": 1.8828125, "learning_rate": 0.0001937620360065474, "loss": 2.3812, "step": 96055 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.0001937613934119649, "loss": 2.227, "step": 96060 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019376075078535187, "loss": 2.1671, "step": 96065 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.00019376010812670842, "loss": 2.0571, "step": 96070 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.00019375946543603485, "loss": 2.3115, "step": 96075 }, { "epoch": 0.23, "grad_norm": 1.8359375, "learning_rate": 0.0001937588227133313, "loss": 2.1497, "step": 96080 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019375817995859808, "loss": 2.327, "step": 96085 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.00019375753717183536, "loss": 2.1884, "step": 96090 }, { "epoch": 0.23, "grad_norm": 2.296875, "learning_rate": 0.00019375689435304337, "loss": 2.3282, "step": 96095 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.0001937562515022223, "loss": 2.0372, "step": 96100 }, { "epoch": 0.23, "grad_norm": 1.6796875, "learning_rate": 0.00019375560861937242, "loss": 2.0655, "step": 96105 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.00019375496570449389, "loss": 2.1521, "step": 96110 }, { "epoch": 0.23, "grad_norm": 2.515625, "learning_rate": 0.00019375432275758696, "loss": 2.295, "step": 96115 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.0001937536797786519, "loss": 2.1246, "step": 96120 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.00019375303676768884, "loss": 2.2341, "step": 96125 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019375239372469808, "loss": 2.2173, "step": 96130 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.00019375175064967977, "loss": 2.266, "step": 96135 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019375110754263418, "loss": 2.0037, "step": 96140 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.00019375046440356147, "loss": 2.1637, "step": 96145 }, { "epoch": 0.23, "grad_norm": 1.609375, "learning_rate": 0.00019374982123246195, "loss": 2.284, "step": 96150 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.0001937491780293358, "loss": 2.0547, "step": 96155 }, { "epoch": 0.23, "grad_norm": 1.828125, "learning_rate": 0.0001937485347941832, "loss": 2.1414, "step": 96160 }, { "epoch": 0.23, "grad_norm": 1.8046875, "learning_rate": 0.00019374789152700437, "loss": 2.1627, "step": 96165 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.00019374724822779964, "loss": 2.1566, "step": 96170 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.00019374660489656907, "loss": 2.1602, "step": 96175 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.000193745961533313, "loss": 2.1577, "step": 96180 }, { "epoch": 0.23, "grad_norm": 1.953125, "learning_rate": 0.00019374531813803162, "loss": 2.1681, "step": 96185 }, { "epoch": 0.23, "grad_norm": 1.859375, "learning_rate": 0.00019374467471072512, "loss": 2.0103, "step": 96190 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.00019374403125139376, "loss": 2.1567, "step": 96195 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019374338776003772, "loss": 2.1642, "step": 96200 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.00019374274423665725, "loss": 2.3034, "step": 96205 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.00019374210068125254, "loss": 2.032, "step": 96210 }, { "epoch": 0.23, "grad_norm": 1.6875, "learning_rate": 0.00019374145709382382, "loss": 2.0837, "step": 96215 }, { "epoch": 0.23, "grad_norm": 1.953125, "learning_rate": 0.00019374081347437136, "loss": 2.0748, "step": 96220 }, { "epoch": 0.23, "grad_norm": 1.5390625, "learning_rate": 0.0001937401698228953, "loss": 2.0406, "step": 96225 }, { "epoch": 0.23, "grad_norm": 1.7109375, "learning_rate": 0.0001937395261393959, "loss": 2.1163, "step": 96230 }, { "epoch": 0.23, "grad_norm": 1.90625, "learning_rate": 0.0001937388824238734, "loss": 2.3902, "step": 96235 }, { "epoch": 0.23, "grad_norm": 1.9609375, "learning_rate": 0.000193738238676328, "loss": 2.2644, "step": 96240 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.00019373759489675992, "loss": 2.1405, "step": 96245 }, { "epoch": 0.23, "grad_norm": 2.46875, "learning_rate": 0.00019373695108516935, "loss": 2.0555, "step": 96250 }, { "epoch": 0.23, "grad_norm": 2.265625, "learning_rate": 0.00019373630724155654, "loss": 2.093, "step": 96255 }, { "epoch": 0.23, "grad_norm": 1.8828125, "learning_rate": 0.0001937356633659217, "loss": 1.9848, "step": 96260 }, { "epoch": 0.23, "grad_norm": 2.34375, "learning_rate": 0.00019373501945826508, "loss": 2.1035, "step": 96265 }, { "epoch": 0.23, "grad_norm": 1.875, "learning_rate": 0.0001937343755185869, "loss": 2.2115, "step": 96270 }, { "epoch": 0.23, "grad_norm": 1.9609375, "learning_rate": 0.00019373373154688733, "loss": 2.0491, "step": 96275 }, { "epoch": 0.23, "grad_norm": 1.7890625, "learning_rate": 0.00019373308754316661, "loss": 2.137, "step": 96280 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019373244350742497, "loss": 1.9382, "step": 96285 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.00019373179943966263, "loss": 2.1681, "step": 96290 }, { "epoch": 0.23, "grad_norm": 1.9140625, "learning_rate": 0.0001937311553398798, "loss": 2.195, "step": 96295 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019373051120807671, "loss": 2.1985, "step": 96300 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.0001937298670442536, "loss": 2.0936, "step": 96305 }, { "epoch": 0.23, "grad_norm": 2.640625, "learning_rate": 0.00019372922284841067, "loss": 2.1403, "step": 96310 }, { "epoch": 0.23, "grad_norm": 2.796875, "learning_rate": 0.0001937285786205481, "loss": 2.2138, "step": 96315 }, { "epoch": 0.23, "grad_norm": 1.8046875, "learning_rate": 0.00019372793436066616, "loss": 2.1496, "step": 96320 }, { "epoch": 0.23, "grad_norm": 1.9609375, "learning_rate": 0.00019372729006876508, "loss": 2.1609, "step": 96325 }, { "epoch": 0.23, "grad_norm": 2.46875, "learning_rate": 0.00019372664574484504, "loss": 2.2735, "step": 96330 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019372600138890626, "loss": 2.1374, "step": 96335 }, { "epoch": 0.23, "grad_norm": 1.9609375, "learning_rate": 0.000193725357000949, "loss": 2.2209, "step": 96340 }, { "epoch": 0.23, "grad_norm": 1.78125, "learning_rate": 0.00019372471258097347, "loss": 2.0247, "step": 96345 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.00019372406812897985, "loss": 2.2449, "step": 96350 }, { "epoch": 0.23, "grad_norm": 2.40625, "learning_rate": 0.0001937234236449684, "loss": 2.1058, "step": 96355 }, { "epoch": 0.23, "grad_norm": 1.8828125, "learning_rate": 0.00019372277912893936, "loss": 1.9364, "step": 96360 }, { "epoch": 0.23, "grad_norm": 1.9375, "learning_rate": 0.00019372213458089288, "loss": 2.3498, "step": 96365 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019372149000082924, "loss": 2.2657, "step": 96370 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019372084538874865, "loss": 2.0628, "step": 96375 }, { "epoch": 0.23, "grad_norm": 1.8671875, "learning_rate": 0.00019372020074465127, "loss": 2.006, "step": 96380 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.0001937195560685374, "loss": 2.1987, "step": 96385 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.00019371891136040723, "loss": 2.0987, "step": 96390 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019371826662026098, "loss": 2.2154, "step": 96395 }, { "epoch": 0.23, "grad_norm": 2.421875, "learning_rate": 0.00019371762184809888, "loss": 2.2132, "step": 96400 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.0001937169770439211, "loss": 2.026, "step": 96405 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.00019371633220772796, "loss": 2.0886, "step": 96410 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.00019371568733951957, "loss": 2.0393, "step": 96415 }, { "epoch": 0.23, "grad_norm": 1.6953125, "learning_rate": 0.00019371504243929623, "loss": 2.1023, "step": 96420 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019371439750705814, "loss": 2.1242, "step": 96425 }, { "epoch": 0.23, "grad_norm": 2.34375, "learning_rate": 0.00019371375254280548, "loss": 2.2749, "step": 96430 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.00019371310754653852, "loss": 2.0099, "step": 96435 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.00019371246251825746, "loss": 2.0141, "step": 96440 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019371181745796254, "loss": 1.9607, "step": 96445 }, { "epoch": 0.23, "grad_norm": 2.203125, "learning_rate": 0.00019371117236565395, "loss": 2.1768, "step": 96450 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019371052724133193, "loss": 2.2084, "step": 96455 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.0001937098820849967, "loss": 2.0538, "step": 96460 }, { "epoch": 0.23, "grad_norm": 1.8671875, "learning_rate": 0.00019370923689664843, "loss": 2.1856, "step": 96465 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.00019370859167628743, "loss": 2.2327, "step": 96470 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.00019370794642391387, "loss": 2.1877, "step": 96475 }, { "epoch": 0.23, "grad_norm": 2.390625, "learning_rate": 0.00019370730113952797, "loss": 2.195, "step": 96480 }, { "epoch": 0.23, "grad_norm": 2.359375, "learning_rate": 0.00019370665582312997, "loss": 2.205, "step": 96485 }, { "epoch": 0.23, "grad_norm": 1.9921875, "learning_rate": 0.00019370601047472005, "loss": 2.0222, "step": 96490 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019370536509429848, "loss": 2.0384, "step": 96495 }, { "epoch": 0.23, "grad_norm": 1.828125, "learning_rate": 0.00019370471968186545, "loss": 1.98, "step": 96500 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.0001937040742374212, "loss": 2.0422, "step": 96505 }, { "epoch": 0.23, "grad_norm": 2.453125, "learning_rate": 0.0001937034287609659, "loss": 2.005, "step": 96510 }, { "epoch": 0.23, "grad_norm": 2.484375, "learning_rate": 0.00019370278325249983, "loss": 2.1477, "step": 96515 }, { "epoch": 0.23, "grad_norm": 1.9375, "learning_rate": 0.0001937021377120232, "loss": 2.0061, "step": 96520 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.00019370149213953624, "loss": 2.2069, "step": 96525 }, { "epoch": 0.23, "grad_norm": 1.8828125, "learning_rate": 0.00019370084653503914, "loss": 2.2507, "step": 96530 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.0001937002008985321, "loss": 2.3088, "step": 96535 }, { "epoch": 0.23, "grad_norm": 1.7890625, "learning_rate": 0.00019369955523001537, "loss": 1.9861, "step": 96540 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019369890952948923, "loss": 2.3277, "step": 96545 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019369826379695378, "loss": 2.1467, "step": 96550 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.00019369761803240935, "loss": 2.4118, "step": 96555 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019369697223585607, "loss": 1.9986, "step": 96560 }, { "epoch": 0.23, "grad_norm": 1.9140625, "learning_rate": 0.00019369632640729422, "loss": 1.9278, "step": 96565 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.00019369568054672406, "loss": 2.1774, "step": 96570 }, { "epoch": 0.23, "grad_norm": 2.484375, "learning_rate": 0.00019369503465414568, "loss": 2.1507, "step": 96575 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.00019369438872955943, "loss": 2.143, "step": 96580 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.00019369374277296544, "loss": 2.0916, "step": 96585 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.000193693096784364, "loss": 2.1441, "step": 96590 }, { "epoch": 0.23, "grad_norm": 1.90625, "learning_rate": 0.00019369245076375526, "loss": 2.1349, "step": 96595 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019369180471113953, "loss": 2.0503, "step": 96600 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.0001936911586265169, "loss": 2.0556, "step": 96605 }, { "epoch": 0.23, "grad_norm": 2.40625, "learning_rate": 0.00019369051250988774, "loss": 2.13, "step": 96610 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.00019368986636125222, "loss": 2.0076, "step": 96615 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.0001936892201806105, "loss": 2.2245, "step": 96620 }, { "epoch": 0.23, "grad_norm": 1.6875, "learning_rate": 0.00019368857396796286, "loss": 2.1247, "step": 96625 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.00019368792772330945, "loss": 2.0893, "step": 96630 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.0001936872814466506, "loss": 2.0986, "step": 96635 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.0001936866351379865, "loss": 2.1845, "step": 96640 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.0001936859887973173, "loss": 2.3167, "step": 96645 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.00019368534242464327, "loss": 2.1821, "step": 96650 }, { "epoch": 0.23, "grad_norm": 1.90625, "learning_rate": 0.00019368469601996463, "loss": 2.1337, "step": 96655 }, { "epoch": 0.23, "grad_norm": 1.8671875, "learning_rate": 0.0001936840495832816, "loss": 2.096, "step": 96660 }, { "epoch": 0.23, "grad_norm": 2.203125, "learning_rate": 0.00019368340311459443, "loss": 2.2177, "step": 96665 }, { "epoch": 0.23, "grad_norm": 1.546875, "learning_rate": 0.00019368275661390326, "loss": 2.1241, "step": 96670 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.0001936821100812084, "loss": 2.1155, "step": 96675 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019368146351651002, "loss": 2.2804, "step": 96680 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.00019368081691980836, "loss": 2.2391, "step": 96685 }, { "epoch": 0.23, "grad_norm": 1.9140625, "learning_rate": 0.00019368017029110362, "loss": 2.2008, "step": 96690 }, { "epoch": 0.23, "grad_norm": 2.3125, "learning_rate": 0.00019367952363039606, "loss": 1.9917, "step": 96695 }, { "epoch": 0.23, "grad_norm": 1.625, "learning_rate": 0.00019367887693768582, "loss": 2.0497, "step": 96700 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019367823021297325, "loss": 2.1608, "step": 96705 }, { "epoch": 0.23, "grad_norm": 1.8359375, "learning_rate": 0.00019367758345625844, "loss": 1.9887, "step": 96710 }, { "epoch": 0.23, "grad_norm": 1.6796875, "learning_rate": 0.0001936769366675417, "loss": 2.2607, "step": 96715 }, { "epoch": 0.23, "grad_norm": 2.6875, "learning_rate": 0.0001936762898468232, "loss": 2.2608, "step": 96720 }, { "epoch": 0.23, "grad_norm": 1.5, "learning_rate": 0.0001936756429941032, "loss": 2.0365, "step": 96725 }, { "epoch": 0.23, "grad_norm": 2.34375, "learning_rate": 0.00019367499610938187, "loss": 2.2111, "step": 96730 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.0001936743491926595, "loss": 2.1069, "step": 96735 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019367370224393623, "loss": 2.1048, "step": 96740 }, { "epoch": 0.23, "grad_norm": 1.71875, "learning_rate": 0.00019367305526321238, "loss": 2.1435, "step": 96745 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019367240825048806, "loss": 2.1347, "step": 96750 }, { "epoch": 0.23, "grad_norm": 1.6796875, "learning_rate": 0.0001936717612057636, "loss": 2.0663, "step": 96755 }, { "epoch": 0.23, "grad_norm": 1.9140625, "learning_rate": 0.00019367111412903914, "loss": 2.0435, "step": 96760 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.00019367046702031492, "loss": 2.2498, "step": 96765 }, { "epoch": 0.23, "grad_norm": 1.7421875, "learning_rate": 0.00019366981987959119, "loss": 2.2314, "step": 96770 }, { "epoch": 0.23, "grad_norm": 1.7109375, "learning_rate": 0.00019366917270686814, "loss": 2.0653, "step": 96775 }, { "epoch": 0.23, "grad_norm": 2.3125, "learning_rate": 0.000193668525502146, "loss": 2.0656, "step": 96780 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.000193667878265425, "loss": 2.2001, "step": 96785 }, { "epoch": 0.23, "grad_norm": 1.6328125, "learning_rate": 0.00019366723099670534, "loss": 2.2315, "step": 96790 }, { "epoch": 0.23, "grad_norm": 2.40625, "learning_rate": 0.00019366658369598727, "loss": 2.1476, "step": 96795 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.000193665936363271, "loss": 2.1033, "step": 96800 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.0001936652889985567, "loss": 2.1316, "step": 96805 }, { "epoch": 0.23, "grad_norm": 2.40625, "learning_rate": 0.0001936646416018447, "loss": 2.2508, "step": 96810 }, { "epoch": 0.23, "grad_norm": 1.8671875, "learning_rate": 0.00019366399417313512, "loss": 2.0983, "step": 96815 }, { "epoch": 0.23, "grad_norm": 2.3125, "learning_rate": 0.00019366334671242825, "loss": 2.2264, "step": 96820 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019366269921972427, "loss": 2.324, "step": 96825 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.00019366205169502341, "loss": 2.0597, "step": 96830 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.0001936614041383259, "loss": 2.1276, "step": 96835 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019366075654963196, "loss": 2.2266, "step": 96840 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.0001936601089289418, "loss": 2.286, "step": 96845 }, { "epoch": 0.23, "grad_norm": 1.7421875, "learning_rate": 0.00019365946127625563, "loss": 1.9549, "step": 96850 }, { "epoch": 0.23, "grad_norm": 2.3125, "learning_rate": 0.0001936588135915737, "loss": 2.1827, "step": 96855 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019365816587489624, "loss": 2.2429, "step": 96860 }, { "epoch": 0.23, "grad_norm": 1.625, "learning_rate": 0.00019365751812622345, "loss": 2.0934, "step": 96865 }, { "epoch": 0.23, "grad_norm": 1.9921875, "learning_rate": 0.00019365687034555556, "loss": 2.0914, "step": 96870 }, { "epoch": 0.23, "grad_norm": 1.90625, "learning_rate": 0.00019365622253289275, "loss": 2.2339, "step": 96875 }, { "epoch": 0.23, "grad_norm": 2.71875, "learning_rate": 0.00019365557468823527, "loss": 1.9628, "step": 96880 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.0001936549268115834, "loss": 1.9142, "step": 96885 }, { "epoch": 0.23, "grad_norm": 2.359375, "learning_rate": 0.00019365427890293728, "loss": 2.0675, "step": 96890 }, { "epoch": 0.23, "grad_norm": 2.421875, "learning_rate": 0.00019365363096229715, "loss": 2.2799, "step": 96895 }, { "epoch": 0.23, "grad_norm": 1.421875, "learning_rate": 0.00019365298298966327, "loss": 2.1735, "step": 96900 }, { "epoch": 0.23, "grad_norm": 2.46875, "learning_rate": 0.0001936523349850358, "loss": 2.0988, "step": 96905 }, { "epoch": 0.23, "grad_norm": 1.9765625, "learning_rate": 0.000193651686948415, "loss": 2.2058, "step": 96910 }, { "epoch": 0.23, "grad_norm": 2.9375, "learning_rate": 0.0001936510388798011, "loss": 2.2236, "step": 96915 }, { "epoch": 0.23, "grad_norm": 2.671875, "learning_rate": 0.00019365039077919429, "loss": 2.271, "step": 96920 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.0001936497426465948, "loss": 2.2161, "step": 96925 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.0001936490944820029, "loss": 1.9489, "step": 96930 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.00019364844628541874, "loss": 2.1454, "step": 96935 }, { "epoch": 0.23, "grad_norm": 1.7734375, "learning_rate": 0.00019364779805684256, "loss": 2.2173, "step": 96940 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.00019364714979627464, "loss": 2.217, "step": 96945 }, { "epoch": 0.23, "grad_norm": 2.234375, "learning_rate": 0.00019364650150371514, "loss": 2.2282, "step": 96950 }, { "epoch": 0.23, "grad_norm": 1.84375, "learning_rate": 0.00019364585317916427, "loss": 2.0854, "step": 96955 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.00019364520482262226, "loss": 2.232, "step": 96960 }, { "epoch": 0.23, "grad_norm": 1.9765625, "learning_rate": 0.0001936445564340894, "loss": 2.1727, "step": 96965 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.00019364390801356586, "loss": 2.0915, "step": 96970 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.0001936432595610518, "loss": 2.0518, "step": 96975 }, { "epoch": 0.23, "grad_norm": 2.953125, "learning_rate": 0.0001936426110765476, "loss": 2.1235, "step": 96980 }, { "epoch": 0.23, "grad_norm": 1.7265625, "learning_rate": 0.00019364196256005332, "loss": 1.9954, "step": 96985 }, { "epoch": 0.23, "grad_norm": 1.78125, "learning_rate": 0.00019364131401156925, "loss": 2.1013, "step": 96990 }, { "epoch": 0.23, "grad_norm": 2.3125, "learning_rate": 0.00019364066543109564, "loss": 2.3757, "step": 96995 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.00019364001681863266, "loss": 2.03, "step": 97000 }, { "epoch": 0.23, "grad_norm": 1.9765625, "learning_rate": 0.00019363936817418055, "loss": 2.0996, "step": 97005 }, { "epoch": 0.23, "grad_norm": 1.9375, "learning_rate": 0.00019363871949773956, "loss": 2.2099, "step": 97010 }, { "epoch": 0.23, "grad_norm": 2.453125, "learning_rate": 0.00019363807078930987, "loss": 2.1863, "step": 97015 }, { "epoch": 0.23, "grad_norm": 1.71875, "learning_rate": 0.0001936374220488917, "loss": 2.2504, "step": 97020 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.0001936367732764853, "loss": 2.3194, "step": 97025 }, { "epoch": 0.23, "grad_norm": 1.9921875, "learning_rate": 0.00019363612447209088, "loss": 2.0799, "step": 97030 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.00019363547563570866, "loss": 2.258, "step": 97035 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.00019363482676733887, "loss": 2.0232, "step": 97040 }, { "epoch": 0.23, "grad_norm": 1.875, "learning_rate": 0.0001936341778669817, "loss": 2.1685, "step": 97045 }, { "epoch": 0.23, "grad_norm": 1.6328125, "learning_rate": 0.00019363352893463743, "loss": 2.2007, "step": 97050 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.0001936328799703062, "loss": 2.2166, "step": 97055 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.0001936322309739883, "loss": 2.2547, "step": 97060 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.000193631581945684, "loss": 1.9991, "step": 97065 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.0001936309328853934, "loss": 2.2195, "step": 97070 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.00019363028379311677, "loss": 2.2452, "step": 97075 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.00019362963466885432, "loss": 2.1338, "step": 97080 }, { "epoch": 0.23, "grad_norm": 2.3125, "learning_rate": 0.00019362898551260633, "loss": 2.1036, "step": 97085 }, { "epoch": 0.23, "grad_norm": 1.671875, "learning_rate": 0.00019362833632437294, "loss": 1.9762, "step": 97090 }, { "epoch": 0.23, "grad_norm": 1.8359375, "learning_rate": 0.00019362768710415445, "loss": 2.157, "step": 97095 }, { "epoch": 0.23, "grad_norm": 1.90625, "learning_rate": 0.00019362703785195105, "loss": 2.192, "step": 97100 }, { "epoch": 0.23, "grad_norm": 1.828125, "learning_rate": 0.0001936263885677629, "loss": 2.2908, "step": 97105 }, { "epoch": 0.23, "grad_norm": 1.7265625, "learning_rate": 0.00019362573925159034, "loss": 2.0807, "step": 97110 }, { "epoch": 0.23, "grad_norm": 2.5625, "learning_rate": 0.0001936250899034335, "loss": 2.1659, "step": 97115 }, { "epoch": 0.23, "grad_norm": 1.7578125, "learning_rate": 0.00019362444052329264, "loss": 2.102, "step": 97120 }, { "epoch": 0.23, "grad_norm": 2.203125, "learning_rate": 0.00019362379111116792, "loss": 2.1681, "step": 97125 }, { "epoch": 0.23, "grad_norm": 2.484375, "learning_rate": 0.00019362314166705967, "loss": 2.1846, "step": 97130 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019362249219096803, "loss": 2.1466, "step": 97135 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.0001936218426828933, "loss": 2.215, "step": 97140 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.0001936211931428356, "loss": 1.9589, "step": 97145 }, { "epoch": 0.23, "grad_norm": 2.46875, "learning_rate": 0.00019362054357079523, "loss": 2.0328, "step": 97150 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.00019361989396677237, "loss": 2.0991, "step": 97155 }, { "epoch": 0.23, "grad_norm": 1.9375, "learning_rate": 0.00019361924433076725, "loss": 2.1723, "step": 97160 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019361859466278012, "loss": 2.1584, "step": 97165 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.00019361794496281117, "loss": 2.2283, "step": 97170 }, { "epoch": 0.23, "grad_norm": 1.71875, "learning_rate": 0.00019361729523086063, "loss": 2.1956, "step": 97175 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019361664546692874, "loss": 2.156, "step": 97180 }, { "epoch": 0.23, "grad_norm": 2.359375, "learning_rate": 0.00019361599567101567, "loss": 2.1264, "step": 97185 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.0001936153458431217, "loss": 2.1263, "step": 97190 }, { "epoch": 0.23, "grad_norm": 3.15625, "learning_rate": 0.00019361469598324705, "loss": 2.2081, "step": 97195 }, { "epoch": 0.23, "grad_norm": 1.7265625, "learning_rate": 0.00019361404609139187, "loss": 2.0496, "step": 97200 }, { "epoch": 0.23, "grad_norm": 2.4375, "learning_rate": 0.00019361339616755647, "loss": 2.1754, "step": 97205 }, { "epoch": 0.23, "grad_norm": 1.9609375, "learning_rate": 0.00019361274621174101, "loss": 2.2057, "step": 97210 }, { "epoch": 0.23, "grad_norm": 1.6953125, "learning_rate": 0.0001936120962239458, "loss": 2.1973, "step": 97215 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019361144620417094, "loss": 2.1665, "step": 97220 }, { "epoch": 0.23, "grad_norm": 1.7734375, "learning_rate": 0.00019361079615241674, "loss": 2.1378, "step": 97225 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.00019361014606868336, "loss": 2.1854, "step": 97230 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.0001936094959529711, "loss": 2.2538, "step": 97235 }, { "epoch": 0.23, "grad_norm": 2.234375, "learning_rate": 0.00019360884580528012, "loss": 2.2436, "step": 97240 }, { "epoch": 0.23, "grad_norm": 1.875, "learning_rate": 0.00019360819562561066, "loss": 2.1253, "step": 97245 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.00019360754541396293, "loss": 2.2363, "step": 97250 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.0001936068951703372, "loss": 2.1178, "step": 97255 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.00019360624489473362, "loss": 2.2186, "step": 97260 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019360559458715248, "loss": 2.1979, "step": 97265 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.00019360494424759393, "loss": 2.198, "step": 97270 }, { "epoch": 0.23, "grad_norm": 1.828125, "learning_rate": 0.00019360429387605828, "loss": 2.0571, "step": 97275 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.00019360364347254568, "loss": 2.1213, "step": 97280 }, { "epoch": 0.23, "grad_norm": 1.9453125, "learning_rate": 0.0001936029930370564, "loss": 2.1528, "step": 97285 }, { "epoch": 0.23, "grad_norm": 1.9921875, "learning_rate": 0.0001936023425695906, "loss": 2.1482, "step": 97290 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.0001936016920701486, "loss": 1.9219, "step": 97295 }, { "epoch": 0.23, "grad_norm": 2.453125, "learning_rate": 0.00019360104153873052, "loss": 2.2534, "step": 97300 }, { "epoch": 0.23, "grad_norm": 2.546875, "learning_rate": 0.0001936003909753366, "loss": 2.034, "step": 97305 }, { "epoch": 0.23, "grad_norm": 1.8125, "learning_rate": 0.00019359974037996716, "loss": 2.2565, "step": 97310 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.0001935990897526223, "loss": 2.297, "step": 97315 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.0001935984390933023, "loss": 2.1021, "step": 97320 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.00019359778840200739, "loss": 2.0033, "step": 97325 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019359713767873776, "loss": 2.2352, "step": 97330 }, { "epoch": 0.23, "grad_norm": 1.7578125, "learning_rate": 0.0001935964869234937, "loss": 2.0927, "step": 97335 }, { "epoch": 0.23, "grad_norm": 1.8046875, "learning_rate": 0.0001935958361362753, "loss": 1.9281, "step": 97340 }, { "epoch": 0.23, "grad_norm": 1.765625, "learning_rate": 0.00019359518531708293, "loss": 2.2185, "step": 97345 }, { "epoch": 0.23, "grad_norm": 1.875, "learning_rate": 0.00019359453446591673, "loss": 2.1342, "step": 97350 }, { "epoch": 0.23, "grad_norm": 2.203125, "learning_rate": 0.00019359388358277695, "loss": 2.1073, "step": 97355 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.00019359323266766377, "loss": 2.0397, "step": 97360 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.00019359258172057747, "loss": 2.1053, "step": 97365 }, { "epoch": 0.23, "grad_norm": 1.90625, "learning_rate": 0.00019359193074151824, "loss": 2.2494, "step": 97370 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.00019359127973048632, "loss": 2.1868, "step": 97375 }, { "epoch": 0.23, "grad_norm": 1.640625, "learning_rate": 0.0001935906286874819, "loss": 2.2652, "step": 97380 }, { "epoch": 0.23, "grad_norm": 1.9765625, "learning_rate": 0.00019358997761250522, "loss": 2.281, "step": 97385 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.00019358932650555654, "loss": 2.1154, "step": 97390 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019358867536663603, "loss": 2.1087, "step": 97395 }, { "epoch": 0.23, "grad_norm": 1.5078125, "learning_rate": 0.00019358802419574392, "loss": 2.0911, "step": 97400 }, { "epoch": 0.23, "grad_norm": 2.203125, "learning_rate": 0.00019358737299288045, "loss": 2.1266, "step": 97405 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019358672175804587, "loss": 2.0546, "step": 97410 }, { "epoch": 0.23, "grad_norm": 1.5234375, "learning_rate": 0.00019358607049124033, "loss": 2.2542, "step": 97415 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.0001935854191924641, "loss": 2.131, "step": 97420 }, { "epoch": 0.23, "grad_norm": 1.9609375, "learning_rate": 0.0001935847678617174, "loss": 2.0512, "step": 97425 }, { "epoch": 0.23, "grad_norm": 1.875, "learning_rate": 0.00019358411649900042, "loss": 2.0671, "step": 97430 }, { "epoch": 0.23, "grad_norm": 1.9765625, "learning_rate": 0.00019358346510431345, "loss": 2.1256, "step": 97435 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019358281367765664, "loss": 2.0238, "step": 97440 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019358216221903026, "loss": 2.1915, "step": 97445 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.0001935815107284345, "loss": 2.1866, "step": 97450 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.00019358085920586962, "loss": 2.2353, "step": 97455 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019358020765133582, "loss": 2.2298, "step": 97460 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.0001935795560648333, "loss": 2.2974, "step": 97465 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019357890444636234, "loss": 2.209, "step": 97470 }, { "epoch": 0.23, "grad_norm": 1.6875, "learning_rate": 0.00019357825279592308, "loss": 2.0558, "step": 97475 }, { "epoch": 0.23, "grad_norm": 1.8046875, "learning_rate": 0.00019357760111351585, "loss": 2.0848, "step": 97480 }, { "epoch": 0.23, "grad_norm": 2.34375, "learning_rate": 0.00019357694939914076, "loss": 2.2473, "step": 97485 }, { "epoch": 0.23, "grad_norm": 2.296875, "learning_rate": 0.00019357629765279813, "loss": 2.0649, "step": 97490 }, { "epoch": 0.23, "grad_norm": 1.953125, "learning_rate": 0.00019357564587448812, "loss": 2.2767, "step": 97495 }, { "epoch": 0.23, "grad_norm": 1.796875, "learning_rate": 0.00019357499406421095, "loss": 2.1671, "step": 97500 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.0001935743422219669, "loss": 2.2283, "step": 97505 }, { "epoch": 0.23, "grad_norm": 1.515625, "learning_rate": 0.00019357369034775617, "loss": 2.288, "step": 97510 }, { "epoch": 0.23, "grad_norm": 1.9765625, "learning_rate": 0.0001935730384415789, "loss": 2.3806, "step": 97515 }, { "epoch": 0.23, "grad_norm": 2.4375, "learning_rate": 0.00019357238650343547, "loss": 2.1962, "step": 97520 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.00019357173453332598, "loss": 2.1815, "step": 97525 }, { "epoch": 0.23, "grad_norm": 2.890625, "learning_rate": 0.00019357108253125068, "loss": 2.4021, "step": 97530 }, { "epoch": 0.23, "grad_norm": 2.203125, "learning_rate": 0.0001935704304972098, "loss": 2.1947, "step": 97535 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019356977843120357, "loss": 2.1188, "step": 97540 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.00019356912633323222, "loss": 2.1866, "step": 97545 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.00019356847420329593, "loss": 2.3934, "step": 97550 }, { "epoch": 0.23, "grad_norm": 1.828125, "learning_rate": 0.00019356782204139495, "loss": 2.0113, "step": 97555 }, { "epoch": 0.23, "grad_norm": 1.9609375, "learning_rate": 0.00019356716984752953, "loss": 2.2223, "step": 97560 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.00019356651762169987, "loss": 2.2399, "step": 97565 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.00019356586536390616, "loss": 2.1552, "step": 97570 }, { "epoch": 0.23, "grad_norm": 3.21875, "learning_rate": 0.00019356521307414866, "loss": 2.276, "step": 97575 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019356456075242764, "loss": 2.2402, "step": 97580 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.00019356390839874322, "loss": 2.0875, "step": 97585 }, { "epoch": 0.23, "grad_norm": 1.78125, "learning_rate": 0.00019356325601309568, "loss": 2.1159, "step": 97590 }, { "epoch": 0.23, "grad_norm": 1.9140625, "learning_rate": 0.00019356260359548527, "loss": 2.2383, "step": 97595 }, { "epoch": 0.23, "grad_norm": 2.40625, "learning_rate": 0.00019356195114591214, "loss": 2.3136, "step": 97600 }, { "epoch": 0.23, "grad_norm": 1.8515625, "learning_rate": 0.00019356129866437654, "loss": 2.1655, "step": 97605 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019356064615087872, "loss": 2.0889, "step": 97610 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019355999360541888, "loss": 2.2861, "step": 97615 }, { "epoch": 0.23, "grad_norm": 1.5, "learning_rate": 0.00019355934102799726, "loss": 2.2789, "step": 97620 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.00019355868841861406, "loss": 2.1335, "step": 97625 }, { "epoch": 0.23, "grad_norm": 1.9921875, "learning_rate": 0.00019355803577726955, "loss": 2.1636, "step": 97630 }, { "epoch": 0.23, "grad_norm": 1.71875, "learning_rate": 0.00019355738310396388, "loss": 2.0412, "step": 97635 }, { "epoch": 0.23, "grad_norm": 1.8125, "learning_rate": 0.00019355673039869734, "loss": 2.0836, "step": 97640 }, { "epoch": 0.23, "grad_norm": 1.8515625, "learning_rate": 0.00019355607766147008, "loss": 2.4776, "step": 97645 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.0001935554248922824, "loss": 2.0204, "step": 97650 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019355477209113446, "loss": 2.177, "step": 97655 }, { "epoch": 0.23, "grad_norm": 2.28125, "learning_rate": 0.00019355411925802655, "loss": 2.1535, "step": 97660 }, { "epoch": 0.23, "grad_norm": 2.40625, "learning_rate": 0.00019355346639295885, "loss": 2.0964, "step": 97665 }, { "epoch": 0.23, "grad_norm": 2.203125, "learning_rate": 0.00019355281349593158, "loss": 2.3194, "step": 97670 }, { "epoch": 0.23, "grad_norm": 2.265625, "learning_rate": 0.000193552160566945, "loss": 1.9526, "step": 97675 }, { "epoch": 0.23, "grad_norm": 2.46875, "learning_rate": 0.00019355150760599926, "loss": 2.0926, "step": 97680 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.00019355085461309464, "loss": 2.1832, "step": 97685 }, { "epoch": 0.23, "grad_norm": 2.34375, "learning_rate": 0.00019355020158823137, "loss": 2.1875, "step": 97690 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.00019354954853140964, "loss": 1.897, "step": 97695 }, { "epoch": 0.23, "grad_norm": 1.734375, "learning_rate": 0.0001935488954426297, "loss": 2.1953, "step": 97700 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.00019354824232189173, "loss": 1.9773, "step": 97705 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.000193547589169196, "loss": 2.2537, "step": 97710 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.00019354693598454273, "loss": 2.0812, "step": 97715 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.00019354628276793215, "loss": 2.0328, "step": 97720 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019354562951936442, "loss": 2.119, "step": 97725 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.0001935449762388398, "loss": 2.1378, "step": 97730 }, { "epoch": 0.23, "grad_norm": 1.7890625, "learning_rate": 0.0001935443229263586, "loss": 2.1026, "step": 97735 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.00019354366958192087, "loss": 2.1676, "step": 97740 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019354301620552695, "loss": 2.3394, "step": 97745 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.00019354236279717706, "loss": 2.0643, "step": 97750 }, { "epoch": 0.23, "grad_norm": 1.859375, "learning_rate": 0.0001935417093568714, "loss": 2.1738, "step": 97755 }, { "epoch": 0.23, "grad_norm": 1.8515625, "learning_rate": 0.00019354105588461018, "loss": 2.136, "step": 97760 }, { "epoch": 0.23, "grad_norm": 1.78125, "learning_rate": 0.00019354040238039365, "loss": 1.9844, "step": 97765 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.00019353974884422202, "loss": 2.2456, "step": 97770 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.0001935390952760955, "loss": 2.168, "step": 97775 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019353844167601435, "loss": 2.0586, "step": 97780 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.0001935377880439788, "loss": 2.2682, "step": 97785 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.000193537134379989, "loss": 2.2076, "step": 97790 }, { "epoch": 0.23, "grad_norm": 1.90625, "learning_rate": 0.0001935364806840452, "loss": 2.1259, "step": 97795 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.00019353582695614768, "loss": 2.1322, "step": 97800 }, { "epoch": 0.23, "grad_norm": 1.8515625, "learning_rate": 0.0001935351731962966, "loss": 2.1426, "step": 97805 }, { "epoch": 0.23, "grad_norm": 1.7578125, "learning_rate": 0.00019353451940449224, "loss": 2.0392, "step": 97810 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.0001935338655807348, "loss": 2.1017, "step": 97815 }, { "epoch": 0.23, "grad_norm": 1.6484375, "learning_rate": 0.00019353321172502447, "loss": 2.017, "step": 97820 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.00019353255783736148, "loss": 2.2132, "step": 97825 }, { "epoch": 0.23, "grad_norm": 1.9453125, "learning_rate": 0.0001935319039177461, "loss": 2.0718, "step": 97830 }, { "epoch": 0.23, "grad_norm": 2.265625, "learning_rate": 0.0001935312499661785, "loss": 2.049, "step": 97835 }, { "epoch": 0.23, "grad_norm": 1.4375, "learning_rate": 0.00019353059598265895, "loss": 2.1928, "step": 97840 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.00019352994196718766, "loss": 2.1757, "step": 97845 }, { "epoch": 0.23, "grad_norm": 2.34375, "learning_rate": 0.0001935292879197648, "loss": 2.2682, "step": 97850 }, { "epoch": 0.23, "grad_norm": 1.9765625, "learning_rate": 0.0001935286338403907, "loss": 2.1014, "step": 97855 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.0001935279797290655, "loss": 2.3432, "step": 97860 }, { "epoch": 0.23, "grad_norm": 1.703125, "learning_rate": 0.00019352732558578943, "loss": 1.989, "step": 97865 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.00019352667141056274, "loss": 2.1805, "step": 97870 }, { "epoch": 0.23, "grad_norm": 1.8671875, "learning_rate": 0.00019352601720338565, "loss": 2.1371, "step": 97875 }, { "epoch": 0.23, "grad_norm": 1.953125, "learning_rate": 0.0001935253629642584, "loss": 2.11, "step": 97880 }, { "epoch": 0.23, "grad_norm": 2.296875, "learning_rate": 0.0001935247086931811, "loss": 2.1375, "step": 97885 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.00019352405439015415, "loss": 2.0842, "step": 97890 }, { "epoch": 0.23, "grad_norm": 1.9453125, "learning_rate": 0.00019352340005517765, "loss": 1.9866, "step": 97895 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.00019352274568825188, "loss": 2.1718, "step": 97900 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.00019352209128937703, "loss": 2.1794, "step": 97905 }, { "epoch": 0.23, "grad_norm": 1.9453125, "learning_rate": 0.00019352143685855334, "loss": 2.1865, "step": 97910 }, { "epoch": 0.23, "grad_norm": 1.859375, "learning_rate": 0.00019352078239578103, "loss": 2.0898, "step": 97915 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019352012790106032, "loss": 2.2126, "step": 97920 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019351947337439146, "loss": 2.0783, "step": 97925 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.00019351881881577465, "loss": 2.4302, "step": 97930 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.0001935181642252101, "loss": 2.1508, "step": 97935 }, { "epoch": 0.23, "grad_norm": 1.703125, "learning_rate": 0.00019351750960269802, "loss": 2.1471, "step": 97940 }, { "epoch": 0.23, "grad_norm": 1.9921875, "learning_rate": 0.00019351685494823872, "loss": 2.1767, "step": 97945 }, { "epoch": 0.23, "grad_norm": 2.53125, "learning_rate": 0.00019351620026183234, "loss": 2.2151, "step": 97950 }, { "epoch": 0.23, "grad_norm": 2.546875, "learning_rate": 0.00019351554554347913, "loss": 2.0855, "step": 97955 }, { "epoch": 0.23, "grad_norm": 1.7890625, "learning_rate": 0.00019351489079317934, "loss": 2.0765, "step": 97960 }, { "epoch": 0.23, "grad_norm": 3.0, "learning_rate": 0.00019351423601093312, "loss": 2.1735, "step": 97965 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019351358119674075, "loss": 2.1996, "step": 97970 }, { "epoch": 0.23, "grad_norm": 1.9609375, "learning_rate": 0.00019351292635060244, "loss": 2.1715, "step": 97975 }, { "epoch": 0.23, "grad_norm": 1.7421875, "learning_rate": 0.00019351227147251847, "loss": 2.178, "step": 97980 }, { "epoch": 0.23, "grad_norm": 2.203125, "learning_rate": 0.00019351161656248897, "loss": 2.1478, "step": 97985 }, { "epoch": 0.23, "grad_norm": 1.953125, "learning_rate": 0.00019351096162051424, "loss": 2.1855, "step": 97990 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.0001935103066465944, "loss": 2.0631, "step": 97995 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.00019350965164072981, "loss": 2.2856, "step": 98000 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.0001935089966029206, "loss": 2.1808, "step": 98005 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019350834153316701, "loss": 2.0369, "step": 98010 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.0001935076864314693, "loss": 2.067, "step": 98015 }, { "epoch": 0.23, "grad_norm": 1.7578125, "learning_rate": 0.00019350703129782768, "loss": 1.99, "step": 98020 }, { "epoch": 0.23, "grad_norm": 1.7265625, "learning_rate": 0.00019350637613224232, "loss": 2.1199, "step": 98025 }, { "epoch": 0.23, "grad_norm": 1.6796875, "learning_rate": 0.0001935057209347135, "loss": 2.0318, "step": 98030 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.00019350506570524144, "loss": 2.1248, "step": 98035 }, { "epoch": 0.23, "grad_norm": 1.59375, "learning_rate": 0.00019350441044382636, "loss": 2.1769, "step": 98040 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.00019350375515046845, "loss": 2.2979, "step": 98045 }, { "epoch": 0.23, "grad_norm": 1.828125, "learning_rate": 0.000193503099825168, "loss": 2.1006, "step": 98050 }, { "epoch": 0.23, "grad_norm": 1.7890625, "learning_rate": 0.00019350244446792516, "loss": 2.1665, "step": 98055 }, { "epoch": 0.23, "grad_norm": 1.9921875, "learning_rate": 0.0001935017890787402, "loss": 2.1869, "step": 98060 }, { "epoch": 0.23, "grad_norm": 2.296875, "learning_rate": 0.00019350113365761335, "loss": 2.1158, "step": 98065 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.0001935004782045448, "loss": 2.0458, "step": 98070 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.0001934998227195348, "loss": 2.0983, "step": 98075 }, { "epoch": 0.23, "grad_norm": 1.875, "learning_rate": 0.00019349916720258356, "loss": 2.3059, "step": 98080 }, { "epoch": 0.23, "grad_norm": 1.7890625, "learning_rate": 0.0001934985116536913, "loss": 2.0888, "step": 98085 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019349785607285827, "loss": 2.1422, "step": 98090 }, { "epoch": 0.23, "grad_norm": 2.625, "learning_rate": 0.00019349720046008467, "loss": 2.3923, "step": 98095 }, { "epoch": 0.23, "grad_norm": 1.6953125, "learning_rate": 0.00019349654481537072, "loss": 1.951, "step": 98100 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.0001934958891387167, "loss": 2.054, "step": 98105 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.00019349523343012271, "loss": 2.3429, "step": 98110 }, { "epoch": 0.23, "grad_norm": 1.796875, "learning_rate": 0.0001934945776895891, "loss": 2.008, "step": 98115 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019349392191711604, "loss": 2.0651, "step": 98120 }, { "epoch": 0.23, "grad_norm": 1.9375, "learning_rate": 0.00019349326611270374, "loss": 2.2021, "step": 98125 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.00019349261027635248, "loss": 2.2039, "step": 98130 }, { "epoch": 0.23, "grad_norm": 2.59375, "learning_rate": 0.00019349195440806244, "loss": 2.1665, "step": 98135 }, { "epoch": 0.23, "grad_norm": 1.8046875, "learning_rate": 0.00019349129850783386, "loss": 2.2253, "step": 98140 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019349064257566693, "loss": 2.0588, "step": 98145 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019348998661156193, "loss": 2.1874, "step": 98150 }, { "epoch": 0.23, "grad_norm": 1.859375, "learning_rate": 0.00019348933061551904, "loss": 2.0102, "step": 98155 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.0001934886745875385, "loss": 2.1858, "step": 98160 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.00019348801852762053, "loss": 2.1637, "step": 98165 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.00019348736243576537, "loss": 2.2714, "step": 98170 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.00019348670631197322, "loss": 2.0758, "step": 98175 }, { "epoch": 0.23, "grad_norm": 1.90625, "learning_rate": 0.0001934860501562443, "loss": 2.0084, "step": 98180 }, { "epoch": 0.23, "grad_norm": 1.953125, "learning_rate": 0.00019348539396857886, "loss": 2.0019, "step": 98185 }, { "epoch": 0.23, "grad_norm": 1.734375, "learning_rate": 0.00019348473774897715, "loss": 2.2523, "step": 98190 }, { "epoch": 0.23, "grad_norm": 1.5859375, "learning_rate": 0.00019348408149743934, "loss": 2.1451, "step": 98195 }, { "epoch": 0.23, "grad_norm": 2.46875, "learning_rate": 0.00019348342521396562, "loss": 2.099, "step": 98200 }, { "epoch": 0.23, "grad_norm": 1.9765625, "learning_rate": 0.00019348276889855632, "loss": 2.1062, "step": 98205 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.0001934821125512116, "loss": 2.0672, "step": 98210 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.0001934814561719317, "loss": 2.1149, "step": 98215 }, { "epoch": 0.23, "grad_norm": 1.8359375, "learning_rate": 0.00019348079976071682, "loss": 2.136, "step": 98220 }, { "epoch": 0.23, "grad_norm": 1.78125, "learning_rate": 0.00019348014331756723, "loss": 2.2255, "step": 98225 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.0001934794868424831, "loss": 2.0772, "step": 98230 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.0001934788303354647, "loss": 2.2283, "step": 98235 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.00019347817379651225, "loss": 2.2588, "step": 98240 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.0001934775172256259, "loss": 2.089, "step": 98245 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.000193476860622806, "loss": 2.0377, "step": 98250 }, { "epoch": 0.23, "grad_norm": 2.390625, "learning_rate": 0.0001934762039880527, "loss": 2.399, "step": 98255 }, { "epoch": 0.23, "grad_norm": 1.8359375, "learning_rate": 0.0001934755473213662, "loss": 2.1725, "step": 98260 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.00019347489062274681, "loss": 2.2127, "step": 98265 }, { "epoch": 0.23, "grad_norm": 2.296875, "learning_rate": 0.00019347423389219466, "loss": 2.423, "step": 98270 }, { "epoch": 0.23, "grad_norm": 1.703125, "learning_rate": 0.00019347357712971004, "loss": 2.1493, "step": 98275 }, { "epoch": 0.23, "grad_norm": 1.7109375, "learning_rate": 0.00019347292033529314, "loss": 2.0459, "step": 98280 }, { "epoch": 0.23, "grad_norm": 2.28125, "learning_rate": 0.0001934722635089442, "loss": 2.1689, "step": 98285 }, { "epoch": 0.23, "grad_norm": 1.875, "learning_rate": 0.0001934716066506634, "loss": 2.1983, "step": 98290 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019347094976045103, "loss": 2.0144, "step": 98295 }, { "epoch": 0.23, "grad_norm": 1.953125, "learning_rate": 0.00019347029283830732, "loss": 2.2137, "step": 98300 }, { "epoch": 0.23, "grad_norm": 1.859375, "learning_rate": 0.00019346963588423244, "loss": 2.297, "step": 98305 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.00019346897889822664, "loss": 2.1852, "step": 98310 }, { "epoch": 0.23, "grad_norm": 2.5, "learning_rate": 0.00019346832188029013, "loss": 2.3418, "step": 98315 }, { "epoch": 0.23, "grad_norm": 1.9609375, "learning_rate": 0.00019346766483042315, "loss": 2.1452, "step": 98320 }, { "epoch": 0.23, "grad_norm": 1.9921875, "learning_rate": 0.00019346700774862592, "loss": 2.032, "step": 98325 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.00019346635063489866, "loss": 2.2306, "step": 98330 }, { "epoch": 0.23, "grad_norm": 1.8515625, "learning_rate": 0.00019346569348924162, "loss": 2.0245, "step": 98335 }, { "epoch": 0.23, "grad_norm": 1.8046875, "learning_rate": 0.00019346503631165497, "loss": 2.2216, "step": 98340 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019346437910213902, "loss": 2.0636, "step": 98345 }, { "epoch": 0.23, "grad_norm": 1.8046875, "learning_rate": 0.00019346372186069391, "loss": 2.1295, "step": 98350 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.0001934630645873199, "loss": 2.0373, "step": 98355 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.0001934624072820172, "loss": 2.2437, "step": 98360 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019346174994478606, "loss": 2.3346, "step": 98365 }, { "epoch": 0.23, "grad_norm": 1.8125, "learning_rate": 0.0001934610925756267, "loss": 2.1814, "step": 98370 }, { "epoch": 0.23, "grad_norm": 1.8828125, "learning_rate": 0.00019346043517453932, "loss": 2.1705, "step": 98375 }, { "epoch": 0.23, "grad_norm": 2.359375, "learning_rate": 0.00019345977774152417, "loss": 2.2278, "step": 98380 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.00019345912027658147, "loss": 2.0922, "step": 98385 }, { "epoch": 0.23, "grad_norm": 1.6640625, "learning_rate": 0.00019345846277971142, "loss": 2.191, "step": 98390 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.00019345780525091428, "loss": 2.1627, "step": 98395 }, { "epoch": 0.23, "grad_norm": 2.421875, "learning_rate": 0.00019345714769019025, "loss": 2.1154, "step": 98400 }, { "epoch": 0.23, "grad_norm": 3.09375, "learning_rate": 0.00019345649009753956, "loss": 2.1481, "step": 98405 }, { "epoch": 0.23, "grad_norm": 2.921875, "learning_rate": 0.00019345583247296246, "loss": 2.0944, "step": 98410 }, { "epoch": 0.23, "grad_norm": 2.453125, "learning_rate": 0.00019345517481645912, "loss": 2.0886, "step": 98415 }, { "epoch": 0.23, "grad_norm": 1.6875, "learning_rate": 0.00019345451712802982, "loss": 2.077, "step": 98420 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.00019345385940767478, "loss": 2.1576, "step": 98425 }, { "epoch": 0.23, "grad_norm": 1.8515625, "learning_rate": 0.00019345320165539418, "loss": 2.1077, "step": 98430 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019345254387118828, "loss": 2.1857, "step": 98435 }, { "epoch": 0.23, "grad_norm": 1.671875, "learning_rate": 0.0001934518860550573, "loss": 2.1009, "step": 98440 }, { "epoch": 0.23, "grad_norm": 2.203125, "learning_rate": 0.00019345122820700142, "loss": 2.1855, "step": 98445 }, { "epoch": 0.23, "grad_norm": 1.8359375, "learning_rate": 0.00019345057032702095, "loss": 2.0416, "step": 98450 }, { "epoch": 0.23, "grad_norm": 2.359375, "learning_rate": 0.0001934499124151161, "loss": 2.3392, "step": 98455 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019344925447128698, "loss": 2.1424, "step": 98460 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.00019344859649553396, "loss": 2.2879, "step": 98465 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.0001934479384878572, "loss": 2.0533, "step": 98470 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.0001934472804482569, "loss": 2.0936, "step": 98475 }, { "epoch": 0.23, "grad_norm": 1.953125, "learning_rate": 0.00019344662237673333, "loss": 2.238, "step": 98480 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.0001934459642732867, "loss": 2.2419, "step": 98485 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.00019344530613791725, "loss": 2.2253, "step": 98490 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019344464797062515, "loss": 2.3034, "step": 98495 }, { "epoch": 0.23, "grad_norm": 1.59375, "learning_rate": 0.0001934439897714107, "loss": 2.0727, "step": 98500 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.0001934433315402741, "loss": 2.2269, "step": 98505 }, { "epoch": 0.23, "grad_norm": 1.9765625, "learning_rate": 0.00019344267327721552, "loss": 2.098, "step": 98510 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.00019344201498223526, "loss": 2.2653, "step": 98515 }, { "epoch": 0.23, "grad_norm": 1.9609375, "learning_rate": 0.00019344135665533347, "loss": 2.1282, "step": 98520 }, { "epoch": 0.23, "grad_norm": 2.234375, "learning_rate": 0.00019344069829651045, "loss": 2.3522, "step": 98525 }, { "epoch": 0.23, "grad_norm": 2.609375, "learning_rate": 0.0001934400399057664, "loss": 2.4204, "step": 98530 }, { "epoch": 0.23, "grad_norm": 1.640625, "learning_rate": 0.0001934393814831015, "loss": 2.0639, "step": 98535 }, { "epoch": 0.23, "grad_norm": 1.703125, "learning_rate": 0.0001934387230285161, "loss": 2.1936, "step": 98540 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.00019343806454201026, "loss": 2.1425, "step": 98545 }, { "epoch": 0.23, "grad_norm": 2.296875, "learning_rate": 0.0001934374060235843, "loss": 2.1814, "step": 98550 }, { "epoch": 0.23, "grad_norm": 1.9609375, "learning_rate": 0.0001934367474732384, "loss": 2.2859, "step": 98555 }, { "epoch": 0.23, "grad_norm": 1.5859375, "learning_rate": 0.00019343608889097288, "loss": 2.1664, "step": 98560 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019343543027678783, "loss": 2.1298, "step": 98565 }, { "epoch": 0.23, "grad_norm": 1.625, "learning_rate": 0.0001934347716306836, "loss": 2.0637, "step": 98570 }, { "epoch": 0.23, "grad_norm": 1.7890625, "learning_rate": 0.0001934341129526603, "loss": 2.1391, "step": 98575 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019343345424271827, "loss": 2.0639, "step": 98580 }, { "epoch": 0.23, "grad_norm": 1.875, "learning_rate": 0.00019343279550085765, "loss": 2.2864, "step": 98585 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.0001934321367270787, "loss": 2.0944, "step": 98590 }, { "epoch": 0.23, "grad_norm": 2.8125, "learning_rate": 0.0001934314779213816, "loss": 2.3405, "step": 98595 }, { "epoch": 0.23, "grad_norm": 2.546875, "learning_rate": 0.00019343081908376666, "loss": 2.1417, "step": 98600 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019343016021423405, "loss": 1.9613, "step": 98605 }, { "epoch": 0.23, "grad_norm": 2.984375, "learning_rate": 0.000193429501312784, "loss": 2.1829, "step": 98610 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.0001934288423794167, "loss": 2.0543, "step": 98615 }, { "epoch": 0.23, "grad_norm": 2.234375, "learning_rate": 0.00019342818341413245, "loss": 2.1265, "step": 98620 }, { "epoch": 0.23, "grad_norm": 1.71875, "learning_rate": 0.0001934275244169314, "loss": 2.1967, "step": 98625 }, { "epoch": 0.23, "grad_norm": 1.640625, "learning_rate": 0.00019342686538781386, "loss": 2.1591, "step": 98630 }, { "epoch": 0.23, "grad_norm": 1.6171875, "learning_rate": 0.00019342620632678, "loss": 2.2113, "step": 98635 }, { "epoch": 0.23, "grad_norm": 2.46875, "learning_rate": 0.00019342554723383003, "loss": 2.1483, "step": 98640 }, { "epoch": 0.23, "grad_norm": 1.7578125, "learning_rate": 0.00019342488810896423, "loss": 2.1232, "step": 98645 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.0001934242289521828, "loss": 2.4053, "step": 98650 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019342356976348592, "loss": 2.0035, "step": 98655 }, { "epoch": 0.23, "grad_norm": 1.671875, "learning_rate": 0.00019342291054287388, "loss": 2.0957, "step": 98660 }, { "epoch": 0.23, "grad_norm": 1.9140625, "learning_rate": 0.00019342225129034688, "loss": 2.0323, "step": 98665 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019342159200590512, "loss": 2.215, "step": 98670 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019342093268954886, "loss": 2.2317, "step": 98675 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019342027334127835, "loss": 2.1316, "step": 98680 }, { "epoch": 0.23, "grad_norm": 2.921875, "learning_rate": 0.00019341961396109373, "loss": 1.9825, "step": 98685 }, { "epoch": 0.23, "grad_norm": 2.484375, "learning_rate": 0.00019341895454899532, "loss": 2.2941, "step": 98690 }, { "epoch": 0.23, "grad_norm": 2.234375, "learning_rate": 0.00019341829510498325, "loss": 2.2292, "step": 98695 }, { "epoch": 0.23, "grad_norm": 1.765625, "learning_rate": 0.00019341763562905783, "loss": 2.0728, "step": 98700 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.00019341697612121927, "loss": 2.03, "step": 98705 }, { "epoch": 0.23, "grad_norm": 1.9921875, "learning_rate": 0.00019341631658146774, "loss": 2.0968, "step": 98710 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.00019341565700980353, "loss": 2.0742, "step": 98715 }, { "epoch": 0.23, "grad_norm": 1.765625, "learning_rate": 0.00019341499740622683, "loss": 2.0908, "step": 98720 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019341433777073785, "loss": 2.221, "step": 98725 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.00019341367810333683, "loss": 2.1334, "step": 98730 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.00019341301840402407, "loss": 2.0719, "step": 98735 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.00019341235867279964, "loss": 2.1487, "step": 98740 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.00019341169890966392, "loss": 2.1315, "step": 98745 }, { "epoch": 0.23, "grad_norm": 2.359375, "learning_rate": 0.00019341103911461706, "loss": 2.4327, "step": 98750 }, { "epoch": 0.23, "grad_norm": 1.765625, "learning_rate": 0.00019341037928765928, "loss": 2.1028, "step": 98755 }, { "epoch": 0.23, "grad_norm": 1.6875, "learning_rate": 0.00019340971942879084, "loss": 2.2346, "step": 98760 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.0001934090595380119, "loss": 1.9621, "step": 98765 }, { "epoch": 0.23, "grad_norm": 2.828125, "learning_rate": 0.00019340839961532277, "loss": 2.0897, "step": 98770 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.00019340773966072363, "loss": 2.1022, "step": 98775 }, { "epoch": 0.23, "grad_norm": 1.84375, "learning_rate": 0.00019340707967421472, "loss": 2.0037, "step": 98780 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.00019340641965579625, "loss": 2.1576, "step": 98785 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.00019340575960546846, "loss": 2.2801, "step": 98790 }, { "epoch": 0.23, "grad_norm": 2.234375, "learning_rate": 0.00019340509952323154, "loss": 1.9329, "step": 98795 }, { "epoch": 0.23, "grad_norm": 1.9140625, "learning_rate": 0.00019340443940908577, "loss": 2.2352, "step": 98800 }, { "epoch": 0.23, "grad_norm": 1.9453125, "learning_rate": 0.00019340377926303136, "loss": 1.9874, "step": 98805 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.00019340311908506847, "loss": 2.0728, "step": 98810 }, { "epoch": 0.23, "grad_norm": 2.28125, "learning_rate": 0.00019340245887519743, "loss": 2.1844, "step": 98815 }, { "epoch": 0.23, "grad_norm": 1.859375, "learning_rate": 0.0001934017986334184, "loss": 2.1697, "step": 98820 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.00019340113835973163, "loss": 2.2433, "step": 98825 }, { "epoch": 0.23, "grad_norm": 1.9453125, "learning_rate": 0.00019340047805413733, "loss": 2.2576, "step": 98830 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.00019339981771663575, "loss": 2.2839, "step": 98835 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.0001933991573472271, "loss": 2.209, "step": 98840 }, { "epoch": 0.23, "grad_norm": 1.84375, "learning_rate": 0.00019339849694591157, "loss": 2.1822, "step": 98845 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.00019339783651268943, "loss": 2.0737, "step": 98850 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.0001933971760475609, "loss": 2.2698, "step": 98855 }, { "epoch": 0.23, "grad_norm": 1.8671875, "learning_rate": 0.00019339651555052624, "loss": 2.1518, "step": 98860 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.00019339585502158556, "loss": 2.1615, "step": 98865 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.00019339519446073922, "loss": 2.0587, "step": 98870 }, { "epoch": 0.23, "grad_norm": 1.6171875, "learning_rate": 0.00019339453386798736, "loss": 2.1546, "step": 98875 }, { "epoch": 0.23, "grad_norm": 2.53125, "learning_rate": 0.00019339387324333025, "loss": 2.3721, "step": 98880 }, { "epoch": 0.23, "grad_norm": 2.375, "learning_rate": 0.00019339321258676808, "loss": 2.0333, "step": 98885 }, { "epoch": 0.23, "grad_norm": 1.6796875, "learning_rate": 0.00019339255189830112, "loss": 2.0416, "step": 98890 }, { "epoch": 0.23, "grad_norm": 3.15625, "learning_rate": 0.00019339189117792955, "loss": 2.497, "step": 98895 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.00019339123042565363, "loss": 2.1987, "step": 98900 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019339056964147358, "loss": 2.1841, "step": 98905 }, { "epoch": 0.23, "grad_norm": 2.796875, "learning_rate": 0.00019338990882538962, "loss": 2.2767, "step": 98910 }, { "epoch": 0.23, "grad_norm": 2.875, "learning_rate": 0.00019338924797740193, "loss": 2.2828, "step": 98915 }, { "epoch": 0.23, "grad_norm": 2.265625, "learning_rate": 0.0001933885870975108, "loss": 2.0985, "step": 98920 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.00019338792618571647, "loss": 2.3196, "step": 98925 }, { "epoch": 0.23, "grad_norm": 1.7578125, "learning_rate": 0.0001933872652420191, "loss": 1.9381, "step": 98930 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.00019338660426641892, "loss": 2.0849, "step": 98935 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019338594325891623, "loss": 2.216, "step": 98940 }, { "epoch": 0.23, "grad_norm": 1.953125, "learning_rate": 0.0001933852822195112, "loss": 2.3189, "step": 98945 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.00019338462114820403, "loss": 2.0837, "step": 98950 }, { "epoch": 0.23, "grad_norm": 2.359375, "learning_rate": 0.00019338396004499501, "loss": 2.2016, "step": 98955 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019338329890988433, "loss": 2.352, "step": 98960 }, { "epoch": 0.23, "grad_norm": 2.609375, "learning_rate": 0.0001933826377428722, "loss": 2.172, "step": 98965 }, { "epoch": 0.23, "grad_norm": 1.6328125, "learning_rate": 0.0001933819765439589, "loss": 2.1524, "step": 98970 }, { "epoch": 0.23, "grad_norm": 1.828125, "learning_rate": 0.0001933813153131446, "loss": 2.2407, "step": 98975 }, { "epoch": 0.23, "grad_norm": 2.3125, "learning_rate": 0.0001933806540504296, "loss": 2.0621, "step": 98980 }, { "epoch": 0.23, "grad_norm": 1.9375, "learning_rate": 0.00019337999275581403, "loss": 1.9823, "step": 98985 }, { "epoch": 0.23, "grad_norm": 1.9921875, "learning_rate": 0.00019337933142929816, "loss": 1.9881, "step": 98990 }, { "epoch": 0.23, "grad_norm": 2.78125, "learning_rate": 0.0001933786700708822, "loss": 2.2998, "step": 98995 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.00019337800868056643, "loss": 2.2149, "step": 99000 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.000193377347258351, "loss": 1.9597, "step": 99005 }, { "epoch": 0.23, "grad_norm": 1.953125, "learning_rate": 0.00019337668580423622, "loss": 2.006, "step": 99010 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019337602431822225, "loss": 1.8507, "step": 99015 }, { "epoch": 0.23, "grad_norm": 1.828125, "learning_rate": 0.00019337536280030935, "loss": 2.0749, "step": 99020 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.0001933747012504977, "loss": 2.3219, "step": 99025 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.00019337403966878758, "loss": 2.0966, "step": 99030 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.0001933733780551792, "loss": 2.1161, "step": 99035 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.00019337271640967277, "loss": 2.3373, "step": 99040 }, { "epoch": 0.23, "grad_norm": 2.234375, "learning_rate": 0.00019337205473226853, "loss": 2.2386, "step": 99045 }, { "epoch": 0.23, "grad_norm": 1.9453125, "learning_rate": 0.00019337139302296673, "loss": 2.1744, "step": 99050 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.0001933707312817675, "loss": 2.2719, "step": 99055 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019337006950867117, "loss": 2.1848, "step": 99060 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.00019336940770367799, "loss": 2.135, "step": 99065 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019336874586678803, "loss": 2.188, "step": 99070 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.00019336808399800167, "loss": 2.1608, "step": 99075 }, { "epoch": 0.23, "grad_norm": 2.328125, "learning_rate": 0.00019336742209731907, "loss": 2.1119, "step": 99080 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.00019336676016474047, "loss": 2.29, "step": 99085 }, { "epoch": 0.23, "grad_norm": 1.7265625, "learning_rate": 0.00019336609820026608, "loss": 2.2422, "step": 99090 }, { "epoch": 0.23, "grad_norm": 2.296875, "learning_rate": 0.00019336543620389614, "loss": 2.2446, "step": 99095 }, { "epoch": 0.23, "grad_norm": 1.765625, "learning_rate": 0.00019336477417563087, "loss": 2.1352, "step": 99100 }, { "epoch": 0.23, "grad_norm": 1.9140625, "learning_rate": 0.00019336411211547049, "loss": 2.2432, "step": 99105 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.00019336345002341526, "loss": 2.1371, "step": 99110 }, { "epoch": 0.23, "grad_norm": 2.0, "learning_rate": 0.00019336278789946535, "loss": 2.2837, "step": 99115 }, { "epoch": 0.23, "grad_norm": 1.84375, "learning_rate": 0.00019336212574362106, "loss": 2.3478, "step": 99120 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019336146355588256, "loss": 2.1069, "step": 99125 }, { "epoch": 0.23, "grad_norm": 2.390625, "learning_rate": 0.0001933608013362501, "loss": 1.9837, "step": 99130 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.00019336013908472386, "loss": 2.128, "step": 99135 }, { "epoch": 0.23, "grad_norm": 1.6875, "learning_rate": 0.00019335947680130413, "loss": 2.1532, "step": 99140 }, { "epoch": 0.23, "grad_norm": 1.9921875, "learning_rate": 0.0001933588144859911, "loss": 2.193, "step": 99145 }, { "epoch": 0.23, "grad_norm": 1.6171875, "learning_rate": 0.000193358152138785, "loss": 1.9964, "step": 99150 }, { "epoch": 0.23, "grad_norm": 1.9609375, "learning_rate": 0.0001933574897596861, "loss": 2.1475, "step": 99155 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019335682734869457, "loss": 2.0016, "step": 99160 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.00019335616490581063, "loss": 2.2814, "step": 99165 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.00019335550243103456, "loss": 2.1403, "step": 99170 }, { "epoch": 0.23, "grad_norm": 1.9140625, "learning_rate": 0.00019335483992436654, "loss": 1.9661, "step": 99175 }, { "epoch": 0.23, "grad_norm": 2.390625, "learning_rate": 0.0001933541773858068, "loss": 2.18, "step": 99180 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.0001933535148153556, "loss": 2.2702, "step": 99185 }, { "epoch": 0.23, "grad_norm": 1.7890625, "learning_rate": 0.00019335285221301315, "loss": 2.0993, "step": 99190 }, { "epoch": 0.23, "grad_norm": 1.8359375, "learning_rate": 0.00019335218957877966, "loss": 2.043, "step": 99195 }, { "epoch": 0.23, "grad_norm": 1.703125, "learning_rate": 0.00019335152691265538, "loss": 2.2249, "step": 99200 }, { "epoch": 0.23, "grad_norm": 1.984375, "learning_rate": 0.00019335086421464053, "loss": 2.1794, "step": 99205 }, { "epoch": 0.23, "grad_norm": 1.8984375, "learning_rate": 0.00019335020148473532, "loss": 2.0119, "step": 99210 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019334953872294, "loss": 2.2087, "step": 99215 }, { "epoch": 0.23, "grad_norm": 2.296875, "learning_rate": 0.00019334887592925478, "loss": 2.0826, "step": 99220 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.0001933482131036799, "loss": 2.1456, "step": 99225 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019334755024621555, "loss": 2.2902, "step": 99230 }, { "epoch": 0.23, "grad_norm": 1.9765625, "learning_rate": 0.000193346887356862, "loss": 2.1006, "step": 99235 }, { "epoch": 0.23, "grad_norm": 2.3125, "learning_rate": 0.00019334622443561945, "loss": 2.3246, "step": 99240 }, { "epoch": 0.23, "grad_norm": 3.234375, "learning_rate": 0.00019334556148248818, "loss": 2.1563, "step": 99245 }, { "epoch": 0.23, "grad_norm": 2.796875, "learning_rate": 0.00019334489849746832, "loss": 2.2487, "step": 99250 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.00019334423548056018, "loss": 2.0756, "step": 99255 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019334357243176392, "loss": 2.2743, "step": 99260 }, { "epoch": 0.23, "grad_norm": 1.953125, "learning_rate": 0.00019334290935107984, "loss": 2.3011, "step": 99265 }, { "epoch": 0.23, "grad_norm": 1.75, "learning_rate": 0.00019334224623850812, "loss": 1.9449, "step": 99270 }, { "epoch": 0.23, "grad_norm": 1.90625, "learning_rate": 0.000193341583094049, "loss": 2.162, "step": 99275 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019334091991770268, "loss": 2.1181, "step": 99280 }, { "epoch": 0.23, "grad_norm": 1.9453125, "learning_rate": 0.00019334025670946945, "loss": 2.1724, "step": 99285 }, { "epoch": 0.23, "grad_norm": 1.7421875, "learning_rate": 0.00019333959346934946, "loss": 2.1863, "step": 99290 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.00019333893019734296, "loss": 2.0038, "step": 99295 }, { "epoch": 0.23, "grad_norm": 2.96875, "learning_rate": 0.00019333826689345022, "loss": 2.2493, "step": 99300 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019333760355767142, "loss": 2.1271, "step": 99305 }, { "epoch": 0.23, "grad_norm": 2.71875, "learning_rate": 0.00019333694019000683, "loss": 2.2362, "step": 99310 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.0001933362767904566, "loss": 2.1391, "step": 99315 }, { "epoch": 0.23, "grad_norm": 1.8359375, "learning_rate": 0.00019333561335902104, "loss": 2.1136, "step": 99320 }, { "epoch": 0.23, "grad_norm": 2.234375, "learning_rate": 0.00019333494989570035, "loss": 2.3053, "step": 99325 }, { "epoch": 0.23, "grad_norm": 2.28125, "learning_rate": 0.00019333428640049474, "loss": 2.1319, "step": 99330 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.00019333362287340443, "loss": 2.0027, "step": 99335 }, { "epoch": 0.23, "grad_norm": 2.234375, "learning_rate": 0.00019333295931442965, "loss": 2.2, "step": 99340 }, { "epoch": 0.23, "grad_norm": 1.7421875, "learning_rate": 0.00019333229572357068, "loss": 2.2591, "step": 99345 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.00019333163210082766, "loss": 2.1373, "step": 99350 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019333096844620089, "loss": 2.1677, "step": 99355 }, { "epoch": 0.23, "grad_norm": 2.546875, "learning_rate": 0.00019333030475969057, "loss": 2.2541, "step": 99360 }, { "epoch": 0.23, "grad_norm": 1.921875, "learning_rate": 0.00019332964104129693, "loss": 2.1865, "step": 99365 }, { "epoch": 0.23, "grad_norm": 1.78125, "learning_rate": 0.00019332897729102015, "loss": 2.193, "step": 99370 }, { "epoch": 0.23, "grad_norm": 1.7890625, "learning_rate": 0.00019332831350886054, "loss": 1.941, "step": 99375 }, { "epoch": 0.23, "grad_norm": 1.7109375, "learning_rate": 0.0001933276496948183, "loss": 2.1093, "step": 99380 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019332698584889358, "loss": 2.0385, "step": 99385 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.0001933263219710867, "loss": 2.249, "step": 99390 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019332565806139788, "loss": 2.077, "step": 99395 }, { "epoch": 0.23, "grad_norm": 3.03125, "learning_rate": 0.0001933249941198273, "loss": 2.0895, "step": 99400 }, { "epoch": 0.23, "grad_norm": 2.234375, "learning_rate": 0.0001933243301463752, "loss": 2.3729, "step": 99405 }, { "epoch": 0.23, "grad_norm": 1.9296875, "learning_rate": 0.00019332366614104184, "loss": 2.2549, "step": 99410 }, { "epoch": 0.23, "grad_norm": 1.8671875, "learning_rate": 0.00019332300210382744, "loss": 2.1516, "step": 99415 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019332233803473215, "loss": 2.2145, "step": 99420 }, { "epoch": 0.23, "grad_norm": 1.6796875, "learning_rate": 0.0001933216739337563, "loss": 2.1344, "step": 99425 }, { "epoch": 0.23, "grad_norm": 2.484375, "learning_rate": 0.00019332100980090007, "loss": 2.046, "step": 99430 }, { "epoch": 0.23, "grad_norm": 1.6328125, "learning_rate": 0.00019332034563616368, "loss": 2.1757, "step": 99435 }, { "epoch": 0.23, "grad_norm": 1.8046875, "learning_rate": 0.00019331968143954735, "loss": 2.1323, "step": 99440 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019331901721105136, "loss": 2.219, "step": 99445 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.0001933183529506759, "loss": 2.2197, "step": 99450 }, { "epoch": 0.23, "grad_norm": 2.9375, "learning_rate": 0.0001933176886584212, "loss": 2.2049, "step": 99455 }, { "epoch": 0.23, "grad_norm": 1.9375, "learning_rate": 0.00019331702433428744, "loss": 1.9991, "step": 99460 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.00019331635997827493, "loss": 2.1316, "step": 99465 }, { "epoch": 0.23, "grad_norm": 1.671875, "learning_rate": 0.00019331569559038385, "loss": 2.2081, "step": 99470 }, { "epoch": 0.23, "grad_norm": 3.328125, "learning_rate": 0.00019331503117061446, "loss": 2.099, "step": 99475 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.00019331436671896692, "loss": 2.1526, "step": 99480 }, { "epoch": 0.23, "grad_norm": 2.28125, "learning_rate": 0.0001933137022354415, "loss": 2.1733, "step": 99485 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.00019331303772003846, "loss": 2.2503, "step": 99490 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.00019331237317275798, "loss": 2.0314, "step": 99495 }, { "epoch": 0.23, "grad_norm": 2.296875, "learning_rate": 0.0001933117085936003, "loss": 1.9845, "step": 99500 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019331104398256566, "loss": 1.9984, "step": 99505 }, { "epoch": 0.23, "grad_norm": 1.5625, "learning_rate": 0.00019331037933965428, "loss": 1.918, "step": 99510 }, { "epoch": 0.23, "grad_norm": 1.6796875, "learning_rate": 0.00019330971466486636, "loss": 2.1785, "step": 99515 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019330904995820217, "loss": 1.9372, "step": 99520 }, { "epoch": 0.23, "grad_norm": 1.8515625, "learning_rate": 0.0001933083852196619, "loss": 2.2351, "step": 99525 }, { "epoch": 0.23, "grad_norm": 2.53125, "learning_rate": 0.00019330772044924576, "loss": 2.2793, "step": 99530 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.00019330705564695407, "loss": 2.203, "step": 99535 }, { "epoch": 0.23, "grad_norm": 2.296875, "learning_rate": 0.00019330639081278697, "loss": 2.1934, "step": 99540 }, { "epoch": 0.23, "grad_norm": 2.171875, "learning_rate": 0.00019330572594674473, "loss": 2.2989, "step": 99545 }, { "epoch": 0.23, "grad_norm": 2.078125, "learning_rate": 0.00019330506104882754, "loss": 2.04, "step": 99550 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019330439611903565, "loss": 2.1957, "step": 99555 }, { "epoch": 0.23, "grad_norm": 1.890625, "learning_rate": 0.0001933037311573693, "loss": 2.1904, "step": 99560 }, { "epoch": 0.23, "grad_norm": 2.203125, "learning_rate": 0.0001933030661638287, "loss": 2.1733, "step": 99565 }, { "epoch": 0.23, "grad_norm": 1.640625, "learning_rate": 0.00019330240113841408, "loss": 2.1934, "step": 99570 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019330173608112568, "loss": 2.0734, "step": 99575 }, { "epoch": 0.23, "grad_norm": 1.7421875, "learning_rate": 0.0001933010709919637, "loss": 2.0491, "step": 99580 }, { "epoch": 0.23, "grad_norm": 1.8359375, "learning_rate": 0.00019330040587092837, "loss": 2.0914, "step": 99585 }, { "epoch": 0.23, "grad_norm": 1.8828125, "learning_rate": 0.00019329974071801998, "loss": 2.1001, "step": 99590 }, { "epoch": 0.23, "grad_norm": 1.7890625, "learning_rate": 0.00019329907553323865, "loss": 2.0837, "step": 99595 }, { "epoch": 0.23, "grad_norm": 1.9140625, "learning_rate": 0.0001932984103165847, "loss": 2.2271, "step": 99600 }, { "epoch": 0.23, "grad_norm": 1.765625, "learning_rate": 0.0001932977450680583, "loss": 2.2048, "step": 99605 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.0001932970797876597, "loss": 2.1459, "step": 99610 }, { "epoch": 0.23, "grad_norm": 1.9765625, "learning_rate": 0.00019329641447538912, "loss": 2.2642, "step": 99615 }, { "epoch": 0.23, "grad_norm": 1.640625, "learning_rate": 0.0001932957491312468, "loss": 2.0953, "step": 99620 }, { "epoch": 0.23, "grad_norm": 2.421875, "learning_rate": 0.00019329508375523299, "loss": 2.1437, "step": 99625 }, { "epoch": 0.23, "grad_norm": 2.546875, "learning_rate": 0.00019329441834734784, "loss": 2.3201, "step": 99630 }, { "epoch": 0.23, "grad_norm": 2.234375, "learning_rate": 0.00019329375290759164, "loss": 2.2351, "step": 99635 }, { "epoch": 0.23, "grad_norm": 1.8828125, "learning_rate": 0.0001932930874359646, "loss": 2.1952, "step": 99640 }, { "epoch": 0.23, "grad_norm": 1.5234375, "learning_rate": 0.00019329242193246696, "loss": 2.1358, "step": 99645 }, { "epoch": 0.23, "grad_norm": 2.359375, "learning_rate": 0.00019329175639709893, "loss": 2.043, "step": 99650 }, { "epoch": 0.23, "grad_norm": 2.015625, "learning_rate": 0.00019329109082986075, "loss": 2.158, "step": 99655 }, { "epoch": 0.23, "grad_norm": 2.03125, "learning_rate": 0.00019329042523075262, "loss": 1.9691, "step": 99660 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.00019328975959977482, "loss": 2.0453, "step": 99665 }, { "epoch": 0.23, "grad_norm": 1.9375, "learning_rate": 0.00019328909393692754, "loss": 2.1305, "step": 99670 }, { "epoch": 0.23, "grad_norm": 2.21875, "learning_rate": 0.000193288428242211, "loss": 2.0611, "step": 99675 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019328776251562542, "loss": 2.1583, "step": 99680 }, { "epoch": 0.23, "grad_norm": 1.796875, "learning_rate": 0.0001932870967571711, "loss": 2.1037, "step": 99685 }, { "epoch": 0.23, "grad_norm": 2.09375, "learning_rate": 0.0001932864309668482, "loss": 2.097, "step": 99690 }, { "epoch": 0.23, "grad_norm": 2.796875, "learning_rate": 0.00019328576514465696, "loss": 2.0994, "step": 99695 }, { "epoch": 0.23, "grad_norm": 2.4375, "learning_rate": 0.0001932850992905976, "loss": 2.2107, "step": 99700 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.00019328443340467036, "loss": 2.2332, "step": 99705 }, { "epoch": 0.23, "grad_norm": 2.28125, "learning_rate": 0.00019328376748687547, "loss": 2.1678, "step": 99710 }, { "epoch": 0.23, "grad_norm": 2.15625, "learning_rate": 0.00019328310153721314, "loss": 2.1596, "step": 99715 }, { "epoch": 0.23, "grad_norm": 2.3125, "learning_rate": 0.0001932824355556836, "loss": 2.0717, "step": 99720 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.00019328176954228713, "loss": 2.1448, "step": 99725 }, { "epoch": 0.23, "grad_norm": 2.125, "learning_rate": 0.00019328110349702392, "loss": 2.1763, "step": 99730 }, { "epoch": 0.23, "grad_norm": 1.8203125, "learning_rate": 0.00019328043741989416, "loss": 2.0812, "step": 99735 }, { "epoch": 0.23, "grad_norm": 2.34375, "learning_rate": 0.0001932797713108981, "loss": 2.2012, "step": 99740 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.00019327910517003601, "loss": 2.2681, "step": 99745 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019327843899730808, "loss": 2.0343, "step": 99750 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.00019327777279271456, "loss": 2.3263, "step": 99755 }, { "epoch": 0.23, "grad_norm": 1.8046875, "learning_rate": 0.00019327710655625563, "loss": 2.0506, "step": 99760 }, { "epoch": 0.23, "grad_norm": 2.109375, "learning_rate": 0.00019327644028793156, "loss": 2.2507, "step": 99765 }, { "epoch": 0.23, "grad_norm": 2.0625, "learning_rate": 0.00019327577398774257, "loss": 2.1121, "step": 99770 }, { "epoch": 0.23, "grad_norm": 1.8046875, "learning_rate": 0.00019327510765568886, "loss": 2.1052, "step": 99775 }, { "epoch": 0.23, "grad_norm": 2.484375, "learning_rate": 0.00019327444129177074, "loss": 2.2166, "step": 99780 }, { "epoch": 0.23, "grad_norm": 2.140625, "learning_rate": 0.0001932737748959883, "loss": 2.1465, "step": 99785 }, { "epoch": 0.23, "grad_norm": 1.8828125, "learning_rate": 0.0001932731084683419, "loss": 1.9858, "step": 99790 }, { "epoch": 0.23, "grad_norm": 1.65625, "learning_rate": 0.0001932724420088317, "loss": 2.2174, "step": 99795 }, { "epoch": 0.23, "grad_norm": 1.9765625, "learning_rate": 0.00019327177551745796, "loss": 2.1996, "step": 99800 }, { "epoch": 0.23, "grad_norm": 1.734375, "learning_rate": 0.00019327110899422086, "loss": 1.9887, "step": 99805 }, { "epoch": 0.23, "grad_norm": 2.046875, "learning_rate": 0.00019327044243912068, "loss": 2.1821, "step": 99810 }, { "epoch": 0.23, "grad_norm": 1.8828125, "learning_rate": 0.0001932697758521576, "loss": 2.1764, "step": 99815 }, { "epoch": 0.23, "grad_norm": 1.9375, "learning_rate": 0.0001932691092333319, "loss": 2.1105, "step": 99820 }, { "epoch": 0.23, "grad_norm": 2.25, "learning_rate": 0.00019326844258264379, "loss": 2.1248, "step": 99825 }, { "epoch": 0.23, "grad_norm": 2.40625, "learning_rate": 0.00019326777590009347, "loss": 2.1494, "step": 99830 }, { "epoch": 0.23, "grad_norm": 1.7734375, "learning_rate": 0.00019326710918568117, "loss": 2.2228, "step": 99835 }, { "epoch": 0.23, "grad_norm": 1.609375, "learning_rate": 0.00019326644243940715, "loss": 2.3677, "step": 99840 }, { "epoch": 0.23, "grad_norm": 2.1875, "learning_rate": 0.00019326577566127162, "loss": 2.2241, "step": 99845 }, { "epoch": 0.23, "grad_norm": 1.6328125, "learning_rate": 0.00019326510885127484, "loss": 2.1814, "step": 99850 }, { "epoch": 0.23, "grad_norm": 1.96875, "learning_rate": 0.00019326444200941695, "loss": 2.21, "step": 99855 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019326377513569827, "loss": 2.1815, "step": 99860 }, { "epoch": 0.24, "grad_norm": 1.625, "learning_rate": 0.00019326310823011898, "loss": 2.2623, "step": 99865 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.00019326244129267935, "loss": 2.1673, "step": 99870 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.00019326177432337952, "loss": 2.1233, "step": 99875 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.00019326110732221981, "loss": 1.8625, "step": 99880 }, { "epoch": 0.24, "grad_norm": 1.8359375, "learning_rate": 0.00019326044028920042, "loss": 2.0552, "step": 99885 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.00019325977322432158, "loss": 2.3368, "step": 99890 }, { "epoch": 0.24, "grad_norm": 1.75, "learning_rate": 0.00019325910612758352, "loss": 2.129, "step": 99895 }, { "epoch": 0.24, "grad_norm": 1.7734375, "learning_rate": 0.0001932584389989864, "loss": 2.1103, "step": 99900 }, { "epoch": 0.24, "grad_norm": 1.875, "learning_rate": 0.00019325777183853057, "loss": 2.1546, "step": 99905 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.00019325710464621615, "loss": 2.081, "step": 99910 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.00019325643742204346, "loss": 1.9977, "step": 99915 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.00019325577016601262, "loss": 2.3614, "step": 99920 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.00019325510287812394, "loss": 2.0915, "step": 99925 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019325443555837763, "loss": 2.0598, "step": 99930 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.0001932537682067739, "loss": 2.1776, "step": 99935 }, { "epoch": 0.24, "grad_norm": 2.578125, "learning_rate": 0.00019325310082331302, "loss": 2.0414, "step": 99940 }, { "epoch": 0.24, "grad_norm": 2.40625, "learning_rate": 0.0001932524334079952, "loss": 2.3331, "step": 99945 }, { "epoch": 0.24, "grad_norm": 2.375, "learning_rate": 0.00019325176596082062, "loss": 2.0302, "step": 99950 }, { "epoch": 0.24, "grad_norm": 2.328125, "learning_rate": 0.00019325109848178956, "loss": 2.1611, "step": 99955 }, { "epoch": 0.24, "grad_norm": 2.359375, "learning_rate": 0.00019325043097090223, "loss": 2.0078, "step": 99960 }, { "epoch": 0.24, "grad_norm": 1.7578125, "learning_rate": 0.00019324976342815887, "loss": 1.9403, "step": 99965 }, { "epoch": 0.24, "grad_norm": 1.7421875, "learning_rate": 0.00019324909585355967, "loss": 2.2246, "step": 99970 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.00019324842824710494, "loss": 2.2914, "step": 99975 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019324776060879485, "loss": 2.1372, "step": 99980 }, { "epoch": 0.24, "grad_norm": 2.8125, "learning_rate": 0.0001932470929386296, "loss": 2.0617, "step": 99985 }, { "epoch": 0.24, "grad_norm": 2.21875, "learning_rate": 0.00019324642523660946, "loss": 2.203, "step": 99990 }, { "epoch": 0.24, "grad_norm": 1.7734375, "learning_rate": 0.00019324575750273465, "loss": 2.104, "step": 99995 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019324508973700538, "loss": 2.2846, "step": 100000 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019324442193942193, "loss": 2.3891, "step": 100005 }, { "epoch": 0.24, "grad_norm": 2.15625, "learning_rate": 0.00019324375410998448, "loss": 2.3191, "step": 100010 }, { "epoch": 0.24, "grad_norm": 1.640625, "learning_rate": 0.00019324308624869328, "loss": 2.13, "step": 100015 }, { "epoch": 0.24, "grad_norm": 1.84375, "learning_rate": 0.00019324241835554855, "loss": 2.0622, "step": 100020 }, { "epoch": 0.24, "grad_norm": 2.140625, "learning_rate": 0.0001932417504305505, "loss": 2.1138, "step": 100025 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.0001932410824736994, "loss": 2.249, "step": 100030 }, { "epoch": 0.24, "grad_norm": 2.3125, "learning_rate": 0.00019324041448499544, "loss": 2.2503, "step": 100035 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.00019323974646443887, "loss": 2.0333, "step": 100040 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019323907841202993, "loss": 2.206, "step": 100045 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019323841032776881, "loss": 2.1318, "step": 100050 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.00019323774221165576, "loss": 2.1005, "step": 100055 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.000193237074063691, "loss": 2.1866, "step": 100060 }, { "epoch": 0.24, "grad_norm": 1.6015625, "learning_rate": 0.00019323640588387475, "loss": 1.8535, "step": 100065 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.0001932357376722073, "loss": 2.0873, "step": 100070 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019323506942868878, "loss": 2.1391, "step": 100075 }, { "epoch": 0.24, "grad_norm": 2.4375, "learning_rate": 0.0001932344011533195, "loss": 2.1133, "step": 100080 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019323373284609968, "loss": 2.0721, "step": 100085 }, { "epoch": 0.24, "grad_norm": 1.8359375, "learning_rate": 0.00019323306450702948, "loss": 2.3896, "step": 100090 }, { "epoch": 0.24, "grad_norm": 1.6484375, "learning_rate": 0.0001932323961361092, "loss": 2.2549, "step": 100095 }, { "epoch": 0.24, "grad_norm": 2.828125, "learning_rate": 0.000193231727733339, "loss": 1.935, "step": 100100 }, { "epoch": 0.24, "grad_norm": 2.4375, "learning_rate": 0.0001932310592987192, "loss": 2.1305, "step": 100105 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.00019323039083224996, "loss": 2.0592, "step": 100110 }, { "epoch": 0.24, "grad_norm": 1.8046875, "learning_rate": 0.00019322972233393152, "loss": 2.2218, "step": 100115 }, { "epoch": 0.24, "grad_norm": 1.671875, "learning_rate": 0.00019322905380376415, "loss": 2.2302, "step": 100120 }, { "epoch": 0.24, "grad_norm": 1.78125, "learning_rate": 0.00019322838524174802, "loss": 2.0358, "step": 100125 }, { "epoch": 0.24, "grad_norm": 2.21875, "learning_rate": 0.0001932277166478834, "loss": 2.1623, "step": 100130 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019322704802217047, "loss": 2.2472, "step": 100135 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.0001932263793646095, "loss": 2.0703, "step": 100140 }, { "epoch": 0.24, "grad_norm": 2.28125, "learning_rate": 0.00019322571067520073, "loss": 2.187, "step": 100145 }, { "epoch": 0.24, "grad_norm": 1.53125, "learning_rate": 0.00019322504195394435, "loss": 2.0585, "step": 100150 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.0001932243732008406, "loss": 2.1752, "step": 100155 }, { "epoch": 0.24, "grad_norm": 1.6484375, "learning_rate": 0.00019322370441588973, "loss": 2.0454, "step": 100160 }, { "epoch": 0.24, "grad_norm": 1.8125, "learning_rate": 0.00019322303559909191, "loss": 2.1828, "step": 100165 }, { "epoch": 0.24, "grad_norm": 2.53125, "learning_rate": 0.00019322236675044745, "loss": 2.1936, "step": 100170 }, { "epoch": 0.24, "grad_norm": 1.7734375, "learning_rate": 0.00019322169786995655, "loss": 2.1699, "step": 100175 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.0001932210289576194, "loss": 2.2467, "step": 100180 }, { "epoch": 0.24, "grad_norm": 2.453125, "learning_rate": 0.00019322036001343627, "loss": 2.3515, "step": 100185 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019321969103740734, "loss": 2.0646, "step": 100190 }, { "epoch": 0.24, "grad_norm": 1.8359375, "learning_rate": 0.0001932190220295329, "loss": 2.1412, "step": 100195 }, { "epoch": 0.24, "grad_norm": 2.484375, "learning_rate": 0.00019321835298981318, "loss": 2.2064, "step": 100200 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019321768391824832, "loss": 2.231, "step": 100205 }, { "epoch": 0.24, "grad_norm": 1.671875, "learning_rate": 0.00019321701481483868, "loss": 2.0893, "step": 100210 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.00019321634567958436, "loss": 2.1525, "step": 100215 }, { "epoch": 0.24, "grad_norm": 2.421875, "learning_rate": 0.00019321567651248568, "loss": 2.0918, "step": 100220 }, { "epoch": 0.24, "grad_norm": 3.25, "learning_rate": 0.0001932150073135428, "loss": 2.1445, "step": 100225 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.000193214338082756, "loss": 2.1003, "step": 100230 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.0001932136688201255, "loss": 2.1867, "step": 100235 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.00019321299952565152, "loss": 2.075, "step": 100240 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.00019321233019933427, "loss": 1.9863, "step": 100245 }, { "epoch": 0.24, "grad_norm": 1.8828125, "learning_rate": 0.00019321166084117398, "loss": 2.1499, "step": 100250 }, { "epoch": 0.24, "grad_norm": 1.8515625, "learning_rate": 0.00019321099145117097, "loss": 2.2121, "step": 100255 }, { "epoch": 0.24, "grad_norm": 1.8515625, "learning_rate": 0.00019321032202932533, "loss": 2.1004, "step": 100260 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019320965257563737, "loss": 2.1365, "step": 100265 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.0001932089830901073, "loss": 1.9719, "step": 100270 }, { "epoch": 0.24, "grad_norm": 1.9453125, "learning_rate": 0.00019320831357273537, "loss": 2.0581, "step": 100275 }, { "epoch": 0.24, "grad_norm": 2.46875, "learning_rate": 0.00019320764402352175, "loss": 2.2559, "step": 100280 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.00019320697444246676, "loss": 2.0323, "step": 100285 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019320630482957057, "loss": 2.2694, "step": 100290 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.0001932056351848334, "loss": 2.0388, "step": 100295 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.00019320496550825547, "loss": 2.3072, "step": 100300 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019320429579983706, "loss": 2.2517, "step": 100305 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.0001932036260595784, "loss": 2.156, "step": 100310 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.0001932029562874796, "loss": 2.291, "step": 100315 }, { "epoch": 0.24, "grad_norm": 2.78125, "learning_rate": 0.00019320228648354106, "loss": 2.1096, "step": 100320 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.0001932016166477629, "loss": 2.1261, "step": 100325 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.0001932009467801454, "loss": 2.15, "step": 100330 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.00019320027688068874, "loss": 2.1031, "step": 100335 }, { "epoch": 0.24, "grad_norm": 2.515625, "learning_rate": 0.0001931996069493932, "loss": 2.229, "step": 100340 }, { "epoch": 0.24, "grad_norm": 2.3125, "learning_rate": 0.00019319893698625898, "loss": 2.0701, "step": 100345 }, { "epoch": 0.24, "grad_norm": 1.921875, "learning_rate": 0.00019319826699128626, "loss": 1.994, "step": 100350 }, { "epoch": 0.24, "grad_norm": 2.453125, "learning_rate": 0.0001931975969644754, "loss": 2.1028, "step": 100355 }, { "epoch": 0.24, "grad_norm": 1.8359375, "learning_rate": 0.0001931969269058265, "loss": 2.1809, "step": 100360 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019319625681533985, "loss": 2.0629, "step": 100365 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.00019319558669301569, "loss": 1.974, "step": 100370 }, { "epoch": 0.24, "grad_norm": 1.96875, "learning_rate": 0.0001931949165388542, "loss": 2.0207, "step": 100375 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.00019319424635285564, "loss": 2.215, "step": 100380 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.00019319357613502026, "loss": 2.155, "step": 100385 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.00019319290588534825, "loss": 2.0299, "step": 100390 }, { "epoch": 0.24, "grad_norm": 2.28125, "learning_rate": 0.00019319223560383987, "loss": 2.0994, "step": 100395 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.0001931915652904953, "loss": 2.2081, "step": 100400 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.00019319089494531478, "loss": 2.2737, "step": 100405 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.0001931902245682986, "loss": 2.0189, "step": 100410 }, { "epoch": 0.24, "grad_norm": 2.453125, "learning_rate": 0.00019318955415944695, "loss": 2.1544, "step": 100415 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.00019318888371876004, "loss": 2.1872, "step": 100420 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.00019318821324623814, "loss": 2.3692, "step": 100425 }, { "epoch": 0.24, "grad_norm": 1.90625, "learning_rate": 0.00019318754274188143, "loss": 2.1318, "step": 100430 }, { "epoch": 0.24, "grad_norm": 2.234375, "learning_rate": 0.00019318687220569022, "loss": 1.9741, "step": 100435 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.0001931862016376646, "loss": 2.2009, "step": 100440 }, { "epoch": 0.24, "grad_norm": 1.921875, "learning_rate": 0.00019318553103780494, "loss": 2.1418, "step": 100445 }, { "epoch": 0.24, "grad_norm": 1.96875, "learning_rate": 0.0001931848604061114, "loss": 2.1434, "step": 100450 }, { "epoch": 0.24, "grad_norm": 2.34375, "learning_rate": 0.00019318418974258422, "loss": 2.3607, "step": 100455 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.00019318351904722364, "loss": 2.1085, "step": 100460 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019318284832002989, "loss": 1.9538, "step": 100465 }, { "epoch": 0.24, "grad_norm": 1.9140625, "learning_rate": 0.00019318217756100317, "loss": 2.3242, "step": 100470 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019318150677014373, "loss": 2.2213, "step": 100475 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.0001931808359474518, "loss": 2.1446, "step": 100480 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.0001931801650929276, "loss": 2.0512, "step": 100485 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019317949420657137, "loss": 2.2035, "step": 100490 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019317882328838333, "loss": 2.1208, "step": 100495 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.00019317815233836372, "loss": 1.9751, "step": 100500 }, { "epoch": 0.24, "grad_norm": 2.40625, "learning_rate": 0.00019317748135651278, "loss": 2.2399, "step": 100505 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.0001931768103428307, "loss": 2.2398, "step": 100510 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.0001931761392973177, "loss": 2.2145, "step": 100515 }, { "epoch": 0.24, "grad_norm": 2.21875, "learning_rate": 0.0001931754682199741, "loss": 2.1607, "step": 100520 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019317479711080003, "loss": 2.2157, "step": 100525 }, { "epoch": 0.24, "grad_norm": 1.984375, "learning_rate": 0.00019317412596979577, "loss": 2.0164, "step": 100530 }, { "epoch": 0.24, "grad_norm": 1.9140625, "learning_rate": 0.00019317345479696155, "loss": 2.1583, "step": 100535 }, { "epoch": 0.24, "grad_norm": 2.34375, "learning_rate": 0.0001931727835922976, "loss": 2.147, "step": 100540 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019317211235580413, "loss": 2.0278, "step": 100545 }, { "epoch": 0.24, "grad_norm": 1.8984375, "learning_rate": 0.00019317144108748136, "loss": 2.1946, "step": 100550 }, { "epoch": 0.24, "grad_norm": 1.8046875, "learning_rate": 0.00019317076978732955, "loss": 2.0679, "step": 100555 }, { "epoch": 0.24, "grad_norm": 1.75, "learning_rate": 0.0001931700984553489, "loss": 2.0966, "step": 100560 }, { "epoch": 0.24, "grad_norm": 1.8359375, "learning_rate": 0.00019316942709153966, "loss": 2.1746, "step": 100565 }, { "epoch": 0.24, "grad_norm": 1.8515625, "learning_rate": 0.00019316875569590206, "loss": 2.1308, "step": 100570 }, { "epoch": 0.24, "grad_norm": 2.40625, "learning_rate": 0.00019316808426843632, "loss": 2.1882, "step": 100575 }, { "epoch": 0.24, "grad_norm": 2.578125, "learning_rate": 0.00019316741280914267, "loss": 2.0575, "step": 100580 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019316674131802136, "loss": 2.1181, "step": 100585 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.00019316606979507257, "loss": 2.2514, "step": 100590 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.0001931653982402966, "loss": 2.2094, "step": 100595 }, { "epoch": 0.24, "grad_norm": 2.28125, "learning_rate": 0.0001931647266536936, "loss": 2.2914, "step": 100600 }, { "epoch": 0.24, "grad_norm": 2.15625, "learning_rate": 0.00019316405503526384, "loss": 2.0575, "step": 100605 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.0001931633833850076, "loss": 2.3479, "step": 100610 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.000193162711702925, "loss": 2.1089, "step": 100615 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.0001931620399890164, "loss": 2.337, "step": 100620 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.00019316136824328189, "loss": 2.2939, "step": 100625 }, { "epoch": 0.24, "grad_norm": 2.453125, "learning_rate": 0.00019316069646572176, "loss": 2.1163, "step": 100630 }, { "epoch": 0.24, "grad_norm": 1.9140625, "learning_rate": 0.0001931600246563363, "loss": 1.9668, "step": 100635 }, { "epoch": 0.24, "grad_norm": 2.625, "learning_rate": 0.00019315935281512565, "loss": 1.9884, "step": 100640 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.0001931586809420901, "loss": 2.2876, "step": 100645 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.00019315800903722984, "loss": 2.029, "step": 100650 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.0001931573371005451, "loss": 2.1537, "step": 100655 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.00019315666513203614, "loss": 2.1849, "step": 100660 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019315599313170318, "loss": 2.1482, "step": 100665 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019315532109954645, "loss": 2.0312, "step": 100670 }, { "epoch": 0.24, "grad_norm": 1.8125, "learning_rate": 0.00019315464903556614, "loss": 2.0887, "step": 100675 }, { "epoch": 0.24, "grad_norm": 2.40625, "learning_rate": 0.00019315397693976257, "loss": 2.2228, "step": 100680 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019315330481213584, "loss": 2.092, "step": 100685 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.00019315263265268627, "loss": 2.0029, "step": 100690 }, { "epoch": 0.24, "grad_norm": 2.390625, "learning_rate": 0.00019315196046141412, "loss": 2.0645, "step": 100695 }, { "epoch": 0.24, "grad_norm": 2.25, "learning_rate": 0.00019315128823831955, "loss": 2.1052, "step": 100700 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.00019315061598340277, "loss": 2.1699, "step": 100705 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019314994369666407, "loss": 2.2836, "step": 100710 }, { "epoch": 0.24, "grad_norm": 2.34375, "learning_rate": 0.0001931492713781037, "loss": 2.1748, "step": 100715 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.0001931485990277218, "loss": 2.2899, "step": 100720 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.00019314792664551869, "loss": 2.3091, "step": 100725 }, { "epoch": 0.24, "grad_norm": 2.328125, "learning_rate": 0.00019314725423149454, "loss": 2.3267, "step": 100730 }, { "epoch": 0.24, "grad_norm": 2.765625, "learning_rate": 0.00019314658178564957, "loss": 2.1261, "step": 100735 }, { "epoch": 0.24, "grad_norm": 2.140625, "learning_rate": 0.00019314590930798406, "loss": 2.0219, "step": 100740 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.00019314523679849822, "loss": 2.1163, "step": 100745 }, { "epoch": 0.24, "grad_norm": 1.7578125, "learning_rate": 0.0001931445642571923, "loss": 2.2175, "step": 100750 }, { "epoch": 0.24, "grad_norm": 1.90625, "learning_rate": 0.00019314389168406649, "loss": 2.0938, "step": 100755 }, { "epoch": 0.24, "grad_norm": 1.8515625, "learning_rate": 0.00019314321907912103, "loss": 2.1406, "step": 100760 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.00019314254644235617, "loss": 2.0476, "step": 100765 }, { "epoch": 0.24, "grad_norm": 1.9453125, "learning_rate": 0.0001931418737737721, "loss": 2.1972, "step": 100770 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.0001931412010733691, "loss": 2.3782, "step": 100775 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019314052834114737, "loss": 2.0842, "step": 100780 }, { "epoch": 0.24, "grad_norm": 2.4375, "learning_rate": 0.00019313985557710718, "loss": 2.1882, "step": 100785 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.00019313918278124868, "loss": 2.1534, "step": 100790 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.00019313850995357216, "loss": 2.3438, "step": 100795 }, { "epoch": 0.24, "grad_norm": 2.359375, "learning_rate": 0.00019313783709407783, "loss": 2.0941, "step": 100800 }, { "epoch": 0.24, "grad_norm": 2.234375, "learning_rate": 0.00019313716420276593, "loss": 2.1602, "step": 100805 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.00019313649127963668, "loss": 2.2483, "step": 100810 }, { "epoch": 0.24, "grad_norm": 1.7109375, "learning_rate": 0.00019313581832469033, "loss": 1.9269, "step": 100815 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.0001931351453379271, "loss": 2.0945, "step": 100820 }, { "epoch": 0.24, "grad_norm": 2.359375, "learning_rate": 0.0001931344723193472, "loss": 2.1983, "step": 100825 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.00019313379926895086, "loss": 2.1954, "step": 100830 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.00019313312618673837, "loss": 2.1614, "step": 100835 }, { "epoch": 0.24, "grad_norm": 1.765625, "learning_rate": 0.00019313245307270988, "loss": 1.9325, "step": 100840 }, { "epoch": 0.24, "grad_norm": 1.9453125, "learning_rate": 0.00019313177992686566, "loss": 2.16, "step": 100845 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.00019313110674920594, "loss": 1.9922, "step": 100850 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.00019313043353973096, "loss": 2.0964, "step": 100855 }, { "epoch": 0.24, "grad_norm": 1.7109375, "learning_rate": 0.0001931297602984409, "loss": 2.1576, "step": 100860 }, { "epoch": 0.24, "grad_norm": 1.703125, "learning_rate": 0.00019312908702533602, "loss": 2.119, "step": 100865 }, { "epoch": 0.24, "grad_norm": 1.5546875, "learning_rate": 0.0001931284137204166, "loss": 1.9906, "step": 100870 }, { "epoch": 0.24, "grad_norm": 2.8125, "learning_rate": 0.00019312774038368283, "loss": 2.1525, "step": 100875 }, { "epoch": 0.24, "grad_norm": 1.796875, "learning_rate": 0.00019312706701513487, "loss": 2.1212, "step": 100880 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.00019312639361477308, "loss": 2.1376, "step": 100885 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.0001931257201825976, "loss": 2.183, "step": 100890 }, { "epoch": 0.24, "grad_norm": 2.4375, "learning_rate": 0.00019312504671860868, "loss": 2.2446, "step": 100895 }, { "epoch": 0.24, "grad_norm": 1.8203125, "learning_rate": 0.00019312437322280656, "loss": 2.1168, "step": 100900 }, { "epoch": 0.24, "grad_norm": 1.671875, "learning_rate": 0.00019312369969519145, "loss": 2.1713, "step": 100905 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.00019312302613576363, "loss": 2.2449, "step": 100910 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019312235254452325, "loss": 2.2331, "step": 100915 }, { "epoch": 0.24, "grad_norm": 1.921875, "learning_rate": 0.00019312167892147062, "loss": 2.0417, "step": 100920 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019312100526660593, "loss": 2.1976, "step": 100925 }, { "epoch": 0.24, "grad_norm": 1.8359375, "learning_rate": 0.0001931203315799294, "loss": 2.2053, "step": 100930 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019311965786144127, "loss": 2.0786, "step": 100935 }, { "epoch": 0.24, "grad_norm": 1.7265625, "learning_rate": 0.00019311898411114182, "loss": 2.1287, "step": 100940 }, { "epoch": 0.24, "grad_norm": 2.25, "learning_rate": 0.0001931183103290312, "loss": 2.2853, "step": 100945 }, { "epoch": 0.24, "grad_norm": 1.8203125, "learning_rate": 0.0001931176365151097, "loss": 2.1742, "step": 100950 }, { "epoch": 0.24, "grad_norm": 2.375, "learning_rate": 0.0001931169626693775, "loss": 2.2262, "step": 100955 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.0001931162887918349, "loss": 2.0754, "step": 100960 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019311561488248203, "loss": 2.0763, "step": 100965 }, { "epoch": 0.24, "grad_norm": 1.8359375, "learning_rate": 0.0001931149409413192, "loss": 2.2752, "step": 100970 }, { "epoch": 0.24, "grad_norm": 1.8359375, "learning_rate": 0.00019311426696834663, "loss": 2.2039, "step": 100975 }, { "epoch": 0.24, "grad_norm": 1.953125, "learning_rate": 0.00019311359296356453, "loss": 1.9969, "step": 100980 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019311291892697315, "loss": 1.9786, "step": 100985 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019311224485857266, "loss": 2.0108, "step": 100990 }, { "epoch": 0.24, "grad_norm": 1.96875, "learning_rate": 0.00019311157075836338, "loss": 2.1039, "step": 100995 }, { "epoch": 0.24, "grad_norm": 1.7421875, "learning_rate": 0.0001931108966263455, "loss": 2.1527, "step": 101000 }, { "epoch": 0.24, "grad_norm": 2.328125, "learning_rate": 0.00019311022246251924, "loss": 2.0733, "step": 101005 }, { "epoch": 0.24, "grad_norm": 1.921875, "learning_rate": 0.00019310954826688487, "loss": 2.1674, "step": 101010 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.00019310887403944254, "loss": 2.1197, "step": 101015 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.00019310819978019255, "loss": 2.145, "step": 101020 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.00019310752548913512, "loss": 2.1853, "step": 101025 }, { "epoch": 0.24, "grad_norm": 2.28125, "learning_rate": 0.00019310685116627046, "loss": 2.1725, "step": 101030 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.0001931061768115988, "loss": 2.2251, "step": 101035 }, { "epoch": 0.24, "grad_norm": 1.75, "learning_rate": 0.0001931055024251204, "loss": 2.0893, "step": 101040 }, { "epoch": 0.24, "grad_norm": 2.515625, "learning_rate": 0.00019310482800683545, "loss": 2.1354, "step": 101045 }, { "epoch": 0.24, "grad_norm": 1.6875, "learning_rate": 0.00019310415355674424, "loss": 2.0401, "step": 101050 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.00019310347907484693, "loss": 2.242, "step": 101055 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.0001931028045611438, "loss": 2.1298, "step": 101060 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.00019310213001563504, "loss": 2.1317, "step": 101065 }, { "epoch": 0.24, "grad_norm": 2.234375, "learning_rate": 0.00019310145543832092, "loss": 2.1058, "step": 101070 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.00019310078082920168, "loss": 2.1536, "step": 101075 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019310010618827747, "loss": 2.2466, "step": 101080 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019309943151554862, "loss": 2.2835, "step": 101085 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.00019309875681101528, "loss": 2.104, "step": 101090 }, { "epoch": 0.24, "grad_norm": 2.5, "learning_rate": 0.00019309808207467776, "loss": 2.1563, "step": 101095 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019309740730653622, "loss": 2.2483, "step": 101100 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019309673250659092, "loss": 2.1614, "step": 101105 }, { "epoch": 0.24, "grad_norm": 1.7734375, "learning_rate": 0.00019309605767484208, "loss": 2.1394, "step": 101110 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019309538281128994, "loss": 2.0412, "step": 101115 }, { "epoch": 0.24, "grad_norm": 1.7890625, "learning_rate": 0.00019309470791593474, "loss": 2.272, "step": 101120 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.00019309403298877671, "loss": 2.1355, "step": 101125 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019309335802981606, "loss": 2.1363, "step": 101130 }, { "epoch": 0.24, "grad_norm": 2.421875, "learning_rate": 0.000193092683039053, "loss": 2.0146, "step": 101135 }, { "epoch": 0.24, "grad_norm": 1.6640625, "learning_rate": 0.00019309200801648782, "loss": 2.1056, "step": 101140 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.0001930913329621207, "loss": 2.2792, "step": 101145 }, { "epoch": 0.24, "grad_norm": 1.8359375, "learning_rate": 0.00019309065787595194, "loss": 2.1552, "step": 101150 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.0001930899827579817, "loss": 2.2164, "step": 101155 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019308930760821022, "loss": 2.2865, "step": 101160 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019308863242663776, "loss": 2.1376, "step": 101165 }, { "epoch": 0.24, "grad_norm": 2.328125, "learning_rate": 0.00019308795721326453, "loss": 2.1189, "step": 101170 }, { "epoch": 0.24, "grad_norm": 2.34375, "learning_rate": 0.00019308728196809077, "loss": 2.055, "step": 101175 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.0001930866066911167, "loss": 2.1624, "step": 101180 }, { "epoch": 0.24, "grad_norm": 1.6796875, "learning_rate": 0.00019308593138234255, "loss": 2.1269, "step": 101185 }, { "epoch": 0.24, "grad_norm": 1.765625, "learning_rate": 0.00019308525604176856, "loss": 2.0952, "step": 101190 }, { "epoch": 0.24, "grad_norm": 1.96875, "learning_rate": 0.00019308458066939497, "loss": 2.1564, "step": 101195 }, { "epoch": 0.24, "grad_norm": 2.34375, "learning_rate": 0.00019308390526522198, "loss": 2.2816, "step": 101200 }, { "epoch": 0.24, "grad_norm": 1.59375, "learning_rate": 0.00019308322982924985, "loss": 2.2646, "step": 101205 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.0001930825543614788, "loss": 1.995, "step": 101210 }, { "epoch": 0.24, "grad_norm": 1.6640625, "learning_rate": 0.00019308187886190906, "loss": 2.1502, "step": 101215 }, { "epoch": 0.24, "grad_norm": 2.15625, "learning_rate": 0.00019308120333054088, "loss": 2.1603, "step": 101220 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019308052776737444, "loss": 2.1998, "step": 101225 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019307985217241003, "loss": 2.3457, "step": 101230 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.00019307917654564784, "loss": 2.1475, "step": 101235 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.0001930785008870881, "loss": 2.4011, "step": 101240 }, { "epoch": 0.24, "grad_norm": 1.8515625, "learning_rate": 0.0001930778251967311, "loss": 2.2696, "step": 101245 }, { "epoch": 0.24, "grad_norm": 2.328125, "learning_rate": 0.000193077149474577, "loss": 2.2524, "step": 101250 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.00019307647372062605, "loss": 2.0858, "step": 101255 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.0001930757979348785, "loss": 2.0678, "step": 101260 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.00019307512211733457, "loss": 2.1031, "step": 101265 }, { "epoch": 0.24, "grad_norm": 1.921875, "learning_rate": 0.00019307444626799448, "loss": 2.181, "step": 101270 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019307377038685849, "loss": 2.0387, "step": 101275 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.0001930730944739268, "loss": 2.0043, "step": 101280 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019307241852919966, "loss": 2.1454, "step": 101285 }, { "epoch": 0.24, "grad_norm": 1.7109375, "learning_rate": 0.00019307174255267731, "loss": 2.4123, "step": 101290 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.00019307106654435993, "loss": 2.0296, "step": 101295 }, { "epoch": 0.24, "grad_norm": 2.140625, "learning_rate": 0.0001930703905042478, "loss": 2.1096, "step": 101300 }, { "epoch": 0.24, "grad_norm": 1.6953125, "learning_rate": 0.00019306971443234114, "loss": 2.0798, "step": 101305 }, { "epoch": 0.24, "grad_norm": 2.15625, "learning_rate": 0.00019306903832864015, "loss": 2.1375, "step": 101310 }, { "epoch": 0.24, "grad_norm": 1.828125, "learning_rate": 0.00019306836219314515, "loss": 1.9905, "step": 101315 }, { "epoch": 0.24, "grad_norm": 1.7265625, "learning_rate": 0.00019306768602585627, "loss": 2.0017, "step": 101320 }, { "epoch": 0.24, "grad_norm": 1.9140625, "learning_rate": 0.00019306700982677378, "loss": 1.9746, "step": 101325 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.0001930663335958979, "loss": 2.3768, "step": 101330 }, { "epoch": 0.24, "grad_norm": 1.7734375, "learning_rate": 0.0001930656573332289, "loss": 2.1752, "step": 101335 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019306498103876697, "loss": 2.1473, "step": 101340 }, { "epoch": 0.24, "grad_norm": 1.953125, "learning_rate": 0.00019306430471251234, "loss": 2.2275, "step": 101345 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.00019306362835446527, "loss": 2.1117, "step": 101350 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019306295196462597, "loss": 2.2342, "step": 101355 }, { "epoch": 0.24, "grad_norm": 4.03125, "learning_rate": 0.0001930622755429947, "loss": 2.2087, "step": 101360 }, { "epoch": 0.24, "grad_norm": 1.71875, "learning_rate": 0.00019306159908957167, "loss": 2.1604, "step": 101365 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.0001930609226043571, "loss": 2.1204, "step": 101370 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.0001930602460873512, "loss": 2.0947, "step": 101375 }, { "epoch": 0.24, "grad_norm": 1.5859375, "learning_rate": 0.00019305956953855427, "loss": 2.2218, "step": 101380 }, { "epoch": 0.24, "grad_norm": 1.953125, "learning_rate": 0.0001930588929579665, "loss": 2.1206, "step": 101385 }, { "epoch": 0.24, "grad_norm": 1.96875, "learning_rate": 0.0001930582163455881, "loss": 2.1311, "step": 101390 }, { "epoch": 0.24, "grad_norm": 2.90625, "learning_rate": 0.00019305753970141933, "loss": 2.3086, "step": 101395 }, { "epoch": 0.24, "grad_norm": 1.96875, "learning_rate": 0.00019305686302546044, "loss": 2.1707, "step": 101400 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019305618631771163, "loss": 2.2962, "step": 101405 }, { "epoch": 0.24, "grad_norm": 1.6796875, "learning_rate": 0.00019305550957817314, "loss": 2.0165, "step": 101410 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.0001930548328068452, "loss": 2.1978, "step": 101415 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.00019305415600372801, "loss": 2.2048, "step": 101420 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.00019305347916882189, "loss": 2.1392, "step": 101425 }, { "epoch": 0.24, "grad_norm": 3.15625, "learning_rate": 0.00019305280230212698, "loss": 2.1497, "step": 101430 }, { "epoch": 0.24, "grad_norm": 1.8359375, "learning_rate": 0.00019305212540364353, "loss": 2.2221, "step": 101435 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019305144847337182, "loss": 2.2678, "step": 101440 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019305077151131203, "loss": 2.2707, "step": 101445 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.00019305009451746442, "loss": 2.0502, "step": 101450 }, { "epoch": 0.24, "grad_norm": 2.15625, "learning_rate": 0.00019304941749182918, "loss": 2.5153, "step": 101455 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.0001930487404344066, "loss": 2.1216, "step": 101460 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019304806334519687, "loss": 2.0657, "step": 101465 }, { "epoch": 0.24, "grad_norm": 1.6796875, "learning_rate": 0.00019304738622420026, "loss": 2.0867, "step": 101470 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019304670907141692, "loss": 2.0423, "step": 101475 }, { "epoch": 0.24, "grad_norm": 1.6953125, "learning_rate": 0.0001930460318868472, "loss": 2.1814, "step": 101480 }, { "epoch": 0.24, "grad_norm": 1.671875, "learning_rate": 0.00019304535467049122, "loss": 2.0662, "step": 101485 }, { "epoch": 0.24, "grad_norm": 1.8828125, "learning_rate": 0.0001930446774223493, "loss": 2.166, "step": 101490 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.0001930440001424216, "loss": 2.2136, "step": 101495 }, { "epoch": 0.24, "grad_norm": 1.6796875, "learning_rate": 0.00019304332283070837, "loss": 2.2953, "step": 101500 }, { "epoch": 0.24, "grad_norm": 1.5859375, "learning_rate": 0.0001930426454872099, "loss": 2.2085, "step": 101505 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019304196811192634, "loss": 2.2089, "step": 101510 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.000193041290704858, "loss": 2.2333, "step": 101515 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019304061326600502, "loss": 2.2772, "step": 101520 }, { "epoch": 0.24, "grad_norm": 1.96875, "learning_rate": 0.0001930399357953677, "loss": 2.1695, "step": 101525 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019303925829294624, "loss": 1.976, "step": 101530 }, { "epoch": 0.24, "grad_norm": 1.765625, "learning_rate": 0.00019303858075874088, "loss": 2.1034, "step": 101535 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019303790319275186, "loss": 2.2499, "step": 101540 }, { "epoch": 0.24, "grad_norm": 1.8046875, "learning_rate": 0.0001930372255949794, "loss": 2.2612, "step": 101545 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019303654796542374, "loss": 2.285, "step": 101550 }, { "epoch": 0.24, "grad_norm": 1.625, "learning_rate": 0.0001930358703040851, "loss": 2.2019, "step": 101555 }, { "epoch": 0.24, "grad_norm": 2.3125, "learning_rate": 0.00019303519261096376, "loss": 2.21, "step": 101560 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019303451488605986, "loss": 2.2196, "step": 101565 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.0001930338371293737, "loss": 2.0065, "step": 101570 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.0001930331593409055, "loss": 2.079, "step": 101575 }, { "epoch": 0.24, "grad_norm": 1.7265625, "learning_rate": 0.00019303248152065548, "loss": 2.2198, "step": 101580 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.00019303180366862387, "loss": 2.1103, "step": 101585 }, { "epoch": 0.24, "grad_norm": 1.6796875, "learning_rate": 0.00019303112578481093, "loss": 2.1128, "step": 101590 }, { "epoch": 0.24, "grad_norm": 1.71875, "learning_rate": 0.00019303044786921685, "loss": 1.9455, "step": 101595 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019302976992184188, "loss": 2.3567, "step": 101600 }, { "epoch": 0.24, "grad_norm": 1.828125, "learning_rate": 0.00019302909194268626, "loss": 2.2467, "step": 101605 }, { "epoch": 0.24, "grad_norm": 1.84375, "learning_rate": 0.00019302841393175024, "loss": 2.1542, "step": 101610 }, { "epoch": 0.24, "grad_norm": 1.8984375, "learning_rate": 0.000193027735889034, "loss": 2.2334, "step": 101615 }, { "epoch": 0.24, "grad_norm": 2.25, "learning_rate": 0.0001930270578145378, "loss": 2.055, "step": 101620 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019302637970826188, "loss": 2.3398, "step": 101625 }, { "epoch": 0.24, "grad_norm": 1.8046875, "learning_rate": 0.00019302570157020643, "loss": 2.1431, "step": 101630 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.00019302502340037174, "loss": 2.0133, "step": 101635 }, { "epoch": 0.24, "grad_norm": 1.875, "learning_rate": 0.00019302434519875803, "loss": 2.1132, "step": 101640 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.0001930236669653655, "loss": 2.2106, "step": 101645 }, { "epoch": 0.24, "grad_norm": 1.7421875, "learning_rate": 0.00019302298870019438, "loss": 2.1589, "step": 101650 }, { "epoch": 0.24, "grad_norm": 1.8203125, "learning_rate": 0.00019302231040324493, "loss": 2.1346, "step": 101655 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.00019302163207451738, "loss": 2.1885, "step": 101660 }, { "epoch": 0.24, "grad_norm": 2.34375, "learning_rate": 0.00019302095371401197, "loss": 2.156, "step": 101665 }, { "epoch": 0.24, "grad_norm": 2.3125, "learning_rate": 0.0001930202753217289, "loss": 2.3783, "step": 101670 }, { "epoch": 0.24, "grad_norm": 2.21875, "learning_rate": 0.0001930195968976684, "loss": 2.0538, "step": 101675 }, { "epoch": 0.24, "grad_norm": 2.34375, "learning_rate": 0.00019301891844183074, "loss": 2.2253, "step": 101680 }, { "epoch": 0.24, "grad_norm": 1.5234375, "learning_rate": 0.0001930182399542161, "loss": 2.247, "step": 101685 }, { "epoch": 0.24, "grad_norm": 1.6171875, "learning_rate": 0.00019301756143482476, "loss": 2.0664, "step": 101690 }, { "epoch": 0.24, "grad_norm": 1.6484375, "learning_rate": 0.00019301688288365695, "loss": 2.1563, "step": 101695 }, { "epoch": 0.24, "grad_norm": 2.484375, "learning_rate": 0.00019301620430071289, "loss": 2.1555, "step": 101700 }, { "epoch": 0.24, "grad_norm": 2.40625, "learning_rate": 0.0001930155256859928, "loss": 2.0799, "step": 101705 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.0001930148470394969, "loss": 2.3515, "step": 101710 }, { "epoch": 0.24, "grad_norm": 1.875, "learning_rate": 0.00019301416836122547, "loss": 1.9341, "step": 101715 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.0001930134896511787, "loss": 2.048, "step": 101720 }, { "epoch": 0.24, "grad_norm": 2.28125, "learning_rate": 0.0001930128109093568, "loss": 2.1161, "step": 101725 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.00019301213213576007, "loss": 2.1269, "step": 101730 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019301145333038873, "loss": 2.2148, "step": 101735 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.00019301077449324297, "loss": 2.3223, "step": 101740 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.00019301009562432305, "loss": 2.0424, "step": 101745 }, { "epoch": 0.24, "grad_norm": 1.984375, "learning_rate": 0.00019300941672362916, "loss": 2.0957, "step": 101750 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.0001930087377911616, "loss": 2.1582, "step": 101755 }, { "epoch": 0.24, "grad_norm": 1.703125, "learning_rate": 0.00019300805882692057, "loss": 2.234, "step": 101760 }, { "epoch": 0.24, "grad_norm": 1.84375, "learning_rate": 0.00019300737983090626, "loss": 2.214, "step": 101765 }, { "epoch": 0.24, "grad_norm": 2.234375, "learning_rate": 0.00019300670080311897, "loss": 2.3055, "step": 101770 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019300602174355893, "loss": 2.0737, "step": 101775 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.00019300534265222634, "loss": 2.1418, "step": 101780 }, { "epoch": 0.24, "grad_norm": 2.140625, "learning_rate": 0.00019300466352912142, "loss": 2.038, "step": 101785 }, { "epoch": 0.24, "grad_norm": 1.8828125, "learning_rate": 0.0001930039843742444, "loss": 2.1938, "step": 101790 }, { "epoch": 0.24, "grad_norm": 1.828125, "learning_rate": 0.00019300330518759554, "loss": 2.0527, "step": 101795 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.0001930026259691751, "loss": 2.1556, "step": 101800 }, { "epoch": 0.24, "grad_norm": 1.7734375, "learning_rate": 0.00019300194671898323, "loss": 2.1557, "step": 101805 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.00019300126743702025, "loss": 2.0516, "step": 101810 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019300058812328635, "loss": 2.0434, "step": 101815 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019299990877778175, "loss": 2.0123, "step": 101820 }, { "epoch": 0.24, "grad_norm": 1.8046875, "learning_rate": 0.0001929992294005067, "loss": 2.3001, "step": 101825 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019299854999146142, "loss": 2.0275, "step": 101830 }, { "epoch": 0.24, "grad_norm": 2.484375, "learning_rate": 0.00019299787055064612, "loss": 2.0976, "step": 101835 }, { "epoch": 0.24, "grad_norm": 1.640625, "learning_rate": 0.0001929971910780611, "loss": 2.1831, "step": 101840 }, { "epoch": 0.24, "grad_norm": 2.453125, "learning_rate": 0.00019299651157370654, "loss": 2.2236, "step": 101845 }, { "epoch": 0.24, "grad_norm": 1.921875, "learning_rate": 0.00019299583203758266, "loss": 2.154, "step": 101850 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.00019299515246968976, "loss": 1.9825, "step": 101855 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.000192994472870028, "loss": 2.0587, "step": 101860 }, { "epoch": 0.24, "grad_norm": 1.828125, "learning_rate": 0.00019299379323859766, "loss": 2.124, "step": 101865 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019299311357539892, "loss": 1.9838, "step": 101870 }, { "epoch": 0.24, "grad_norm": 2.140625, "learning_rate": 0.00019299243388043208, "loss": 2.2574, "step": 101875 }, { "epoch": 0.24, "grad_norm": 1.828125, "learning_rate": 0.00019299175415369736, "loss": 2.1827, "step": 101880 }, { "epoch": 0.24, "grad_norm": 1.734375, "learning_rate": 0.00019299107439519492, "loss": 2.078, "step": 101885 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019299039460492505, "loss": 2.1646, "step": 101890 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.000192989714782888, "loss": 2.0326, "step": 101895 }, { "epoch": 0.24, "grad_norm": 2.34375, "learning_rate": 0.00019298903492908398, "loss": 1.9163, "step": 101900 }, { "epoch": 0.24, "grad_norm": 1.8046875, "learning_rate": 0.0001929883550435132, "loss": 2.2313, "step": 101905 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.0001929876751261759, "loss": 2.2284, "step": 101910 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019298699517707234, "loss": 2.2032, "step": 101915 }, { "epoch": 0.24, "grad_norm": 1.828125, "learning_rate": 0.00019298631519620274, "loss": 2.0806, "step": 101920 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.00019298563518356732, "loss": 2.2952, "step": 101925 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019298495513916635, "loss": 2.1575, "step": 101930 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.00019298427506299999, "loss": 2.1821, "step": 101935 }, { "epoch": 0.24, "grad_norm": 1.8828125, "learning_rate": 0.00019298359495506853, "loss": 2.097, "step": 101940 }, { "epoch": 0.24, "grad_norm": 2.578125, "learning_rate": 0.00019298291481537218, "loss": 2.183, "step": 101945 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.0001929822346439112, "loss": 2.1822, "step": 101950 }, { "epoch": 0.24, "grad_norm": 1.984375, "learning_rate": 0.0001929815544406858, "loss": 2.2632, "step": 101955 }, { "epoch": 0.24, "grad_norm": 1.78125, "learning_rate": 0.00019298087420569618, "loss": 2.2145, "step": 101960 }, { "epoch": 0.24, "grad_norm": 1.828125, "learning_rate": 0.00019298019393894266, "loss": 2.0591, "step": 101965 }, { "epoch": 0.24, "grad_norm": 1.625, "learning_rate": 0.00019297951364042538, "loss": 2.2085, "step": 101970 }, { "epoch": 0.24, "grad_norm": 1.8984375, "learning_rate": 0.00019297883331014464, "loss": 2.2746, "step": 101975 }, { "epoch": 0.24, "grad_norm": 1.7421875, "learning_rate": 0.00019297815294810062, "loss": 2.0278, "step": 101980 }, { "epoch": 0.24, "grad_norm": 1.6640625, "learning_rate": 0.0001929774725542936, "loss": 2.1294, "step": 101985 }, { "epoch": 0.24, "grad_norm": 1.7109375, "learning_rate": 0.00019297679212872378, "loss": 2.1202, "step": 101990 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.00019297611167139138, "loss": 2.2335, "step": 101995 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.00019297543118229667, "loss": 2.1169, "step": 102000 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.00019297475066143985, "loss": 2.024, "step": 102005 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.0001929740701088212, "loss": 2.1355, "step": 102010 }, { "epoch": 0.24, "grad_norm": 1.71875, "learning_rate": 0.0001929733895244409, "loss": 2.1633, "step": 102015 }, { "epoch": 0.24, "grad_norm": 1.8203125, "learning_rate": 0.00019297270890829925, "loss": 2.1985, "step": 102020 }, { "epoch": 0.24, "grad_norm": 2.25, "learning_rate": 0.00019297202826039638, "loss": 2.1469, "step": 102025 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.0001929713475807326, "loss": 2.2637, "step": 102030 }, { "epoch": 0.24, "grad_norm": 1.84375, "learning_rate": 0.0001929706668693081, "loss": 2.0521, "step": 102035 }, { "epoch": 0.24, "grad_norm": 1.53125, "learning_rate": 0.00019296998612612317, "loss": 2.1809, "step": 102040 }, { "epoch": 0.24, "grad_norm": 1.59375, "learning_rate": 0.000192969305351178, "loss": 2.1109, "step": 102045 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.0001929686245444728, "loss": 2.1958, "step": 102050 }, { "epoch": 0.24, "grad_norm": 2.140625, "learning_rate": 0.00019296794370600787, "loss": 2.0568, "step": 102055 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.00019296726283578338, "loss": 2.2315, "step": 102060 }, { "epoch": 0.24, "grad_norm": 2.359375, "learning_rate": 0.00019296658193379958, "loss": 2.1114, "step": 102065 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.0001929659010000567, "loss": 2.1771, "step": 102070 }, { "epoch": 0.24, "grad_norm": 1.6640625, "learning_rate": 0.00019296522003455505, "loss": 2.2515, "step": 102075 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019296453903729473, "loss": 2.2646, "step": 102080 }, { "epoch": 0.24, "grad_norm": 2.359375, "learning_rate": 0.00019296385800827604, "loss": 2.228, "step": 102085 }, { "epoch": 0.24, "grad_norm": 2.3125, "learning_rate": 0.00019296317694749923, "loss": 2.125, "step": 102090 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.0001929624958549645, "loss": 2.2589, "step": 102095 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019296181473067213, "loss": 2.2822, "step": 102100 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.0001929611335746223, "loss": 2.1982, "step": 102105 }, { "epoch": 0.24, "grad_norm": 1.8203125, "learning_rate": 0.00019296045238681523, "loss": 2.1695, "step": 102110 }, { "epoch": 0.24, "grad_norm": 1.59375, "learning_rate": 0.0001929597711672512, "loss": 2.1515, "step": 102115 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019295908991593045, "loss": 2.2405, "step": 102120 }, { "epoch": 0.24, "grad_norm": 2.140625, "learning_rate": 0.00019295840863285316, "loss": 2.0935, "step": 102125 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.0001929577273180196, "loss": 2.1435, "step": 102130 }, { "epoch": 0.24, "grad_norm": 1.84375, "learning_rate": 0.00019295704597143, "loss": 2.1182, "step": 102135 }, { "epoch": 0.24, "grad_norm": 1.8828125, "learning_rate": 0.0001929563645930846, "loss": 2.0143, "step": 102140 }, { "epoch": 0.24, "grad_norm": 1.8203125, "learning_rate": 0.0001929556831829836, "loss": 2.214, "step": 102145 }, { "epoch": 0.24, "grad_norm": 2.515625, "learning_rate": 0.00019295500174112726, "loss": 2.099, "step": 102150 }, { "epoch": 0.24, "grad_norm": 1.7578125, "learning_rate": 0.0001929543202675158, "loss": 2.0701, "step": 102155 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019295363876214948, "loss": 2.1105, "step": 102160 }, { "epoch": 0.24, "grad_norm": 2.53125, "learning_rate": 0.0001929529572250285, "loss": 2.0703, "step": 102165 }, { "epoch": 0.24, "grad_norm": 1.8125, "learning_rate": 0.00019295227565615307, "loss": 2.2545, "step": 102170 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.0001929515940555235, "loss": 2.3783, "step": 102175 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019295091242313997, "loss": 2.0071, "step": 102180 }, { "epoch": 0.24, "grad_norm": 1.921875, "learning_rate": 0.00019295023075900273, "loss": 2.0299, "step": 102185 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019294954906311196, "loss": 2.1397, "step": 102190 }, { "epoch": 0.24, "grad_norm": 1.7734375, "learning_rate": 0.00019294886733546798, "loss": 2.101, "step": 102195 }, { "epoch": 0.24, "grad_norm": 1.7890625, "learning_rate": 0.00019294818557607097, "loss": 2.2238, "step": 102200 }, { "epoch": 0.24, "grad_norm": 1.7265625, "learning_rate": 0.0001929475037849212, "loss": 2.2607, "step": 102205 }, { "epoch": 0.24, "grad_norm": 2.421875, "learning_rate": 0.00019294682196201884, "loss": 1.991, "step": 102210 }, { "epoch": 0.24, "grad_norm": 1.7890625, "learning_rate": 0.00019294614010736415, "loss": 2.0696, "step": 102215 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.0001929454582209574, "loss": 2.0568, "step": 102220 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.0001929447763027988, "loss": 1.979, "step": 102225 }, { "epoch": 0.24, "grad_norm": 1.828125, "learning_rate": 0.00019294409435288858, "loss": 1.9689, "step": 102230 }, { "epoch": 0.24, "grad_norm": 1.7265625, "learning_rate": 0.00019294341237122695, "loss": 2.2618, "step": 102235 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.0001929427303578142, "loss": 2.2144, "step": 102240 }, { "epoch": 0.24, "grad_norm": 3.46875, "learning_rate": 0.00019294204831265047, "loss": 2.1808, "step": 102245 }, { "epoch": 0.24, "grad_norm": 2.140625, "learning_rate": 0.0001929413662357361, "loss": 2.0239, "step": 102250 }, { "epoch": 0.24, "grad_norm": 2.21875, "learning_rate": 0.00019294068412707124, "loss": 2.1065, "step": 102255 }, { "epoch": 0.24, "grad_norm": 2.359375, "learning_rate": 0.0001929400019866562, "loss": 2.2521, "step": 102260 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.00019293931981449115, "loss": 2.3949, "step": 102265 }, { "epoch": 0.24, "grad_norm": 1.7890625, "learning_rate": 0.00019293863761057632, "loss": 2.0466, "step": 102270 }, { "epoch": 0.24, "grad_norm": 1.984375, "learning_rate": 0.000192937955374912, "loss": 2.1233, "step": 102275 }, { "epoch": 0.24, "grad_norm": 1.703125, "learning_rate": 0.00019293727310749835, "loss": 2.3042, "step": 102280 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019293659080833568, "loss": 2.2646, "step": 102285 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.00019293590847742418, "loss": 2.1547, "step": 102290 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019293522611476407, "loss": 2.1963, "step": 102295 }, { "epoch": 0.24, "grad_norm": 5.09375, "learning_rate": 0.0001929345437203556, "loss": 2.2549, "step": 102300 }, { "epoch": 0.24, "grad_norm": 1.953125, "learning_rate": 0.00019293386129419902, "loss": 2.0992, "step": 102305 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.00019293317883629455, "loss": 2.1604, "step": 102310 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019293249634664238, "loss": 2.1701, "step": 102315 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019293181382524285, "loss": 2.0526, "step": 102320 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.00019293113127209608, "loss": 2.2313, "step": 102325 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.0001929304486872024, "loss": 1.8999, "step": 102330 }, { "epoch": 0.24, "grad_norm": 1.7890625, "learning_rate": 0.00019292976607056193, "loss": 2.2371, "step": 102335 }, { "epoch": 0.24, "grad_norm": 1.6796875, "learning_rate": 0.000192929083422175, "loss": 1.9584, "step": 102340 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.00019292840074204177, "loss": 2.158, "step": 102345 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019292771803016255, "loss": 2.0948, "step": 102350 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019292703528653754, "loss": 2.353, "step": 102355 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.00019292635251116696, "loss": 2.192, "step": 102360 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.00019292566970405107, "loss": 2.1885, "step": 102365 }, { "epoch": 0.24, "grad_norm": 1.734375, "learning_rate": 0.00019292498686519007, "loss": 2.1879, "step": 102370 }, { "epoch": 0.24, "grad_norm": 1.6875, "learning_rate": 0.0001929243039945842, "loss": 2.0063, "step": 102375 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.0001929236210922337, "loss": 2.1547, "step": 102380 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.00019292293815813881, "loss": 2.2462, "step": 102385 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.0001929222551922998, "loss": 2.339, "step": 102390 }, { "epoch": 0.24, "grad_norm": 1.765625, "learning_rate": 0.0001929215721947168, "loss": 2.128, "step": 102395 }, { "epoch": 0.24, "grad_norm": 1.90625, "learning_rate": 0.00019292088916539015, "loss": 1.9948, "step": 102400 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019292020610432001, "loss": 2.0999, "step": 102405 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019291952301150666, "loss": 2.2426, "step": 102410 }, { "epoch": 0.24, "grad_norm": 1.90625, "learning_rate": 0.0001929188398869503, "loss": 2.1925, "step": 102415 }, { "epoch": 0.24, "grad_norm": 2.15625, "learning_rate": 0.0001929181567306512, "loss": 2.1176, "step": 102420 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.00019291747354260958, "loss": 2.2134, "step": 102425 }, { "epoch": 0.24, "grad_norm": 1.84375, "learning_rate": 0.00019291679032282564, "loss": 2.178, "step": 102430 }, { "epoch": 0.24, "grad_norm": 1.7421875, "learning_rate": 0.00019291610707129967, "loss": 2.2464, "step": 102435 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.0001929154237880318, "loss": 2.0312, "step": 102440 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.0001929147404730224, "loss": 1.9174, "step": 102445 }, { "epoch": 0.24, "grad_norm": 1.875, "learning_rate": 0.00019291405712627165, "loss": 2.1404, "step": 102450 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019291337374777973, "loss": 2.0389, "step": 102455 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019291269033754695, "loss": 2.1728, "step": 102460 }, { "epoch": 0.24, "grad_norm": 1.796875, "learning_rate": 0.0001929120068955735, "loss": 2.2128, "step": 102465 }, { "epoch": 0.24, "grad_norm": 1.84375, "learning_rate": 0.00019291132342185962, "loss": 2.0158, "step": 102470 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019291063991640553, "loss": 1.9804, "step": 102475 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.0001929099563792115, "loss": 2.1541, "step": 102480 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.00019290927281027776, "loss": 2.2326, "step": 102485 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.0001929085892096045, "loss": 2.2844, "step": 102490 }, { "epoch": 0.24, "grad_norm": 1.7265625, "learning_rate": 0.00019290790557719198, "loss": 2.1312, "step": 102495 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019290722191304044, "loss": 2.3738, "step": 102500 }, { "epoch": 0.24, "grad_norm": 2.15625, "learning_rate": 0.00019290653821715011, "loss": 2.1293, "step": 102505 }, { "epoch": 0.24, "grad_norm": 2.421875, "learning_rate": 0.00019290585448952124, "loss": 2.1059, "step": 102510 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019290517073015405, "loss": 1.9907, "step": 102515 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.00019290448693904872, "loss": 2.0015, "step": 102520 }, { "epoch": 0.24, "grad_norm": 2.140625, "learning_rate": 0.00019290380311620556, "loss": 2.0389, "step": 102525 }, { "epoch": 0.24, "grad_norm": 2.28125, "learning_rate": 0.00019290311926162477, "loss": 2.0556, "step": 102530 }, { "epoch": 0.24, "grad_norm": 1.6484375, "learning_rate": 0.0001929024353753066, "loss": 2.0756, "step": 102535 }, { "epoch": 0.24, "grad_norm": 2.46875, "learning_rate": 0.00019290175145725126, "loss": 2.1422, "step": 102540 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.000192901067507459, "loss": 2.3108, "step": 102545 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.00019290038352593006, "loss": 2.2873, "step": 102550 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.00019289969951266465, "loss": 2.1666, "step": 102555 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.00019289901546766305, "loss": 2.0322, "step": 102560 }, { "epoch": 0.24, "grad_norm": 1.7890625, "learning_rate": 0.00019289833139092544, "loss": 2.2332, "step": 102565 }, { "epoch": 0.24, "grad_norm": 1.65625, "learning_rate": 0.00019289764728245206, "loss": 2.0369, "step": 102570 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019289696314224317, "loss": 2.1849, "step": 102575 }, { "epoch": 0.24, "grad_norm": 1.796875, "learning_rate": 0.000192896278970299, "loss": 2.001, "step": 102580 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019289559476661977, "loss": 2.0794, "step": 102585 }, { "epoch": 0.24, "grad_norm": 1.984375, "learning_rate": 0.00019289491053120573, "loss": 1.9965, "step": 102590 }, { "epoch": 0.24, "grad_norm": 2.140625, "learning_rate": 0.0001928942262640571, "loss": 2.2108, "step": 102595 }, { "epoch": 0.24, "grad_norm": 1.90625, "learning_rate": 0.0001928935419651741, "loss": 2.2829, "step": 102600 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.000192892857634557, "loss": 2.1289, "step": 102605 }, { "epoch": 0.24, "grad_norm": 1.9453125, "learning_rate": 0.00019289217327220603, "loss": 2.006, "step": 102610 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019289148887812138, "loss": 2.1174, "step": 102615 }, { "epoch": 0.24, "grad_norm": 2.3125, "learning_rate": 0.00019289080445230333, "loss": 2.1582, "step": 102620 }, { "epoch": 0.24, "grad_norm": 2.3125, "learning_rate": 0.0001928901199947521, "loss": 2.2355, "step": 102625 }, { "epoch": 0.24, "grad_norm": 1.75, "learning_rate": 0.00019288943550546789, "loss": 1.856, "step": 102630 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.00019288875098445098, "loss": 1.9304, "step": 102635 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.00019288806643170158, "loss": 2.1694, "step": 102640 }, { "epoch": 0.24, "grad_norm": 1.7265625, "learning_rate": 0.00019288738184721995, "loss": 2.2652, "step": 102645 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.0001928866972310063, "loss": 2.0318, "step": 102650 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.00019288601258306086, "loss": 2.3078, "step": 102655 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.0001928853279033839, "loss": 1.9367, "step": 102660 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.0001928846431919756, "loss": 2.3428, "step": 102665 }, { "epoch": 0.24, "grad_norm": 1.6953125, "learning_rate": 0.00019288395844883622, "loss": 2.0224, "step": 102670 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.000192883273673966, "loss": 2.3986, "step": 102675 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019288258886736518, "loss": 2.2325, "step": 102680 }, { "epoch": 0.24, "grad_norm": 1.8515625, "learning_rate": 0.00019288190402903398, "loss": 2.088, "step": 102685 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019288121915897265, "loss": 2.1954, "step": 102690 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019288053425718137, "loss": 2.2088, "step": 102695 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019287984932366047, "loss": 2.3067, "step": 102700 }, { "epoch": 0.24, "grad_norm": 2.40625, "learning_rate": 0.0001928791643584101, "loss": 2.255, "step": 102705 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019287847936143052, "loss": 2.0525, "step": 102710 }, { "epoch": 0.24, "grad_norm": 2.484375, "learning_rate": 0.00019287779433272198, "loss": 2.2331, "step": 102715 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019287710927228465, "loss": 2.0624, "step": 102720 }, { "epoch": 0.24, "grad_norm": 1.7265625, "learning_rate": 0.00019287642418011887, "loss": 2.1068, "step": 102725 }, { "epoch": 0.24, "grad_norm": 1.7890625, "learning_rate": 0.00019287573905622481, "loss": 2.0342, "step": 102730 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.00019287505390060272, "loss": 2.1789, "step": 102735 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.00019287436871325282, "loss": 2.0477, "step": 102740 }, { "epoch": 0.24, "grad_norm": 1.671875, "learning_rate": 0.00019287368349417533, "loss": 2.0977, "step": 102745 }, { "epoch": 0.24, "grad_norm": 2.484375, "learning_rate": 0.00019287299824337053, "loss": 2.2323, "step": 102750 }, { "epoch": 0.24, "grad_norm": 1.75, "learning_rate": 0.0001928723129608386, "loss": 2.0525, "step": 102755 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.00019287162764657984, "loss": 2.1043, "step": 102760 }, { "epoch": 0.24, "grad_norm": 1.7734375, "learning_rate": 0.00019287094230059445, "loss": 2.1618, "step": 102765 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019287025692288264, "loss": 2.3771, "step": 102770 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019286957151344467, "loss": 2.1985, "step": 102775 }, { "epoch": 0.24, "grad_norm": 2.9375, "learning_rate": 0.00019286888607228074, "loss": 2.2656, "step": 102780 }, { "epoch": 0.24, "grad_norm": 1.6953125, "learning_rate": 0.00019286820059939117, "loss": 2.0996, "step": 102785 }, { "epoch": 0.24, "grad_norm": 2.53125, "learning_rate": 0.00019286751509477613, "loss": 2.2937, "step": 102790 }, { "epoch": 0.24, "grad_norm": 1.8359375, "learning_rate": 0.00019286682955843584, "loss": 2.1587, "step": 102795 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.00019286614399037055, "loss": 2.1715, "step": 102800 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.00019286545839058053, "loss": 2.1691, "step": 102805 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.00019286477275906597, "loss": 1.9786, "step": 102810 }, { "epoch": 0.24, "grad_norm": 2.34375, "learning_rate": 0.0001928640870958271, "loss": 2.1243, "step": 102815 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.00019286340140086422, "loss": 2.1319, "step": 102820 }, { "epoch": 0.24, "grad_norm": 2.640625, "learning_rate": 0.0001928627156741775, "loss": 2.0959, "step": 102825 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019286202991576715, "loss": 2.1657, "step": 102830 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.0001928613441256335, "loss": 2.1455, "step": 102835 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.0001928606583037767, "loss": 2.2292, "step": 102840 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.00019285997245019703, "loss": 2.2238, "step": 102845 }, { "epoch": 0.24, "grad_norm": 2.40625, "learning_rate": 0.0001928592865648947, "loss": 2.2988, "step": 102850 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.00019285860064786995, "loss": 2.242, "step": 102855 }, { "epoch": 0.24, "grad_norm": 2.234375, "learning_rate": 0.00019285791469912303, "loss": 2.1274, "step": 102860 }, { "epoch": 0.24, "grad_norm": 2.609375, "learning_rate": 0.00019285722871865415, "loss": 2.2417, "step": 102865 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019285654270646355, "loss": 2.3066, "step": 102870 }, { "epoch": 0.24, "grad_norm": 2.15625, "learning_rate": 0.0001928558566625515, "loss": 2.1273, "step": 102875 }, { "epoch": 0.24, "grad_norm": 2.25, "learning_rate": 0.0001928551705869182, "loss": 2.1704, "step": 102880 }, { "epoch": 0.24, "grad_norm": 1.7734375, "learning_rate": 0.00019285448447956387, "loss": 2.2528, "step": 102885 }, { "epoch": 0.24, "grad_norm": 2.140625, "learning_rate": 0.0001928537983404888, "loss": 1.967, "step": 102890 }, { "epoch": 0.24, "grad_norm": 1.7109375, "learning_rate": 0.00019285311216969312, "loss": 2.2646, "step": 102895 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.0001928524259671772, "loss": 2.4914, "step": 102900 }, { "epoch": 0.24, "grad_norm": 1.921875, "learning_rate": 0.00019285173973294118, "loss": 2.2583, "step": 102905 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.0001928510534669853, "loss": 2.112, "step": 102910 }, { "epoch": 0.24, "grad_norm": 1.6640625, "learning_rate": 0.00019285036716930987, "loss": 2.3345, "step": 102915 }, { "epoch": 0.24, "grad_norm": 2.28125, "learning_rate": 0.00019284968083991503, "loss": 2.0717, "step": 102920 }, { "epoch": 0.24, "grad_norm": 1.8125, "learning_rate": 0.00019284899447880106, "loss": 2.0893, "step": 102925 }, { "epoch": 0.24, "grad_norm": 1.71875, "learning_rate": 0.00019284830808596822, "loss": 2.1781, "step": 102930 }, { "epoch": 0.24, "grad_norm": 1.78125, "learning_rate": 0.00019284762166141669, "loss": 2.2032, "step": 102935 }, { "epoch": 0.24, "grad_norm": 1.8203125, "learning_rate": 0.0001928469352051467, "loss": 2.0897, "step": 102940 }, { "epoch": 0.24, "grad_norm": 1.875, "learning_rate": 0.00019284624871715854, "loss": 2.1043, "step": 102945 }, { "epoch": 0.24, "grad_norm": 1.953125, "learning_rate": 0.00019284556219745244, "loss": 2.2511, "step": 102950 }, { "epoch": 0.24, "grad_norm": 1.625, "learning_rate": 0.0001928448756460286, "loss": 2.2375, "step": 102955 }, { "epoch": 0.24, "grad_norm": 1.984375, "learning_rate": 0.00019284418906288724, "loss": 2.1747, "step": 102960 }, { "epoch": 0.24, "grad_norm": 1.8984375, "learning_rate": 0.00019284350244802865, "loss": 2.0062, "step": 102965 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019284281580145304, "loss": 1.8587, "step": 102970 }, { "epoch": 0.24, "grad_norm": 1.8984375, "learning_rate": 0.00019284212912316063, "loss": 2.4293, "step": 102975 }, { "epoch": 0.24, "grad_norm": 2.40625, "learning_rate": 0.00019284144241315162, "loss": 2.1676, "step": 102980 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.00019284075567142634, "loss": 2.2544, "step": 102985 }, { "epoch": 0.24, "grad_norm": 1.921875, "learning_rate": 0.00019284006889798495, "loss": 2.1484, "step": 102990 }, { "epoch": 0.24, "grad_norm": 1.8515625, "learning_rate": 0.00019283938209282774, "loss": 2.1296, "step": 102995 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.0001928386952559549, "loss": 2.0504, "step": 103000 }, { "epoch": 0.24, "grad_norm": 1.953125, "learning_rate": 0.00019283800838736667, "loss": 2.266, "step": 103005 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.0001928373214870633, "loss": 2.109, "step": 103010 }, { "epoch": 0.24, "grad_norm": 1.9453125, "learning_rate": 0.000192836634555045, "loss": 2.1884, "step": 103015 }, { "epoch": 0.24, "grad_norm": 1.84375, "learning_rate": 0.00019283594759131204, "loss": 2.1441, "step": 103020 }, { "epoch": 0.24, "grad_norm": 1.734375, "learning_rate": 0.00019283526059586464, "loss": 2.0556, "step": 103025 }, { "epoch": 0.24, "grad_norm": 1.796875, "learning_rate": 0.00019283457356870302, "loss": 2.1511, "step": 103030 }, { "epoch": 0.24, "grad_norm": 2.328125, "learning_rate": 0.00019283388650982744, "loss": 2.1684, "step": 103035 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.0001928331994192381, "loss": 2.2398, "step": 103040 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.00019283251229693527, "loss": 2.3904, "step": 103045 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019283182514291919, "loss": 2.1423, "step": 103050 }, { "epoch": 0.24, "grad_norm": 2.59375, "learning_rate": 0.00019283113795719002, "loss": 2.0147, "step": 103055 }, { "epoch": 0.24, "grad_norm": 1.8125, "learning_rate": 0.00019283045073974808, "loss": 2.1657, "step": 103060 }, { "epoch": 0.24, "grad_norm": 1.6875, "learning_rate": 0.00019282976349059358, "loss": 2.2027, "step": 103065 }, { "epoch": 0.24, "grad_norm": 1.8125, "learning_rate": 0.00019282907620972675, "loss": 2.0407, "step": 103070 }, { "epoch": 0.24, "grad_norm": 1.828125, "learning_rate": 0.00019282838889714783, "loss": 2.1917, "step": 103075 }, { "epoch": 0.24, "grad_norm": 2.828125, "learning_rate": 0.00019282770155285703, "loss": 2.1754, "step": 103080 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019282701417685464, "loss": 1.9187, "step": 103085 }, { "epoch": 0.24, "grad_norm": 1.7421875, "learning_rate": 0.0001928263267691408, "loss": 1.9338, "step": 103090 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019282563932971583, "loss": 2.0768, "step": 103095 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.00019282495185857993, "loss": 2.0916, "step": 103100 }, { "epoch": 0.24, "grad_norm": 1.96875, "learning_rate": 0.00019282426435573337, "loss": 2.2469, "step": 103105 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019282357682117636, "loss": 2.4218, "step": 103110 }, { "epoch": 0.24, "grad_norm": 1.8125, "learning_rate": 0.0001928228892549091, "loss": 2.3211, "step": 103115 }, { "epoch": 0.24, "grad_norm": 1.953125, "learning_rate": 0.00019282220165693187, "loss": 2.0634, "step": 103120 }, { "epoch": 0.24, "grad_norm": 1.9140625, "learning_rate": 0.0001928215140272449, "loss": 2.178, "step": 103125 }, { "epoch": 0.24, "grad_norm": 1.59375, "learning_rate": 0.0001928208263658484, "loss": 1.9836, "step": 103130 }, { "epoch": 0.24, "grad_norm": 2.234375, "learning_rate": 0.00019282013867274263, "loss": 2.2345, "step": 103135 }, { "epoch": 0.24, "grad_norm": 1.9140625, "learning_rate": 0.00019281945094792784, "loss": 2.2145, "step": 103140 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.0001928187631914042, "loss": 2.1904, "step": 103145 }, { "epoch": 0.24, "grad_norm": 2.34375, "learning_rate": 0.00019281807540317202, "loss": 2.2547, "step": 103150 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.00019281738758323148, "loss": 2.2037, "step": 103155 }, { "epoch": 0.24, "grad_norm": 1.9140625, "learning_rate": 0.00019281669973158288, "loss": 2.0889, "step": 103160 }, { "epoch": 0.24, "grad_norm": 1.7890625, "learning_rate": 0.00019281601184822637, "loss": 2.2145, "step": 103165 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019281532393316224, "loss": 2.1412, "step": 103170 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.00019281463598639072, "loss": 2.1479, "step": 103175 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.000192813948007912, "loss": 2.2209, "step": 103180 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.0001928132599977264, "loss": 2.0985, "step": 103185 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.0001928125719558341, "loss": 2.0747, "step": 103190 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.00019281188388223531, "loss": 2.2515, "step": 103195 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019281119577693032, "loss": 2.3015, "step": 103200 }, { "epoch": 0.24, "grad_norm": 1.78125, "learning_rate": 0.00019281050763991935, "loss": 2.3459, "step": 103205 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.0001928098194712026, "loss": 2.1648, "step": 103210 }, { "epoch": 0.24, "grad_norm": 2.21875, "learning_rate": 0.00019280913127078035, "loss": 2.2309, "step": 103215 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019280844303865284, "loss": 2.3763, "step": 103220 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019280775477482025, "loss": 2.2457, "step": 103225 }, { "epoch": 0.24, "grad_norm": 1.5703125, "learning_rate": 0.00019280706647928283, "loss": 2.0493, "step": 103230 }, { "epoch": 0.24, "grad_norm": 1.734375, "learning_rate": 0.0001928063781520409, "loss": 2.0632, "step": 103235 }, { "epoch": 0.24, "grad_norm": 1.9140625, "learning_rate": 0.00019280568979309457, "loss": 2.1139, "step": 103240 }, { "epoch": 0.24, "grad_norm": 1.84375, "learning_rate": 0.00019280500140244415, "loss": 2.1753, "step": 103245 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.00019280431298008988, "loss": 2.1528, "step": 103250 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.00019280362452603193, "loss": 2.0834, "step": 103255 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.0001928029360402706, "loss": 2.113, "step": 103260 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.0001928022475228061, "loss": 2.1514, "step": 103265 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.00019280155897363868, "loss": 2.2555, "step": 103270 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.00019280087039276857, "loss": 2.2129, "step": 103275 }, { "epoch": 0.24, "grad_norm": 2.453125, "learning_rate": 0.00019280018178019598, "loss": 2.1546, "step": 103280 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019279949313592119, "loss": 2.0837, "step": 103285 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019279880445994437, "loss": 2.1605, "step": 103290 }, { "epoch": 0.24, "grad_norm": 1.875, "learning_rate": 0.00019279811575226584, "loss": 2.1198, "step": 103295 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019279742701288578, "loss": 2.2431, "step": 103300 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019279673824180443, "loss": 2.0532, "step": 103305 }, { "epoch": 0.24, "grad_norm": 1.7734375, "learning_rate": 0.000192796049439022, "loss": 2.0552, "step": 103310 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.0001927953606045388, "loss": 2.3609, "step": 103315 }, { "epoch": 0.24, "grad_norm": 1.78125, "learning_rate": 0.000192794671738355, "loss": 2.2267, "step": 103320 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.00019279398284047087, "loss": 2.2335, "step": 103325 }, { "epoch": 0.24, "grad_norm": 1.7109375, "learning_rate": 0.00019279329391088662, "loss": 1.9759, "step": 103330 }, { "epoch": 0.24, "grad_norm": 2.25, "learning_rate": 0.0001927926049496025, "loss": 2.1112, "step": 103335 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019279191595661876, "loss": 1.9936, "step": 103340 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.00019279122693193558, "loss": 1.9693, "step": 103345 }, { "epoch": 0.24, "grad_norm": 1.984375, "learning_rate": 0.0001927905378755533, "loss": 2.0694, "step": 103350 }, { "epoch": 0.24, "grad_norm": 1.84375, "learning_rate": 0.00019278984878747202, "loss": 2.0887, "step": 103355 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.00019278915966769209, "loss": 2.1659, "step": 103360 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.0001927884705162137, "loss": 1.9954, "step": 103365 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019278778133303704, "loss": 2.2449, "step": 103370 }, { "epoch": 0.24, "grad_norm": 1.7265625, "learning_rate": 0.00019278709211816245, "loss": 2.0218, "step": 103375 }, { "epoch": 0.24, "grad_norm": 2.46875, "learning_rate": 0.00019278640287159003, "loss": 1.9684, "step": 103380 }, { "epoch": 0.24, "grad_norm": 1.984375, "learning_rate": 0.00019278571359332015, "loss": 2.0654, "step": 103385 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.000192785024283353, "loss": 2.1317, "step": 103390 }, { "epoch": 0.24, "grad_norm": 2.140625, "learning_rate": 0.00019278433494168877, "loss": 1.89, "step": 103395 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.00019278364556832775, "loss": 2.1254, "step": 103400 }, { "epoch": 0.24, "grad_norm": 2.375, "learning_rate": 0.00019278295616327013, "loss": 2.0476, "step": 103405 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.00019278226672651618, "loss": 2.2012, "step": 103410 }, { "epoch": 0.24, "grad_norm": 1.6875, "learning_rate": 0.00019278157725806613, "loss": 2.1009, "step": 103415 }, { "epoch": 0.24, "grad_norm": 2.234375, "learning_rate": 0.00019278088775792022, "loss": 2.2867, "step": 103420 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.00019278019822607868, "loss": 2.0549, "step": 103425 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019277950866254169, "loss": 2.1618, "step": 103430 }, { "epoch": 0.24, "grad_norm": 1.8125, "learning_rate": 0.00019277881906730956, "loss": 1.9997, "step": 103435 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019277812944038254, "loss": 2.1965, "step": 103440 }, { "epoch": 0.24, "grad_norm": 2.25, "learning_rate": 0.00019277743978176083, "loss": 2.0996, "step": 103445 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.0001927767500914446, "loss": 2.0585, "step": 103450 }, { "epoch": 0.24, "grad_norm": 2.125, "learning_rate": 0.0001927760603694342, "loss": 2.1596, "step": 103455 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.00019277537061572982, "loss": 2.2366, "step": 103460 }, { "epoch": 0.24, "grad_norm": 1.8984375, "learning_rate": 0.00019277468083033167, "loss": 2.1974, "step": 103465 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019277399101323998, "loss": 2.204, "step": 103470 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.00019277330116445506, "loss": 2.015, "step": 103475 }, { "epoch": 0.24, "grad_norm": 1.546875, "learning_rate": 0.00019277261128397707, "loss": 2.1589, "step": 103480 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.0001927719213718063, "loss": 1.9371, "step": 103485 }, { "epoch": 0.24, "grad_norm": 1.7734375, "learning_rate": 0.00019277123142794295, "loss": 2.2607, "step": 103490 }, { "epoch": 0.24, "grad_norm": 2.4375, "learning_rate": 0.00019277054145238722, "loss": 2.0576, "step": 103495 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.00019276985144513943, "loss": 1.9692, "step": 103500 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.0001927691614061998, "loss": 2.0912, "step": 103505 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.0001927684713355685, "loss": 2.2753, "step": 103510 }, { "epoch": 0.24, "grad_norm": 1.9453125, "learning_rate": 0.00019276778123324584, "loss": 2.0732, "step": 103515 }, { "epoch": 0.24, "grad_norm": 1.765625, "learning_rate": 0.000192767091099232, "loss": 2.1229, "step": 103520 }, { "epoch": 0.24, "grad_norm": 2.265625, "learning_rate": 0.0001927664009335272, "loss": 1.9069, "step": 103525 }, { "epoch": 0.24, "grad_norm": 1.7109375, "learning_rate": 0.0001927657107361318, "loss": 2.0061, "step": 103530 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.00019276502050704592, "loss": 2.1247, "step": 103535 }, { "epoch": 0.24, "grad_norm": 1.78125, "learning_rate": 0.0001927643302462698, "loss": 2.2338, "step": 103540 }, { "epoch": 0.24, "grad_norm": 2.296875, "learning_rate": 0.00019276363995380372, "loss": 2.1251, "step": 103545 }, { "epoch": 0.24, "grad_norm": 2.8125, "learning_rate": 0.00019276294962964792, "loss": 2.2244, "step": 103550 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.0001927622592738026, "loss": 2.0711, "step": 103555 }, { "epoch": 0.24, "grad_norm": 2.0625, "learning_rate": 0.00019276156888626796, "loss": 2.1449, "step": 103560 }, { "epoch": 0.24, "grad_norm": 2.46875, "learning_rate": 0.00019276087846704435, "loss": 2.2143, "step": 103565 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.00019276018801613192, "loss": 2.1981, "step": 103570 }, { "epoch": 0.24, "grad_norm": 1.7265625, "learning_rate": 0.00019275949753353092, "loss": 2.0397, "step": 103575 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.0001927588070192416, "loss": 2.16, "step": 103580 }, { "epoch": 0.24, "grad_norm": 2.640625, "learning_rate": 0.0001927581164732642, "loss": 2.1827, "step": 103585 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019275742589559895, "loss": 2.0186, "step": 103590 }, { "epoch": 0.24, "grad_norm": 2.40625, "learning_rate": 0.00019275673528624604, "loss": 2.1494, "step": 103595 }, { "epoch": 0.24, "grad_norm": 1.625, "learning_rate": 0.00019275604464520578, "loss": 2.187, "step": 103600 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019275535397247836, "loss": 2.1612, "step": 103605 }, { "epoch": 0.24, "grad_norm": 2.3125, "learning_rate": 0.00019275466326806404, "loss": 2.2534, "step": 103610 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019275397253196305, "loss": 2.2808, "step": 103615 }, { "epoch": 0.24, "grad_norm": 1.90625, "learning_rate": 0.00019275328176417562, "loss": 2.1269, "step": 103620 }, { "epoch": 0.24, "grad_norm": 2.5, "learning_rate": 0.00019275259096470198, "loss": 2.3148, "step": 103625 }, { "epoch": 0.24, "grad_norm": 2.390625, "learning_rate": 0.00019275190013354237, "loss": 2.3144, "step": 103630 }, { "epoch": 0.24, "grad_norm": 1.59375, "learning_rate": 0.00019275120927069703, "loss": 1.9388, "step": 103635 }, { "epoch": 0.24, "grad_norm": 1.8203125, "learning_rate": 0.0001927505183761662, "loss": 2.1191, "step": 103640 }, { "epoch": 0.24, "grad_norm": 1.71875, "learning_rate": 0.0001927498274499501, "loss": 2.0781, "step": 103645 }, { "epoch": 0.24, "grad_norm": 1.640625, "learning_rate": 0.00019274913649204898, "loss": 2.2225, "step": 103650 }, { "epoch": 0.24, "grad_norm": 1.921875, "learning_rate": 0.00019274844550246306, "loss": 2.1799, "step": 103655 }, { "epoch": 0.24, "grad_norm": 2.328125, "learning_rate": 0.0001927477544811926, "loss": 2.1352, "step": 103660 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.00019274706342823784, "loss": 2.2171, "step": 103665 }, { "epoch": 0.24, "grad_norm": 1.8125, "learning_rate": 0.000192746372343599, "loss": 2.1094, "step": 103670 }, { "epoch": 0.24, "grad_norm": 1.96875, "learning_rate": 0.00019274568122727633, "loss": 2.1013, "step": 103675 }, { "epoch": 0.24, "grad_norm": 1.8359375, "learning_rate": 0.00019274499007927002, "loss": 2.266, "step": 103680 }, { "epoch": 0.24, "grad_norm": 1.8203125, "learning_rate": 0.00019274429889958034, "loss": 2.1974, "step": 103685 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019274360768820753, "loss": 2.1669, "step": 103690 }, { "epoch": 0.24, "grad_norm": 1.953125, "learning_rate": 0.00019274291644515184, "loss": 2.2963, "step": 103695 }, { "epoch": 0.24, "grad_norm": 2.515625, "learning_rate": 0.00019274222517041346, "loss": 2.1134, "step": 103700 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019274153386399266, "loss": 2.0321, "step": 103705 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.0001927408425258897, "loss": 2.0189, "step": 103710 }, { "epoch": 0.24, "grad_norm": 2.25, "learning_rate": 0.00019274015115610475, "loss": 2.2731, "step": 103715 }, { "epoch": 0.24, "grad_norm": 1.859375, "learning_rate": 0.0001927394597546381, "loss": 2.0772, "step": 103720 }, { "epoch": 0.24, "grad_norm": 2.3125, "learning_rate": 0.00019273876832148998, "loss": 1.9492, "step": 103725 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.0001927380768566606, "loss": 2.0962, "step": 103730 }, { "epoch": 0.24, "grad_norm": 1.96875, "learning_rate": 0.0001927373853601502, "loss": 2.2307, "step": 103735 }, { "epoch": 0.24, "grad_norm": 1.515625, "learning_rate": 0.00019273669383195903, "loss": 2.0848, "step": 103740 }, { "epoch": 0.24, "grad_norm": 2.25, "learning_rate": 0.00019273600227208735, "loss": 2.1812, "step": 103745 }, { "epoch": 0.24, "grad_norm": 2.375, "learning_rate": 0.00019273531068053534, "loss": 2.0992, "step": 103750 }, { "epoch": 0.24, "grad_norm": 3.046875, "learning_rate": 0.0001927346190573033, "loss": 2.1477, "step": 103755 }, { "epoch": 0.24, "grad_norm": 2.21875, "learning_rate": 0.00019273392740239137, "loss": 2.1526, "step": 103760 }, { "epoch": 0.24, "grad_norm": 1.828125, "learning_rate": 0.0001927332357157999, "loss": 2.0865, "step": 103765 }, { "epoch": 0.24, "grad_norm": 1.890625, "learning_rate": 0.00019273254399752908, "loss": 2.1903, "step": 103770 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019273185224757912, "loss": 2.1584, "step": 103775 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.0001927311604659503, "loss": 2.0445, "step": 103780 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.00019273046865264282, "loss": 1.9188, "step": 103785 }, { "epoch": 0.24, "grad_norm": 1.7421875, "learning_rate": 0.00019272977680765693, "loss": 1.98, "step": 103790 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019272908493099284, "loss": 2.1979, "step": 103795 }, { "epoch": 0.24, "grad_norm": 2.546875, "learning_rate": 0.00019272839302265086, "loss": 2.207, "step": 103800 }, { "epoch": 0.24, "grad_norm": 2.765625, "learning_rate": 0.00019272770108263112, "loss": 2.3235, "step": 103805 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.00019272700911093397, "loss": 2.3451, "step": 103810 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.00019272631710755956, "loss": 2.2078, "step": 103815 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.00019272562507250817, "loss": 2.2563, "step": 103820 }, { "epoch": 0.24, "grad_norm": 2.15625, "learning_rate": 0.00019272493300578005, "loss": 2.1722, "step": 103825 }, { "epoch": 0.24, "grad_norm": 1.875, "learning_rate": 0.00019272424090737537, "loss": 2.027, "step": 103830 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019272354877729444, "loss": 2.1532, "step": 103835 }, { "epoch": 0.24, "grad_norm": 1.6484375, "learning_rate": 0.00019272285661553744, "loss": 2.0521, "step": 103840 }, { "epoch": 0.24, "grad_norm": 2.46875, "learning_rate": 0.0001927221644221046, "loss": 2.1211, "step": 103845 }, { "epoch": 0.24, "grad_norm": 2.671875, "learning_rate": 0.00019272147219699627, "loss": 2.327, "step": 103850 }, { "epoch": 0.24, "grad_norm": 2.421875, "learning_rate": 0.00019272077994021255, "loss": 2.0611, "step": 103855 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019272008765175373, "loss": 2.2186, "step": 103860 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.00019271939533162006, "loss": 2.0281, "step": 103865 }, { "epoch": 0.24, "grad_norm": 1.734375, "learning_rate": 0.0001927187029798118, "loss": 2.1267, "step": 103870 }, { "epoch": 0.24, "grad_norm": 2.0, "learning_rate": 0.00019271801059632908, "loss": 2.0934, "step": 103875 }, { "epoch": 0.24, "grad_norm": 2.34375, "learning_rate": 0.00019271731818117224, "loss": 2.1743, "step": 103880 }, { "epoch": 0.24, "grad_norm": 1.84375, "learning_rate": 0.0001927166257343415, "loss": 2.4387, "step": 103885 }, { "epoch": 0.24, "grad_norm": 1.6484375, "learning_rate": 0.00019271593325583704, "loss": 2.1354, "step": 103890 }, { "epoch": 0.24, "grad_norm": 2.078125, "learning_rate": 0.00019271524074565915, "loss": 2.2735, "step": 103895 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019271454820380806, "loss": 2.0181, "step": 103900 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019271385563028397, "loss": 2.0604, "step": 103905 }, { "epoch": 0.24, "grad_norm": 2.15625, "learning_rate": 0.00019271316302508717, "loss": 1.9912, "step": 103910 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.0001927124703882179, "loss": 2.1832, "step": 103915 }, { "epoch": 0.24, "grad_norm": 1.7421875, "learning_rate": 0.00019271177771967632, "loss": 2.047, "step": 103920 }, { "epoch": 0.24, "grad_norm": 1.953125, "learning_rate": 0.00019271108501946272, "loss": 2.2567, "step": 103925 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019271039228757736, "loss": 2.1645, "step": 103930 }, { "epoch": 0.24, "grad_norm": 2.171875, "learning_rate": 0.00019270969952402042, "loss": 2.0812, "step": 103935 }, { "epoch": 0.24, "grad_norm": 1.6484375, "learning_rate": 0.00019270900672879222, "loss": 2.1286, "step": 103940 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.0001927083139018929, "loss": 2.0618, "step": 103945 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019270762104332275, "loss": 2.1793, "step": 103950 }, { "epoch": 0.24, "grad_norm": 1.984375, "learning_rate": 0.00019270692815308196, "loss": 2.1609, "step": 103955 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019270623523117084, "loss": 1.9448, "step": 103960 }, { "epoch": 0.24, "grad_norm": 2.03125, "learning_rate": 0.00019270554227758958, "loss": 2.0681, "step": 103965 }, { "epoch": 0.24, "grad_norm": 2.15625, "learning_rate": 0.00019270484929233842, "loss": 2.2408, "step": 103970 }, { "epoch": 0.24, "grad_norm": 1.7734375, "learning_rate": 0.0001927041562754176, "loss": 1.9147, "step": 103975 }, { "epoch": 0.24, "grad_norm": 1.8671875, "learning_rate": 0.00019270346322682738, "loss": 2.1534, "step": 103980 }, { "epoch": 0.24, "grad_norm": 2.1875, "learning_rate": 0.00019270277014656794, "loss": 2.1829, "step": 103985 }, { "epoch": 0.24, "grad_norm": 1.9609375, "learning_rate": 0.00019270207703463957, "loss": 2.2595, "step": 103990 }, { "epoch": 0.24, "grad_norm": 1.828125, "learning_rate": 0.00019270138389104247, "loss": 2.2492, "step": 103995 }, { "epoch": 0.24, "grad_norm": 1.7578125, "learning_rate": 0.00019270069071577694, "loss": 2.0561, "step": 104000 }, { "epoch": 0.24, "grad_norm": 1.6953125, "learning_rate": 0.00019269999750884316, "loss": 2.1169, "step": 104005 }, { "epoch": 0.24, "grad_norm": 2.09375, "learning_rate": 0.00019269930427024136, "loss": 2.3023, "step": 104010 }, { "epoch": 0.24, "grad_norm": 2.578125, "learning_rate": 0.00019269861099997183, "loss": 2.1131, "step": 104015 }, { "epoch": 0.24, "grad_norm": 2.015625, "learning_rate": 0.00019269791769803473, "loss": 2.1259, "step": 104020 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019269722436443036, "loss": 2.004, "step": 104025 }, { "epoch": 0.24, "grad_norm": 2.28125, "learning_rate": 0.00019269653099915894, "loss": 2.2126, "step": 104030 }, { "epoch": 0.24, "grad_norm": 1.6875, "learning_rate": 0.0001926958376022207, "loss": 2.0636, "step": 104035 }, { "epoch": 0.24, "grad_norm": 2.046875, "learning_rate": 0.00019269514417361586, "loss": 2.0501, "step": 104040 }, { "epoch": 0.24, "grad_norm": 2.203125, "learning_rate": 0.00019269445071334472, "loss": 2.121, "step": 104045 }, { "epoch": 0.24, "grad_norm": 2.390625, "learning_rate": 0.00019269375722140745, "loss": 2.2603, "step": 104050 }, { "epoch": 0.24, "grad_norm": 1.9453125, "learning_rate": 0.00019269306369780432, "loss": 2.1554, "step": 104055 }, { "epoch": 0.24, "grad_norm": 2.546875, "learning_rate": 0.00019269237014253555, "loss": 2.1437, "step": 104060 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.0001926916765556014, "loss": 1.9412, "step": 104065 }, { "epoch": 0.24, "grad_norm": 2.109375, "learning_rate": 0.00019269098293700208, "loss": 2.0989, "step": 104070 }, { "epoch": 0.24, "grad_norm": 1.8125, "learning_rate": 0.00019269028928673785, "loss": 2.0044, "step": 104075 }, { "epoch": 0.24, "grad_norm": 1.9921875, "learning_rate": 0.00019268959560480892, "loss": 2.2128, "step": 104080 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.00019268890189121557, "loss": 2.213, "step": 104085 }, { "epoch": 0.24, "grad_norm": 1.84375, "learning_rate": 0.00019268820814595796, "loss": 2.2734, "step": 104090 }, { "epoch": 0.24, "grad_norm": 1.9375, "learning_rate": 0.0001926875143690364, "loss": 1.9325, "step": 104095 }, { "epoch": 0.24, "grad_norm": 1.9765625, "learning_rate": 0.00019268682056045112, "loss": 2.2717, "step": 104100 }, { "epoch": 0.24, "grad_norm": 1.9296875, "learning_rate": 0.00019268612672020233, "loss": 2.1728, "step": 104105 }, { "epoch": 0.25, "grad_norm": 1.8359375, "learning_rate": 0.00019268543284829028, "loss": 2.268, "step": 104110 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.00019268473894471522, "loss": 2.1951, "step": 104115 }, { "epoch": 0.25, "grad_norm": 1.984375, "learning_rate": 0.00019268404500947733, "loss": 2.1923, "step": 104120 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019268335104257695, "loss": 2.1278, "step": 104125 }, { "epoch": 0.25, "grad_norm": 1.609375, "learning_rate": 0.0001926826570440142, "loss": 2.0847, "step": 104130 }, { "epoch": 0.25, "grad_norm": 1.8671875, "learning_rate": 0.0001926819630137894, "loss": 2.1351, "step": 104135 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.00019268126895190276, "loss": 2.2783, "step": 104140 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019268057485835453, "loss": 1.8486, "step": 104145 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.0001926798807331449, "loss": 2.3967, "step": 104150 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019267918657627417, "loss": 2.0465, "step": 104155 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019267849238774253, "loss": 2.1124, "step": 104160 }, { "epoch": 0.25, "grad_norm": 1.9375, "learning_rate": 0.00019267779816755027, "loss": 2.1301, "step": 104165 }, { "epoch": 0.25, "grad_norm": 1.7578125, "learning_rate": 0.00019267710391569757, "loss": 2.235, "step": 104170 }, { "epoch": 0.25, "grad_norm": 1.9453125, "learning_rate": 0.0001926764096321847, "loss": 2.1606, "step": 104175 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019267571531701188, "loss": 2.2969, "step": 104180 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.00019267502097017935, "loss": 2.2634, "step": 104185 }, { "epoch": 0.25, "grad_norm": 1.9765625, "learning_rate": 0.00019267432659168736, "loss": 2.2465, "step": 104190 }, { "epoch": 0.25, "grad_norm": 1.7265625, "learning_rate": 0.0001926736321815361, "loss": 2.1871, "step": 104195 }, { "epoch": 0.25, "grad_norm": 1.984375, "learning_rate": 0.0001926729377397259, "loss": 2.2243, "step": 104200 }, { "epoch": 0.25, "grad_norm": 1.8828125, "learning_rate": 0.00019267224326625693, "loss": 2.0775, "step": 104205 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.00019267154876112942, "loss": 2.0681, "step": 104210 }, { "epoch": 0.25, "grad_norm": 1.8125, "learning_rate": 0.00019267085422434367, "loss": 2.1865, "step": 104215 }, { "epoch": 0.25, "grad_norm": 1.6796875, "learning_rate": 0.00019267015965589984, "loss": 2.1822, "step": 104220 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.00019266946505579823, "loss": 2.3149, "step": 104225 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.00019266877042403902, "loss": 2.1094, "step": 104230 }, { "epoch": 0.25, "grad_norm": 1.7421875, "learning_rate": 0.0001926680757606225, "loss": 2.1792, "step": 104235 }, { "epoch": 0.25, "grad_norm": 1.921875, "learning_rate": 0.00019266738106554887, "loss": 2.0802, "step": 104240 }, { "epoch": 0.25, "grad_norm": 1.9375, "learning_rate": 0.00019266668633881838, "loss": 2.1807, "step": 104245 }, { "epoch": 0.25, "grad_norm": 1.9296875, "learning_rate": 0.00019266599158043128, "loss": 2.0846, "step": 104250 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.00019266529679038777, "loss": 2.1341, "step": 104255 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019266460196868814, "loss": 2.2177, "step": 104260 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.00019266390711533262, "loss": 2.1523, "step": 104265 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.0001926632122303214, "loss": 2.1046, "step": 104270 }, { "epoch": 0.25, "grad_norm": 1.6953125, "learning_rate": 0.00019266251731365474, "loss": 2.1365, "step": 104275 }, { "epoch": 0.25, "grad_norm": 1.890625, "learning_rate": 0.00019266182236533293, "loss": 1.9894, "step": 104280 }, { "epoch": 0.25, "grad_norm": 2.46875, "learning_rate": 0.00019266112738535612, "loss": 1.969, "step": 104285 }, { "epoch": 0.25, "grad_norm": 2.265625, "learning_rate": 0.00019266043237372457, "loss": 2.1246, "step": 104290 }, { "epoch": 0.25, "grad_norm": 1.6953125, "learning_rate": 0.0001926597373304386, "loss": 2.0757, "step": 104295 }, { "epoch": 0.25, "grad_norm": 1.8125, "learning_rate": 0.0001926590422554983, "loss": 2.2342, "step": 104300 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019265834714890406, "loss": 2.1986, "step": 104305 }, { "epoch": 0.25, "grad_norm": 2.25, "learning_rate": 0.00019265765201065605, "loss": 2.1693, "step": 104310 }, { "epoch": 0.25, "grad_norm": 2.328125, "learning_rate": 0.00019265695684075445, "loss": 1.9746, "step": 104315 }, { "epoch": 0.25, "grad_norm": 1.84375, "learning_rate": 0.0001926562616391996, "loss": 2.0766, "step": 104320 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.00019265556640599165, "loss": 2.1508, "step": 104325 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.00019265487114113089, "loss": 2.0631, "step": 104330 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.00019265417584461757, "loss": 2.1122, "step": 104335 }, { "epoch": 0.25, "grad_norm": 1.59375, "learning_rate": 0.0001926534805164519, "loss": 2.2942, "step": 104340 }, { "epoch": 0.25, "grad_norm": 2.203125, "learning_rate": 0.00019265278515663412, "loss": 2.1283, "step": 104345 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019265208976516446, "loss": 2.118, "step": 104350 }, { "epoch": 0.25, "grad_norm": 2.421875, "learning_rate": 0.00019265139434204316, "loss": 2.185, "step": 104355 }, { "epoch": 0.25, "grad_norm": 1.7109375, "learning_rate": 0.00019265069888727045, "loss": 2.2589, "step": 104360 }, { "epoch": 0.25, "grad_norm": 2.234375, "learning_rate": 0.00019265000340084663, "loss": 2.1656, "step": 104365 }, { "epoch": 0.25, "grad_norm": 1.9296875, "learning_rate": 0.00019264930788277185, "loss": 2.1373, "step": 104370 }, { "epoch": 0.25, "grad_norm": 1.7109375, "learning_rate": 0.00019264861233304637, "loss": 2.3405, "step": 104375 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.00019264791675167048, "loss": 1.9693, "step": 104380 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019264722113864437, "loss": 2.267, "step": 104385 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.0001926465254939683, "loss": 2.0874, "step": 104390 }, { "epoch": 0.25, "grad_norm": 2.203125, "learning_rate": 0.00019264582981764247, "loss": 2.208, "step": 104395 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019264513410966716, "loss": 2.0696, "step": 104400 }, { "epoch": 0.25, "grad_norm": 1.984375, "learning_rate": 0.0001926444383700426, "loss": 2.2001, "step": 104405 }, { "epoch": 0.25, "grad_norm": 2.34375, "learning_rate": 0.000192643742598769, "loss": 2.1851, "step": 104410 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.00019264304679584665, "loss": 2.2657, "step": 104415 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019264235096127572, "loss": 2.0476, "step": 104420 }, { "epoch": 0.25, "grad_norm": 2.46875, "learning_rate": 0.0001926416550950565, "loss": 2.2174, "step": 104425 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.0001926409591971892, "loss": 2.2289, "step": 104430 }, { "epoch": 0.25, "grad_norm": 1.859375, "learning_rate": 0.00019264026326767407, "loss": 2.2422, "step": 104435 }, { "epoch": 0.25, "grad_norm": 2.375, "learning_rate": 0.00019263956730651138, "loss": 2.3215, "step": 104440 }, { "epoch": 0.25, "grad_norm": 1.8125, "learning_rate": 0.00019263887131370128, "loss": 2.3134, "step": 104445 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.0001926381752892441, "loss": 2.034, "step": 104450 }, { "epoch": 0.25, "grad_norm": 1.8125, "learning_rate": 0.00019263747923314002, "loss": 2.0992, "step": 104455 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.0001926367831453893, "loss": 2.4013, "step": 104460 }, { "epoch": 0.25, "grad_norm": 2.25, "learning_rate": 0.0001926360870259922, "loss": 2.2763, "step": 104465 }, { "epoch": 0.25, "grad_norm": 1.8671875, "learning_rate": 0.0001926353908749489, "loss": 2.2532, "step": 104470 }, { "epoch": 0.25, "grad_norm": 1.7421875, "learning_rate": 0.00019263469469225968, "loss": 2.0831, "step": 104475 }, { "epoch": 0.25, "grad_norm": 1.8671875, "learning_rate": 0.00019263399847792477, "loss": 2.0987, "step": 104480 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019263330223194438, "loss": 2.0538, "step": 104485 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019263260595431882, "loss": 2.1825, "step": 104490 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.00019263190964504824, "loss": 2.0128, "step": 104495 }, { "epoch": 0.25, "grad_norm": 1.8125, "learning_rate": 0.00019263121330413295, "loss": 2.0642, "step": 104500 }, { "epoch": 0.25, "grad_norm": 1.9609375, "learning_rate": 0.00019263051693157315, "loss": 2.025, "step": 104505 }, { "epoch": 0.25, "grad_norm": 1.921875, "learning_rate": 0.00019262982052736907, "loss": 2.1973, "step": 104510 }, { "epoch": 0.25, "grad_norm": 2.53125, "learning_rate": 0.000192629124091521, "loss": 2.1729, "step": 104515 }, { "epoch": 0.25, "grad_norm": 1.890625, "learning_rate": 0.0001926284276240291, "loss": 2.2501, "step": 104520 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019262773112489368, "loss": 2.1873, "step": 104525 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019262703459411494, "loss": 2.1204, "step": 104530 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.0001926263380316931, "loss": 2.0513, "step": 104535 }, { "epoch": 0.25, "grad_norm": 1.7890625, "learning_rate": 0.00019262564143762844, "loss": 2.0202, "step": 104540 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019262494481192118, "loss": 2.1043, "step": 104545 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.00019262424815457156, "loss": 2.1934, "step": 104550 }, { "epoch": 0.25, "grad_norm": 1.78125, "learning_rate": 0.00019262355146557983, "loss": 2.1834, "step": 104555 }, { "epoch": 0.25, "grad_norm": 1.9765625, "learning_rate": 0.00019262285474494622, "loss": 2.1023, "step": 104560 }, { "epoch": 0.25, "grad_norm": 1.9765625, "learning_rate": 0.00019262215799267093, "loss": 2.1722, "step": 104565 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.00019262146120875426, "loss": 1.9796, "step": 104570 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.0001926207643931964, "loss": 2.1867, "step": 104575 }, { "epoch": 0.25, "grad_norm": 1.7109375, "learning_rate": 0.0001926200675459976, "loss": 2.1626, "step": 104580 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019261937066715815, "loss": 2.1909, "step": 104585 }, { "epoch": 0.25, "grad_norm": 1.6328125, "learning_rate": 0.0001926186737566782, "loss": 2.0325, "step": 104590 }, { "epoch": 0.25, "grad_norm": 1.7421875, "learning_rate": 0.00019261797681455805, "loss": 2.1635, "step": 104595 }, { "epoch": 0.25, "grad_norm": 1.984375, "learning_rate": 0.0001926172798407979, "loss": 2.0259, "step": 104600 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019261658283539803, "loss": 2.2261, "step": 104605 }, { "epoch": 0.25, "grad_norm": 2.46875, "learning_rate": 0.00019261588579835867, "loss": 1.9843, "step": 104610 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019261518872968, "loss": 2.2026, "step": 104615 }, { "epoch": 0.25, "grad_norm": 1.703125, "learning_rate": 0.00019261449162936232, "loss": 2.0328, "step": 104620 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019261379449740583, "loss": 2.2576, "step": 104625 }, { "epoch": 0.25, "grad_norm": 2.4375, "learning_rate": 0.00019261309733381083, "loss": 2.0779, "step": 104630 }, { "epoch": 0.25, "grad_norm": 1.8203125, "learning_rate": 0.00019261240013857748, "loss": 2.0251, "step": 104635 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.00019261170291170608, "loss": 2.1675, "step": 104640 }, { "epoch": 0.25, "grad_norm": 1.7890625, "learning_rate": 0.00019261100565319685, "loss": 2.273, "step": 104645 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019261030836305, "loss": 2.4668, "step": 104650 }, { "epoch": 0.25, "grad_norm": 1.7578125, "learning_rate": 0.00019260961104126582, "loss": 2.1773, "step": 104655 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.0001926089136878445, "loss": 2.148, "step": 104660 }, { "epoch": 0.25, "grad_norm": 2.0, "learning_rate": 0.00019260821630278624, "loss": 2.3563, "step": 104665 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.0001926075188860914, "loss": 2.1527, "step": 104670 }, { "epoch": 0.25, "grad_norm": 1.5703125, "learning_rate": 0.00019260682143776013, "loss": 2.2229, "step": 104675 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.00019260612395779268, "loss": 2.1779, "step": 104680 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019260542644618933, "loss": 1.9539, "step": 104685 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019260472890295026, "loss": 2.1474, "step": 104690 }, { "epoch": 0.25, "grad_norm": 2.609375, "learning_rate": 0.00019260403132807574, "loss": 2.1662, "step": 104695 }, { "epoch": 0.25, "grad_norm": 2.421875, "learning_rate": 0.00019260333372156602, "loss": 2.1291, "step": 104700 }, { "epoch": 0.25, "grad_norm": 1.6953125, "learning_rate": 0.0001926026360834213, "loss": 2.3822, "step": 104705 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019260193841364186, "loss": 2.1994, "step": 104710 }, { "epoch": 0.25, "grad_norm": 1.4609375, "learning_rate": 0.00019260124071222788, "loss": 2.1765, "step": 104715 }, { "epoch": 0.25, "grad_norm": 1.8359375, "learning_rate": 0.00019260054297917968, "loss": 2.1207, "step": 104720 }, { "epoch": 0.25, "grad_norm": 1.7890625, "learning_rate": 0.00019259984521449743, "loss": 2.0717, "step": 104725 }, { "epoch": 0.25, "grad_norm": 1.734375, "learning_rate": 0.00019259914741818143, "loss": 2.2008, "step": 104730 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019259844959023184, "loss": 2.1824, "step": 104735 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019259775173064895, "loss": 2.3399, "step": 104740 }, { "epoch": 0.25, "grad_norm": 1.84375, "learning_rate": 0.000192597053839433, "loss": 2.1456, "step": 104745 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.0001925963559165842, "loss": 2.1915, "step": 104750 }, { "epoch": 0.25, "grad_norm": 1.8828125, "learning_rate": 0.00019259565796210285, "loss": 2.1426, "step": 104755 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.0001925949599759891, "loss": 2.1437, "step": 104760 }, { "epoch": 0.25, "grad_norm": 1.9609375, "learning_rate": 0.00019259426195824322, "loss": 2.1344, "step": 104765 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.0001925935639088655, "loss": 2.2429, "step": 104770 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019259286582785608, "loss": 2.3575, "step": 104775 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019259216771521533, "loss": 2.1144, "step": 104780 }, { "epoch": 0.25, "grad_norm": 1.9375, "learning_rate": 0.0001925914695709434, "loss": 2.0196, "step": 104785 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.0001925907713950405, "loss": 2.3729, "step": 104790 }, { "epoch": 0.25, "grad_norm": 2.40625, "learning_rate": 0.00019259007318750694, "loss": 2.3836, "step": 104795 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019258937494834294, "loss": 2.2331, "step": 104800 }, { "epoch": 0.25, "grad_norm": 2.265625, "learning_rate": 0.00019258867667754873, "loss": 2.1707, "step": 104805 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.00019258797837512455, "loss": 2.0874, "step": 104810 }, { "epoch": 0.25, "grad_norm": 2.234375, "learning_rate": 0.00019258728004107066, "loss": 2.1776, "step": 104815 }, { "epoch": 0.25, "grad_norm": 1.78125, "learning_rate": 0.00019258658167538723, "loss": 2.0206, "step": 104820 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019258588327807457, "loss": 2.2141, "step": 104825 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019258518484913288, "loss": 2.1335, "step": 104830 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.0001925844863885624, "loss": 2.1253, "step": 104835 }, { "epoch": 0.25, "grad_norm": 1.7421875, "learning_rate": 0.0001925837878963634, "loss": 2.187, "step": 104840 }, { "epoch": 0.25, "grad_norm": 1.875, "learning_rate": 0.00019258308937253614, "loss": 1.9244, "step": 104845 }, { "epoch": 0.25, "grad_norm": 2.578125, "learning_rate": 0.00019258239081708074, "loss": 2.0842, "step": 104850 }, { "epoch": 0.25, "grad_norm": 1.703125, "learning_rate": 0.00019258169222999757, "loss": 2.2417, "step": 104855 }, { "epoch": 0.25, "grad_norm": 1.9296875, "learning_rate": 0.00019258099361128678, "loss": 2.1509, "step": 104860 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019258029496094868, "loss": 2.063, "step": 104865 }, { "epoch": 0.25, "grad_norm": 1.8203125, "learning_rate": 0.00019257959627898345, "loss": 2.1531, "step": 104870 }, { "epoch": 0.25, "grad_norm": 1.6328125, "learning_rate": 0.00019257889756539137, "loss": 2.1032, "step": 104875 }, { "epoch": 0.25, "grad_norm": 2.484375, "learning_rate": 0.00019257819882017262, "loss": 1.8997, "step": 104880 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.0001925775000433275, "loss": 2.1569, "step": 104885 }, { "epoch": 0.25, "grad_norm": 2.234375, "learning_rate": 0.00019257680123485623, "loss": 2.2529, "step": 104890 }, { "epoch": 0.25, "grad_norm": 1.5859375, "learning_rate": 0.00019257610239475903, "loss": 2.2649, "step": 104895 }, { "epoch": 0.25, "grad_norm": 2.453125, "learning_rate": 0.00019257540352303617, "loss": 2.0563, "step": 104900 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.00019257470461968786, "loss": 2.1197, "step": 104905 }, { "epoch": 0.25, "grad_norm": 2.625, "learning_rate": 0.00019257400568471438, "loss": 2.1123, "step": 104910 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.0001925733067181159, "loss": 2.224, "step": 104915 }, { "epoch": 0.25, "grad_norm": 2.40625, "learning_rate": 0.00019257260771989273, "loss": 2.2178, "step": 104920 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019257190869004508, "loss": 2.3615, "step": 104925 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019257120962857318, "loss": 2.2694, "step": 104930 }, { "epoch": 0.25, "grad_norm": 1.765625, "learning_rate": 0.00019257051053547726, "loss": 2.0865, "step": 104935 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.00019256981141075761, "loss": 2.2075, "step": 104940 }, { "epoch": 0.25, "grad_norm": 2.34375, "learning_rate": 0.00019256911225441444, "loss": 2.3113, "step": 104945 }, { "epoch": 0.25, "grad_norm": 1.796875, "learning_rate": 0.00019256841306644794, "loss": 2.1462, "step": 104950 }, { "epoch": 0.25, "grad_norm": 1.7578125, "learning_rate": 0.0001925677138468584, "loss": 2.2195, "step": 104955 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019256701459564605, "loss": 2.1369, "step": 104960 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.00019256631531281112, "loss": 2.1104, "step": 104965 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.0001925656159983539, "loss": 2.0211, "step": 104970 }, { "epoch": 0.25, "grad_norm": 2.53125, "learning_rate": 0.00019256491665227453, "loss": 2.058, "step": 104975 }, { "epoch": 0.25, "grad_norm": 2.390625, "learning_rate": 0.00019256421727457335, "loss": 2.0855, "step": 104980 }, { "epoch": 0.25, "grad_norm": 2.640625, "learning_rate": 0.00019256351786525055, "loss": 2.1598, "step": 104985 }, { "epoch": 0.25, "grad_norm": 2.578125, "learning_rate": 0.00019256281842430637, "loss": 2.1669, "step": 104990 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.00019256211895174103, "loss": 2.1629, "step": 104995 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019256141944755482, "loss": 2.1986, "step": 105000 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.00019256071991174793, "loss": 2.0933, "step": 105005 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019256002034432064, "loss": 2.1417, "step": 105010 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019255932074527317, "loss": 2.0714, "step": 105015 }, { "epoch": 0.25, "grad_norm": 1.65625, "learning_rate": 0.00019255862111460572, "loss": 2.0217, "step": 105020 }, { "epoch": 0.25, "grad_norm": 2.421875, "learning_rate": 0.0001925579214523186, "loss": 2.3644, "step": 105025 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.000192557221758412, "loss": 2.2182, "step": 105030 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.0001925565220328862, "loss": 2.115, "step": 105035 }, { "epoch": 0.25, "grad_norm": 2.390625, "learning_rate": 0.00019255582227574138, "loss": 2.2135, "step": 105040 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019255512248697782, "loss": 2.2033, "step": 105045 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019255442266659576, "loss": 2.1213, "step": 105050 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.00019255372281459546, "loss": 1.95, "step": 105055 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.0001925530229309771, "loss": 1.8995, "step": 105060 }, { "epoch": 0.25, "grad_norm": 1.6484375, "learning_rate": 0.00019255232301574094, "loss": 2.0134, "step": 105065 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.0001925516230688872, "loss": 2.011, "step": 105070 }, { "epoch": 0.25, "grad_norm": 1.8203125, "learning_rate": 0.0001925509230904162, "loss": 2.1849, "step": 105075 }, { "epoch": 0.25, "grad_norm": 2.265625, "learning_rate": 0.0001925502230803281, "loss": 1.985, "step": 105080 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.00019254952303862316, "loss": 2.2153, "step": 105085 }, { "epoch": 0.25, "grad_norm": 2.34375, "learning_rate": 0.00019254882296530166, "loss": 2.2578, "step": 105090 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.0001925481228603638, "loss": 2.1954, "step": 105095 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.00019254742272380978, "loss": 2.1021, "step": 105100 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019254672255563992, "loss": 2.0323, "step": 105105 }, { "epoch": 0.25, "grad_norm": 1.8984375, "learning_rate": 0.0001925460223558544, "loss": 2.1531, "step": 105110 }, { "epoch": 0.25, "grad_norm": 1.8515625, "learning_rate": 0.0001925453221244535, "loss": 2.2431, "step": 105115 }, { "epoch": 0.25, "grad_norm": 2.390625, "learning_rate": 0.00019254462186143743, "loss": 2.0003, "step": 105120 }, { "epoch": 0.25, "grad_norm": 1.921875, "learning_rate": 0.00019254392156680642, "loss": 2.0124, "step": 105125 }, { "epoch": 0.25, "grad_norm": 1.9609375, "learning_rate": 0.00019254322124056075, "loss": 2.2314, "step": 105130 }, { "epoch": 0.25, "grad_norm": 1.890625, "learning_rate": 0.0001925425208827006, "loss": 2.3026, "step": 105135 }, { "epoch": 0.25, "grad_norm": 1.7734375, "learning_rate": 0.0001925418204932263, "loss": 2.0667, "step": 105140 }, { "epoch": 0.25, "grad_norm": 1.609375, "learning_rate": 0.000192541120072138, "loss": 2.2982, "step": 105145 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.000192540419619436, "loss": 2.1946, "step": 105150 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.0001925397191351205, "loss": 2.128, "step": 105155 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019253901861919178, "loss": 2.1437, "step": 105160 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019253831807165, "loss": 2.2364, "step": 105165 }, { "epoch": 0.25, "grad_norm": 1.875, "learning_rate": 0.0001925376174924955, "loss": 2.1166, "step": 105170 }, { "epoch": 0.25, "grad_norm": 1.84375, "learning_rate": 0.00019253691688172846, "loss": 2.1729, "step": 105175 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019253621623934911, "loss": 2.2186, "step": 105180 }, { "epoch": 0.25, "grad_norm": 2.265625, "learning_rate": 0.00019253551556535774, "loss": 2.2445, "step": 105185 }, { "epoch": 0.25, "grad_norm": 2.6875, "learning_rate": 0.00019253481485975455, "loss": 2.0503, "step": 105190 }, { "epoch": 0.25, "grad_norm": 2.265625, "learning_rate": 0.00019253411412253977, "loss": 2.1134, "step": 105195 }, { "epoch": 0.25, "grad_norm": 2.5, "learning_rate": 0.0001925334133537137, "loss": 2.0711, "step": 105200 }, { "epoch": 0.25, "grad_norm": 2.4375, "learning_rate": 0.0001925327125532765, "loss": 2.2273, "step": 105205 }, { "epoch": 0.25, "grad_norm": 1.609375, "learning_rate": 0.00019253201172122846, "loss": 2.1717, "step": 105210 }, { "epoch": 0.25, "grad_norm": 1.9765625, "learning_rate": 0.0001925313108575698, "loss": 2.1873, "step": 105215 }, { "epoch": 0.25, "grad_norm": 2.328125, "learning_rate": 0.00019253060996230078, "loss": 2.196, "step": 105220 }, { "epoch": 0.25, "grad_norm": 2.46875, "learning_rate": 0.00019252990903542158, "loss": 2.036, "step": 105225 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019252920807693254, "loss": 2.1472, "step": 105230 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.0001925285070868338, "loss": 2.0274, "step": 105235 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.00019252780606512568, "loss": 2.2008, "step": 105240 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019252710501180837, "loss": 2.1664, "step": 105245 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019252640392688211, "loss": 2.1074, "step": 105250 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.0001925257028103472, "loss": 2.3165, "step": 105255 }, { "epoch": 0.25, "grad_norm": 2.0, "learning_rate": 0.00019252500166220377, "loss": 2.2324, "step": 105260 }, { "epoch": 0.25, "grad_norm": 1.8203125, "learning_rate": 0.00019252430048245217, "loss": 2.06, "step": 105265 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019252359927109257, "loss": 2.162, "step": 105270 }, { "epoch": 0.25, "grad_norm": 1.984375, "learning_rate": 0.00019252289802812524, "loss": 2.1084, "step": 105275 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.0001925221967535504, "loss": 2.0938, "step": 105280 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.0001925214954473683, "loss": 2.2512, "step": 105285 }, { "epoch": 0.25, "grad_norm": 1.8515625, "learning_rate": 0.00019252079410957918, "loss": 2.1566, "step": 105290 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019252009274018327, "loss": 2.2031, "step": 105295 }, { "epoch": 0.25, "grad_norm": 2.640625, "learning_rate": 0.00019251939133918083, "loss": 2.1647, "step": 105300 }, { "epoch": 0.25, "grad_norm": 2.40625, "learning_rate": 0.00019251868990657206, "loss": 2.1075, "step": 105305 }, { "epoch": 0.25, "grad_norm": 2.59375, "learning_rate": 0.00019251798844235727, "loss": 2.0603, "step": 105310 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.00019251728694653667, "loss": 2.1984, "step": 105315 }, { "epoch": 0.25, "grad_norm": 1.9140625, "learning_rate": 0.00019251658541911043, "loss": 2.0755, "step": 105320 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019251588386007889, "loss": 1.9788, "step": 105325 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.0001925151822694422, "loss": 2.3217, "step": 105330 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.0001925144806472007, "loss": 1.9152, "step": 105335 }, { "epoch": 0.25, "grad_norm": 2.375, "learning_rate": 0.00019251377899335455, "loss": 1.9547, "step": 105340 }, { "epoch": 0.25, "grad_norm": 1.828125, "learning_rate": 0.00019251307730790403, "loss": 1.9439, "step": 105345 }, { "epoch": 0.25, "grad_norm": 1.7734375, "learning_rate": 0.00019251237559084935, "loss": 2.089, "step": 105350 }, { "epoch": 0.25, "grad_norm": 2.515625, "learning_rate": 0.00019251167384219076, "loss": 2.2604, "step": 105355 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019251097206192852, "loss": 1.9477, "step": 105360 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019251027025006282, "loss": 2.1384, "step": 105365 }, { "epoch": 0.25, "grad_norm": 2.203125, "learning_rate": 0.00019250956840659398, "loss": 2.1068, "step": 105370 }, { "epoch": 0.25, "grad_norm": 1.7421875, "learning_rate": 0.0001925088665315222, "loss": 2.2524, "step": 105375 }, { "epoch": 0.25, "grad_norm": 1.84375, "learning_rate": 0.00019250816462484767, "loss": 2.0404, "step": 105380 }, { "epoch": 0.25, "grad_norm": 1.78125, "learning_rate": 0.00019250746268657066, "loss": 2.132, "step": 105385 }, { "epoch": 0.25, "grad_norm": 1.4453125, "learning_rate": 0.00019250676071669147, "loss": 2.1987, "step": 105390 }, { "epoch": 0.25, "grad_norm": 1.71875, "learning_rate": 0.00019250605871521027, "loss": 2.0042, "step": 105395 }, { "epoch": 0.25, "grad_norm": 1.8984375, "learning_rate": 0.00019250535668212732, "loss": 2.0525, "step": 105400 }, { "epoch": 0.25, "grad_norm": 1.859375, "learning_rate": 0.00019250465461744288, "loss": 1.9571, "step": 105405 }, { "epoch": 0.25, "grad_norm": 1.7734375, "learning_rate": 0.00019250395252115715, "loss": 2.1046, "step": 105410 }, { "epoch": 0.25, "grad_norm": 2.234375, "learning_rate": 0.00019250325039327042, "loss": 2.1809, "step": 105415 }, { "epoch": 0.25, "grad_norm": 1.8984375, "learning_rate": 0.00019250254823378288, "loss": 2.2185, "step": 105420 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.00019250184604269478, "loss": 2.2248, "step": 105425 }, { "epoch": 0.25, "grad_norm": 2.875, "learning_rate": 0.00019250114382000642, "loss": 2.0442, "step": 105430 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019250044156571795, "loss": 2.2226, "step": 105435 }, { "epoch": 0.25, "grad_norm": 1.9453125, "learning_rate": 0.00019249973927982968, "loss": 2.2166, "step": 105440 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.0001924990369623418, "loss": 2.064, "step": 105445 }, { "epoch": 0.25, "grad_norm": 1.984375, "learning_rate": 0.00019249833461325457, "loss": 2.0756, "step": 105450 }, { "epoch": 0.25, "grad_norm": 1.984375, "learning_rate": 0.00019249763223256822, "loss": 2.1942, "step": 105455 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019249692982028303, "loss": 2.1512, "step": 105460 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.0001924962273763992, "loss": 2.19, "step": 105465 }, { "epoch": 0.25, "grad_norm": 1.9375, "learning_rate": 0.000192495524900917, "loss": 2.2499, "step": 105470 }, { "epoch": 0.25, "grad_norm": 2.703125, "learning_rate": 0.00019249482239383663, "loss": 1.9834, "step": 105475 }, { "epoch": 0.25, "grad_norm": 1.75, "learning_rate": 0.00019249411985515835, "loss": 2.2638, "step": 105480 }, { "epoch": 0.25, "grad_norm": 1.4140625, "learning_rate": 0.00019249341728488242, "loss": 2.0829, "step": 105485 }, { "epoch": 0.25, "grad_norm": 1.734375, "learning_rate": 0.00019249271468300903, "loss": 2.1252, "step": 105490 }, { "epoch": 0.25, "grad_norm": 1.875, "learning_rate": 0.00019249201204953845, "loss": 2.1625, "step": 105495 }, { "epoch": 0.25, "grad_norm": 1.7890625, "learning_rate": 0.00019249130938447096, "loss": 2.2649, "step": 105500 }, { "epoch": 0.25, "grad_norm": 1.7890625, "learning_rate": 0.00019249060668780673, "loss": 2.0718, "step": 105505 }, { "epoch": 0.25, "grad_norm": 1.6484375, "learning_rate": 0.00019248990395954606, "loss": 2.1028, "step": 105510 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019248920119968913, "loss": 2.0289, "step": 105515 }, { "epoch": 0.25, "grad_norm": 1.859375, "learning_rate": 0.00019248849840823625, "loss": 2.086, "step": 105520 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.0001924877955851876, "loss": 2.1626, "step": 105525 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.0001924870927305434, "loss": 2.2156, "step": 105530 }, { "epoch": 0.25, "grad_norm": 1.984375, "learning_rate": 0.000192486389844304, "loss": 2.0497, "step": 105535 }, { "epoch": 0.25, "grad_norm": 2.515625, "learning_rate": 0.00019248568692646953, "loss": 2.1674, "step": 105540 }, { "epoch": 0.25, "grad_norm": 2.34375, "learning_rate": 0.0001924849839770403, "loss": 2.0894, "step": 105545 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019248428099601648, "loss": 2.0989, "step": 105550 }, { "epoch": 0.25, "grad_norm": 2.203125, "learning_rate": 0.0001924835779833984, "loss": 2.1058, "step": 105555 }, { "epoch": 0.25, "grad_norm": 2.203125, "learning_rate": 0.0001924828749391862, "loss": 2.1384, "step": 105560 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.00019248217186338022, "loss": 2.1357, "step": 105565 }, { "epoch": 0.25, "grad_norm": 2.25, "learning_rate": 0.00019248146875598066, "loss": 2.2226, "step": 105570 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019248076561698772, "loss": 2.2743, "step": 105575 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.0001924800624464017, "loss": 2.3551, "step": 105580 }, { "epoch": 0.25, "grad_norm": 2.25, "learning_rate": 0.00019247935924422276, "loss": 2.201, "step": 105585 }, { "epoch": 0.25, "grad_norm": 1.9765625, "learning_rate": 0.00019247865601045122, "loss": 2.066, "step": 105590 }, { "epoch": 0.25, "grad_norm": 1.7890625, "learning_rate": 0.00019247795274508734, "loss": 2.2904, "step": 105595 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.00019247724944813127, "loss": 2.1773, "step": 105600 }, { "epoch": 0.25, "grad_norm": 1.9375, "learning_rate": 0.0001924765461195833, "loss": 1.9998, "step": 105605 }, { "epoch": 0.25, "grad_norm": 2.1875, "learning_rate": 0.00019247584275944367, "loss": 2.1561, "step": 105610 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.0001924751393677126, "loss": 2.1534, "step": 105615 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019247443594439036, "loss": 2.1019, "step": 105620 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.00019247373248947717, "loss": 2.0903, "step": 105625 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019247302900297328, "loss": 2.0501, "step": 105630 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019247232548487891, "loss": 2.1834, "step": 105635 }, { "epoch": 0.25, "grad_norm": 2.1875, "learning_rate": 0.00019247162193519434, "loss": 1.9064, "step": 105640 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.00019247091835391977, "loss": 2.2851, "step": 105645 }, { "epoch": 0.25, "grad_norm": 1.7578125, "learning_rate": 0.00019247021474105548, "loss": 2.0924, "step": 105650 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019246951109660166, "loss": 2.0699, "step": 105655 }, { "epoch": 0.25, "grad_norm": 2.515625, "learning_rate": 0.00019246880742055858, "loss": 2.0455, "step": 105660 }, { "epoch": 0.25, "grad_norm": 1.8984375, "learning_rate": 0.0001924681037129265, "loss": 2.1096, "step": 105665 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.00019246739997370564, "loss": 2.3252, "step": 105670 }, { "epoch": 0.25, "grad_norm": 1.8828125, "learning_rate": 0.00019246669620289622, "loss": 2.0864, "step": 105675 }, { "epoch": 0.25, "grad_norm": 2.265625, "learning_rate": 0.0001924659924004985, "loss": 2.1812, "step": 105680 }, { "epoch": 0.25, "grad_norm": 1.796875, "learning_rate": 0.00019246528856651276, "loss": 2.2117, "step": 105685 }, { "epoch": 0.25, "grad_norm": 2.59375, "learning_rate": 0.00019246458470093916, "loss": 2.1676, "step": 105690 }, { "epoch": 0.25, "grad_norm": 1.9453125, "learning_rate": 0.00019246388080377798, "loss": 1.9679, "step": 105695 }, { "epoch": 0.25, "grad_norm": 2.34375, "learning_rate": 0.0001924631768750295, "loss": 2.2115, "step": 105700 }, { "epoch": 0.25, "grad_norm": 2.5, "learning_rate": 0.00019246247291469386, "loss": 2.1978, "step": 105705 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.0001924617689227714, "loss": 2.0725, "step": 105710 }, { "epoch": 0.25, "grad_norm": 1.5859375, "learning_rate": 0.00019246106489926231, "loss": 2.1257, "step": 105715 }, { "epoch": 0.25, "grad_norm": 1.765625, "learning_rate": 0.00019246036084416684, "loss": 2.0833, "step": 105720 }, { "epoch": 0.25, "grad_norm": 1.890625, "learning_rate": 0.00019245965675748528, "loss": 2.112, "step": 105725 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019245895263921776, "loss": 2.2707, "step": 105730 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.00019245824848936462, "loss": 1.9403, "step": 105735 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.00019245754430792606, "loss": 2.1651, "step": 105740 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.00019245684009490234, "loss": 2.19, "step": 105745 }, { "epoch": 0.25, "grad_norm": 2.203125, "learning_rate": 0.00019245613585029366, "loss": 2.2785, "step": 105750 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019245543157410028, "loss": 2.1754, "step": 105755 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.00019245472726632247, "loss": 2.2414, "step": 105760 }, { "epoch": 0.25, "grad_norm": 1.9765625, "learning_rate": 0.00019245402292696046, "loss": 2.2485, "step": 105765 }, { "epoch": 0.25, "grad_norm": 1.4921875, "learning_rate": 0.00019245331855601445, "loss": 1.905, "step": 105770 }, { "epoch": 0.25, "grad_norm": 1.96875, "learning_rate": 0.00019245261415348473, "loss": 2.2837, "step": 105775 }, { "epoch": 0.25, "grad_norm": 2.375, "learning_rate": 0.0001924519097193715, "loss": 2.0781, "step": 105780 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.00019245120525367506, "loss": 2.2361, "step": 105785 }, { "epoch": 0.25, "grad_norm": 1.640625, "learning_rate": 0.00019245050075639557, "loss": 2.1998, "step": 105790 }, { "epoch": 0.25, "grad_norm": 1.9453125, "learning_rate": 0.00019244979622753334, "loss": 2.0656, "step": 105795 }, { "epoch": 0.25, "grad_norm": 2.34375, "learning_rate": 0.00019244909166708855, "loss": 2.2702, "step": 105800 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019244838707506152, "loss": 2.1998, "step": 105805 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019244768245145241, "loss": 2.3001, "step": 105810 }, { "epoch": 0.25, "grad_norm": 2.1875, "learning_rate": 0.0001924469777962615, "loss": 2.1975, "step": 105815 }, { "epoch": 0.25, "grad_norm": 2.234375, "learning_rate": 0.00019244627310948905, "loss": 2.2728, "step": 105820 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.00019244556839113526, "loss": 2.0635, "step": 105825 }, { "epoch": 0.25, "grad_norm": 1.859375, "learning_rate": 0.0001924448636412004, "loss": 1.9432, "step": 105830 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019244415885968466, "loss": 2.1853, "step": 105835 }, { "epoch": 0.25, "grad_norm": 1.8984375, "learning_rate": 0.00019244345404658835, "loss": 2.0234, "step": 105840 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019244274920191166, "loss": 1.9498, "step": 105845 }, { "epoch": 0.25, "grad_norm": 1.765625, "learning_rate": 0.00019244204432565485, "loss": 2.3762, "step": 105850 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019244133941781816, "loss": 2.1518, "step": 105855 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019244063447840186, "loss": 2.0759, "step": 105860 }, { "epoch": 0.25, "grad_norm": 1.78125, "learning_rate": 0.00019243992950740614, "loss": 2.161, "step": 105865 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.0001924392245048313, "loss": 2.1373, "step": 105870 }, { "epoch": 0.25, "grad_norm": 2.328125, "learning_rate": 0.0001924385194706775, "loss": 2.3422, "step": 105875 }, { "epoch": 0.25, "grad_norm": 1.875, "learning_rate": 0.00019243781440494504, "loss": 2.0918, "step": 105880 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019243710930763415, "loss": 2.1767, "step": 105885 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.00019243640417874506, "loss": 2.0659, "step": 105890 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019243569901827802, "loss": 2.1317, "step": 105895 }, { "epoch": 0.25, "grad_norm": 1.7890625, "learning_rate": 0.00019243499382623326, "loss": 2.2324, "step": 105900 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.00019243428860261106, "loss": 2.2065, "step": 105905 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019243358334741163, "loss": 2.0469, "step": 105910 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019243287806063516, "loss": 2.0362, "step": 105915 }, { "epoch": 0.25, "grad_norm": 1.9140625, "learning_rate": 0.000192432172742282, "loss": 2.3073, "step": 105920 }, { "epoch": 0.25, "grad_norm": 1.9765625, "learning_rate": 0.00019243146739235227, "loss": 2.195, "step": 105925 }, { "epoch": 0.25, "grad_norm": 1.9296875, "learning_rate": 0.00019243076201084632, "loss": 2.1218, "step": 105930 }, { "epoch": 0.25, "grad_norm": 2.640625, "learning_rate": 0.00019243005659776434, "loss": 2.2057, "step": 105935 }, { "epoch": 0.25, "grad_norm": 1.96875, "learning_rate": 0.00019242935115310658, "loss": 2.0964, "step": 105940 }, { "epoch": 0.25, "grad_norm": 1.625, "learning_rate": 0.00019242864567687326, "loss": 2.2134, "step": 105945 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019242794016906465, "loss": 2.1852, "step": 105950 }, { "epoch": 0.25, "grad_norm": 1.96875, "learning_rate": 0.00019242723462968098, "loss": 2.1085, "step": 105955 }, { "epoch": 0.25, "grad_norm": 2.4375, "learning_rate": 0.00019242652905872252, "loss": 2.0895, "step": 105960 }, { "epoch": 0.25, "grad_norm": 1.7578125, "learning_rate": 0.00019242582345618941, "loss": 2.1872, "step": 105965 }, { "epoch": 0.25, "grad_norm": 2.265625, "learning_rate": 0.00019242511782208203, "loss": 2.1606, "step": 105970 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.00019242441215640052, "loss": 2.1227, "step": 105975 }, { "epoch": 0.25, "grad_norm": 1.828125, "learning_rate": 0.00019242370645914517, "loss": 2.1394, "step": 105980 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.0001924230007303162, "loss": 2.1043, "step": 105985 }, { "epoch": 0.25, "grad_norm": 1.78125, "learning_rate": 0.00019242229496991385, "loss": 2.0635, "step": 105990 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019242158917793836, "loss": 2.067, "step": 105995 }, { "epoch": 0.25, "grad_norm": 2.96875, "learning_rate": 0.00019242088335439, "loss": 1.9653, "step": 106000 }, { "epoch": 0.25, "grad_norm": 1.6484375, "learning_rate": 0.00019242017749926898, "loss": 2.0213, "step": 106005 }, { "epoch": 0.25, "grad_norm": 3.828125, "learning_rate": 0.00019241947161257556, "loss": 2.0237, "step": 106010 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019241876569430993, "loss": 2.2213, "step": 106015 }, { "epoch": 0.25, "grad_norm": 2.375, "learning_rate": 0.00019241805974447242, "loss": 2.1519, "step": 106020 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019241735376306323, "loss": 2.1609, "step": 106025 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.00019241664775008257, "loss": 2.1669, "step": 106030 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019241594170553073, "loss": 2.2474, "step": 106035 }, { "epoch": 0.25, "grad_norm": 1.609375, "learning_rate": 0.00019241523562940792, "loss": 1.9736, "step": 106040 }, { "epoch": 0.25, "grad_norm": 1.921875, "learning_rate": 0.00019241452952171436, "loss": 2.2635, "step": 106045 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.00019241382338245036, "loss": 2.1324, "step": 106050 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.0001924131172116161, "loss": 2.2362, "step": 106055 }, { "epoch": 0.25, "grad_norm": 1.9375, "learning_rate": 0.00019241241100921187, "loss": 2.1536, "step": 106060 }, { "epoch": 0.25, "grad_norm": 1.9296875, "learning_rate": 0.00019241170477523784, "loss": 2.255, "step": 106065 }, { "epoch": 0.25, "grad_norm": 2.34375, "learning_rate": 0.00019241099850969432, "loss": 2.1646, "step": 106070 }, { "epoch": 0.25, "grad_norm": 2.0, "learning_rate": 0.00019241029221258153, "loss": 2.1256, "step": 106075 }, { "epoch": 0.25, "grad_norm": 2.484375, "learning_rate": 0.0001924095858838997, "loss": 2.3122, "step": 106080 }, { "epoch": 0.25, "grad_norm": 1.9609375, "learning_rate": 0.00019240887952364907, "loss": 2.0361, "step": 106085 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.0001924081731318299, "loss": 2.2075, "step": 106090 }, { "epoch": 0.25, "grad_norm": 1.9765625, "learning_rate": 0.00019240746670844242, "loss": 2.0875, "step": 106095 }, { "epoch": 0.25, "grad_norm": 1.8125, "learning_rate": 0.00019240676025348693, "loss": 2.221, "step": 106100 }, { "epoch": 0.25, "grad_norm": 1.9140625, "learning_rate": 0.00019240605376696354, "loss": 2.1063, "step": 106105 }, { "epoch": 0.25, "grad_norm": 2.328125, "learning_rate": 0.00019240534724887257, "loss": 2.1867, "step": 106110 }, { "epoch": 0.25, "grad_norm": 2.34375, "learning_rate": 0.0001924046406992143, "loss": 2.1272, "step": 106115 }, { "epoch": 0.25, "grad_norm": 2.53125, "learning_rate": 0.00019240393411798891, "loss": 2.0989, "step": 106120 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.00019240322750519666, "loss": 2.0755, "step": 106125 }, { "epoch": 0.25, "grad_norm": 1.9375, "learning_rate": 0.0001924025208608378, "loss": 2.3044, "step": 106130 }, { "epoch": 0.25, "grad_norm": 1.890625, "learning_rate": 0.00019240181418491255, "loss": 2.2237, "step": 106135 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019240110747742114, "loss": 2.3, "step": 106140 }, { "epoch": 0.25, "grad_norm": 1.890625, "learning_rate": 0.00019240040073836387, "loss": 2.2407, "step": 106145 }, { "epoch": 0.25, "grad_norm": 2.234375, "learning_rate": 0.00019239969396774096, "loss": 2.1635, "step": 106150 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.00019239898716555263, "loss": 2.1016, "step": 106155 }, { "epoch": 0.25, "grad_norm": 1.7578125, "learning_rate": 0.0001923982803317991, "loss": 2.0731, "step": 106160 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.00019239757346648065, "loss": 2.0299, "step": 106165 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019239686656959755, "loss": 2.3655, "step": 106170 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019239615964114997, "loss": 2.3892, "step": 106175 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.0001923954526811382, "loss": 2.1499, "step": 106180 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019239474568956245, "loss": 2.155, "step": 106185 }, { "epoch": 0.25, "grad_norm": 1.6953125, "learning_rate": 0.000192394038666423, "loss": 2.1879, "step": 106190 }, { "epoch": 0.25, "grad_norm": 2.25, "learning_rate": 0.00019239333161172006, "loss": 2.1932, "step": 106195 }, { "epoch": 0.25, "grad_norm": 2.703125, "learning_rate": 0.0001923926245254539, "loss": 2.2755, "step": 106200 }, { "epoch": 0.25, "grad_norm": 1.6015625, "learning_rate": 0.00019239191740762473, "loss": 1.9518, "step": 106205 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.0001923912102582328, "loss": 2.2586, "step": 106210 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019239050307727834, "loss": 2.1975, "step": 106215 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.00019238979586476166, "loss": 2.3445, "step": 106220 }, { "epoch": 0.25, "grad_norm": 2.0, "learning_rate": 0.0001923890886206829, "loss": 2.2506, "step": 106225 }, { "epoch": 0.25, "grad_norm": 2.25, "learning_rate": 0.0001923883813450424, "loss": 2.0787, "step": 106230 }, { "epoch": 0.25, "grad_norm": 2.421875, "learning_rate": 0.0001923876740378403, "loss": 2.2907, "step": 106235 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.00019238696669907692, "loss": 2.1732, "step": 106240 }, { "epoch": 0.25, "grad_norm": 1.625, "learning_rate": 0.00019238625932875247, "loss": 2.1982, "step": 106245 }, { "epoch": 0.25, "grad_norm": 1.96875, "learning_rate": 0.00019238555192686721, "loss": 2.1836, "step": 106250 }, { "epoch": 0.25, "grad_norm": 1.4609375, "learning_rate": 0.00019238484449342135, "loss": 1.9421, "step": 106255 }, { "epoch": 0.25, "grad_norm": 2.328125, "learning_rate": 0.00019238413702841518, "loss": 2.1784, "step": 106260 }, { "epoch": 0.25, "grad_norm": 2.265625, "learning_rate": 0.0001923834295318489, "loss": 2.1731, "step": 106265 }, { "epoch": 0.25, "grad_norm": 1.75, "learning_rate": 0.0001923827220037228, "loss": 2.106, "step": 106270 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.000192382014444037, "loss": 2.126, "step": 106275 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.0001923813068527919, "loss": 2.2424, "step": 106280 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.00019238059922998765, "loss": 2.1292, "step": 106285 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019237989157562454, "loss": 2.1268, "step": 106290 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.00019237918388970274, "loss": 2.05, "step": 106295 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019237847617222252, "loss": 2.0826, "step": 106300 }, { "epoch": 0.25, "grad_norm": 1.828125, "learning_rate": 0.0001923777684231842, "loss": 2.2581, "step": 106305 }, { "epoch": 0.25, "grad_norm": 2.375, "learning_rate": 0.0001923770606425879, "loss": 2.0676, "step": 106310 }, { "epoch": 0.25, "grad_norm": 1.9375, "learning_rate": 0.00019237635283043397, "loss": 2.203, "step": 106315 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.0001923756449867226, "loss": 2.2741, "step": 106320 }, { "epoch": 0.25, "grad_norm": 1.6640625, "learning_rate": 0.000192374937111454, "loss": 2.1975, "step": 106325 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019237422920462843, "loss": 2.2181, "step": 106330 }, { "epoch": 0.25, "grad_norm": 1.671875, "learning_rate": 0.00019237352126624623, "loss": 2.0588, "step": 106335 }, { "epoch": 0.25, "grad_norm": 2.34375, "learning_rate": 0.0001923728132963075, "loss": 2.1015, "step": 106340 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019237210529481254, "loss": 2.1991, "step": 106345 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019237139726176162, "loss": 2.1952, "step": 106350 }, { "epoch": 0.25, "grad_norm": 1.71875, "learning_rate": 0.00019237068919715494, "loss": 2.1799, "step": 106355 }, { "epoch": 0.25, "grad_norm": 1.8359375, "learning_rate": 0.00019236998110099273, "loss": 2.105, "step": 106360 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.0001923692729732753, "loss": 2.2124, "step": 106365 }, { "epoch": 0.25, "grad_norm": 1.890625, "learning_rate": 0.00019236856481400283, "loss": 2.1123, "step": 106370 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019236785662317562, "loss": 2.2326, "step": 106375 }, { "epoch": 0.25, "grad_norm": 1.8671875, "learning_rate": 0.0001923671484007938, "loss": 2.1511, "step": 106380 }, { "epoch": 0.25, "grad_norm": 1.7421875, "learning_rate": 0.00019236644014685776, "loss": 2.1488, "step": 106385 }, { "epoch": 0.25, "grad_norm": 1.96875, "learning_rate": 0.00019236573186136763, "loss": 2.2939, "step": 106390 }, { "epoch": 0.25, "grad_norm": 2.578125, "learning_rate": 0.0001923650235443237, "loss": 2.0587, "step": 106395 }, { "epoch": 0.25, "grad_norm": 2.46875, "learning_rate": 0.00019236431519572623, "loss": 1.99, "step": 106400 }, { "epoch": 0.25, "grad_norm": 1.7734375, "learning_rate": 0.00019236360681557538, "loss": 2.0597, "step": 106405 }, { "epoch": 0.25, "grad_norm": 1.640625, "learning_rate": 0.0001923628984038715, "loss": 2.3362, "step": 106410 }, { "epoch": 0.25, "grad_norm": 1.8828125, "learning_rate": 0.00019236218996061474, "loss": 2.2931, "step": 106415 }, { "epoch": 0.25, "grad_norm": 2.53125, "learning_rate": 0.0001923614814858054, "loss": 2.2676, "step": 106420 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.0001923607729794437, "loss": 2.1277, "step": 106425 }, { "epoch": 0.25, "grad_norm": 2.6875, "learning_rate": 0.0001923600644415299, "loss": 2.0162, "step": 106430 }, { "epoch": 0.25, "grad_norm": 2.0, "learning_rate": 0.0001923593558720642, "loss": 2.1774, "step": 106435 }, { "epoch": 0.25, "grad_norm": 1.96875, "learning_rate": 0.00019235864727104688, "loss": 2.1886, "step": 106440 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019235793863847818, "loss": 2.1806, "step": 106445 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.0001923572299743583, "loss": 2.02, "step": 106450 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019235652127868756, "loss": 2.3059, "step": 106455 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019235581255146612, "loss": 2.292, "step": 106460 }, { "epoch": 0.25, "grad_norm": 1.8203125, "learning_rate": 0.00019235510379269429, "loss": 2.0671, "step": 106465 }, { "epoch": 0.25, "grad_norm": 1.6171875, "learning_rate": 0.00019235439500237228, "loss": 2.033, "step": 106470 }, { "epoch": 0.25, "grad_norm": 1.9453125, "learning_rate": 0.00019235368618050028, "loss": 2.2609, "step": 106475 }, { "epoch": 0.25, "grad_norm": 1.8515625, "learning_rate": 0.00019235297732707863, "loss": 2.0792, "step": 106480 }, { "epoch": 0.25, "grad_norm": 1.8359375, "learning_rate": 0.00019235226844210756, "loss": 2.0596, "step": 106485 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.00019235155952558724, "loss": 2.3538, "step": 106490 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.00019235085057751796, "loss": 2.1515, "step": 106495 }, { "epoch": 0.25, "grad_norm": 1.78125, "learning_rate": 0.00019235014159789995, "loss": 2.3973, "step": 106500 }, { "epoch": 0.25, "grad_norm": 1.859375, "learning_rate": 0.00019234943258673347, "loss": 2.3, "step": 106505 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.0001923487235440187, "loss": 2.2057, "step": 106510 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.000192348014469756, "loss": 2.0544, "step": 106515 }, { "epoch": 0.25, "grad_norm": 1.7578125, "learning_rate": 0.00019234730536394552, "loss": 2.3505, "step": 106520 }, { "epoch": 0.25, "grad_norm": 2.609375, "learning_rate": 0.0001923465962265875, "loss": 2.2422, "step": 106525 }, { "epoch": 0.25, "grad_norm": 1.875, "learning_rate": 0.00019234588705768223, "loss": 2.0306, "step": 106530 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.0001923451778572299, "loss": 2.175, "step": 106535 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.00019234446862523082, "loss": 2.119, "step": 106540 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.0001923437593616852, "loss": 2.0625, "step": 106545 }, { "epoch": 0.25, "grad_norm": 1.9765625, "learning_rate": 0.00019234305006659324, "loss": 2.1327, "step": 106550 }, { "epoch": 0.25, "grad_norm": 1.96875, "learning_rate": 0.00019234234073995526, "loss": 2.2272, "step": 106555 }, { "epoch": 0.25, "grad_norm": 1.78125, "learning_rate": 0.00019234163138177147, "loss": 2.1476, "step": 106560 }, { "epoch": 0.25, "grad_norm": 1.9140625, "learning_rate": 0.00019234092199204206, "loss": 2.0546, "step": 106565 }, { "epoch": 0.25, "grad_norm": 2.390625, "learning_rate": 0.00019234021257076732, "loss": 2.2212, "step": 106570 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.0001923395031179475, "loss": 2.3237, "step": 106575 }, { "epoch": 0.25, "grad_norm": 1.9140625, "learning_rate": 0.00019233879363358286, "loss": 1.9491, "step": 106580 }, { "epoch": 0.25, "grad_norm": 1.5234375, "learning_rate": 0.0001923380841176736, "loss": 2.0112, "step": 106585 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019233737457021995, "loss": 2.2669, "step": 106590 }, { "epoch": 0.25, "grad_norm": 1.78125, "learning_rate": 0.00019233666499122217, "loss": 2.0431, "step": 106595 }, { "epoch": 0.25, "grad_norm": 2.421875, "learning_rate": 0.00019233595538068054, "loss": 2.1136, "step": 106600 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.00019233524573859527, "loss": 2.1927, "step": 106605 }, { "epoch": 0.25, "grad_norm": 2.234375, "learning_rate": 0.0001923345360649666, "loss": 2.2646, "step": 106610 }, { "epoch": 0.25, "grad_norm": 1.6171875, "learning_rate": 0.0001923338263597948, "loss": 2.0343, "step": 106615 }, { "epoch": 0.25, "grad_norm": 2.28125, "learning_rate": 0.00019233311662308005, "loss": 2.09, "step": 106620 }, { "epoch": 0.25, "grad_norm": 2.265625, "learning_rate": 0.00019233240685482265, "loss": 2.118, "step": 106625 }, { "epoch": 0.25, "grad_norm": 2.75, "learning_rate": 0.00019233169705502282, "loss": 1.9446, "step": 106630 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019233098722368078, "loss": 2.1601, "step": 106635 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019233027736079684, "loss": 2.0861, "step": 106640 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.0001923295674663712, "loss": 2.1291, "step": 106645 }, { "epoch": 0.25, "grad_norm": 1.859375, "learning_rate": 0.0001923288575404041, "loss": 2.2505, "step": 106650 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.0001923281475828958, "loss": 2.1904, "step": 106655 }, { "epoch": 0.25, "grad_norm": 1.7734375, "learning_rate": 0.00019232743759384652, "loss": 2.0921, "step": 106660 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.0001923267275732565, "loss": 2.1856, "step": 106665 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019232601752112603, "loss": 2.1934, "step": 106670 }, { "epoch": 0.25, "grad_norm": 1.9296875, "learning_rate": 0.00019232530743745527, "loss": 2.1985, "step": 106675 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019232459732224455, "loss": 2.0494, "step": 106680 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.0001923238871754941, "loss": 2.0343, "step": 106685 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.00019232317699720405, "loss": 2.1348, "step": 106690 }, { "epoch": 0.25, "grad_norm": 2.0, "learning_rate": 0.00019232246678737476, "loss": 2.0926, "step": 106695 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.00019232175654600647, "loss": 2.2855, "step": 106700 }, { "epoch": 0.25, "grad_norm": 1.9453125, "learning_rate": 0.00019232104627309938, "loss": 2.2664, "step": 106705 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019232033596865375, "loss": 2.1471, "step": 106710 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.00019231962563266982, "loss": 2.0569, "step": 106715 }, { "epoch": 0.25, "grad_norm": 1.984375, "learning_rate": 0.0001923189152651478, "loss": 2.4177, "step": 106720 }, { "epoch": 0.25, "grad_norm": 2.609375, "learning_rate": 0.000192318204866088, "loss": 2.0592, "step": 106725 }, { "epoch": 0.25, "grad_norm": 2.421875, "learning_rate": 0.00019231749443549063, "loss": 2.3052, "step": 106730 }, { "epoch": 0.25, "grad_norm": 2.1875, "learning_rate": 0.0001923167839733559, "loss": 2.1162, "step": 106735 }, { "epoch": 0.25, "grad_norm": 2.1875, "learning_rate": 0.0001923160734796841, "loss": 2.1182, "step": 106740 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.00019231536295447548, "loss": 2.3101, "step": 106745 }, { "epoch": 0.25, "grad_norm": 1.8125, "learning_rate": 0.0001923146523977302, "loss": 2.3933, "step": 106750 }, { "epoch": 0.25, "grad_norm": 1.8359375, "learning_rate": 0.00019231394180944863, "loss": 2.1321, "step": 106755 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.00019231323118963086, "loss": 2.1126, "step": 106760 }, { "epoch": 0.25, "grad_norm": 1.921875, "learning_rate": 0.0001923125205382773, "loss": 2.1786, "step": 106765 }, { "epoch": 0.25, "grad_norm": 1.9453125, "learning_rate": 0.00019231180985538804, "loss": 2.0883, "step": 106770 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.00019231109914096345, "loss": 2.154, "step": 106775 }, { "epoch": 0.25, "grad_norm": 1.7734375, "learning_rate": 0.00019231038839500367, "loss": 2.1957, "step": 106780 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.000192309677617509, "loss": 2.0587, "step": 106785 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.00019230896680847969, "loss": 2.0938, "step": 106790 }, { "epoch": 0.25, "grad_norm": 2.265625, "learning_rate": 0.00019230825596791592, "loss": 2.0843, "step": 106795 }, { "epoch": 0.25, "grad_norm": 1.421875, "learning_rate": 0.000192307545095818, "loss": 2.2001, "step": 106800 }, { "epoch": 0.25, "grad_norm": 2.25, "learning_rate": 0.0001923068341921862, "loss": 2.1976, "step": 106805 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.0001923061232570206, "loss": 2.1043, "step": 106810 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019230541229032163, "loss": 2.2173, "step": 106815 }, { "epoch": 0.25, "grad_norm": 1.921875, "learning_rate": 0.00019230470129208947, "loss": 2.1372, "step": 106820 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.0001923039902623243, "loss": 2.0853, "step": 106825 }, { "epoch": 0.25, "grad_norm": 1.96875, "learning_rate": 0.00019230327920102645, "loss": 1.9838, "step": 106830 }, { "epoch": 0.25, "grad_norm": 1.8203125, "learning_rate": 0.0001923025681081961, "loss": 1.9807, "step": 106835 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019230185698383353, "loss": 2.27, "step": 106840 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.000192301145827939, "loss": 2.1565, "step": 106845 }, { "epoch": 0.25, "grad_norm": 1.7578125, "learning_rate": 0.00019230043464051267, "loss": 2.1397, "step": 106850 }, { "epoch": 0.25, "grad_norm": 2.546875, "learning_rate": 0.00019229972342155486, "loss": 2.17, "step": 106855 }, { "epoch": 0.25, "grad_norm": 1.65625, "learning_rate": 0.0001922990121710658, "loss": 2.0261, "step": 106860 }, { "epoch": 0.25, "grad_norm": 1.921875, "learning_rate": 0.00019229830088904573, "loss": 2.1718, "step": 106865 }, { "epoch": 0.25, "grad_norm": 1.5625, "learning_rate": 0.00019229758957549486, "loss": 2.2799, "step": 106870 }, { "epoch": 0.25, "grad_norm": 1.703125, "learning_rate": 0.0001922968782304135, "loss": 2.1145, "step": 106875 }, { "epoch": 0.25, "grad_norm": 2.234375, "learning_rate": 0.0001922961668538018, "loss": 2.0861, "step": 106880 }, { "epoch": 0.25, "grad_norm": 1.8984375, "learning_rate": 0.0001922954554456601, "loss": 2.0704, "step": 106885 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019229474400598857, "loss": 2.0158, "step": 106890 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.0001922940325347875, "loss": 2.1964, "step": 106895 }, { "epoch": 0.25, "grad_norm": 1.828125, "learning_rate": 0.0001922933210320571, "loss": 2.1189, "step": 106900 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.00019229260949779763, "loss": 2.0464, "step": 106905 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019229189793200933, "loss": 2.1889, "step": 106910 }, { "epoch": 0.25, "grad_norm": 3.0, "learning_rate": 0.00019229118633469245, "loss": 2.3017, "step": 106915 }, { "epoch": 0.25, "grad_norm": 2.328125, "learning_rate": 0.00019229047470584724, "loss": 2.1674, "step": 106920 }, { "epoch": 0.25, "grad_norm": 1.9296875, "learning_rate": 0.0001922897630454739, "loss": 2.1053, "step": 106925 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019228905135357274, "loss": 2.3256, "step": 106930 }, { "epoch": 0.25, "grad_norm": 1.84375, "learning_rate": 0.00019228833963014395, "loss": 2.1197, "step": 106935 }, { "epoch": 0.25, "grad_norm": 1.6171875, "learning_rate": 0.00019228762787518777, "loss": 2.3796, "step": 106940 }, { "epoch": 0.25, "grad_norm": 1.578125, "learning_rate": 0.0001922869160887045, "loss": 1.9044, "step": 106945 }, { "epoch": 0.25, "grad_norm": 1.9453125, "learning_rate": 0.0001922862042706943, "loss": 2.0659, "step": 106950 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.0001922854924211575, "loss": 2.0061, "step": 106955 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019228478054009426, "loss": 1.9194, "step": 106960 }, { "epoch": 0.25, "grad_norm": 1.8515625, "learning_rate": 0.0001922840686275049, "loss": 2.0342, "step": 106965 }, { "epoch": 0.25, "grad_norm": 2.1875, "learning_rate": 0.00019228335668338963, "loss": 2.1075, "step": 106970 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.0001922826447077487, "loss": 2.1594, "step": 106975 }, { "epoch": 0.25, "grad_norm": 1.9296875, "learning_rate": 0.0001922819327005823, "loss": 1.8602, "step": 106980 }, { "epoch": 0.25, "grad_norm": 1.7421875, "learning_rate": 0.00019228122066189073, "loss": 2.1413, "step": 106985 }, { "epoch": 0.25, "grad_norm": 1.9453125, "learning_rate": 0.00019228050859167425, "loss": 2.0957, "step": 106990 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.00019227979648993304, "loss": 1.9004, "step": 106995 }, { "epoch": 0.25, "grad_norm": 1.828125, "learning_rate": 0.00019227908435666738, "loss": 2.1232, "step": 107000 }, { "epoch": 0.25, "grad_norm": 1.921875, "learning_rate": 0.00019227837219187757, "loss": 2.0827, "step": 107005 }, { "epoch": 0.25, "grad_norm": 1.671875, "learning_rate": 0.00019227765999556376, "loss": 2.1279, "step": 107010 }, { "epoch": 0.25, "grad_norm": 1.828125, "learning_rate": 0.0001922769477677262, "loss": 2.0694, "step": 107015 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.0001922762355083652, "loss": 2.3889, "step": 107020 }, { "epoch": 0.25, "grad_norm": 1.703125, "learning_rate": 0.00019227552321748092, "loss": 2.1064, "step": 107025 }, { "epoch": 0.25, "grad_norm": 2.609375, "learning_rate": 0.0001922748108950737, "loss": 1.9987, "step": 107030 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019227409854114368, "loss": 2.2683, "step": 107035 }, { "epoch": 0.25, "grad_norm": 2.203125, "learning_rate": 0.00019227338615569118, "loss": 1.8929, "step": 107040 }, { "epoch": 0.25, "grad_norm": 1.8828125, "learning_rate": 0.00019227267373871643, "loss": 2.2602, "step": 107045 }, { "epoch": 0.25, "grad_norm": 2.25, "learning_rate": 0.00019227196129021964, "loss": 2.2686, "step": 107050 }, { "epoch": 0.25, "grad_norm": 2.59375, "learning_rate": 0.00019227124881020108, "loss": 2.3033, "step": 107055 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.000192270536298661, "loss": 2.1493, "step": 107060 }, { "epoch": 0.25, "grad_norm": 1.6953125, "learning_rate": 0.00019226982375559963, "loss": 2.1374, "step": 107065 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.0001922691111810172, "loss": 2.1682, "step": 107070 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.00019226839857491398, "loss": 2.3332, "step": 107075 }, { "epoch": 0.25, "grad_norm": 1.8828125, "learning_rate": 0.00019226768593729018, "loss": 2.0606, "step": 107080 }, { "epoch": 0.25, "grad_norm": 2.453125, "learning_rate": 0.00019226697326814608, "loss": 2.1996, "step": 107085 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.00019226626056748192, "loss": 2.1502, "step": 107090 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019226554783529794, "loss": 2.0865, "step": 107095 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.00019226483507159435, "loss": 2.1975, "step": 107100 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019226412227637142, "loss": 2.2661, "step": 107105 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.0001922634094496294, "loss": 2.1072, "step": 107110 }, { "epoch": 0.25, "grad_norm": 1.96875, "learning_rate": 0.00019226269659136853, "loss": 2.2765, "step": 107115 }, { "epoch": 0.25, "grad_norm": 2.25, "learning_rate": 0.00019226198370158907, "loss": 2.0463, "step": 107120 }, { "epoch": 0.25, "grad_norm": 1.640625, "learning_rate": 0.0001922612707802912, "loss": 2.1508, "step": 107125 }, { "epoch": 0.25, "grad_norm": 1.890625, "learning_rate": 0.00019226055782747522, "loss": 2.2586, "step": 107130 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019225984484314137, "loss": 2.1083, "step": 107135 }, { "epoch": 0.25, "grad_norm": 1.921875, "learning_rate": 0.00019225913182728988, "loss": 2.1456, "step": 107140 }, { "epoch": 0.25, "grad_norm": 2.1875, "learning_rate": 0.00019225841877992098, "loss": 2.3299, "step": 107145 }, { "epoch": 0.25, "grad_norm": 2.46875, "learning_rate": 0.00019225770570103497, "loss": 2.1857, "step": 107150 }, { "epoch": 0.25, "grad_norm": 1.9453125, "learning_rate": 0.00019225699259063203, "loss": 2.2823, "step": 107155 }, { "epoch": 0.25, "grad_norm": 1.921875, "learning_rate": 0.00019225627944871243, "loss": 2.2533, "step": 107160 }, { "epoch": 0.25, "grad_norm": 2.59375, "learning_rate": 0.0001922555662752764, "loss": 2.1071, "step": 107165 }, { "epoch": 0.25, "grad_norm": 1.875, "learning_rate": 0.00019225485307032423, "loss": 2.0528, "step": 107170 }, { "epoch": 0.25, "grad_norm": 2.46875, "learning_rate": 0.0001922541398338561, "loss": 2.1246, "step": 107175 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019225342656587228, "loss": 2.1518, "step": 107180 }, { "epoch": 0.25, "grad_norm": 2.328125, "learning_rate": 0.00019225271326637304, "loss": 2.1885, "step": 107185 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019225199993535855, "loss": 2.1231, "step": 107190 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019225128657282916, "loss": 2.3929, "step": 107195 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.00019225057317878504, "loss": 2.0802, "step": 107200 }, { "epoch": 0.25, "grad_norm": 1.6484375, "learning_rate": 0.00019224985975322645, "loss": 2.0892, "step": 107205 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019224914629615363, "loss": 2.0956, "step": 107210 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.00019224843280756684, "loss": 2.3193, "step": 107215 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.0001922477192874663, "loss": 2.1883, "step": 107220 }, { "epoch": 0.25, "grad_norm": 2.203125, "learning_rate": 0.00019224700573585227, "loss": 2.1702, "step": 107225 }, { "epoch": 0.25, "grad_norm": 2.625, "learning_rate": 0.000192246292152725, "loss": 2.0763, "step": 107230 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.0001922455785380847, "loss": 2.2268, "step": 107235 }, { "epoch": 0.25, "grad_norm": 1.7734375, "learning_rate": 0.00019224486489193164, "loss": 2.1196, "step": 107240 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.0001922441512142661, "loss": 2.3583, "step": 107245 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019224343750508824, "loss": 2.1781, "step": 107250 }, { "epoch": 0.25, "grad_norm": 1.75, "learning_rate": 0.00019224272376439837, "loss": 2.0696, "step": 107255 }, { "epoch": 0.25, "grad_norm": 1.9375, "learning_rate": 0.0001922420099921967, "loss": 2.1788, "step": 107260 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.0001922412961884835, "loss": 2.0342, "step": 107265 }, { "epoch": 0.25, "grad_norm": 2.28125, "learning_rate": 0.000192240582353259, "loss": 2.2617, "step": 107270 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019223986848652345, "loss": 2.133, "step": 107275 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.00019223915458827708, "loss": 2.0005, "step": 107280 }, { "epoch": 0.25, "grad_norm": 2.328125, "learning_rate": 0.00019223844065852013, "loss": 2.3029, "step": 107285 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019223772669725285, "loss": 2.2093, "step": 107290 }, { "epoch": 0.25, "grad_norm": 1.9140625, "learning_rate": 0.00019223701270447552, "loss": 2.415, "step": 107295 }, { "epoch": 0.25, "grad_norm": 1.8125, "learning_rate": 0.00019223629868018836, "loss": 2.0501, "step": 107300 }, { "epoch": 0.25, "grad_norm": 1.765625, "learning_rate": 0.00019223558462439156, "loss": 2.2135, "step": 107305 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019223487053708545, "loss": 2.1542, "step": 107310 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019223415641827024, "loss": 2.2789, "step": 107315 }, { "epoch": 0.25, "grad_norm": 1.875, "learning_rate": 0.00019223344226794613, "loss": 2.3146, "step": 107320 }, { "epoch": 0.25, "grad_norm": 1.8984375, "learning_rate": 0.00019223272808611343, "loss": 2.132, "step": 107325 }, { "epoch": 0.25, "grad_norm": 1.7890625, "learning_rate": 0.00019223201387277237, "loss": 2.0078, "step": 107330 }, { "epoch": 0.25, "grad_norm": 1.765625, "learning_rate": 0.00019223129962792315, "loss": 2.1045, "step": 107335 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019223058535156607, "loss": 2.3083, "step": 107340 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019222987104370134, "loss": 2.0317, "step": 107345 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.0001922291567043292, "loss": 2.1493, "step": 107350 }, { "epoch": 0.25, "grad_norm": 1.6328125, "learning_rate": 0.00019222844233344991, "loss": 2.1634, "step": 107355 }, { "epoch": 0.25, "grad_norm": 2.1875, "learning_rate": 0.0001922277279310637, "loss": 2.3562, "step": 107360 }, { "epoch": 0.25, "grad_norm": 1.9140625, "learning_rate": 0.00019222701349717088, "loss": 2.2443, "step": 107365 }, { "epoch": 0.25, "grad_norm": 1.6171875, "learning_rate": 0.0001922262990317716, "loss": 2.2286, "step": 107370 }, { "epoch": 0.25, "grad_norm": 1.6875, "learning_rate": 0.00019222558453486617, "loss": 2.147, "step": 107375 }, { "epoch": 0.25, "grad_norm": 1.75, "learning_rate": 0.00019222487000645477, "loss": 2.2223, "step": 107380 }, { "epoch": 0.25, "grad_norm": 1.9140625, "learning_rate": 0.0001922241554465377, "loss": 2.1542, "step": 107385 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019222344085511518, "loss": 2.1642, "step": 107390 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019222272623218748, "loss": 2.1824, "step": 107395 }, { "epoch": 0.25, "grad_norm": 1.7890625, "learning_rate": 0.0001922220115777548, "loss": 2.233, "step": 107400 }, { "epoch": 0.25, "grad_norm": 1.875, "learning_rate": 0.00019222129689181743, "loss": 2.1495, "step": 107405 }, { "epoch": 0.25, "grad_norm": 1.9609375, "learning_rate": 0.00019222058217437557, "loss": 2.1947, "step": 107410 }, { "epoch": 0.25, "grad_norm": 2.203125, "learning_rate": 0.0001922198674254295, "loss": 2.0062, "step": 107415 }, { "epoch": 0.25, "grad_norm": 2.203125, "learning_rate": 0.00019221915264497946, "loss": 2.1014, "step": 107420 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.00019221843783302566, "loss": 2.002, "step": 107425 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.0001922177229895684, "loss": 2.1565, "step": 107430 }, { "epoch": 0.25, "grad_norm": 2.328125, "learning_rate": 0.00019221700811460787, "loss": 2.2165, "step": 107435 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.00019221629320814433, "loss": 2.2175, "step": 107440 }, { "epoch": 0.25, "grad_norm": 2.25, "learning_rate": 0.00019221557827017805, "loss": 2.2245, "step": 107445 }, { "epoch": 0.25, "grad_norm": 1.8203125, "learning_rate": 0.00019221486330070925, "loss": 2.0939, "step": 107450 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.00019221414829973817, "loss": 2.3757, "step": 107455 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.0001922134332672651, "loss": 2.1458, "step": 107460 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.00019221271820329022, "loss": 2.2034, "step": 107465 }, { "epoch": 0.25, "grad_norm": 1.828125, "learning_rate": 0.0001922120031078138, "loss": 2.05, "step": 107470 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.0001922112879808361, "loss": 2.1183, "step": 107475 }, { "epoch": 0.25, "grad_norm": 1.6640625, "learning_rate": 0.00019221057282235734, "loss": 2.042, "step": 107480 }, { "epoch": 0.25, "grad_norm": 1.7890625, "learning_rate": 0.0001922098576323778, "loss": 2.2264, "step": 107485 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.00019220914241089768, "loss": 2.2846, "step": 107490 }, { "epoch": 0.25, "grad_norm": 1.6328125, "learning_rate": 0.00019220842715791725, "loss": 2.0187, "step": 107495 }, { "epoch": 0.25, "grad_norm": 2.328125, "learning_rate": 0.00019220771187343676, "loss": 2.1569, "step": 107500 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.0001922069965574564, "loss": 2.0998, "step": 107505 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.0001922062812099765, "loss": 2.3394, "step": 107510 }, { "epoch": 0.25, "grad_norm": 1.890625, "learning_rate": 0.00019220556583099727, "loss": 2.1996, "step": 107515 }, { "epoch": 0.25, "grad_norm": 3.5625, "learning_rate": 0.0001922048504205189, "loss": 2.1479, "step": 107520 }, { "epoch": 0.25, "grad_norm": 2.1875, "learning_rate": 0.00019220413497854174, "loss": 1.9749, "step": 107525 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019220341950506594, "loss": 2.1666, "step": 107530 }, { "epoch": 0.25, "grad_norm": 1.9765625, "learning_rate": 0.00019220270400009177, "loss": 2.1575, "step": 107535 }, { "epoch": 0.25, "grad_norm": 2.421875, "learning_rate": 0.00019220198846361947, "loss": 2.1646, "step": 107540 }, { "epoch": 0.25, "grad_norm": 1.5703125, "learning_rate": 0.00019220127289564935, "loss": 2.2282, "step": 107545 }, { "epoch": 0.25, "grad_norm": 2.65625, "learning_rate": 0.00019220055729618157, "loss": 2.0382, "step": 107550 }, { "epoch": 0.25, "grad_norm": 1.78125, "learning_rate": 0.00019219984166521644, "loss": 2.1768, "step": 107555 }, { "epoch": 0.25, "grad_norm": 1.8515625, "learning_rate": 0.00019219912600275412, "loss": 2.055, "step": 107560 }, { "epoch": 0.25, "grad_norm": 1.9453125, "learning_rate": 0.00019219841030879495, "loss": 2.1678, "step": 107565 }, { "epoch": 0.25, "grad_norm": 2.625, "learning_rate": 0.0001921976945833391, "loss": 2.1232, "step": 107570 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.0001921969788263869, "loss": 2.2403, "step": 107575 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019219626303793846, "loss": 2.1803, "step": 107580 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019219554721799416, "loss": 2.1696, "step": 107585 }, { "epoch": 0.25, "grad_norm": 1.8984375, "learning_rate": 0.00019219483136655415, "loss": 2.1468, "step": 107590 }, { "epoch": 0.25, "grad_norm": 2.234375, "learning_rate": 0.00019219411548361876, "loss": 2.3605, "step": 107595 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019219339956918816, "loss": 2.2412, "step": 107600 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019219268362326265, "loss": 2.1151, "step": 107605 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.0001921919676458424, "loss": 2.1729, "step": 107610 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.00019219125163692774, "loss": 2.1845, "step": 107615 }, { "epoch": 0.25, "grad_norm": 2.71875, "learning_rate": 0.00019219053559651887, "loss": 2.1429, "step": 107620 }, { "epoch": 0.25, "grad_norm": 2.40625, "learning_rate": 0.00019218981952461603, "loss": 2.0645, "step": 107625 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019218910342121948, "loss": 2.1259, "step": 107630 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019218838728632945, "loss": 2.1005, "step": 107635 }, { "epoch": 0.25, "grad_norm": 2.0, "learning_rate": 0.00019218767111994623, "loss": 2.0421, "step": 107640 }, { "epoch": 0.25, "grad_norm": 2.1875, "learning_rate": 0.00019218695492206997, "loss": 2.0946, "step": 107645 }, { "epoch": 0.25, "grad_norm": 1.8828125, "learning_rate": 0.00019218623869270103, "loss": 2.0544, "step": 107650 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.0001921855224318396, "loss": 2.381, "step": 107655 }, { "epoch": 0.25, "grad_norm": 1.5859375, "learning_rate": 0.0001921848061394859, "loss": 2.123, "step": 107660 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019218408981564017, "loss": 2.2228, "step": 107665 }, { "epoch": 0.25, "grad_norm": 1.7421875, "learning_rate": 0.00019218337346030273, "loss": 2.1395, "step": 107670 }, { "epoch": 0.25, "grad_norm": 1.734375, "learning_rate": 0.00019218265707347375, "loss": 2.2552, "step": 107675 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019218194065515352, "loss": 2.0294, "step": 107680 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.0001921812242053423, "loss": 2.2551, "step": 107685 }, { "epoch": 0.25, "grad_norm": 2.40625, "learning_rate": 0.0001921805077240402, "loss": 2.1332, "step": 107690 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.00019217979121124765, "loss": 2.1403, "step": 107695 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.0001921790746669648, "loss": 2.1501, "step": 107700 }, { "epoch": 0.25, "grad_norm": 1.7578125, "learning_rate": 0.00019217835809119191, "loss": 2.1052, "step": 107705 }, { "epoch": 0.25, "grad_norm": 1.6875, "learning_rate": 0.0001921776414839292, "loss": 2.0758, "step": 107710 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019217692484517693, "loss": 2.3443, "step": 107715 }, { "epoch": 0.25, "grad_norm": 1.9296875, "learning_rate": 0.0001921762081749354, "loss": 2.2822, "step": 107720 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.00019217549147320474, "loss": 2.1832, "step": 107725 }, { "epoch": 0.25, "grad_norm": 1.8828125, "learning_rate": 0.0001921747747399853, "loss": 2.1393, "step": 107730 }, { "epoch": 0.25, "grad_norm": 2.3125, "learning_rate": 0.00019217405797527728, "loss": 2.196, "step": 107735 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019217334117908094, "loss": 2.1732, "step": 107740 }, { "epoch": 0.25, "grad_norm": 1.8515625, "learning_rate": 0.0001921726243513965, "loss": 2.3285, "step": 107745 }, { "epoch": 0.25, "grad_norm": 2.09375, "learning_rate": 0.00019217190749222423, "loss": 2.3432, "step": 107750 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.00019217119060156433, "loss": 2.3227, "step": 107755 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.0001921704736794171, "loss": 2.1699, "step": 107760 }, { "epoch": 0.25, "grad_norm": 2.234375, "learning_rate": 0.0001921697567257828, "loss": 2.0796, "step": 107765 }, { "epoch": 0.25, "grad_norm": 1.8203125, "learning_rate": 0.00019216903974066162, "loss": 2.1471, "step": 107770 }, { "epoch": 0.25, "grad_norm": 2.359375, "learning_rate": 0.00019216832272405377, "loss": 2.1285, "step": 107775 }, { "epoch": 0.25, "grad_norm": 2.109375, "learning_rate": 0.00019216760567595964, "loss": 2.1815, "step": 107780 }, { "epoch": 0.25, "grad_norm": 1.7734375, "learning_rate": 0.00019216688859637933, "loss": 2.1682, "step": 107785 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.00019216617148531314, "loss": 2.1497, "step": 107790 }, { "epoch": 0.25, "grad_norm": 1.765625, "learning_rate": 0.00019216545434276133, "loss": 2.2353, "step": 107795 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.0001921647371687241, "loss": 2.1712, "step": 107800 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.00019216401996320176, "loss": 2.1594, "step": 107805 }, { "epoch": 0.25, "grad_norm": 2.453125, "learning_rate": 0.0001921633027261945, "loss": 2.1945, "step": 107810 }, { "epoch": 0.25, "grad_norm": 1.640625, "learning_rate": 0.00019216258545770262, "loss": 2.2205, "step": 107815 }, { "epoch": 0.25, "grad_norm": 1.984375, "learning_rate": 0.00019216186815772628, "loss": 2.1914, "step": 107820 }, { "epoch": 0.25, "grad_norm": 2.171875, "learning_rate": 0.0001921611508262658, "loss": 2.1606, "step": 107825 }, { "epoch": 0.25, "grad_norm": 2.390625, "learning_rate": 0.0001921604334633214, "loss": 2.0888, "step": 107830 }, { "epoch": 0.25, "grad_norm": 1.984375, "learning_rate": 0.00019215971606889332, "loss": 1.9546, "step": 107835 }, { "epoch": 0.25, "grad_norm": 2.5625, "learning_rate": 0.00019215899864298178, "loss": 2.2056, "step": 107840 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.0001921582811855871, "loss": 2.2138, "step": 107845 }, { "epoch": 0.25, "grad_norm": 2.421875, "learning_rate": 0.00019215756369670946, "loss": 2.0456, "step": 107850 }, { "epoch": 0.25, "grad_norm": 2.734375, "learning_rate": 0.00019215684617634912, "loss": 2.2279, "step": 107855 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019215612862450633, "loss": 2.1154, "step": 107860 }, { "epoch": 0.25, "grad_norm": 2.703125, "learning_rate": 0.00019215541104118135, "loss": 2.2458, "step": 107865 }, { "epoch": 0.25, "grad_norm": 1.96875, "learning_rate": 0.0001921546934263744, "loss": 2.1578, "step": 107870 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.00019215397578008575, "loss": 2.1106, "step": 107875 }, { "epoch": 0.25, "grad_norm": 1.8125, "learning_rate": 0.00019215325810231562, "loss": 2.1322, "step": 107880 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019215254039306424, "loss": 2.2278, "step": 107885 }, { "epoch": 0.25, "grad_norm": 2.8125, "learning_rate": 0.0001921518226523319, "loss": 2.1905, "step": 107890 }, { "epoch": 0.25, "grad_norm": 1.90625, "learning_rate": 0.00019215110488011886, "loss": 2.0619, "step": 107895 }, { "epoch": 0.25, "grad_norm": 2.28125, "learning_rate": 0.0001921503870764253, "loss": 2.1471, "step": 107900 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.0001921496692412515, "loss": 2.3333, "step": 107905 }, { "epoch": 0.25, "grad_norm": 1.921875, "learning_rate": 0.0001921489513745977, "loss": 2.0714, "step": 107910 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.00019214823347646416, "loss": 2.2761, "step": 107915 }, { "epoch": 0.25, "grad_norm": 2.4375, "learning_rate": 0.00019214751554685107, "loss": 2.0953, "step": 107920 }, { "epoch": 0.25, "grad_norm": 1.7421875, "learning_rate": 0.00019214679758575875, "loss": 2.0007, "step": 107925 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.00019214607959318743, "loss": 2.1823, "step": 107930 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019214536156913733, "loss": 2.1674, "step": 107935 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.0001921446435136087, "loss": 2.1073, "step": 107940 }, { "epoch": 0.25, "grad_norm": 1.8359375, "learning_rate": 0.00019214392542660176, "loss": 2.1742, "step": 107945 }, { "epoch": 0.25, "grad_norm": 1.671875, "learning_rate": 0.00019214320730811678, "loss": 2.0865, "step": 107950 }, { "epoch": 0.25, "grad_norm": 1.828125, "learning_rate": 0.00019214248915815405, "loss": 2.1581, "step": 107955 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019214177097671376, "loss": 2.0181, "step": 107960 }, { "epoch": 0.25, "grad_norm": 1.9765625, "learning_rate": 0.00019214105276379618, "loss": 2.2275, "step": 107965 }, { "epoch": 0.25, "grad_norm": 1.640625, "learning_rate": 0.00019214033451940153, "loss": 2.2029, "step": 107970 }, { "epoch": 0.25, "grad_norm": 2.21875, "learning_rate": 0.00019213961624353005, "loss": 1.9758, "step": 107975 }, { "epoch": 0.25, "grad_norm": 2.328125, "learning_rate": 0.00019213889793618204, "loss": 2.1352, "step": 107980 }, { "epoch": 0.25, "grad_norm": 2.34375, "learning_rate": 0.00019213817959735772, "loss": 2.1768, "step": 107985 }, { "epoch": 0.25, "grad_norm": 2.375, "learning_rate": 0.0001921374612270573, "loss": 2.0492, "step": 107990 }, { "epoch": 0.25, "grad_norm": 2.28125, "learning_rate": 0.00019213674282528105, "loss": 2.2531, "step": 107995 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019213602439202923, "loss": 2.2231, "step": 108000 }, { "epoch": 0.25, "grad_norm": 2.265625, "learning_rate": 0.00019213530592730206, "loss": 2.2755, "step": 108005 }, { "epoch": 0.25, "grad_norm": 2.0, "learning_rate": 0.00019213458743109983, "loss": 2.2042, "step": 108010 }, { "epoch": 0.25, "grad_norm": 2.34375, "learning_rate": 0.00019213386890342272, "loss": 2.1542, "step": 108015 }, { "epoch": 0.25, "grad_norm": 1.8515625, "learning_rate": 0.000192133150344271, "loss": 2.1512, "step": 108020 }, { "epoch": 0.25, "grad_norm": 1.7578125, "learning_rate": 0.00019213243175364496, "loss": 2.2291, "step": 108025 }, { "epoch": 0.25, "grad_norm": 1.96875, "learning_rate": 0.00019213171313154482, "loss": 2.1178, "step": 108030 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019213099447797078, "loss": 2.0296, "step": 108035 }, { "epoch": 0.25, "grad_norm": 2.203125, "learning_rate": 0.00019213027579292313, "loss": 2.1394, "step": 108040 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.00019212955707640213, "loss": 2.1718, "step": 108045 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019212883832840798, "loss": 2.1162, "step": 108050 }, { "epoch": 0.25, "grad_norm": 1.9296875, "learning_rate": 0.00019212811954894097, "loss": 2.0918, "step": 108055 }, { "epoch": 0.25, "grad_norm": 2.1875, "learning_rate": 0.0001921274007380013, "loss": 2.0979, "step": 108060 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.0001921266818955892, "loss": 1.8976, "step": 108065 }, { "epoch": 0.25, "grad_norm": 2.25, "learning_rate": 0.00019212596302170502, "loss": 2.1076, "step": 108070 }, { "epoch": 0.25, "grad_norm": 1.8671875, "learning_rate": 0.00019212524411634893, "loss": 2.0634, "step": 108075 }, { "epoch": 0.25, "grad_norm": 2.5625, "learning_rate": 0.0001921245251795212, "loss": 2.0427, "step": 108080 }, { "epoch": 0.25, "grad_norm": 1.875, "learning_rate": 0.000192123806211222, "loss": 2.1655, "step": 108085 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.0001921230872114517, "loss": 2.103, "step": 108090 }, { "epoch": 0.25, "grad_norm": 1.9140625, "learning_rate": 0.00019212236818021044, "loss": 2.1529, "step": 108095 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.00019212164911749853, "loss": 2.1484, "step": 108100 }, { "epoch": 0.25, "grad_norm": 1.8359375, "learning_rate": 0.00019212093002331622, "loss": 2.0343, "step": 108105 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.00019212021089766367, "loss": 2.1953, "step": 108110 }, { "epoch": 0.25, "grad_norm": 1.8359375, "learning_rate": 0.00019211949174054123, "loss": 2.3037, "step": 108115 }, { "epoch": 0.25, "grad_norm": 1.59375, "learning_rate": 0.00019211877255194908, "loss": 2.167, "step": 108120 }, { "epoch": 0.25, "grad_norm": 1.765625, "learning_rate": 0.0001921180533318875, "loss": 1.9841, "step": 108125 }, { "epoch": 0.25, "grad_norm": 1.609375, "learning_rate": 0.0001921173340803567, "loss": 2.1623, "step": 108130 }, { "epoch": 0.25, "grad_norm": 1.875, "learning_rate": 0.00019211661479735697, "loss": 2.0381, "step": 108135 }, { "epoch": 0.25, "grad_norm": 1.8515625, "learning_rate": 0.00019211589548288852, "loss": 2.1283, "step": 108140 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019211517613695162, "loss": 2.248, "step": 108145 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019211445675954648, "loss": 2.1458, "step": 108150 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.0001921137373506734, "loss": 2.0687, "step": 108155 }, { "epoch": 0.25, "grad_norm": 1.8515625, "learning_rate": 0.0001921130179103326, "loss": 2.1802, "step": 108160 }, { "epoch": 0.25, "grad_norm": 1.9296875, "learning_rate": 0.0001921122984385243, "loss": 2.169, "step": 108165 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.00019211157893524877, "loss": 2.26, "step": 108170 }, { "epoch": 0.25, "grad_norm": 2.421875, "learning_rate": 0.00019211085940050627, "loss": 2.0869, "step": 108175 }, { "epoch": 0.25, "grad_norm": 1.78125, "learning_rate": 0.00019211013983429703, "loss": 2.2074, "step": 108180 }, { "epoch": 0.25, "grad_norm": 1.9609375, "learning_rate": 0.0001921094202366213, "loss": 2.0716, "step": 108185 }, { "epoch": 0.25, "grad_norm": 2.1875, "learning_rate": 0.0001921087006074793, "loss": 2.0798, "step": 108190 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019210798094687132, "loss": 1.9879, "step": 108195 }, { "epoch": 0.25, "grad_norm": 1.8828125, "learning_rate": 0.00019210726125479756, "loss": 1.9853, "step": 108200 }, { "epoch": 0.25, "grad_norm": 1.9375, "learning_rate": 0.0001921065415312583, "loss": 1.9982, "step": 108205 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.0001921058217762538, "loss": 1.9881, "step": 108210 }, { "epoch": 0.25, "grad_norm": 1.8984375, "learning_rate": 0.00019210510198978424, "loss": 1.9757, "step": 108215 }, { "epoch": 0.25, "grad_norm": 2.125, "learning_rate": 0.00019210438217184993, "loss": 2.3182, "step": 108220 }, { "epoch": 0.25, "grad_norm": 2.046875, "learning_rate": 0.00019210366232245106, "loss": 2.1571, "step": 108225 }, { "epoch": 0.25, "grad_norm": 2.515625, "learning_rate": 0.00019210294244158797, "loss": 2.2181, "step": 108230 }, { "epoch": 0.25, "grad_norm": 1.8046875, "learning_rate": 0.0001921022225292608, "loss": 2.1714, "step": 108235 }, { "epoch": 0.25, "grad_norm": 2.390625, "learning_rate": 0.00019210150258546987, "loss": 2.1648, "step": 108240 }, { "epoch": 0.25, "grad_norm": 2.15625, "learning_rate": 0.00019210078261021536, "loss": 1.9838, "step": 108245 }, { "epoch": 0.25, "grad_norm": 1.578125, "learning_rate": 0.0001921000626034976, "loss": 2.084, "step": 108250 }, { "epoch": 0.25, "grad_norm": 1.9921875, "learning_rate": 0.0001920993425653167, "loss": 2.2386, "step": 108255 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019209862249567307, "loss": 2.3151, "step": 108260 }, { "epoch": 0.25, "grad_norm": 2.140625, "learning_rate": 0.00019209790239456685, "loss": 1.9608, "step": 108265 }, { "epoch": 0.25, "grad_norm": 2.015625, "learning_rate": 0.00019209718226199834, "loss": 1.9987, "step": 108270 }, { "epoch": 0.25, "grad_norm": 1.765625, "learning_rate": 0.00019209646209796774, "loss": 2.1627, "step": 108275 }, { "epoch": 0.25, "grad_norm": 1.671875, "learning_rate": 0.00019209574190247534, "loss": 2.3254, "step": 108280 }, { "epoch": 0.25, "grad_norm": 2.328125, "learning_rate": 0.00019209502167552133, "loss": 2.2751, "step": 108285 }, { "epoch": 0.25, "grad_norm": 2.640625, "learning_rate": 0.00019209430141710603, "loss": 2.2085, "step": 108290 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.00019209358112722962, "loss": 2.2003, "step": 108295 }, { "epoch": 0.25, "grad_norm": 2.03125, "learning_rate": 0.0001920928608058924, "loss": 2.1394, "step": 108300 }, { "epoch": 0.25, "grad_norm": 2.0, "learning_rate": 0.00019209214045309457, "loss": 2.1445, "step": 108305 }, { "epoch": 0.25, "grad_norm": 2.203125, "learning_rate": 0.00019209142006883638, "loss": 2.0261, "step": 108310 }, { "epoch": 0.25, "grad_norm": 2.078125, "learning_rate": 0.00019209069965311811, "loss": 2.1572, "step": 108315 }, { "epoch": 0.25, "grad_norm": 1.78125, "learning_rate": 0.00019208997920594, "loss": 2.2709, "step": 108320 }, { "epoch": 0.25, "grad_norm": 1.765625, "learning_rate": 0.00019208925872730225, "loss": 2.1922, "step": 108325 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019208853821720516, "loss": 2.2152, "step": 108330 }, { "epoch": 0.25, "grad_norm": 2.0625, "learning_rate": 0.00019208781767564894, "loss": 2.1907, "step": 108335 }, { "epoch": 0.25, "grad_norm": 2.296875, "learning_rate": 0.00019208709710263387, "loss": 2.3634, "step": 108340 }, { "epoch": 0.25, "grad_norm": 2.515625, "learning_rate": 0.00019208637649816015, "loss": 2.0767, "step": 108345 }, { "epoch": 0.25, "grad_norm": 2.671875, "learning_rate": 0.0001920856558622281, "loss": 2.2154, "step": 108350 }, { "epoch": 0.25, "grad_norm": 1.953125, "learning_rate": 0.00019208493519483788, "loss": 2.1799, "step": 108355 }, { "epoch": 0.26, "grad_norm": 1.9765625, "learning_rate": 0.0001920842144959898, "loss": 2.0015, "step": 108360 }, { "epoch": 0.26, "grad_norm": 1.8046875, "learning_rate": 0.00019208349376568403, "loss": 2.1971, "step": 108365 }, { "epoch": 0.26, "grad_norm": 2.21875, "learning_rate": 0.00019208277300392093, "loss": 2.0293, "step": 108370 }, { "epoch": 0.26, "grad_norm": 2.609375, "learning_rate": 0.00019208205221070066, "loss": 2.1479, "step": 108375 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.0001920813313860235, "loss": 2.071, "step": 108380 }, { "epoch": 0.26, "grad_norm": 2.796875, "learning_rate": 0.00019208061052988968, "loss": 2.2822, "step": 108385 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.00019207988964229945, "loss": 2.227, "step": 108390 }, { "epoch": 0.26, "grad_norm": 2.3125, "learning_rate": 0.0001920791687232531, "loss": 2.1781, "step": 108395 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019207844777275078, "loss": 2.1944, "step": 108400 }, { "epoch": 0.26, "grad_norm": 2.484375, "learning_rate": 0.0001920777267907928, "loss": 2.344, "step": 108405 }, { "epoch": 0.26, "grad_norm": 1.78125, "learning_rate": 0.0001920770057773794, "loss": 2.2002, "step": 108410 }, { "epoch": 0.26, "grad_norm": 1.78125, "learning_rate": 0.00019207628473251088, "loss": 2.1547, "step": 108415 }, { "epoch": 0.26, "grad_norm": 1.7890625, "learning_rate": 0.00019207556365618736, "loss": 2.1699, "step": 108420 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.0001920748425484092, "loss": 2.2593, "step": 108425 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.00019207412140917658, "loss": 2.2686, "step": 108430 }, { "epoch": 0.26, "grad_norm": 1.734375, "learning_rate": 0.00019207340023848977, "loss": 2.0165, "step": 108435 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019207267903634904, "loss": 2.1079, "step": 108440 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.0001920719578027546, "loss": 2.0659, "step": 108445 }, { "epoch": 0.26, "grad_norm": 2.84375, "learning_rate": 0.0001920712365377067, "loss": 2.1256, "step": 108450 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.0001920705152412056, "loss": 2.2968, "step": 108455 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019206979391325155, "loss": 2.2174, "step": 108460 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.0001920690725538448, "loss": 2.0055, "step": 108465 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019206835116298556, "loss": 2.1816, "step": 108470 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.0001920676297406741, "loss": 2.0248, "step": 108475 }, { "epoch": 0.26, "grad_norm": 1.6953125, "learning_rate": 0.0001920669082869107, "loss": 2.0986, "step": 108480 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.00019206618680169555, "loss": 2.2612, "step": 108485 }, { "epoch": 0.26, "grad_norm": 1.828125, "learning_rate": 0.00019206546528502893, "loss": 2.2285, "step": 108490 }, { "epoch": 0.26, "grad_norm": 1.828125, "learning_rate": 0.00019206474373691107, "loss": 2.2491, "step": 108495 }, { "epoch": 0.26, "grad_norm": 1.6015625, "learning_rate": 0.00019206402215734221, "loss": 2.0904, "step": 108500 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.00019206330054632263, "loss": 2.0295, "step": 108505 }, { "epoch": 0.26, "grad_norm": 2.40625, "learning_rate": 0.00019206257890385254, "loss": 2.1378, "step": 108510 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019206185722993224, "loss": 2.0601, "step": 108515 }, { "epoch": 0.26, "grad_norm": 2.328125, "learning_rate": 0.00019206113552456194, "loss": 2.1619, "step": 108520 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.00019206041378774184, "loss": 2.1542, "step": 108525 }, { "epoch": 0.26, "grad_norm": 1.703125, "learning_rate": 0.00019205969201947226, "loss": 2.1268, "step": 108530 }, { "epoch": 0.26, "grad_norm": 1.9921875, "learning_rate": 0.0001920589702197534, "loss": 2.067, "step": 108535 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019205824838858555, "loss": 2.1683, "step": 108540 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.00019205752652596894, "loss": 2.1498, "step": 108545 }, { "epoch": 0.26, "grad_norm": 2.765625, "learning_rate": 0.0001920568046319038, "loss": 2.2086, "step": 108550 }, { "epoch": 0.26, "grad_norm": 1.9296875, "learning_rate": 0.00019205608270639037, "loss": 2.0218, "step": 108555 }, { "epoch": 0.26, "grad_norm": 1.65625, "learning_rate": 0.00019205536074942892, "loss": 2.1162, "step": 108560 }, { "epoch": 0.26, "grad_norm": 1.9453125, "learning_rate": 0.00019205463876101968, "loss": 2.2111, "step": 108565 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019205391674116293, "loss": 2.2005, "step": 108570 }, { "epoch": 0.26, "grad_norm": 1.9296875, "learning_rate": 0.0001920531946898589, "loss": 1.9698, "step": 108575 }, { "epoch": 0.26, "grad_norm": 2.421875, "learning_rate": 0.00019205247260710778, "loss": 2.2189, "step": 108580 }, { "epoch": 0.26, "grad_norm": 1.9765625, "learning_rate": 0.00019205175049290992, "loss": 2.1398, "step": 108585 }, { "epoch": 0.26, "grad_norm": 1.734375, "learning_rate": 0.0001920510283472655, "loss": 2.1793, "step": 108590 }, { "epoch": 0.26, "grad_norm": 1.65625, "learning_rate": 0.00019205030617017475, "loss": 2.2831, "step": 108595 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019204958396163796, "loss": 2.2119, "step": 108600 }, { "epoch": 0.26, "grad_norm": 1.765625, "learning_rate": 0.00019204886172165538, "loss": 2.0757, "step": 108605 }, { "epoch": 0.26, "grad_norm": 1.9921875, "learning_rate": 0.00019204813945022722, "loss": 1.894, "step": 108610 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019204741714735375, "loss": 2.1189, "step": 108615 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.0001920466948130352, "loss": 2.1918, "step": 108620 }, { "epoch": 0.26, "grad_norm": 1.78125, "learning_rate": 0.00019204597244727186, "loss": 2.1027, "step": 108625 }, { "epoch": 0.26, "grad_norm": 1.9140625, "learning_rate": 0.00019204525005006394, "loss": 2.2185, "step": 108630 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019204452762141167, "loss": 2.2057, "step": 108635 }, { "epoch": 0.26, "grad_norm": 2.375, "learning_rate": 0.00019204380516131534, "loss": 2.2584, "step": 108640 }, { "epoch": 0.26, "grad_norm": 2.3125, "learning_rate": 0.00019204308266977516, "loss": 2.2358, "step": 108645 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019204236014679142, "loss": 2.0396, "step": 108650 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019204163759236432, "loss": 2.0984, "step": 108655 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.00019204091500649411, "loss": 2.0979, "step": 108660 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.00019204019238918112, "loss": 2.2466, "step": 108665 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.00019203946974042548, "loss": 2.029, "step": 108670 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.0001920387470602275, "loss": 2.1932, "step": 108675 }, { "epoch": 0.26, "grad_norm": 1.765625, "learning_rate": 0.0001920380243485874, "loss": 2.0317, "step": 108680 }, { "epoch": 0.26, "grad_norm": 1.90625, "learning_rate": 0.00019203730160550543, "loss": 2.1568, "step": 108685 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.0001920365788309819, "loss": 2.0776, "step": 108690 }, { "epoch": 0.26, "grad_norm": 2.265625, "learning_rate": 0.00019203585602501697, "loss": 1.9472, "step": 108695 }, { "epoch": 0.26, "grad_norm": 1.5, "learning_rate": 0.00019203513318761094, "loss": 1.9159, "step": 108700 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.00019203441031876402, "loss": 2.2227, "step": 108705 }, { "epoch": 0.26, "grad_norm": 1.6171875, "learning_rate": 0.00019203368741847648, "loss": 2.2214, "step": 108710 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019203296448674857, "loss": 2.0543, "step": 108715 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019203224152358053, "loss": 2.0236, "step": 108720 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.00019203151852897259, "loss": 2.0909, "step": 108725 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.00019203079550292505, "loss": 2.0387, "step": 108730 }, { "epoch": 0.26, "grad_norm": 1.8828125, "learning_rate": 0.0001920300724454381, "loss": 2.1824, "step": 108735 }, { "epoch": 0.26, "grad_norm": 1.921875, "learning_rate": 0.000192029349356512, "loss": 2.0388, "step": 108740 }, { "epoch": 0.26, "grad_norm": 1.9375, "learning_rate": 0.00019202862623614702, "loss": 2.0713, "step": 108745 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.0001920279030843434, "loss": 2.1666, "step": 108750 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.00019202717990110136, "loss": 2.0545, "step": 108755 }, { "epoch": 0.26, "grad_norm": 3.21875, "learning_rate": 0.00019202645668642116, "loss": 2.2317, "step": 108760 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.00019202573344030306, "loss": 1.9574, "step": 108765 }, { "epoch": 0.26, "grad_norm": 1.6640625, "learning_rate": 0.0001920250101627473, "loss": 2.1555, "step": 108770 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019202428685375416, "loss": 1.9421, "step": 108775 }, { "epoch": 0.26, "grad_norm": 2.53125, "learning_rate": 0.0001920235635133238, "loss": 2.0491, "step": 108780 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.00019202284014145654, "loss": 2.1997, "step": 108785 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019202211673815264, "loss": 1.9732, "step": 108790 }, { "epoch": 0.26, "grad_norm": 1.7890625, "learning_rate": 0.00019202139330341228, "loss": 2.0677, "step": 108795 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019202066983723578, "loss": 2.2522, "step": 108800 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019201994633962332, "loss": 2.1786, "step": 108805 }, { "epoch": 0.26, "grad_norm": 2.328125, "learning_rate": 0.00019201922281057517, "loss": 2.38, "step": 108810 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019201849925009162, "loss": 2.2048, "step": 108815 }, { "epoch": 0.26, "grad_norm": 1.6875, "learning_rate": 0.00019201777565817287, "loss": 2.2298, "step": 108820 }, { "epoch": 0.26, "grad_norm": 2.609375, "learning_rate": 0.00019201705203481916, "loss": 2.1968, "step": 108825 }, { "epoch": 0.26, "grad_norm": 1.9453125, "learning_rate": 0.00019201632838003078, "loss": 2.2071, "step": 108830 }, { "epoch": 0.26, "grad_norm": 1.765625, "learning_rate": 0.00019201560469380794, "loss": 2.1685, "step": 108835 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019201488097615087, "loss": 2.0896, "step": 108840 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.0001920141572270599, "loss": 2.1248, "step": 108845 }, { "epoch": 0.26, "grad_norm": 1.8203125, "learning_rate": 0.0001920134334465352, "loss": 2.1179, "step": 108850 }, { "epoch": 0.26, "grad_norm": 3.1875, "learning_rate": 0.00019201270963457704, "loss": 2.2129, "step": 108855 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.00019201198579118568, "loss": 2.2176, "step": 108860 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.00019201126191636136, "loss": 1.9388, "step": 108865 }, { "epoch": 0.26, "grad_norm": 1.921875, "learning_rate": 0.00019201053801010432, "loss": 2.2009, "step": 108870 }, { "epoch": 0.26, "grad_norm": 1.9921875, "learning_rate": 0.00019200981407241482, "loss": 2.0913, "step": 108875 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.00019200909010329308, "loss": 2.2349, "step": 108880 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.00019200836610273937, "loss": 2.1626, "step": 108885 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019200764207075394, "loss": 2.23, "step": 108890 }, { "epoch": 0.26, "grad_norm": 3.59375, "learning_rate": 0.00019200691800733702, "loss": 1.8654, "step": 108895 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.0001920061939124889, "loss": 2.2928, "step": 108900 }, { "epoch": 0.26, "grad_norm": 1.703125, "learning_rate": 0.00019200546978620975, "loss": 2.1348, "step": 108905 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.0001920047456284999, "loss": 2.1417, "step": 108910 }, { "epoch": 0.26, "grad_norm": 1.8046875, "learning_rate": 0.00019200402143935955, "loss": 2.2316, "step": 108915 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.00019200329721878894, "loss": 2.0743, "step": 108920 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019200257296678834, "loss": 2.1803, "step": 108925 }, { "epoch": 0.26, "grad_norm": 1.96875, "learning_rate": 0.00019200184868335799, "loss": 2.1911, "step": 108930 }, { "epoch": 0.26, "grad_norm": 1.8515625, "learning_rate": 0.0001920011243684982, "loss": 2.0842, "step": 108935 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.0001920004000222091, "loss": 2.0583, "step": 108940 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019199967564449097, "loss": 2.1971, "step": 108945 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.00019199895123534413, "loss": 2.1586, "step": 108950 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019199822679476878, "loss": 2.2307, "step": 108955 }, { "epoch": 0.26, "grad_norm": 2.296875, "learning_rate": 0.00019199750232276516, "loss": 2.0376, "step": 108960 }, { "epoch": 0.26, "grad_norm": 2.3125, "learning_rate": 0.0001919967778193335, "loss": 2.326, "step": 108965 }, { "epoch": 0.26, "grad_norm": 1.9921875, "learning_rate": 0.0001919960532844741, "loss": 2.2014, "step": 108970 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019199532871818714, "loss": 2.1616, "step": 108975 }, { "epoch": 0.26, "grad_norm": 2.234375, "learning_rate": 0.00019199460412047293, "loss": 2.4614, "step": 108980 }, { "epoch": 0.26, "grad_norm": 1.90625, "learning_rate": 0.0001919938794913317, "loss": 2.2271, "step": 108985 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.00019199315483076372, "loss": 2.1055, "step": 108990 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019199243013876918, "loss": 2.1989, "step": 108995 }, { "epoch": 0.26, "grad_norm": 1.8828125, "learning_rate": 0.00019199170541534834, "loss": 2.4297, "step": 109000 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.0001919909806605015, "loss": 2.2587, "step": 109005 }, { "epoch": 0.26, "grad_norm": 2.234375, "learning_rate": 0.00019199025587422885, "loss": 2.2117, "step": 109010 }, { "epoch": 0.26, "grad_norm": 2.265625, "learning_rate": 0.00019198953105653067, "loss": 2.0496, "step": 109015 }, { "epoch": 0.26, "grad_norm": 2.65625, "learning_rate": 0.0001919888062074072, "loss": 1.9552, "step": 109020 }, { "epoch": 0.26, "grad_norm": 1.7578125, "learning_rate": 0.0001919880813268587, "loss": 2.1517, "step": 109025 }, { "epoch": 0.26, "grad_norm": 1.9140625, "learning_rate": 0.0001919873564148854, "loss": 2.0553, "step": 109030 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.0001919866314714875, "loss": 2.0408, "step": 109035 }, { "epoch": 0.26, "grad_norm": 2.3125, "learning_rate": 0.00019198590649666534, "loss": 2.0779, "step": 109040 }, { "epoch": 0.26, "grad_norm": 2.59375, "learning_rate": 0.00019198518149041914, "loss": 2.184, "step": 109045 }, { "epoch": 0.26, "grad_norm": 2.34375, "learning_rate": 0.0001919844564527491, "loss": 2.2681, "step": 109050 }, { "epoch": 0.26, "grad_norm": 1.796875, "learning_rate": 0.00019198373138365552, "loss": 2.0359, "step": 109055 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.00019198300628313862, "loss": 2.2245, "step": 109060 }, { "epoch": 0.26, "grad_norm": 1.921875, "learning_rate": 0.00019198228115119867, "loss": 2.3224, "step": 109065 }, { "epoch": 0.26, "grad_norm": 1.796875, "learning_rate": 0.00019198155598783588, "loss": 2.164, "step": 109070 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019198083079305053, "loss": 2.3276, "step": 109075 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.0001919801055668429, "loss": 2.2191, "step": 109080 }, { "epoch": 0.26, "grad_norm": 1.6015625, "learning_rate": 0.00019197938030921313, "loss": 2.3362, "step": 109085 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.0001919786550201616, "loss": 2.1074, "step": 109090 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019197792969968846, "loss": 2.1826, "step": 109095 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.000191977204347794, "loss": 2.2909, "step": 109100 }, { "epoch": 0.26, "grad_norm": 1.921875, "learning_rate": 0.00019197647896447846, "loss": 2.0361, "step": 109105 }, { "epoch": 0.26, "grad_norm": 2.3125, "learning_rate": 0.0001919757535497421, "loss": 2.0264, "step": 109110 }, { "epoch": 0.26, "grad_norm": 1.875, "learning_rate": 0.00019197502810358512, "loss": 2.0812, "step": 109115 }, { "epoch": 0.26, "grad_norm": 1.65625, "learning_rate": 0.0001919743026260078, "loss": 2.2092, "step": 109120 }, { "epoch": 0.26, "grad_norm": 1.7265625, "learning_rate": 0.00019197357711701042, "loss": 2.2557, "step": 109125 }, { "epoch": 0.26, "grad_norm": 1.7265625, "learning_rate": 0.0001919728515765932, "loss": 2.2537, "step": 109130 }, { "epoch": 0.26, "grad_norm": 1.71875, "learning_rate": 0.00019197212600475635, "loss": 2.05, "step": 109135 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019197140040150019, "loss": 2.1909, "step": 109140 }, { "epoch": 0.26, "grad_norm": 2.609375, "learning_rate": 0.00019197067476682493, "loss": 2.0773, "step": 109145 }, { "epoch": 0.26, "grad_norm": 1.640625, "learning_rate": 0.00019196994910073082, "loss": 2.2371, "step": 109150 }, { "epoch": 0.26, "grad_norm": 2.484375, "learning_rate": 0.00019196922340321808, "loss": 2.0281, "step": 109155 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.000191968497674287, "loss": 2.1448, "step": 109160 }, { "epoch": 0.26, "grad_norm": 1.9140625, "learning_rate": 0.0001919677719139378, "loss": 2.1532, "step": 109165 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019196704612217075, "loss": 2.2671, "step": 109170 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.0001919663202989861, "loss": 1.9301, "step": 109175 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.0001919655944443841, "loss": 2.1823, "step": 109180 }, { "epoch": 0.26, "grad_norm": 2.78125, "learning_rate": 0.00019196486855836494, "loss": 2.2564, "step": 109185 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019196414264092897, "loss": 1.9646, "step": 109190 }, { "epoch": 0.26, "grad_norm": 2.234375, "learning_rate": 0.00019196341669207634, "loss": 2.1473, "step": 109195 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019196269071180736, "loss": 2.1946, "step": 109200 }, { "epoch": 0.26, "grad_norm": 1.78125, "learning_rate": 0.00019196196470012224, "loss": 2.0642, "step": 109205 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019196123865702124, "loss": 1.9359, "step": 109210 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.00019196051258250462, "loss": 2.1716, "step": 109215 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.00019195978647657262, "loss": 2.0931, "step": 109220 }, { "epoch": 0.26, "grad_norm": 1.6171875, "learning_rate": 0.00019195906033922552, "loss": 2.2111, "step": 109225 }, { "epoch": 0.26, "grad_norm": 2.609375, "learning_rate": 0.0001919583341704635, "loss": 2.2523, "step": 109230 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.00019195760797028686, "loss": 2.2874, "step": 109235 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019195688173869583, "loss": 2.2557, "step": 109240 }, { "epoch": 0.26, "grad_norm": 1.6953125, "learning_rate": 0.00019195615547569066, "loss": 2.2346, "step": 109245 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.0001919554291812716, "loss": 2.2663, "step": 109250 }, { "epoch": 0.26, "grad_norm": 2.3125, "learning_rate": 0.0001919547028554389, "loss": 2.1752, "step": 109255 }, { "epoch": 0.26, "grad_norm": 2.515625, "learning_rate": 0.0001919539764981928, "loss": 2.1883, "step": 109260 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.00019195325010953359, "loss": 2.1431, "step": 109265 }, { "epoch": 0.26, "grad_norm": 1.828125, "learning_rate": 0.00019195252368946143, "loss": 2.1894, "step": 109270 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019195179723797664, "loss": 2.0342, "step": 109275 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.00019195107075507944, "loss": 2.1062, "step": 109280 }, { "epoch": 0.26, "grad_norm": 1.71875, "learning_rate": 0.0001919503442407701, "loss": 2.0481, "step": 109285 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.00019194961769504888, "loss": 2.0319, "step": 109290 }, { "epoch": 0.26, "grad_norm": 1.7578125, "learning_rate": 0.00019194889111791597, "loss": 2.1826, "step": 109295 }, { "epoch": 0.26, "grad_norm": 2.21875, "learning_rate": 0.00019194816450937165, "loss": 2.3162, "step": 109300 }, { "epoch": 0.26, "grad_norm": 2.375, "learning_rate": 0.00019194743786941616, "loss": 2.1092, "step": 109305 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.0001919467111980498, "loss": 2.0932, "step": 109310 }, { "epoch": 0.26, "grad_norm": 2.390625, "learning_rate": 0.00019194598449527272, "loss": 2.1539, "step": 109315 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019194525776108527, "loss": 2.226, "step": 109320 }, { "epoch": 0.26, "grad_norm": 1.9609375, "learning_rate": 0.00019194453099548761, "loss": 2.2601, "step": 109325 }, { "epoch": 0.26, "grad_norm": 1.7578125, "learning_rate": 0.00019194380419848008, "loss": 2.3633, "step": 109330 }, { "epoch": 0.26, "grad_norm": 2.3125, "learning_rate": 0.00019194307737006286, "loss": 2.3061, "step": 109335 }, { "epoch": 0.26, "grad_norm": 1.7265625, "learning_rate": 0.0001919423505102362, "loss": 2.0108, "step": 109340 }, { "epoch": 0.26, "grad_norm": 1.78125, "learning_rate": 0.00019194162361900038, "loss": 2.0731, "step": 109345 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.00019194089669635563, "loss": 1.8876, "step": 109350 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019194016974230223, "loss": 2.2489, "step": 109355 }, { "epoch": 0.26, "grad_norm": 1.8359375, "learning_rate": 0.00019193944275684035, "loss": 2.2484, "step": 109360 }, { "epoch": 0.26, "grad_norm": 1.40625, "learning_rate": 0.0001919387157399703, "loss": 2.1411, "step": 109365 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.00019193798869169236, "loss": 2.287, "step": 109370 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.0001919372616120067, "loss": 2.198, "step": 109375 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.0001919365345009136, "loss": 2.1693, "step": 109380 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.00019193580735841334, "loss": 1.9776, "step": 109385 }, { "epoch": 0.26, "grad_norm": 1.9140625, "learning_rate": 0.00019193508018450613, "loss": 2.1182, "step": 109390 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.0001919343529791922, "loss": 2.3188, "step": 109395 }, { "epoch": 0.26, "grad_norm": 2.546875, "learning_rate": 0.00019193362574247184, "loss": 1.9636, "step": 109400 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.00019193289847434532, "loss": 2.1855, "step": 109405 }, { "epoch": 0.26, "grad_norm": 1.84375, "learning_rate": 0.00019193217117481281, "loss": 2.1448, "step": 109410 }, { "epoch": 0.26, "grad_norm": 2.859375, "learning_rate": 0.00019193144384387463, "loss": 2.2734, "step": 109415 }, { "epoch": 0.26, "grad_norm": 1.921875, "learning_rate": 0.000191930716481531, "loss": 2.0794, "step": 109420 }, { "epoch": 0.26, "grad_norm": 1.640625, "learning_rate": 0.00019192998908778216, "loss": 2.0586, "step": 109425 }, { "epoch": 0.26, "grad_norm": 2.78125, "learning_rate": 0.00019192926166262838, "loss": 2.0169, "step": 109430 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019192853420606988, "loss": 2.1797, "step": 109435 }, { "epoch": 0.26, "grad_norm": 1.875, "learning_rate": 0.00019192780671810694, "loss": 2.0351, "step": 109440 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.0001919270791987398, "loss": 2.1388, "step": 109445 }, { "epoch": 0.26, "grad_norm": 1.90625, "learning_rate": 0.0001919263516479687, "loss": 1.8762, "step": 109450 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019192562406579388, "loss": 2.2749, "step": 109455 }, { "epoch": 0.26, "grad_norm": 2.21875, "learning_rate": 0.0001919248964522156, "loss": 2.1285, "step": 109460 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019192416880723414, "loss": 2.1213, "step": 109465 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.0001919234411308497, "loss": 2.1906, "step": 109470 }, { "epoch": 0.26, "grad_norm": 1.9609375, "learning_rate": 0.0001919227134230625, "loss": 2.0296, "step": 109475 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019192198568387288, "loss": 2.1181, "step": 109480 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019192125791328104, "loss": 2.0085, "step": 109485 }, { "epoch": 0.26, "grad_norm": 1.8515625, "learning_rate": 0.00019192053011128722, "loss": 2.1736, "step": 109490 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.0001919198022778917, "loss": 2.0217, "step": 109495 }, { "epoch": 0.26, "grad_norm": 1.6796875, "learning_rate": 0.00019191907441309467, "loss": 2.2379, "step": 109500 }, { "epoch": 0.26, "grad_norm": 1.9765625, "learning_rate": 0.00019191834651689644, "loss": 2.3587, "step": 109505 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019191761858929724, "loss": 2.1023, "step": 109510 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.0001919168906302973, "loss": 2.2759, "step": 109515 }, { "epoch": 0.26, "grad_norm": 2.390625, "learning_rate": 0.0001919161626398969, "loss": 2.14, "step": 109520 }, { "epoch": 0.26, "grad_norm": 2.375, "learning_rate": 0.00019191543461809625, "loss": 2.1895, "step": 109525 }, { "epoch": 0.26, "grad_norm": 2.234375, "learning_rate": 0.00019191470656489563, "loss": 2.2955, "step": 109530 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.0001919139784802953, "loss": 2.0151, "step": 109535 }, { "epoch": 0.26, "grad_norm": 2.4375, "learning_rate": 0.00019191325036429547, "loss": 2.3213, "step": 109540 }, { "epoch": 0.26, "grad_norm": 2.390625, "learning_rate": 0.00019191252221689638, "loss": 2.1444, "step": 109545 }, { "epoch": 0.26, "grad_norm": 1.53125, "learning_rate": 0.00019191179403809836, "loss": 1.976, "step": 109550 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019191106582790157, "loss": 2.1374, "step": 109555 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019191033758630632, "loss": 2.1517, "step": 109560 }, { "epoch": 0.26, "grad_norm": 1.9296875, "learning_rate": 0.0001919096093133128, "loss": 2.1754, "step": 109565 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.0001919088810089213, "loss": 2.0555, "step": 109570 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019190815267313208, "loss": 2.0841, "step": 109575 }, { "epoch": 0.26, "grad_norm": 1.6015625, "learning_rate": 0.00019190742430594534, "loss": 2.1939, "step": 109580 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019190669590736137, "loss": 2.2329, "step": 109585 }, { "epoch": 0.26, "grad_norm": 1.8828125, "learning_rate": 0.00019190596747738038, "loss": 2.2626, "step": 109590 }, { "epoch": 0.26, "grad_norm": 1.9609375, "learning_rate": 0.00019190523901600268, "loss": 1.9934, "step": 109595 }, { "epoch": 0.26, "grad_norm": 1.6171875, "learning_rate": 0.0001919045105232285, "loss": 2.0025, "step": 109600 }, { "epoch": 0.26, "grad_norm": 4.625, "learning_rate": 0.00019190378199905803, "loss": 2.0935, "step": 109605 }, { "epoch": 0.26, "grad_norm": 2.234375, "learning_rate": 0.00019190305344349157, "loss": 2.3867, "step": 109610 }, { "epoch": 0.26, "grad_norm": 1.78125, "learning_rate": 0.00019190232485652938, "loss": 2.0969, "step": 109615 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019190159623817165, "loss": 2.1945, "step": 109620 }, { "epoch": 0.26, "grad_norm": 2.265625, "learning_rate": 0.00019190086758841873, "loss": 2.164, "step": 109625 }, { "epoch": 0.26, "grad_norm": 1.8359375, "learning_rate": 0.00019190013890727075, "loss": 2.1821, "step": 109630 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019189941019472802, "loss": 2.088, "step": 109635 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.0001918986814507908, "loss": 2.0247, "step": 109640 }, { "epoch": 0.26, "grad_norm": 1.84375, "learning_rate": 0.00019189795267545934, "loss": 2.1414, "step": 109645 }, { "epoch": 0.26, "grad_norm": 1.9453125, "learning_rate": 0.00019189722386873386, "loss": 2.0526, "step": 109650 }, { "epoch": 0.26, "grad_norm": 1.703125, "learning_rate": 0.0001918964950306146, "loss": 2.1119, "step": 109655 }, { "epoch": 0.26, "grad_norm": 1.7265625, "learning_rate": 0.00019189576616110188, "loss": 2.187, "step": 109660 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019189503726019583, "loss": 2.0258, "step": 109665 }, { "epoch": 0.26, "grad_norm": 1.7890625, "learning_rate": 0.00019189430832789684, "loss": 2.2038, "step": 109670 }, { "epoch": 0.26, "grad_norm": 2.625, "learning_rate": 0.00019189357936420505, "loss": 2.3805, "step": 109675 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019189285036912075, "loss": 2.0595, "step": 109680 }, { "epoch": 0.26, "grad_norm": 2.453125, "learning_rate": 0.00019189212134264417, "loss": 2.1658, "step": 109685 }, { "epoch": 0.26, "grad_norm": 2.328125, "learning_rate": 0.0001918913922847756, "loss": 2.1603, "step": 109690 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019189066319551525, "loss": 2.1421, "step": 109695 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.0001918899340748634, "loss": 2.1862, "step": 109700 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019188920492282027, "loss": 2.2098, "step": 109705 }, { "epoch": 0.26, "grad_norm": 1.6875, "learning_rate": 0.00019188847573938612, "loss": 2.0535, "step": 109710 }, { "epoch": 0.26, "grad_norm": 2.421875, "learning_rate": 0.0001918877465245612, "loss": 2.0509, "step": 109715 }, { "epoch": 0.26, "grad_norm": 1.625, "learning_rate": 0.00019188701727834576, "loss": 2.1481, "step": 109720 }, { "epoch": 0.26, "grad_norm": 1.3671875, "learning_rate": 0.00019188628800074006, "loss": 2.1225, "step": 109725 }, { "epoch": 0.26, "grad_norm": 1.6796875, "learning_rate": 0.00019188555869174433, "loss": 2.2726, "step": 109730 }, { "epoch": 0.26, "grad_norm": 1.8359375, "learning_rate": 0.00019188482935135882, "loss": 2.1342, "step": 109735 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.0001918840999795838, "loss": 2.147, "step": 109740 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.0001918833705764195, "loss": 2.2134, "step": 109745 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.00019188264114186615, "loss": 2.0194, "step": 109750 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019188191167592407, "loss": 1.9433, "step": 109755 }, { "epoch": 0.26, "grad_norm": 1.8046875, "learning_rate": 0.00019188118217859342, "loss": 2.2221, "step": 109760 }, { "epoch": 0.26, "grad_norm": 1.8125, "learning_rate": 0.00019188045264987452, "loss": 2.1035, "step": 109765 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.00019187972308976758, "loss": 2.0934, "step": 109770 }, { "epoch": 0.26, "grad_norm": 2.34375, "learning_rate": 0.00019187899349827286, "loss": 2.028, "step": 109775 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.0001918782638753906, "loss": 2.1393, "step": 109780 }, { "epoch": 0.26, "grad_norm": 1.7109375, "learning_rate": 0.00019187753422112108, "loss": 2.0196, "step": 109785 }, { "epoch": 0.26, "grad_norm": 1.9296875, "learning_rate": 0.0001918768045354645, "loss": 1.9156, "step": 109790 }, { "epoch": 0.26, "grad_norm": 1.8125, "learning_rate": 0.00019187607481842116, "loss": 2.1685, "step": 109795 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.0001918753450699913, "loss": 2.2355, "step": 109800 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.0001918746152901751, "loss": 2.1363, "step": 109805 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019187388547897293, "loss": 2.1852, "step": 109810 }, { "epoch": 0.26, "grad_norm": 1.9453125, "learning_rate": 0.00019187315563638492, "loss": 2.1133, "step": 109815 }, { "epoch": 0.26, "grad_norm": 2.53125, "learning_rate": 0.0001918724257624114, "loss": 2.0642, "step": 109820 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.0001918716958570526, "loss": 2.0588, "step": 109825 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.00019187096592030876, "loss": 2.0624, "step": 109830 }, { "epoch": 0.26, "grad_norm": 1.8125, "learning_rate": 0.0001918702359521801, "loss": 2.1819, "step": 109835 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.00019186950595266694, "loss": 2.0877, "step": 109840 }, { "epoch": 0.26, "grad_norm": 3.015625, "learning_rate": 0.00019186877592176948, "loss": 2.104, "step": 109845 }, { "epoch": 0.26, "grad_norm": 1.875, "learning_rate": 0.00019186804585948796, "loss": 2.1709, "step": 109850 }, { "epoch": 0.26, "grad_norm": 1.71875, "learning_rate": 0.0001918673157658227, "loss": 2.2399, "step": 109855 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019186658564077384, "loss": 2.0945, "step": 109860 }, { "epoch": 0.26, "grad_norm": 2.375, "learning_rate": 0.00019186585548434173, "loss": 2.1329, "step": 109865 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.00019186512529652657, "loss": 2.0148, "step": 109870 }, { "epoch": 0.26, "grad_norm": 1.8203125, "learning_rate": 0.00019186439507732858, "loss": 2.0403, "step": 109875 }, { "epoch": 0.26, "grad_norm": 1.90625, "learning_rate": 0.00019186366482674809, "loss": 2.0616, "step": 109880 }, { "epoch": 0.26, "grad_norm": 1.953125, "learning_rate": 0.0001918629345447853, "loss": 2.1396, "step": 109885 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019186220423144046, "loss": 1.9883, "step": 109890 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.00019186147388671382, "loss": 2.1327, "step": 109895 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.00019186074351060565, "loss": 2.2027, "step": 109900 }, { "epoch": 0.26, "grad_norm": 1.9765625, "learning_rate": 0.00019186001310311616, "loss": 2.1088, "step": 109905 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019185928266424561, "loss": 2.1737, "step": 109910 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019185855219399432, "loss": 2.1907, "step": 109915 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019185782169236243, "loss": 1.9661, "step": 109920 }, { "epoch": 0.26, "grad_norm": 1.6953125, "learning_rate": 0.00019185709115935028, "loss": 2.1311, "step": 109925 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.00019185636059495807, "loss": 2.2246, "step": 109930 }, { "epoch": 0.26, "grad_norm": 1.75, "learning_rate": 0.00019185562999918604, "loss": 1.993, "step": 109935 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.00019185489937203446, "loss": 2.1238, "step": 109940 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019185416871350363, "loss": 2.025, "step": 109945 }, { "epoch": 0.26, "grad_norm": 1.921875, "learning_rate": 0.00019185343802359374, "loss": 2.072, "step": 109950 }, { "epoch": 0.26, "grad_norm": 1.7421875, "learning_rate": 0.000191852707302305, "loss": 2.1456, "step": 109955 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019185197654963775, "loss": 2.0213, "step": 109960 }, { "epoch": 0.26, "grad_norm": 2.453125, "learning_rate": 0.00019185124576559217, "loss": 2.0368, "step": 109965 }, { "epoch": 0.26, "grad_norm": 4.25, "learning_rate": 0.00019185051495016855, "loss": 2.2436, "step": 109970 }, { "epoch": 0.26, "grad_norm": 2.265625, "learning_rate": 0.00019184978410336713, "loss": 2.2865, "step": 109975 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.00019184905322518816, "loss": 2.259, "step": 109980 }, { "epoch": 0.26, "grad_norm": 1.8828125, "learning_rate": 0.0001918483223156319, "loss": 2.1545, "step": 109985 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.00019184759137469857, "loss": 1.9323, "step": 109990 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019184686040238843, "loss": 2.1982, "step": 109995 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.00019184612939870176, "loss": 2.404, "step": 110000 }, { "epoch": 0.26, "grad_norm": 1.8125, "learning_rate": 0.00019184539836363877, "loss": 2.0803, "step": 110005 }, { "epoch": 0.26, "grad_norm": 2.703125, "learning_rate": 0.00019184466729719972, "loss": 2.1858, "step": 110010 }, { "epoch": 0.26, "grad_norm": 2.296875, "learning_rate": 0.0001918439361993849, "loss": 2.2383, "step": 110015 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.00019184320507019452, "loss": 2.0177, "step": 110020 }, { "epoch": 0.26, "grad_norm": 2.390625, "learning_rate": 0.00019184247390962878, "loss": 2.1651, "step": 110025 }, { "epoch": 0.26, "grad_norm": 1.875, "learning_rate": 0.00019184174271768805, "loss": 2.2514, "step": 110030 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019184101149437248, "loss": 2.1743, "step": 110035 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.00019184028023968238, "loss": 2.3129, "step": 110040 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019183954895361796, "loss": 2.012, "step": 110045 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.00019183881763617947, "loss": 2.1757, "step": 110050 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019183808628736717, "loss": 2.0417, "step": 110055 }, { "epoch": 0.26, "grad_norm": 1.9765625, "learning_rate": 0.00019183735490718132, "loss": 2.3499, "step": 110060 }, { "epoch": 0.26, "grad_norm": 1.8515625, "learning_rate": 0.00019183662349562218, "loss": 2.241, "step": 110065 }, { "epoch": 0.26, "grad_norm": 1.953125, "learning_rate": 0.00019183589205269003, "loss": 2.2092, "step": 110070 }, { "epoch": 0.26, "grad_norm": 1.9453125, "learning_rate": 0.000191835160578385, "loss": 2.171, "step": 110075 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019183442907270742, "loss": 2.0102, "step": 110080 }, { "epoch": 0.26, "grad_norm": 2.375, "learning_rate": 0.00019183369753565758, "loss": 2.1646, "step": 110085 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019183296596723563, "loss": 2.0707, "step": 110090 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019183223436744192, "loss": 2.1345, "step": 110095 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.00019183150273627663, "loss": 2.1896, "step": 110100 }, { "epoch": 0.26, "grad_norm": 2.46875, "learning_rate": 0.00019183077107374, "loss": 2.04, "step": 110105 }, { "epoch": 0.26, "grad_norm": 1.5703125, "learning_rate": 0.00019183003937983236, "loss": 2.236, "step": 110110 }, { "epoch": 0.26, "grad_norm": 1.828125, "learning_rate": 0.0001918293076545539, "loss": 2.0949, "step": 110115 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.0001918285758979049, "loss": 2.0875, "step": 110120 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.00019182784410988558, "loss": 2.1643, "step": 110125 }, { "epoch": 0.26, "grad_norm": 2.484375, "learning_rate": 0.00019182711229049617, "loss": 2.1336, "step": 110130 }, { "epoch": 0.26, "grad_norm": 1.96875, "learning_rate": 0.00019182638043973698, "loss": 2.2624, "step": 110135 }, { "epoch": 0.26, "grad_norm": 1.9921875, "learning_rate": 0.00019182564855760824, "loss": 2.0452, "step": 110140 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.0001918249166441102, "loss": 2.1002, "step": 110145 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.0001918241846992431, "loss": 2.0542, "step": 110150 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019182345272300718, "loss": 2.1982, "step": 110155 }, { "epoch": 0.26, "grad_norm": 1.8125, "learning_rate": 0.00019182272071540272, "loss": 2.02, "step": 110160 }, { "epoch": 0.26, "grad_norm": 1.9765625, "learning_rate": 0.0001918219886764299, "loss": 2.2336, "step": 110165 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.00019182125660608908, "loss": 2.1614, "step": 110170 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019182052450438043, "loss": 2.1734, "step": 110175 }, { "epoch": 0.26, "grad_norm": 2.265625, "learning_rate": 0.00019181979237130426, "loss": 1.874, "step": 110180 }, { "epoch": 0.26, "grad_norm": 2.8125, "learning_rate": 0.00019181906020686074, "loss": 2.0788, "step": 110185 }, { "epoch": 0.26, "grad_norm": 1.84375, "learning_rate": 0.0001918183280110502, "loss": 2.1018, "step": 110190 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.0001918175957838728, "loss": 2.3136, "step": 110195 }, { "epoch": 0.26, "grad_norm": 1.59375, "learning_rate": 0.0001918168635253289, "loss": 2.0914, "step": 110200 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019181613123541867, "loss": 2.3249, "step": 110205 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019181539891414238, "loss": 2.139, "step": 110210 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019181466656150028, "loss": 1.9811, "step": 110215 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.00019181393417749265, "loss": 2.1123, "step": 110220 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.0001918132017621197, "loss": 2.167, "step": 110225 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.0001918124693153817, "loss": 2.2392, "step": 110230 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.0001918117368372789, "loss": 2.1403, "step": 110235 }, { "epoch": 0.26, "grad_norm": 1.609375, "learning_rate": 0.0001918110043278115, "loss": 2.063, "step": 110240 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019181027178697985, "loss": 2.2762, "step": 110245 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019180953921478413, "loss": 2.1368, "step": 110250 }, { "epoch": 0.26, "grad_norm": 2.5, "learning_rate": 0.0001918088066112246, "loss": 2.1614, "step": 110255 }, { "epoch": 0.26, "grad_norm": 1.78125, "learning_rate": 0.00019180807397630153, "loss": 1.9835, "step": 110260 }, { "epoch": 0.26, "grad_norm": 1.9921875, "learning_rate": 0.00019180734131001514, "loss": 2.1319, "step": 110265 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.0001918066086123657, "loss": 2.1617, "step": 110270 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019180587588335345, "loss": 2.1853, "step": 110275 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.0001918051431229787, "loss": 2.0063, "step": 110280 }, { "epoch": 0.26, "grad_norm": 1.9609375, "learning_rate": 0.0001918044103312416, "loss": 2.2312, "step": 110285 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019180367750814244, "loss": 2.1308, "step": 110290 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019180294465368148, "loss": 2.2091, "step": 110295 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.00019180221176785903, "loss": 2.1238, "step": 110300 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019180147885067522, "loss": 2.0584, "step": 110305 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019180074590213034, "loss": 1.9926, "step": 110310 }, { "epoch": 0.26, "grad_norm": 1.7265625, "learning_rate": 0.00019180001292222473, "loss": 2.1588, "step": 110315 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.00019179927991095852, "loss": 2.1611, "step": 110320 }, { "epoch": 0.26, "grad_norm": 1.59375, "learning_rate": 0.00019179854686833205, "loss": 2.0198, "step": 110325 }, { "epoch": 0.26, "grad_norm": 2.484375, "learning_rate": 0.0001917978137943455, "loss": 2.2852, "step": 110330 }, { "epoch": 0.26, "grad_norm": 2.5, "learning_rate": 0.00019179708068899916, "loss": 2.345, "step": 110335 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.00019179634755229325, "loss": 2.0813, "step": 110340 }, { "epoch": 0.26, "grad_norm": 2.296875, "learning_rate": 0.0001917956143842281, "loss": 2.143, "step": 110345 }, { "epoch": 0.26, "grad_norm": 1.7734375, "learning_rate": 0.00019179488118480388, "loss": 2.0922, "step": 110350 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019179414795402082, "loss": 1.9093, "step": 110355 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019179341469187925, "loss": 2.3514, "step": 110360 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019179268139837938, "loss": 2.2519, "step": 110365 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019179194807352148, "loss": 2.1375, "step": 110370 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019179121471730578, "loss": 2.0078, "step": 110375 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.0001917904813297325, "loss": 2.2065, "step": 110380 }, { "epoch": 0.26, "grad_norm": 1.9375, "learning_rate": 0.00019178974791080196, "loss": 2.3183, "step": 110385 }, { "epoch": 0.26, "grad_norm": 1.765625, "learning_rate": 0.00019178901446051437, "loss": 2.0076, "step": 110390 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.00019178828097887, "loss": 2.1567, "step": 110395 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019178754746586906, "loss": 2.0729, "step": 110400 }, { "epoch": 0.26, "grad_norm": 1.671875, "learning_rate": 0.00019178681392151186, "loss": 2.3336, "step": 110405 }, { "epoch": 0.26, "grad_norm": 2.296875, "learning_rate": 0.00019178608034579861, "loss": 2.2819, "step": 110410 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019178534673872956, "loss": 2.2477, "step": 110415 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.000191784613100305, "loss": 2.1379, "step": 110420 }, { "epoch": 0.26, "grad_norm": 2.375, "learning_rate": 0.00019178387943052512, "loss": 2.3018, "step": 110425 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.00019178314572939023, "loss": 2.1675, "step": 110430 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019178241199690056, "loss": 2.3227, "step": 110435 }, { "epoch": 0.26, "grad_norm": 2.296875, "learning_rate": 0.0001917816782330563, "loss": 2.2241, "step": 110440 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.0001917809444378578, "loss": 2.3705, "step": 110445 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019178021061130523, "loss": 2.165, "step": 110450 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.0001917794767533989, "loss": 2.2633, "step": 110455 }, { "epoch": 0.26, "grad_norm": 1.7890625, "learning_rate": 0.00019177874286413905, "loss": 2.2229, "step": 110460 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019177800894352588, "loss": 2.1273, "step": 110465 }, { "epoch": 0.26, "grad_norm": 1.7421875, "learning_rate": 0.0001917772749915597, "loss": 2.1478, "step": 110470 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019177654100824073, "loss": 2.1504, "step": 110475 }, { "epoch": 0.26, "grad_norm": 2.265625, "learning_rate": 0.00019177580699356923, "loss": 2.0519, "step": 110480 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019177507294754545, "loss": 1.892, "step": 110485 }, { "epoch": 0.26, "grad_norm": 1.9296875, "learning_rate": 0.00019177433887016963, "loss": 2.1854, "step": 110490 }, { "epoch": 0.26, "grad_norm": 1.8203125, "learning_rate": 0.00019177360476144205, "loss": 2.1517, "step": 110495 }, { "epoch": 0.26, "grad_norm": 1.6953125, "learning_rate": 0.00019177287062136295, "loss": 2.0198, "step": 110500 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019177213644993256, "loss": 2.0325, "step": 110505 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.00019177140224715115, "loss": 2.2187, "step": 110510 }, { "epoch": 0.26, "grad_norm": 1.9375, "learning_rate": 0.00019177066801301894, "loss": 2.1079, "step": 110515 }, { "epoch": 0.26, "grad_norm": 1.84375, "learning_rate": 0.00019176993374753621, "loss": 2.2239, "step": 110520 }, { "epoch": 0.26, "grad_norm": 2.390625, "learning_rate": 0.00019176919945070324, "loss": 2.1674, "step": 110525 }, { "epoch": 0.26, "grad_norm": 2.46875, "learning_rate": 0.00019176846512252024, "loss": 2.1909, "step": 110530 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019176773076298744, "loss": 2.2314, "step": 110535 }, { "epoch": 0.26, "grad_norm": 2.578125, "learning_rate": 0.00019176699637210516, "loss": 2.0326, "step": 110540 }, { "epoch": 0.26, "grad_norm": 2.296875, "learning_rate": 0.00019176626194987358, "loss": 2.1322, "step": 110545 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019176552749629297, "loss": 2.2301, "step": 110550 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.0001917647930113636, "loss": 2.2962, "step": 110555 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019176405849508573, "loss": 2.205, "step": 110560 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019176332394745958, "loss": 1.9679, "step": 110565 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.00019176258936848544, "loss": 2.2057, "step": 110570 }, { "epoch": 0.26, "grad_norm": 1.703125, "learning_rate": 0.00019176185475816348, "loss": 2.1002, "step": 110575 }, { "epoch": 0.26, "grad_norm": 1.9453125, "learning_rate": 0.00019176112011649404, "loss": 2.1642, "step": 110580 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.00019176038544347734, "loss": 2.0648, "step": 110585 }, { "epoch": 0.26, "grad_norm": 2.21875, "learning_rate": 0.00019175965073911364, "loss": 2.1404, "step": 110590 }, { "epoch": 0.26, "grad_norm": 1.640625, "learning_rate": 0.00019175891600340314, "loss": 2.1374, "step": 110595 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.00019175818123634618, "loss": 2.1653, "step": 110600 }, { "epoch": 0.26, "grad_norm": 2.875, "learning_rate": 0.00019175744643794293, "loss": 2.1396, "step": 110605 }, { "epoch": 0.26, "grad_norm": 1.8125, "learning_rate": 0.00019175671160819366, "loss": 2.0546, "step": 110610 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019175597674709867, "loss": 2.2391, "step": 110615 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.00019175524185465815, "loss": 2.1424, "step": 110620 }, { "epoch": 0.26, "grad_norm": 2.34375, "learning_rate": 0.00019175450693087237, "loss": 2.0427, "step": 110625 }, { "epoch": 0.26, "grad_norm": 1.9765625, "learning_rate": 0.00019175377197574157, "loss": 2.0318, "step": 110630 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019175303698926608, "loss": 2.3603, "step": 110635 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019175230197144603, "loss": 2.1295, "step": 110640 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.00019175156692228176, "loss": 2.2328, "step": 110645 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019175083184177347, "loss": 2.2989, "step": 110650 }, { "epoch": 0.26, "grad_norm": 2.515625, "learning_rate": 0.00019175009672992146, "loss": 2.0994, "step": 110655 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019174936158672593, "loss": 2.0436, "step": 110660 }, { "epoch": 0.26, "grad_norm": 1.65625, "learning_rate": 0.00019174862641218717, "loss": 2.3115, "step": 110665 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.0001917478912063054, "loss": 2.0097, "step": 110670 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.0001917471559690809, "loss": 2.1968, "step": 110675 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.00019174642070051389, "loss": 2.2901, "step": 110680 }, { "epoch": 0.26, "grad_norm": 1.8203125, "learning_rate": 0.00019174568540060465, "loss": 2.1933, "step": 110685 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.00019174495006935342, "loss": 2.2255, "step": 110690 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.00019174421470676044, "loss": 1.95, "step": 110695 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.000191743479312826, "loss": 2.0169, "step": 110700 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.00019174274388755028, "loss": 2.054, "step": 110705 }, { "epoch": 0.26, "grad_norm": 1.84375, "learning_rate": 0.0001917420084309336, "loss": 1.9265, "step": 110710 }, { "epoch": 0.26, "grad_norm": 1.9453125, "learning_rate": 0.0001917412729429762, "loss": 2.1582, "step": 110715 }, { "epoch": 0.26, "grad_norm": 1.796875, "learning_rate": 0.0001917405374236783, "loss": 2.1143, "step": 110720 }, { "epoch": 0.26, "grad_norm": 2.296875, "learning_rate": 0.00019173980187304018, "loss": 2.2568, "step": 110725 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.0001917390662910621, "loss": 2.0298, "step": 110730 }, { "epoch": 0.26, "grad_norm": 1.9375, "learning_rate": 0.00019173833067774422, "loss": 2.3319, "step": 110735 }, { "epoch": 0.26, "grad_norm": 1.9765625, "learning_rate": 0.00019173759503308691, "loss": 2.215, "step": 110740 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.0001917368593570904, "loss": 2.1796, "step": 110745 }, { "epoch": 0.26, "grad_norm": 1.7890625, "learning_rate": 0.00019173612364975487, "loss": 2.21, "step": 110750 }, { "epoch": 0.26, "grad_norm": 2.515625, "learning_rate": 0.00019173538791108062, "loss": 2.1946, "step": 110755 }, { "epoch": 0.26, "grad_norm": 1.8515625, "learning_rate": 0.00019173465214106792, "loss": 2.0913, "step": 110760 }, { "epoch": 0.26, "grad_norm": 1.6484375, "learning_rate": 0.00019173391633971697, "loss": 2.1329, "step": 110765 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019173318050702808, "loss": 2.3005, "step": 110770 }, { "epoch": 0.26, "grad_norm": 2.328125, "learning_rate": 0.00019173244464300147, "loss": 2.2944, "step": 110775 }, { "epoch": 0.26, "grad_norm": 2.375, "learning_rate": 0.0001917317087476374, "loss": 2.1324, "step": 110780 }, { "epoch": 0.26, "grad_norm": 1.9296875, "learning_rate": 0.0001917309728209361, "loss": 2.365, "step": 110785 }, { "epoch": 0.26, "grad_norm": 1.9609375, "learning_rate": 0.00019173023686289782, "loss": 2.1161, "step": 110790 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019172950087352283, "loss": 2.4067, "step": 110795 }, { "epoch": 0.26, "grad_norm": 1.5859375, "learning_rate": 0.00019172876485281138, "loss": 2.0664, "step": 110800 }, { "epoch": 0.26, "grad_norm": 1.8828125, "learning_rate": 0.00019172802880076375, "loss": 2.2459, "step": 110805 }, { "epoch": 0.26, "grad_norm": 1.9375, "learning_rate": 0.00019172729271738014, "loss": 2.1045, "step": 110810 }, { "epoch": 0.26, "grad_norm": 2.265625, "learning_rate": 0.00019172655660266083, "loss": 2.0193, "step": 110815 }, { "epoch": 0.26, "grad_norm": 1.8359375, "learning_rate": 0.00019172582045660604, "loss": 2.1249, "step": 110820 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.0001917250842792161, "loss": 2.1619, "step": 110825 }, { "epoch": 0.26, "grad_norm": 2.265625, "learning_rate": 0.00019172434807049115, "loss": 2.0224, "step": 110830 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.0001917236118304315, "loss": 2.051, "step": 110835 }, { "epoch": 0.26, "grad_norm": 1.8828125, "learning_rate": 0.00019172287555903745, "loss": 2.1449, "step": 110840 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.0001917221392563092, "loss": 2.0563, "step": 110845 }, { "epoch": 0.26, "grad_norm": 1.671875, "learning_rate": 0.00019172140292224698, "loss": 2.0145, "step": 110850 }, { "epoch": 0.26, "grad_norm": 1.75, "learning_rate": 0.00019172066655685107, "loss": 1.9406, "step": 110855 }, { "epoch": 0.26, "grad_norm": 1.9609375, "learning_rate": 0.0001917199301601217, "loss": 2.0834, "step": 110860 }, { "epoch": 0.26, "grad_norm": 1.9453125, "learning_rate": 0.00019171919373205915, "loss": 2.1846, "step": 110865 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019171845727266367, "loss": 2.1987, "step": 110870 }, { "epoch": 0.26, "grad_norm": 1.7265625, "learning_rate": 0.0001917177207819355, "loss": 2.0774, "step": 110875 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.0001917169842598749, "loss": 2.1221, "step": 110880 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.0001917162477064821, "loss": 2.219, "step": 110885 }, { "epoch": 0.26, "grad_norm": 1.9609375, "learning_rate": 0.00019171551112175735, "loss": 2.3167, "step": 110890 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.00019171477450570092, "loss": 2.0927, "step": 110895 }, { "epoch": 0.26, "grad_norm": 2.4375, "learning_rate": 0.00019171403785831308, "loss": 2.0271, "step": 110900 }, { "epoch": 0.26, "grad_norm": 2.375, "learning_rate": 0.00019171330117959405, "loss": 2.2695, "step": 110905 }, { "epoch": 0.26, "grad_norm": 1.765625, "learning_rate": 0.0001917125644695441, "loss": 2.0329, "step": 110910 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.0001917118277281635, "loss": 2.3467, "step": 110915 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.00019171109095545243, "loss": 2.0269, "step": 110920 }, { "epoch": 0.26, "grad_norm": 1.8828125, "learning_rate": 0.0001917103541514112, "loss": 2.0966, "step": 110925 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.00019170961731604006, "loss": 2.1399, "step": 110930 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019170888044933926, "loss": 2.1007, "step": 110935 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.00019170814355130904, "loss": 2.1094, "step": 110940 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019170740662194965, "loss": 2.2341, "step": 110945 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019170666966126135, "loss": 2.0855, "step": 110950 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019170593266924438, "loss": 2.0155, "step": 110955 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.000191705195645899, "loss": 2.3873, "step": 110960 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.00019170445859122547, "loss": 2.3119, "step": 110965 }, { "epoch": 0.26, "grad_norm": 1.765625, "learning_rate": 0.00019170372150522402, "loss": 2.3182, "step": 110970 }, { "epoch": 0.26, "grad_norm": 1.921875, "learning_rate": 0.00019170298438789493, "loss": 2.2112, "step": 110975 }, { "epoch": 0.26, "grad_norm": 2.890625, "learning_rate": 0.00019170224723923842, "loss": 2.0182, "step": 110980 }, { "epoch": 0.26, "grad_norm": 1.6640625, "learning_rate": 0.00019170151005925477, "loss": 2.3053, "step": 110985 }, { "epoch": 0.26, "grad_norm": 1.7578125, "learning_rate": 0.00019170077284794422, "loss": 2.1446, "step": 110990 }, { "epoch": 0.26, "grad_norm": 1.7734375, "learning_rate": 0.00019170003560530706, "loss": 2.1173, "step": 110995 }, { "epoch": 0.26, "grad_norm": 1.9296875, "learning_rate": 0.00019169929833134344, "loss": 2.1391, "step": 111000 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019169856102605373, "loss": 2.1926, "step": 111005 }, { "epoch": 0.26, "grad_norm": 1.9765625, "learning_rate": 0.00019169782368943809, "loss": 2.1804, "step": 111010 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019169708632149683, "loss": 2.202, "step": 111015 }, { "epoch": 0.26, "grad_norm": 1.7578125, "learning_rate": 0.00019169634892223018, "loss": 2.2274, "step": 111020 }, { "epoch": 0.26, "grad_norm": 1.921875, "learning_rate": 0.00019169561149163839, "loss": 2.2283, "step": 111025 }, { "epoch": 0.26, "grad_norm": 1.71875, "learning_rate": 0.0001916948740297217, "loss": 2.1416, "step": 111030 }, { "epoch": 0.26, "grad_norm": 1.71875, "learning_rate": 0.00019169413653648043, "loss": 2.1751, "step": 111035 }, { "epoch": 0.26, "grad_norm": 1.96875, "learning_rate": 0.00019169339901191473, "loss": 2.0823, "step": 111040 }, { "epoch": 0.26, "grad_norm": 1.9296875, "learning_rate": 0.00019169266145602492, "loss": 2.12, "step": 111045 }, { "epoch": 0.26, "grad_norm": 2.8125, "learning_rate": 0.00019169192386881122, "loss": 2.0967, "step": 111050 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019169118625027393, "loss": 2.1592, "step": 111055 }, { "epoch": 0.26, "grad_norm": 1.8203125, "learning_rate": 0.00019169044860041322, "loss": 2.2166, "step": 111060 }, { "epoch": 0.26, "grad_norm": 1.7265625, "learning_rate": 0.00019168971091922945, "loss": 2.0723, "step": 111065 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019168897320672275, "loss": 2.4532, "step": 111070 }, { "epoch": 0.26, "grad_norm": 1.9296875, "learning_rate": 0.00019168823546289348, "loss": 2.1521, "step": 111075 }, { "epoch": 0.26, "grad_norm": 1.78125, "learning_rate": 0.0001916874976877418, "loss": 2.1335, "step": 111080 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.00019168675988126806, "loss": 2.1814, "step": 111085 }, { "epoch": 0.26, "grad_norm": 1.953125, "learning_rate": 0.00019168602204347243, "loss": 1.9873, "step": 111090 }, { "epoch": 0.26, "grad_norm": 1.9140625, "learning_rate": 0.0001916852841743552, "loss": 2.0385, "step": 111095 }, { "epoch": 0.26, "grad_norm": 2.578125, "learning_rate": 0.0001916845462739166, "loss": 2.0563, "step": 111100 }, { "epoch": 0.26, "grad_norm": 2.484375, "learning_rate": 0.0001916838083421569, "loss": 2.1891, "step": 111105 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019168307037907637, "loss": 2.2849, "step": 111110 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.00019168233238467524, "loss": 2.115, "step": 111115 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019168159435895373, "loss": 2.2642, "step": 111120 }, { "epoch": 0.26, "grad_norm": 2.328125, "learning_rate": 0.00019168085630191217, "loss": 1.9628, "step": 111125 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019168011821355072, "loss": 2.0114, "step": 111130 }, { "epoch": 0.26, "grad_norm": 2.25, "learning_rate": 0.0001916793800938697, "loss": 2.2148, "step": 111135 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019167864194286936, "loss": 2.2115, "step": 111140 }, { "epoch": 0.26, "grad_norm": 1.9375, "learning_rate": 0.0001916779037605499, "loss": 2.2426, "step": 111145 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019167716554691162, "loss": 2.1189, "step": 111150 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019167642730195475, "loss": 2.0793, "step": 111155 }, { "epoch": 0.26, "grad_norm": 1.703125, "learning_rate": 0.0001916756890256796, "loss": 2.156, "step": 111160 }, { "epoch": 0.26, "grad_norm": 1.9453125, "learning_rate": 0.0001916749507180863, "loss": 2.1914, "step": 111165 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.0001916742123791752, "loss": 2.0299, "step": 111170 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019167347400894652, "loss": 2.1439, "step": 111175 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.00019167273560740053, "loss": 2.355, "step": 111180 }, { "epoch": 0.26, "grad_norm": 1.6484375, "learning_rate": 0.00019167199717453747, "loss": 2.036, "step": 111185 }, { "epoch": 0.26, "grad_norm": 2.34375, "learning_rate": 0.0001916712587103576, "loss": 2.0995, "step": 111190 }, { "epoch": 0.26, "grad_norm": 2.515625, "learning_rate": 0.00019167052021486116, "loss": 2.0566, "step": 111195 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.00019166978168804838, "loss": 1.9217, "step": 111200 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019166904312991958, "loss": 2.1338, "step": 111205 }, { "epoch": 0.26, "grad_norm": 2.375, "learning_rate": 0.00019166830454047499, "loss": 2.0697, "step": 111210 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.00019166756591971478, "loss": 2.0725, "step": 111215 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.00019166682726763932, "loss": 2.0125, "step": 111220 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019166608858424877, "loss": 2.1775, "step": 111225 }, { "epoch": 0.26, "grad_norm": 1.8125, "learning_rate": 0.00019166534986954345, "loss": 2.104, "step": 111230 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019166461112352359, "loss": 2.0954, "step": 111235 }, { "epoch": 0.26, "grad_norm": 2.84375, "learning_rate": 0.00019166387234618938, "loss": 2.2531, "step": 111240 }, { "epoch": 0.26, "grad_norm": 2.75, "learning_rate": 0.00019166313353754117, "loss": 2.2479, "step": 111245 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.0001916623946975792, "loss": 2.025, "step": 111250 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019166165582630367, "loss": 2.2756, "step": 111255 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019166091692371484, "loss": 2.2567, "step": 111260 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019166017798981298, "loss": 2.1503, "step": 111265 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019165943902459836, "loss": 2.0039, "step": 111270 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.0001916587000280712, "loss": 2.1142, "step": 111275 }, { "epoch": 0.26, "grad_norm": 1.9609375, "learning_rate": 0.00019165796100023176, "loss": 2.1552, "step": 111280 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.0001916572219410803, "loss": 2.1352, "step": 111285 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.0001916564828506171, "loss": 2.2083, "step": 111290 }, { "epoch": 0.26, "grad_norm": 1.8203125, "learning_rate": 0.00019165574372884234, "loss": 2.2382, "step": 111295 }, { "epoch": 0.26, "grad_norm": 2.546875, "learning_rate": 0.00019165500457575635, "loss": 2.1423, "step": 111300 }, { "epoch": 0.26, "grad_norm": 1.6328125, "learning_rate": 0.00019165426539135934, "loss": 2.1166, "step": 111305 }, { "epoch": 0.26, "grad_norm": 1.6328125, "learning_rate": 0.00019165352617565157, "loss": 2.1565, "step": 111310 }, { "epoch": 0.26, "grad_norm": 1.9375, "learning_rate": 0.00019165278692863326, "loss": 2.1141, "step": 111315 }, { "epoch": 0.26, "grad_norm": 2.328125, "learning_rate": 0.00019165204765030476, "loss": 2.0274, "step": 111320 }, { "epoch": 0.26, "grad_norm": 2.375, "learning_rate": 0.0001916513083406662, "loss": 2.1552, "step": 111325 }, { "epoch": 0.26, "grad_norm": 1.7578125, "learning_rate": 0.00019165056899971794, "loss": 2.1054, "step": 111330 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.00019164982962746014, "loss": 2.2485, "step": 111335 }, { "epoch": 0.26, "grad_norm": 1.953125, "learning_rate": 0.00019164909022389312, "loss": 2.1916, "step": 111340 }, { "epoch": 0.26, "grad_norm": 1.875, "learning_rate": 0.00019164835078901708, "loss": 2.062, "step": 111345 }, { "epoch": 0.26, "grad_norm": 2.390625, "learning_rate": 0.00019164761132283234, "loss": 2.3005, "step": 111350 }, { "epoch": 0.26, "grad_norm": 2.328125, "learning_rate": 0.0001916468718253391, "loss": 2.1406, "step": 111355 }, { "epoch": 0.26, "grad_norm": 2.3125, "learning_rate": 0.00019164613229653762, "loss": 2.0514, "step": 111360 }, { "epoch": 0.26, "grad_norm": 1.921875, "learning_rate": 0.00019164539273642818, "loss": 1.9568, "step": 111365 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.000191644653145011, "loss": 2.0407, "step": 111370 }, { "epoch": 0.26, "grad_norm": 1.8203125, "learning_rate": 0.00019164391352228632, "loss": 2.1059, "step": 111375 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019164317386825442, "loss": 2.0796, "step": 111380 }, { "epoch": 0.26, "grad_norm": 1.796875, "learning_rate": 0.0001916424341829156, "loss": 1.9908, "step": 111385 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.00019164169446627002, "loss": 2.1243, "step": 111390 }, { "epoch": 0.26, "grad_norm": 2.3125, "learning_rate": 0.00019164095471831797, "loss": 2.1115, "step": 111395 }, { "epoch": 0.26, "grad_norm": 1.7265625, "learning_rate": 0.00019164021493905972, "loss": 1.9748, "step": 111400 }, { "epoch": 0.26, "grad_norm": 1.8515625, "learning_rate": 0.0001916394751284955, "loss": 2.1037, "step": 111405 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019163873528662558, "loss": 2.1041, "step": 111410 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.00019163799541345022, "loss": 2.1494, "step": 111415 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019163725550896965, "loss": 2.2833, "step": 111420 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019163651557318413, "loss": 2.0918, "step": 111425 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019163577560609395, "loss": 2.2132, "step": 111430 }, { "epoch": 0.26, "grad_norm": 1.9296875, "learning_rate": 0.00019163503560769927, "loss": 2.2921, "step": 111435 }, { "epoch": 0.26, "grad_norm": 1.9140625, "learning_rate": 0.00019163429557800043, "loss": 2.0414, "step": 111440 }, { "epoch": 0.26, "grad_norm": 2.234375, "learning_rate": 0.00019163355551699764, "loss": 2.038, "step": 111445 }, { "epoch": 0.26, "grad_norm": 2.5625, "learning_rate": 0.0001916328154246912, "loss": 2.1552, "step": 111450 }, { "epoch": 0.26, "grad_norm": 2.40625, "learning_rate": 0.00019163207530108127, "loss": 2.0875, "step": 111455 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.0001916313351461682, "loss": 2.1597, "step": 111460 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.00019163059495995216, "loss": 2.3305, "step": 111465 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.0001916298547424335, "loss": 2.1315, "step": 111470 }, { "epoch": 0.26, "grad_norm": 2.34375, "learning_rate": 0.00019162911449361242, "loss": 2.4696, "step": 111475 }, { "epoch": 0.26, "grad_norm": 2.296875, "learning_rate": 0.00019162837421348914, "loss": 2.1656, "step": 111480 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019162763390206396, "loss": 2.1611, "step": 111485 }, { "epoch": 0.26, "grad_norm": 1.6953125, "learning_rate": 0.00019162689355933712, "loss": 2.1359, "step": 111490 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019162615318530886, "loss": 2.1057, "step": 111495 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019162541277997948, "loss": 1.9633, "step": 111500 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019162467234334915, "loss": 2.1822, "step": 111505 }, { "epoch": 0.26, "grad_norm": 1.625, "learning_rate": 0.00019162393187541817, "loss": 2.0838, "step": 111510 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.00019162319137618685, "loss": 2.1414, "step": 111515 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019162245084565537, "loss": 2.3478, "step": 111520 }, { "epoch": 0.26, "grad_norm": 1.8125, "learning_rate": 0.00019162171028382395, "loss": 2.1337, "step": 111525 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019162096969069293, "loss": 2.3428, "step": 111530 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.00019162022906626252, "loss": 2.1933, "step": 111535 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.000191619488410533, "loss": 2.1187, "step": 111540 }, { "epoch": 0.26, "grad_norm": 1.9375, "learning_rate": 0.00019161874772350457, "loss": 2.1686, "step": 111545 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.00019161800700517748, "loss": 2.2726, "step": 111550 }, { "epoch": 0.26, "grad_norm": 2.265625, "learning_rate": 0.00019161726625555208, "loss": 2.1912, "step": 111555 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.00019161652547462855, "loss": 2.1528, "step": 111560 }, { "epoch": 0.26, "grad_norm": 1.765625, "learning_rate": 0.00019161578466240714, "loss": 2.1653, "step": 111565 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.0001916150438188881, "loss": 2.2347, "step": 111570 }, { "epoch": 0.26, "grad_norm": 1.921875, "learning_rate": 0.00019161430294407174, "loss": 2.0355, "step": 111575 }, { "epoch": 0.26, "grad_norm": 1.671875, "learning_rate": 0.00019161356203795823, "loss": 2.0884, "step": 111580 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.0001916128211005479, "loss": 2.1322, "step": 111585 }, { "epoch": 0.26, "grad_norm": 1.8125, "learning_rate": 0.00019161208013184094, "loss": 2.2502, "step": 111590 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019161133913183766, "loss": 2.2617, "step": 111595 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019161059810053827, "loss": 2.0866, "step": 111600 }, { "epoch": 0.26, "grad_norm": 2.296875, "learning_rate": 0.00019160985703794303, "loss": 2.2326, "step": 111605 }, { "epoch": 0.26, "grad_norm": 3.0625, "learning_rate": 0.0001916091159440522, "loss": 2.0254, "step": 111610 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.00019160837481886606, "loss": 2.1164, "step": 111615 }, { "epoch": 0.26, "grad_norm": 1.7890625, "learning_rate": 0.00019160763366238482, "loss": 2.0337, "step": 111620 }, { "epoch": 0.26, "grad_norm": 1.8203125, "learning_rate": 0.00019160689247460876, "loss": 2.0669, "step": 111625 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019160615125553813, "loss": 2.2452, "step": 111630 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019160541000517317, "loss": 2.2457, "step": 111635 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.00019160466872351415, "loss": 2.2068, "step": 111640 }, { "epoch": 0.26, "grad_norm": 1.796875, "learning_rate": 0.00019160392741056129, "loss": 1.9464, "step": 111645 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019160318606631488, "loss": 2.3404, "step": 111650 }, { "epoch": 0.26, "grad_norm": 1.96875, "learning_rate": 0.00019160244469077514, "loss": 2.2652, "step": 111655 }, { "epoch": 0.26, "grad_norm": 1.6640625, "learning_rate": 0.00019160170328394238, "loss": 2.0257, "step": 111660 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019160096184581677, "loss": 2.1495, "step": 111665 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019160022037639863, "loss": 2.1345, "step": 111670 }, { "epoch": 0.26, "grad_norm": 1.8125, "learning_rate": 0.00019159947887568824, "loss": 2.2974, "step": 111675 }, { "epoch": 0.26, "grad_norm": 2.4375, "learning_rate": 0.00019159873734368577, "loss": 2.1699, "step": 111680 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.0001915979957803915, "loss": 2.1085, "step": 111685 }, { "epoch": 0.26, "grad_norm": 1.8515625, "learning_rate": 0.0001915972541858057, "loss": 1.9922, "step": 111690 }, { "epoch": 0.26, "grad_norm": 1.8046875, "learning_rate": 0.0001915965125599286, "loss": 2.1762, "step": 111695 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019159577090276052, "loss": 2.0659, "step": 111700 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.0001915950292143016, "loss": 2.1414, "step": 111705 }, { "epoch": 0.26, "grad_norm": 2.390625, "learning_rate": 0.0001915942874945522, "loss": 2.1287, "step": 111710 }, { "epoch": 0.26, "grad_norm": 2.328125, "learning_rate": 0.00019159354574351252, "loss": 2.1137, "step": 111715 }, { "epoch": 0.26, "grad_norm": 2.390625, "learning_rate": 0.00019159280396118282, "loss": 2.1343, "step": 111720 }, { "epoch": 0.26, "grad_norm": 1.8046875, "learning_rate": 0.00019159206214756339, "loss": 2.2897, "step": 111725 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.0001915913203026544, "loss": 2.2084, "step": 111730 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019159057842645617, "loss": 2.1219, "step": 111735 }, { "epoch": 0.26, "grad_norm": 2.640625, "learning_rate": 0.00019158983651896896, "loss": 2.1202, "step": 111740 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019158909458019297, "loss": 2.0897, "step": 111745 }, { "epoch": 0.26, "grad_norm": 2.265625, "learning_rate": 0.0001915883526101285, "loss": 2.4588, "step": 111750 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.0001915876106087758, "loss": 2.113, "step": 111755 }, { "epoch": 0.26, "grad_norm": 1.703125, "learning_rate": 0.0001915868685761351, "loss": 2.2028, "step": 111760 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019158612651220665, "loss": 2.273, "step": 111765 }, { "epoch": 0.26, "grad_norm": 1.78125, "learning_rate": 0.00019158538441699073, "loss": 2.3526, "step": 111770 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019158464229048757, "loss": 2.1607, "step": 111775 }, { "epoch": 0.26, "grad_norm": 1.734375, "learning_rate": 0.00019158390013269746, "loss": 2.3288, "step": 111780 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.0001915831579436206, "loss": 2.2012, "step": 111785 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019158241572325732, "loss": 2.1695, "step": 111790 }, { "epoch": 0.26, "grad_norm": 1.921875, "learning_rate": 0.0001915816734716078, "loss": 2.2296, "step": 111795 }, { "epoch": 0.26, "grad_norm": 1.75, "learning_rate": 0.0001915809311886723, "loss": 2.1089, "step": 111800 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019158018887445108, "loss": 2.0475, "step": 111805 }, { "epoch": 0.26, "grad_norm": 1.8046875, "learning_rate": 0.00019157944652894445, "loss": 2.0125, "step": 111810 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.0001915787041521526, "loss": 2.1749, "step": 111815 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019157796174407578, "loss": 2.1667, "step": 111820 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019157721930471432, "loss": 2.127, "step": 111825 }, { "epoch": 0.26, "grad_norm": 1.9375, "learning_rate": 0.00019157647683406838, "loss": 2.1569, "step": 111830 }, { "epoch": 0.26, "grad_norm": 1.96875, "learning_rate": 0.00019157573433213826, "loss": 2.2784, "step": 111835 }, { "epoch": 0.26, "grad_norm": 2.421875, "learning_rate": 0.00019157499179892423, "loss": 2.2937, "step": 111840 }, { "epoch": 0.26, "grad_norm": 2.8125, "learning_rate": 0.00019157424923442652, "loss": 2.2179, "step": 111845 }, { "epoch": 0.26, "grad_norm": 2.71875, "learning_rate": 0.00019157350663864537, "loss": 2.2821, "step": 111850 }, { "epoch": 0.26, "grad_norm": 2.421875, "learning_rate": 0.00019157276401158107, "loss": 2.1192, "step": 111855 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.0001915720213532338, "loss": 2.1136, "step": 111860 }, { "epoch": 0.26, "grad_norm": 1.96875, "learning_rate": 0.0001915712786636039, "loss": 2.4041, "step": 111865 }, { "epoch": 0.26, "grad_norm": 2.640625, "learning_rate": 0.00019157053594269163, "loss": 2.2414, "step": 111870 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.00019156979319049717, "loss": 2.1965, "step": 111875 }, { "epoch": 0.26, "grad_norm": 1.84375, "learning_rate": 0.0001915690504070208, "loss": 2.0703, "step": 111880 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019156830759226278, "loss": 2.1679, "step": 111885 }, { "epoch": 0.26, "grad_norm": 2.234375, "learning_rate": 0.00019156756474622338, "loss": 2.1125, "step": 111890 }, { "epoch": 0.26, "grad_norm": 1.7265625, "learning_rate": 0.00019156682186890285, "loss": 2.1545, "step": 111895 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.0001915660789603014, "loss": 2.2684, "step": 111900 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019156533602041932, "loss": 2.2728, "step": 111905 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.0001915645930492569, "loss": 2.1418, "step": 111910 }, { "epoch": 0.26, "grad_norm": 1.828125, "learning_rate": 0.00019156385004681433, "loss": 2.1808, "step": 111915 }, { "epoch": 0.26, "grad_norm": 2.3125, "learning_rate": 0.0001915631070130919, "loss": 2.2616, "step": 111920 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.0001915623639480898, "loss": 2.1154, "step": 111925 }, { "epoch": 0.26, "grad_norm": 1.8125, "learning_rate": 0.00019156162085180836, "loss": 2.0544, "step": 111930 }, { "epoch": 0.26, "grad_norm": 2.21875, "learning_rate": 0.00019156087772424787, "loss": 2.1942, "step": 111935 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019156013456540847, "loss": 2.2994, "step": 111940 }, { "epoch": 0.26, "grad_norm": 1.9921875, "learning_rate": 0.00019155939137529047, "loss": 2.0938, "step": 111945 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.0001915586481538941, "loss": 2.326, "step": 111950 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019155790490121968, "loss": 2.0912, "step": 111955 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.0001915571616172674, "loss": 2.3272, "step": 111960 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019155641830203752, "loss": 2.2693, "step": 111965 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019155567495553034, "loss": 2.3359, "step": 111970 }, { "epoch": 0.26, "grad_norm": 1.875, "learning_rate": 0.00019155493157774607, "loss": 2.1777, "step": 111975 }, { "epoch": 0.26, "grad_norm": 1.96875, "learning_rate": 0.00019155418816868495, "loss": 2.2986, "step": 111980 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.0001915534447283473, "loss": 1.8308, "step": 111985 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.0001915527012567333, "loss": 2.2372, "step": 111990 }, { "epoch": 0.26, "grad_norm": 1.75, "learning_rate": 0.00019155195775384326, "loss": 2.105, "step": 111995 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.00019155121421967735, "loss": 2.0244, "step": 112000 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.00019155047065423596, "loss": 2.2309, "step": 112005 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.00019154972705751923, "loss": 2.3053, "step": 112010 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.00019154898342952747, "loss": 2.1685, "step": 112015 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.0001915482397702609, "loss": 2.2265, "step": 112020 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.0001915474960797198, "loss": 1.9919, "step": 112025 }, { "epoch": 0.26, "grad_norm": 2.4375, "learning_rate": 0.00019154675235790442, "loss": 2.0689, "step": 112030 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.000191546008604815, "loss": 2.2442, "step": 112035 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.00019154526482045183, "loss": 2.1673, "step": 112040 }, { "epoch": 0.26, "grad_norm": 1.6328125, "learning_rate": 0.0001915445210048151, "loss": 2.077, "step": 112045 }, { "epoch": 0.26, "grad_norm": 1.9921875, "learning_rate": 0.00019154377715790513, "loss": 2.0889, "step": 112050 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019154303327972212, "loss": 2.235, "step": 112055 }, { "epoch": 0.26, "grad_norm": 2.5, "learning_rate": 0.00019154228937026636, "loss": 2.2902, "step": 112060 }, { "epoch": 0.26, "grad_norm": 2.421875, "learning_rate": 0.0001915415454295381, "loss": 2.0671, "step": 112065 }, { "epoch": 0.26, "grad_norm": 1.984375, "learning_rate": 0.0001915408014575376, "loss": 2.2057, "step": 112070 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019154005745426509, "loss": 2.2863, "step": 112075 }, { "epoch": 0.26, "grad_norm": 1.9921875, "learning_rate": 0.0001915393134197208, "loss": 2.1437, "step": 112080 }, { "epoch": 0.26, "grad_norm": 1.5703125, "learning_rate": 0.00019153856935390505, "loss": 2.1529, "step": 112085 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019153782525681807, "loss": 2.2974, "step": 112090 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.0001915370811284601, "loss": 1.9538, "step": 112095 }, { "epoch": 0.26, "grad_norm": 2.296875, "learning_rate": 0.00019153633696883144, "loss": 2.072, "step": 112100 }, { "epoch": 0.26, "grad_norm": 1.6953125, "learning_rate": 0.00019153559277793226, "loss": 2.1416, "step": 112105 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019153484855576286, "loss": 2.1807, "step": 112110 }, { "epoch": 0.26, "grad_norm": 1.5625, "learning_rate": 0.00019153410430232355, "loss": 2.0665, "step": 112115 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019153336001761449, "loss": 2.0244, "step": 112120 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.00019153261570163595, "loss": 2.1024, "step": 112125 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019153187135438824, "loss": 2.0756, "step": 112130 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.0001915311269758716, "loss": 2.225, "step": 112135 }, { "epoch": 0.26, "grad_norm": 1.7265625, "learning_rate": 0.00019153038256608624, "loss": 2.291, "step": 112140 }, { "epoch": 0.26, "grad_norm": 2.765625, "learning_rate": 0.00019152963812503243, "loss": 2.2016, "step": 112145 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019152889365271048, "loss": 2.227, "step": 112150 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.00019152814914912057, "loss": 2.1872, "step": 112155 }, { "epoch": 0.26, "grad_norm": 1.8046875, "learning_rate": 0.00019152740461426298, "loss": 2.204, "step": 112160 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019152666004813797, "loss": 2.1898, "step": 112165 }, { "epoch": 0.26, "grad_norm": 1.828125, "learning_rate": 0.0001915259154507458, "loss": 2.1332, "step": 112170 }, { "epoch": 0.26, "grad_norm": 2.015625, "learning_rate": 0.0001915251708220867, "loss": 2.0209, "step": 112175 }, { "epoch": 0.26, "grad_norm": 2.8125, "learning_rate": 0.00019152442616216097, "loss": 2.3011, "step": 112180 }, { "epoch": 0.26, "grad_norm": 1.8125, "learning_rate": 0.00019152368147096885, "loss": 2.1476, "step": 112185 }, { "epoch": 0.26, "grad_norm": 1.9375, "learning_rate": 0.0001915229367485105, "loss": 2.3191, "step": 112190 }, { "epoch": 0.26, "grad_norm": 1.9453125, "learning_rate": 0.00019152219199478635, "loss": 2.0828, "step": 112195 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.0001915214472097965, "loss": 2.1966, "step": 112200 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.0001915207023935413, "loss": 2.0732, "step": 112205 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019151995754602093, "loss": 2.0816, "step": 112210 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.0001915192126672357, "loss": 2.3832, "step": 112215 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019151846775718588, "loss": 2.1487, "step": 112220 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.00019151772281587163, "loss": 2.136, "step": 112225 }, { "epoch": 0.26, "grad_norm": 1.9609375, "learning_rate": 0.0001915169778432933, "loss": 2.1062, "step": 112230 }, { "epoch": 0.26, "grad_norm": 3.046875, "learning_rate": 0.00019151623283945112, "loss": 2.1331, "step": 112235 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.00019151548780434535, "loss": 2.1892, "step": 112240 }, { "epoch": 0.26, "grad_norm": 2.421875, "learning_rate": 0.00019151474273797618, "loss": 2.0976, "step": 112245 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019151399764034393, "loss": 2.2379, "step": 112250 }, { "epoch": 0.26, "grad_norm": 1.9609375, "learning_rate": 0.00019151325251144885, "loss": 2.123, "step": 112255 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.00019151250735129118, "loss": 2.0588, "step": 112260 }, { "epoch": 0.26, "grad_norm": 2.046875, "learning_rate": 0.00019151176215987116, "loss": 2.1114, "step": 112265 }, { "epoch": 0.26, "grad_norm": 2.546875, "learning_rate": 0.00019151101693718912, "loss": 2.0214, "step": 112270 }, { "epoch": 0.26, "grad_norm": 1.6640625, "learning_rate": 0.0001915102716832452, "loss": 2.139, "step": 112275 }, { "epoch": 0.26, "grad_norm": 1.625, "learning_rate": 0.0001915095263980397, "loss": 1.9656, "step": 112280 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.00019150878108157297, "loss": 2.1205, "step": 112285 }, { "epoch": 0.26, "grad_norm": 1.71875, "learning_rate": 0.0001915080357338451, "loss": 1.9473, "step": 112290 }, { "epoch": 0.26, "grad_norm": 1.828125, "learning_rate": 0.00019150729035485644, "loss": 2.2291, "step": 112295 }, { "epoch": 0.26, "grad_norm": 1.7265625, "learning_rate": 0.00019150654494460724, "loss": 2.2385, "step": 112300 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019150579950309775, "loss": 1.9099, "step": 112305 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019150505403032822, "loss": 2.1448, "step": 112310 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.0001915043085262989, "loss": 1.8319, "step": 112315 }, { "epoch": 0.26, "grad_norm": 2.265625, "learning_rate": 0.00019150356299101005, "loss": 1.9553, "step": 112320 }, { "epoch": 0.26, "grad_norm": 1.515625, "learning_rate": 0.0001915028174244619, "loss": 2.1411, "step": 112325 }, { "epoch": 0.26, "grad_norm": 1.84375, "learning_rate": 0.00019150207182665476, "loss": 2.1098, "step": 112330 }, { "epoch": 0.26, "grad_norm": 1.859375, "learning_rate": 0.00019150132619758886, "loss": 2.036, "step": 112335 }, { "epoch": 0.26, "grad_norm": 1.7734375, "learning_rate": 0.0001915005805372644, "loss": 2.1327, "step": 112340 }, { "epoch": 0.26, "grad_norm": 2.734375, "learning_rate": 0.0001914998348456817, "loss": 2.1158, "step": 112345 }, { "epoch": 0.26, "grad_norm": 2.421875, "learning_rate": 0.00019149908912284103, "loss": 2.1539, "step": 112350 }, { "epoch": 0.26, "grad_norm": 2.234375, "learning_rate": 0.0001914983433687426, "loss": 2.0317, "step": 112355 }, { "epoch": 0.26, "grad_norm": 1.9375, "learning_rate": 0.00019149759758338666, "loss": 2.1739, "step": 112360 }, { "epoch": 0.26, "grad_norm": 2.28125, "learning_rate": 0.0001914968517667735, "loss": 2.2224, "step": 112365 }, { "epoch": 0.26, "grad_norm": 2.125, "learning_rate": 0.00019149610591890332, "loss": 2.1216, "step": 112370 }, { "epoch": 0.26, "grad_norm": 2.0625, "learning_rate": 0.00019149536003977647, "loss": 1.9444, "step": 112375 }, { "epoch": 0.26, "grad_norm": 2.296875, "learning_rate": 0.0001914946141293931, "loss": 2.2544, "step": 112380 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019149386818775353, "loss": 2.1598, "step": 112385 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.000191493122214858, "loss": 2.1669, "step": 112390 }, { "epoch": 0.26, "grad_norm": 2.34375, "learning_rate": 0.00019149237621070671, "loss": 1.965, "step": 112395 }, { "epoch": 0.26, "grad_norm": 1.953125, "learning_rate": 0.0001914916301753, "loss": 2.1644, "step": 112400 }, { "epoch": 0.26, "grad_norm": 3.921875, "learning_rate": 0.0001914908841086381, "loss": 2.2036, "step": 112405 }, { "epoch": 0.26, "grad_norm": 1.9140625, "learning_rate": 0.00019149013801072123, "loss": 2.2687, "step": 112410 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019148939188154968, "loss": 2.2866, "step": 112415 }, { "epoch": 0.26, "grad_norm": 2.75, "learning_rate": 0.0001914886457211237, "loss": 2.2064, "step": 112420 }, { "epoch": 0.26, "grad_norm": 2.328125, "learning_rate": 0.00019148789952944356, "loss": 2.3091, "step": 112425 }, { "epoch": 0.26, "grad_norm": 1.828125, "learning_rate": 0.00019148715330650945, "loss": 2.274, "step": 112430 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019148640705232172, "loss": 2.2805, "step": 112435 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.00019148566076688056, "loss": 2.1571, "step": 112440 }, { "epoch": 0.26, "grad_norm": 1.703125, "learning_rate": 0.00019148491445018619, "loss": 2.1979, "step": 112445 }, { "epoch": 0.26, "grad_norm": 2.109375, "learning_rate": 0.00019148416810223898, "loss": 2.2281, "step": 112450 }, { "epoch": 0.26, "grad_norm": 1.8984375, "learning_rate": 0.00019148342172303905, "loss": 2.0929, "step": 112455 }, { "epoch": 0.26, "grad_norm": 1.8671875, "learning_rate": 0.00019148267531258678, "loss": 2.0074, "step": 112460 }, { "epoch": 0.26, "grad_norm": 1.9609375, "learning_rate": 0.00019148192887088236, "loss": 2.1907, "step": 112465 }, { "epoch": 0.26, "grad_norm": 1.9375, "learning_rate": 0.00019148118239792605, "loss": 2.0816, "step": 112470 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019148043589371812, "loss": 2.0746, "step": 112475 }, { "epoch": 0.26, "grad_norm": 2.671875, "learning_rate": 0.0001914796893582588, "loss": 2.1105, "step": 112480 }, { "epoch": 0.26, "grad_norm": 2.203125, "learning_rate": 0.00019147894279154833, "loss": 2.0955, "step": 112485 }, { "epoch": 0.26, "grad_norm": 2.265625, "learning_rate": 0.000191478196193587, "loss": 1.9797, "step": 112490 }, { "epoch": 0.26, "grad_norm": 2.34375, "learning_rate": 0.0001914774495643751, "loss": 2.2833, "step": 112495 }, { "epoch": 0.26, "grad_norm": 1.7734375, "learning_rate": 0.00019147670290391283, "loss": 1.906, "step": 112500 }, { "epoch": 0.26, "grad_norm": 1.796875, "learning_rate": 0.00019147595621220046, "loss": 2.0387, "step": 112505 }, { "epoch": 0.26, "grad_norm": 1.890625, "learning_rate": 0.00019147520948923825, "loss": 2.106, "step": 112510 }, { "epoch": 0.26, "grad_norm": 2.0, "learning_rate": 0.00019147446273502642, "loss": 2.3013, "step": 112515 }, { "epoch": 0.26, "grad_norm": 1.78125, "learning_rate": 0.0001914737159495653, "loss": 2.1077, "step": 112520 }, { "epoch": 0.26, "grad_norm": 2.03125, "learning_rate": 0.00019147296913285506, "loss": 2.1704, "step": 112525 }, { "epoch": 0.26, "grad_norm": 2.1875, "learning_rate": 0.00019147222228489601, "loss": 2.192, "step": 112530 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.00019147147540568836, "loss": 1.9759, "step": 112535 }, { "epoch": 0.26, "grad_norm": 1.7734375, "learning_rate": 0.00019147072849523244, "loss": 2.026, "step": 112540 }, { "epoch": 0.26, "grad_norm": 2.46875, "learning_rate": 0.00019146998155352844, "loss": 2.153, "step": 112545 }, { "epoch": 0.26, "grad_norm": 2.359375, "learning_rate": 0.00019146923458057664, "loss": 2.1133, "step": 112550 }, { "epoch": 0.26, "grad_norm": 2.078125, "learning_rate": 0.0001914684875763773, "loss": 2.3819, "step": 112555 }, { "epoch": 0.26, "grad_norm": 2.171875, "learning_rate": 0.00019146774054093065, "loss": 2.3037, "step": 112560 }, { "epoch": 0.26, "grad_norm": 1.9765625, "learning_rate": 0.00019146699347423699, "loss": 2.2695, "step": 112565 }, { "epoch": 0.26, "grad_norm": 2.46875, "learning_rate": 0.00019146624637629648, "loss": 2.1057, "step": 112570 }, { "epoch": 0.26, "grad_norm": 1.9296875, "learning_rate": 0.0001914654992471095, "loss": 2.1625, "step": 112575 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019146475208667622, "loss": 2.3566, "step": 112580 }, { "epoch": 0.26, "grad_norm": 1.9765625, "learning_rate": 0.00019146400489499695, "loss": 2.2194, "step": 112585 }, { "epoch": 0.26, "grad_norm": 2.140625, "learning_rate": 0.0001914632576720719, "loss": 2.0962, "step": 112590 }, { "epoch": 0.26, "grad_norm": 2.09375, "learning_rate": 0.00019146251041790132, "loss": 1.9711, "step": 112595 }, { "epoch": 0.26, "grad_norm": 2.390625, "learning_rate": 0.00019146176313248554, "loss": 2.082, "step": 112600 }, { "epoch": 0.26, "grad_norm": 2.15625, "learning_rate": 0.00019146101581582473, "loss": 2.3161, "step": 112605 }, { "epoch": 0.27, "grad_norm": 2.375, "learning_rate": 0.00019146026846791916, "loss": 2.095, "step": 112610 }, { "epoch": 0.27, "grad_norm": 1.7890625, "learning_rate": 0.0001914595210887691, "loss": 2.2125, "step": 112615 }, { "epoch": 0.27, "grad_norm": 2.671875, "learning_rate": 0.00019145877367837485, "loss": 2.2035, "step": 112620 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019145802623673662, "loss": 2.2009, "step": 112625 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019145727876385466, "loss": 2.1688, "step": 112630 }, { "epoch": 0.27, "grad_norm": 1.9140625, "learning_rate": 0.00019145653125972922, "loss": 2.0796, "step": 112635 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.0001914557837243606, "loss": 1.9978, "step": 112640 }, { "epoch": 0.27, "grad_norm": 1.9140625, "learning_rate": 0.000191455036157749, "loss": 2.0511, "step": 112645 }, { "epoch": 0.27, "grad_norm": 2.328125, "learning_rate": 0.0001914542885598947, "loss": 2.3667, "step": 112650 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.00019145354093079797, "loss": 2.3926, "step": 112655 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019145279327045906, "loss": 2.2502, "step": 112660 }, { "epoch": 0.27, "grad_norm": 1.984375, "learning_rate": 0.00019145204557887818, "loss": 2.084, "step": 112665 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019145129785605564, "loss": 2.0582, "step": 112670 }, { "epoch": 0.27, "grad_norm": 1.640625, "learning_rate": 0.0001914505501019917, "loss": 1.8969, "step": 112675 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.0001914498023166866, "loss": 2.1669, "step": 112680 }, { "epoch": 0.27, "grad_norm": 1.90625, "learning_rate": 0.00019144905450014054, "loss": 2.0506, "step": 112685 }, { "epoch": 0.27, "grad_norm": 1.90625, "learning_rate": 0.00019144830665235386, "loss": 2.2297, "step": 112690 }, { "epoch": 0.27, "grad_norm": 1.796875, "learning_rate": 0.0001914475587733268, "loss": 2.3819, "step": 112695 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019144681086305954, "loss": 2.1328, "step": 112700 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019144606292155242, "loss": 2.1662, "step": 112705 }, { "epoch": 0.27, "grad_norm": 2.390625, "learning_rate": 0.00019144531494880566, "loss": 2.0349, "step": 112710 }, { "epoch": 0.27, "grad_norm": 2.5625, "learning_rate": 0.00019144456694481955, "loss": 2.191, "step": 112715 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019144381890959426, "loss": 2.2289, "step": 112720 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.00019144307084313015, "loss": 1.9692, "step": 112725 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.00019144232274542742, "loss": 2.2613, "step": 112730 }, { "epoch": 0.27, "grad_norm": 2.546875, "learning_rate": 0.00019144157461648632, "loss": 2.0219, "step": 112735 }, { "epoch": 0.27, "grad_norm": 1.828125, "learning_rate": 0.00019144082645630714, "loss": 2.0856, "step": 112740 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.0001914400782648901, "loss": 2.0559, "step": 112745 }, { "epoch": 0.27, "grad_norm": 2.515625, "learning_rate": 0.00019143933004223547, "loss": 2.0783, "step": 112750 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.0001914385817883435, "loss": 2.1907, "step": 112755 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.0001914378335032145, "loss": 2.1266, "step": 112760 }, { "epoch": 0.27, "grad_norm": 2.328125, "learning_rate": 0.00019143708518684863, "loss": 2.1857, "step": 112765 }, { "epoch": 0.27, "grad_norm": 2.734375, "learning_rate": 0.0001914363368392462, "loss": 2.0902, "step": 112770 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.00019143558846040746, "loss": 2.2199, "step": 112775 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019143484005033266, "loss": 2.1248, "step": 112780 }, { "epoch": 0.27, "grad_norm": 2.84375, "learning_rate": 0.00019143409160902208, "loss": 2.1529, "step": 112785 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019143334313647593, "loss": 2.1551, "step": 112790 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.00019143259463269452, "loss": 2.0328, "step": 112795 }, { "epoch": 0.27, "grad_norm": 1.6640625, "learning_rate": 0.00019143184609767805, "loss": 2.154, "step": 112800 }, { "epoch": 0.27, "grad_norm": 2.59375, "learning_rate": 0.00019143109753142685, "loss": 2.1827, "step": 112805 }, { "epoch": 0.27, "grad_norm": 2.453125, "learning_rate": 0.00019143034893394107, "loss": 2.2391, "step": 112810 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.00019142960030522107, "loss": 2.1636, "step": 112815 }, { "epoch": 0.27, "grad_norm": 2.71875, "learning_rate": 0.00019142885164526706, "loss": 2.1575, "step": 112820 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019142810295407926, "loss": 2.0099, "step": 112825 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.00019142735423165799, "loss": 2.2313, "step": 112830 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019142660547800345, "loss": 2.1407, "step": 112835 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.00019142585669311595, "loss": 2.1777, "step": 112840 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.0001914251078769957, "loss": 2.3058, "step": 112845 }, { "epoch": 0.27, "grad_norm": 1.8515625, "learning_rate": 0.000191424359029643, "loss": 2.1242, "step": 112850 }, { "epoch": 0.27, "grad_norm": 3.296875, "learning_rate": 0.00019142361015105804, "loss": 2.3019, "step": 112855 }, { "epoch": 0.27, "grad_norm": 1.9140625, "learning_rate": 0.00019142286124124115, "loss": 2.1251, "step": 112860 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.00019142211230019257, "loss": 2.0914, "step": 112865 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.00019142136332791248, "loss": 2.1985, "step": 112870 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019142061432440122, "loss": 2.139, "step": 112875 }, { "epoch": 0.27, "grad_norm": 1.578125, "learning_rate": 0.00019141986528965906, "loss": 2.3427, "step": 112880 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.00019141911622368618, "loss": 2.0878, "step": 112885 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.00019141836712648286, "loss": 2.0159, "step": 112890 }, { "epoch": 0.27, "grad_norm": 1.8046875, "learning_rate": 0.0001914176179980494, "loss": 1.8887, "step": 112895 }, { "epoch": 0.27, "grad_norm": 3.671875, "learning_rate": 0.000191416868838386, "loss": 2.1777, "step": 112900 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019141611964749295, "loss": 2.2682, "step": 112905 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.0001914153704253705, "loss": 2.2097, "step": 112910 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.0001914146211720189, "loss": 2.1893, "step": 112915 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.0001914138718874384, "loss": 2.2041, "step": 112920 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019141312257162926, "loss": 2.2442, "step": 112925 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.00019141237322459174, "loss": 2.1212, "step": 112930 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.0001914116238463261, "loss": 2.1606, "step": 112935 }, { "epoch": 0.27, "grad_norm": 2.3125, "learning_rate": 0.00019141087443683258, "loss": 1.9977, "step": 112940 }, { "epoch": 0.27, "grad_norm": 2.71875, "learning_rate": 0.00019141012499611146, "loss": 2.3814, "step": 112945 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.000191409375524163, "loss": 2.1932, "step": 112950 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.00019140862602098744, "loss": 2.0719, "step": 112955 }, { "epoch": 0.27, "grad_norm": 1.984375, "learning_rate": 0.000191407876486585, "loss": 2.0886, "step": 112960 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.000191407126920956, "loss": 2.2246, "step": 112965 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.00019140637732410065, "loss": 2.1796, "step": 112970 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.00019140562769601923, "loss": 2.1885, "step": 112975 }, { "epoch": 0.27, "grad_norm": 1.8359375, "learning_rate": 0.000191404878036712, "loss": 2.2274, "step": 112980 }, { "epoch": 0.27, "grad_norm": 1.8203125, "learning_rate": 0.0001914041283461792, "loss": 1.9549, "step": 112985 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019140337862442108, "loss": 2.1289, "step": 112990 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.0001914026288714379, "loss": 2.2571, "step": 112995 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019140187908722996, "loss": 2.1223, "step": 113000 }, { "epoch": 0.27, "grad_norm": 1.8203125, "learning_rate": 0.00019140112927179746, "loss": 2.0324, "step": 113005 }, { "epoch": 0.27, "grad_norm": 1.984375, "learning_rate": 0.00019140037942514066, "loss": 2.0747, "step": 113010 }, { "epoch": 0.27, "grad_norm": 2.375, "learning_rate": 0.00019139962954725984, "loss": 2.1046, "step": 113015 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.0001913988796381553, "loss": 2.2622, "step": 113020 }, { "epoch": 0.27, "grad_norm": 2.34375, "learning_rate": 0.0001913981296978272, "loss": 2.0873, "step": 113025 }, { "epoch": 0.27, "grad_norm": 1.859375, "learning_rate": 0.00019139737972627584, "loss": 2.031, "step": 113030 }, { "epoch": 0.27, "grad_norm": 2.328125, "learning_rate": 0.00019139662972350146, "loss": 2.0602, "step": 113035 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.0001913958796895044, "loss": 2.0976, "step": 113040 }, { "epoch": 0.27, "grad_norm": 1.734375, "learning_rate": 0.0001913951296242848, "loss": 2.0851, "step": 113045 }, { "epoch": 0.27, "grad_norm": 1.734375, "learning_rate": 0.00019139437952784297, "loss": 2.049, "step": 113050 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019139362940017915, "loss": 2.0668, "step": 113055 }, { "epoch": 0.27, "grad_norm": 1.984375, "learning_rate": 0.00019139287924129362, "loss": 2.2311, "step": 113060 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.00019139212905118665, "loss": 2.0845, "step": 113065 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.00019139137882985844, "loss": 1.983, "step": 113070 }, { "epoch": 0.27, "grad_norm": 1.765625, "learning_rate": 0.00019139062857730928, "loss": 2.1093, "step": 113075 }, { "epoch": 0.27, "grad_norm": 1.8984375, "learning_rate": 0.00019138987829353944, "loss": 2.2028, "step": 113080 }, { "epoch": 0.27, "grad_norm": 2.375, "learning_rate": 0.00019138912797854918, "loss": 2.0878, "step": 113085 }, { "epoch": 0.27, "grad_norm": 2.46875, "learning_rate": 0.00019138837763233867, "loss": 2.0157, "step": 113090 }, { "epoch": 0.27, "grad_norm": 2.3125, "learning_rate": 0.00019138762725490828, "loss": 2.0941, "step": 113095 }, { "epoch": 0.27, "grad_norm": 1.828125, "learning_rate": 0.0001913868768462582, "loss": 2.2132, "step": 113100 }, { "epoch": 0.27, "grad_norm": 2.46875, "learning_rate": 0.00019138612640638873, "loss": 2.1645, "step": 113105 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.0001913853759353001, "loss": 2.264, "step": 113110 }, { "epoch": 0.27, "grad_norm": 1.703125, "learning_rate": 0.00019138462543299257, "loss": 2.1219, "step": 113115 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019138387489946637, "loss": 2.1628, "step": 113120 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.0001913831243347218, "loss": 2.1555, "step": 113125 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.0001913823737387591, "loss": 2.1996, "step": 113130 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.0001913816231115785, "loss": 2.2456, "step": 113135 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.00019138087245318029, "loss": 1.9617, "step": 113140 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.00019138012176356473, "loss": 2.1866, "step": 113145 }, { "epoch": 0.27, "grad_norm": 2.8125, "learning_rate": 0.00019137937104273203, "loss": 2.2102, "step": 113150 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.00019137862029068253, "loss": 2.1768, "step": 113155 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.00019137786950741638, "loss": 1.9349, "step": 113160 }, { "epoch": 0.27, "grad_norm": 2.71875, "learning_rate": 0.00019137711869293393, "loss": 2.0421, "step": 113165 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019137636784723538, "loss": 2.0736, "step": 113170 }, { "epoch": 0.27, "grad_norm": 1.71875, "learning_rate": 0.00019137561697032103, "loss": 2.2055, "step": 113175 }, { "epoch": 0.27, "grad_norm": 1.828125, "learning_rate": 0.0001913748660621911, "loss": 2.1977, "step": 113180 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019137411512284587, "loss": 2.2618, "step": 113185 }, { "epoch": 0.27, "grad_norm": 1.90625, "learning_rate": 0.00019137336415228555, "loss": 2.3231, "step": 113190 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.00019137261315051046, "loss": 2.2073, "step": 113195 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.00019137186211752084, "loss": 2.1416, "step": 113200 }, { "epoch": 0.27, "grad_norm": 1.8046875, "learning_rate": 0.0001913711110533169, "loss": 2.2001, "step": 113205 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.00019137035995789897, "loss": 2.0685, "step": 113210 }, { "epoch": 0.27, "grad_norm": 1.765625, "learning_rate": 0.0001913696088312672, "loss": 2.0777, "step": 113215 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019136885767342198, "loss": 2.0369, "step": 113220 }, { "epoch": 0.27, "grad_norm": 2.453125, "learning_rate": 0.00019136810648436348, "loss": 2.2682, "step": 113225 }, { "epoch": 0.27, "grad_norm": 1.9140625, "learning_rate": 0.000191367355264092, "loss": 2.137, "step": 113230 }, { "epoch": 0.27, "grad_norm": 1.6015625, "learning_rate": 0.00019136660401260772, "loss": 2.0258, "step": 113235 }, { "epoch": 0.27, "grad_norm": 2.625, "learning_rate": 0.00019136585272991102, "loss": 2.1747, "step": 113240 }, { "epoch": 0.27, "grad_norm": 1.875, "learning_rate": 0.00019136510141600207, "loss": 2.1264, "step": 113245 }, { "epoch": 0.27, "grad_norm": 3.734375, "learning_rate": 0.0001913643500708811, "loss": 2.475, "step": 113250 }, { "epoch": 0.27, "grad_norm": 1.9140625, "learning_rate": 0.00019136359869454842, "loss": 2.0977, "step": 113255 }, { "epoch": 0.27, "grad_norm": 1.7734375, "learning_rate": 0.0001913628472870043, "loss": 2.0903, "step": 113260 }, { "epoch": 0.27, "grad_norm": 1.7578125, "learning_rate": 0.00019136209584824898, "loss": 2.2462, "step": 113265 }, { "epoch": 0.27, "grad_norm": 1.765625, "learning_rate": 0.0001913613443782827, "loss": 2.0897, "step": 113270 }, { "epoch": 0.27, "grad_norm": 1.96875, "learning_rate": 0.00019136059287710574, "loss": 2.2605, "step": 113275 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.0001913598413447183, "loss": 2.0854, "step": 113280 }, { "epoch": 0.27, "grad_norm": 1.609375, "learning_rate": 0.00019135908978112074, "loss": 1.9981, "step": 113285 }, { "epoch": 0.27, "grad_norm": 1.8984375, "learning_rate": 0.00019135833818631324, "loss": 2.3415, "step": 113290 }, { "epoch": 0.27, "grad_norm": 1.828125, "learning_rate": 0.00019135758656029607, "loss": 2.1922, "step": 113295 }, { "epoch": 0.27, "grad_norm": 1.53125, "learning_rate": 0.00019135683490306948, "loss": 2.2901, "step": 113300 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019135608321463375, "loss": 2.3217, "step": 113305 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.00019135533149498913, "loss": 2.1667, "step": 113310 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019135457974413586, "loss": 2.0967, "step": 113315 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.0001913538279620742, "loss": 1.852, "step": 113320 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019135307614880445, "loss": 2.195, "step": 113325 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.0001913523243043268, "loss": 2.158, "step": 113330 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.00019135157242864157, "loss": 2.0747, "step": 113335 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.00019135082052174895, "loss": 2.0925, "step": 113340 }, { "epoch": 0.27, "grad_norm": 1.7890625, "learning_rate": 0.00019135006858364925, "loss": 2.2402, "step": 113345 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.0001913493166143427, "loss": 2.2119, "step": 113350 }, { "epoch": 0.27, "grad_norm": 1.875, "learning_rate": 0.0001913485646138296, "loss": 2.2145, "step": 113355 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.00019134781258211013, "loss": 2.2466, "step": 113360 }, { "epoch": 0.27, "grad_norm": 3.296875, "learning_rate": 0.00019134706051918463, "loss": 2.0628, "step": 113365 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.0001913463084250533, "loss": 2.231, "step": 113370 }, { "epoch": 0.27, "grad_norm": 2.4375, "learning_rate": 0.0001913455562997164, "loss": 2.3237, "step": 113375 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.00019134480414317423, "loss": 2.2263, "step": 113380 }, { "epoch": 0.27, "grad_norm": 1.9921875, "learning_rate": 0.000191344051955427, "loss": 2.1107, "step": 113385 }, { "epoch": 0.27, "grad_norm": 2.9375, "learning_rate": 0.00019134329973647497, "loss": 2.1603, "step": 113390 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.00019134254748631843, "loss": 2.2143, "step": 113395 }, { "epoch": 0.27, "grad_norm": 1.875, "learning_rate": 0.0001913417952049576, "loss": 1.9979, "step": 113400 }, { "epoch": 0.27, "grad_norm": 2.765625, "learning_rate": 0.00019134104289239277, "loss": 2.1835, "step": 113405 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019134029054862422, "loss": 2.2136, "step": 113410 }, { "epoch": 0.27, "grad_norm": 2.421875, "learning_rate": 0.00019133953817365213, "loss": 2.0734, "step": 113415 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019133878576747682, "loss": 2.2078, "step": 113420 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.0001913380333300985, "loss": 2.2693, "step": 113425 }, { "epoch": 0.27, "grad_norm": 1.9375, "learning_rate": 0.00019133728086151748, "loss": 2.36, "step": 113430 }, { "epoch": 0.27, "grad_norm": 2.609375, "learning_rate": 0.00019133652836173393, "loss": 2.2038, "step": 113435 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.00019133577583074822, "loss": 2.1172, "step": 113440 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019133502326856055, "loss": 2.1131, "step": 113445 }, { "epoch": 0.27, "grad_norm": 1.78125, "learning_rate": 0.00019133427067517117, "loss": 2.0641, "step": 113450 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.00019133351805058032, "loss": 2.1358, "step": 113455 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019133276539478832, "loss": 2.1405, "step": 113460 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019133201270779538, "loss": 2.0134, "step": 113465 }, { "epoch": 0.27, "grad_norm": 2.484375, "learning_rate": 0.00019133125998960178, "loss": 1.9662, "step": 113470 }, { "epoch": 0.27, "grad_norm": 1.96875, "learning_rate": 0.00019133050724020776, "loss": 2.1868, "step": 113475 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019132975445961355, "loss": 2.1642, "step": 113480 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.00019132900164781948, "loss": 2.2788, "step": 113485 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019132824880482575, "loss": 2.2017, "step": 113490 }, { "epoch": 0.27, "grad_norm": 2.3125, "learning_rate": 0.00019132749593063264, "loss": 2.4247, "step": 113495 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.0001913267430252404, "loss": 1.9846, "step": 113500 }, { "epoch": 0.27, "grad_norm": 1.8828125, "learning_rate": 0.00019132599008864925, "loss": 2.191, "step": 113505 }, { "epoch": 0.27, "grad_norm": 1.7578125, "learning_rate": 0.00019132523712085954, "loss": 2.0212, "step": 113510 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019132448412187146, "loss": 2.1295, "step": 113515 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019132373109168524, "loss": 2.1398, "step": 113520 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.00019132297803030123, "loss": 2.1458, "step": 113525 }, { "epoch": 0.27, "grad_norm": 1.8984375, "learning_rate": 0.0001913222249377196, "loss": 2.0755, "step": 113530 }, { "epoch": 0.27, "grad_norm": 2.578125, "learning_rate": 0.00019132147181394067, "loss": 2.1296, "step": 113535 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.00019132071865896465, "loss": 2.1702, "step": 113540 }, { "epoch": 0.27, "grad_norm": 1.7421875, "learning_rate": 0.00019131996547279182, "loss": 2.1777, "step": 113545 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019131921225542243, "loss": 2.1926, "step": 113550 }, { "epoch": 0.27, "grad_norm": 1.78125, "learning_rate": 0.00019131845900685677, "loss": 2.0734, "step": 113555 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.00019131770572709502, "loss": 2.2327, "step": 113560 }, { "epoch": 0.27, "grad_norm": 1.71875, "learning_rate": 0.00019131695241613752, "loss": 2.0875, "step": 113565 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019131619907398445, "loss": 2.1055, "step": 113570 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.00019131544570063618, "loss": 2.1721, "step": 113575 }, { "epoch": 0.27, "grad_norm": 2.34375, "learning_rate": 0.00019131469229609284, "loss": 2.1133, "step": 113580 }, { "epoch": 0.27, "grad_norm": 1.8515625, "learning_rate": 0.00019131393886035478, "loss": 2.1722, "step": 113585 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.0001913131853934222, "loss": 2.0038, "step": 113590 }, { "epoch": 0.27, "grad_norm": 1.9140625, "learning_rate": 0.00019131243189529537, "loss": 2.3173, "step": 113595 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.00019131167836597458, "loss": 2.1347, "step": 113600 }, { "epoch": 0.27, "grad_norm": 1.875, "learning_rate": 0.00019131092480546006, "loss": 2.0988, "step": 113605 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.0001913101712137521, "loss": 2.2182, "step": 113610 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.0001913094175908509, "loss": 2.1454, "step": 113615 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.00019130866393675673, "loss": 2.239, "step": 113620 }, { "epoch": 0.27, "grad_norm": 1.5625, "learning_rate": 0.0001913079102514699, "loss": 2.0155, "step": 113625 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.0001913071565349906, "loss": 2.0118, "step": 113630 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.00019130640278731912, "loss": 2.1884, "step": 113635 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.00019130564900845574, "loss": 2.0177, "step": 113640 }, { "epoch": 0.27, "grad_norm": 2.375, "learning_rate": 0.00019130489519840067, "loss": 2.1769, "step": 113645 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.00019130414135715422, "loss": 2.2084, "step": 113650 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.0001913033874847166, "loss": 2.1228, "step": 113655 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.00019130263358108809, "loss": 2.1036, "step": 113660 }, { "epoch": 0.27, "grad_norm": 1.8359375, "learning_rate": 0.00019130187964626894, "loss": 2.2389, "step": 113665 }, { "epoch": 0.27, "grad_norm": 1.8671875, "learning_rate": 0.00019130112568025944, "loss": 2.1625, "step": 113670 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.00019130037168305978, "loss": 2.3033, "step": 113675 }, { "epoch": 0.27, "grad_norm": 1.7421875, "learning_rate": 0.00019129961765467027, "loss": 1.9664, "step": 113680 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.0001912988635950912, "loss": 2.0998, "step": 113685 }, { "epoch": 0.27, "grad_norm": 1.796875, "learning_rate": 0.00019129810950432275, "loss": 2.111, "step": 113690 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.0001912973553823652, "loss": 2.2896, "step": 113695 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.0001912966012292188, "loss": 2.1435, "step": 113700 }, { "epoch": 0.27, "grad_norm": 1.96875, "learning_rate": 0.00019129584704488382, "loss": 2.0693, "step": 113705 }, { "epoch": 0.27, "grad_norm": 2.3125, "learning_rate": 0.0001912950928293606, "loss": 2.1808, "step": 113710 }, { "epoch": 0.27, "grad_norm": 1.96875, "learning_rate": 0.00019129433858264925, "loss": 2.2639, "step": 113715 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.00019129358430475013, "loss": 2.0271, "step": 113720 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019129282999566342, "loss": 2.2273, "step": 113725 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.0001912920756553895, "loss": 2.0654, "step": 113730 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.00019129132128392848, "loss": 2.1428, "step": 113735 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.00019129056688128073, "loss": 2.1485, "step": 113740 }, { "epoch": 0.27, "grad_norm": 1.9375, "learning_rate": 0.00019128981244744643, "loss": 2.1968, "step": 113745 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.00019128905798242594, "loss": 2.3375, "step": 113750 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.0001912883034862194, "loss": 2.2576, "step": 113755 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.00019128754895882713, "loss": 2.0552, "step": 113760 }, { "epoch": 0.27, "grad_norm": 1.6640625, "learning_rate": 0.00019128679440024937, "loss": 2.2515, "step": 113765 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.0001912860398104864, "loss": 2.2197, "step": 113770 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.00019128528518953846, "loss": 2.1024, "step": 113775 }, { "epoch": 0.27, "grad_norm": 2.5, "learning_rate": 0.00019128453053740582, "loss": 2.2396, "step": 113780 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.0001912837758540887, "loss": 2.1615, "step": 113785 }, { "epoch": 0.27, "grad_norm": 2.359375, "learning_rate": 0.0001912830211395874, "loss": 2.0965, "step": 113790 }, { "epoch": 0.27, "grad_norm": 1.84375, "learning_rate": 0.00019128226639390217, "loss": 2.2712, "step": 113795 }, { "epoch": 0.27, "grad_norm": 1.9921875, "learning_rate": 0.00019128151161703326, "loss": 2.1532, "step": 113800 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.00019128075680898093, "loss": 2.2224, "step": 113805 }, { "epoch": 0.27, "grad_norm": 1.6796875, "learning_rate": 0.00019128000196974544, "loss": 2.1321, "step": 113810 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.00019127924709932704, "loss": 2.1548, "step": 113815 }, { "epoch": 0.27, "grad_norm": 1.859375, "learning_rate": 0.000191278492197726, "loss": 2.0582, "step": 113820 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.00019127773726494257, "loss": 2.069, "step": 113825 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.000191276982300977, "loss": 2.0997, "step": 113830 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.00019127622730582958, "loss": 2.2218, "step": 113835 }, { "epoch": 0.27, "grad_norm": 2.359375, "learning_rate": 0.00019127547227950053, "loss": 2.1973, "step": 113840 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.00019127471722199012, "loss": 2.1645, "step": 113845 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.00019127396213329862, "loss": 2.2555, "step": 113850 }, { "epoch": 0.27, "grad_norm": 2.703125, "learning_rate": 0.00019127320701342628, "loss": 2.199, "step": 113855 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.00019127245186237337, "loss": 2.1601, "step": 113860 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.0001912716966801401, "loss": 2.0429, "step": 113865 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.0001912709414667268, "loss": 2.1104, "step": 113870 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019127018622213366, "loss": 2.0129, "step": 113875 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.000191269430946361, "loss": 2.0846, "step": 113880 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.000191268675639409, "loss": 2.2291, "step": 113885 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.000191267920301278, "loss": 1.9328, "step": 113890 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.0001912671649319682, "loss": 2.0059, "step": 113895 }, { "epoch": 0.27, "grad_norm": 2.5, "learning_rate": 0.00019126640953147993, "loss": 2.0659, "step": 113900 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019126565409981335, "loss": 1.9657, "step": 113905 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.0001912648986369688, "loss": 1.9738, "step": 113910 }, { "epoch": 0.27, "grad_norm": 1.953125, "learning_rate": 0.0001912641431429465, "loss": 2.1306, "step": 113915 }, { "epoch": 0.27, "grad_norm": 1.6171875, "learning_rate": 0.00019126338761774669, "loss": 2.2424, "step": 113920 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019126263206136967, "loss": 2.2971, "step": 113925 }, { "epoch": 0.27, "grad_norm": 2.46875, "learning_rate": 0.00019126187647381567, "loss": 2.0082, "step": 113930 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019126112085508497, "loss": 2.2046, "step": 113935 }, { "epoch": 0.27, "grad_norm": 1.9921875, "learning_rate": 0.0001912603652051778, "loss": 2.154, "step": 113940 }, { "epoch": 0.27, "grad_norm": 1.9140625, "learning_rate": 0.00019125960952409444, "loss": 1.9742, "step": 113945 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019125885381183517, "loss": 2.0879, "step": 113950 }, { "epoch": 0.27, "grad_norm": 2.46875, "learning_rate": 0.0001912580980684002, "loss": 2.2009, "step": 113955 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019125734229378976, "loss": 2.1687, "step": 113960 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.0001912565864880042, "loss": 2.229, "step": 113965 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.00019125583065104378, "loss": 2.0461, "step": 113970 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.00019125507478290866, "loss": 2.184, "step": 113975 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.00019125431888359918, "loss": 1.9908, "step": 113980 }, { "epoch": 0.27, "grad_norm": 1.7734375, "learning_rate": 0.00019125356295311553, "loss": 2.1158, "step": 113985 }, { "epoch": 0.27, "grad_norm": 2.4375, "learning_rate": 0.00019125280699145805, "loss": 2.1941, "step": 113990 }, { "epoch": 0.27, "grad_norm": 2.40625, "learning_rate": 0.00019125205099862692, "loss": 2.0821, "step": 113995 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019125129497462245, "loss": 2.2095, "step": 114000 }, { "epoch": 0.27, "grad_norm": 1.7734375, "learning_rate": 0.0001912505389194449, "loss": 2.1074, "step": 114005 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.0001912497828330945, "loss": 2.1156, "step": 114010 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.0001912490267155715, "loss": 2.2082, "step": 114015 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019124827056687618, "loss": 2.2519, "step": 114020 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.0001912475143870088, "loss": 1.9684, "step": 114025 }, { "epoch": 0.27, "grad_norm": 1.734375, "learning_rate": 0.00019124675817596963, "loss": 2.1488, "step": 114030 }, { "epoch": 0.27, "grad_norm": 1.953125, "learning_rate": 0.0001912460019337589, "loss": 2.2517, "step": 114035 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.0001912452456603769, "loss": 2.1424, "step": 114040 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.00019124448935582385, "loss": 2.1831, "step": 114045 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.0001912437330201, "loss": 2.0787, "step": 114050 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.0001912429766532057, "loss": 2.1651, "step": 114055 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.0001912422202551411, "loss": 2.1169, "step": 114060 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.00019124146382590654, "loss": 2.2569, "step": 114065 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019124070736550218, "loss": 2.0324, "step": 114070 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.00019123995087392837, "loss": 2.1865, "step": 114075 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019123919435118535, "loss": 2.0677, "step": 114080 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.00019123843779727336, "loss": 2.127, "step": 114085 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.00019123768121219266, "loss": 2.382, "step": 114090 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.00019123692459594355, "loss": 2.0675, "step": 114095 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.00019123616794852622, "loss": 2.086, "step": 114100 }, { "epoch": 0.27, "grad_norm": 2.578125, "learning_rate": 0.00019123541126994095, "loss": 2.1109, "step": 114105 }, { "epoch": 0.27, "grad_norm": 2.3125, "learning_rate": 0.000191234654560188, "loss": 2.1187, "step": 114110 }, { "epoch": 0.27, "grad_norm": 2.359375, "learning_rate": 0.00019123389781926768, "loss": 2.1838, "step": 114115 }, { "epoch": 0.27, "grad_norm": 1.65625, "learning_rate": 0.00019123314104718016, "loss": 1.9469, "step": 114120 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.00019123238424392577, "loss": 2.0678, "step": 114125 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019123162740950473, "loss": 2.2788, "step": 114130 }, { "epoch": 0.27, "grad_norm": 1.875, "learning_rate": 0.00019123087054391733, "loss": 2.1426, "step": 114135 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.0001912301136471638, "loss": 2.1879, "step": 114140 }, { "epoch": 0.27, "grad_norm": 1.84375, "learning_rate": 0.00019122935671924443, "loss": 2.0747, "step": 114145 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019122859976015942, "loss": 2.081, "step": 114150 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.0001912278427699091, "loss": 2.1353, "step": 114155 }, { "epoch": 0.27, "grad_norm": 1.828125, "learning_rate": 0.00019122708574849368, "loss": 2.1665, "step": 114160 }, { "epoch": 0.27, "grad_norm": 1.7109375, "learning_rate": 0.0001912263286959134, "loss": 2.1819, "step": 114165 }, { "epoch": 0.27, "grad_norm": 1.9921875, "learning_rate": 0.0001912255716121686, "loss": 2.0624, "step": 114170 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.0001912248144972595, "loss": 1.9982, "step": 114175 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019122405735118628, "loss": 2.0407, "step": 114180 }, { "epoch": 0.27, "grad_norm": 2.78125, "learning_rate": 0.0001912233001739493, "loss": 2.1458, "step": 114185 }, { "epoch": 0.27, "grad_norm": 2.359375, "learning_rate": 0.0001912225429655488, "loss": 2.0534, "step": 114190 }, { "epoch": 0.27, "grad_norm": 2.96875, "learning_rate": 0.00019122178572598503, "loss": 2.0977, "step": 114195 }, { "epoch": 0.27, "grad_norm": 3.0625, "learning_rate": 0.00019122102845525824, "loss": 2.0978, "step": 114200 }, { "epoch": 0.27, "grad_norm": 1.984375, "learning_rate": 0.0001912202711533687, "loss": 2.1579, "step": 114205 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019121951382031663, "loss": 2.1915, "step": 114210 }, { "epoch": 0.27, "grad_norm": 1.9921875, "learning_rate": 0.00019121875645610232, "loss": 2.0503, "step": 114215 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.00019121799906072606, "loss": 2.0221, "step": 114220 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.00019121724163418806, "loss": 2.1357, "step": 114225 }, { "epoch": 0.27, "grad_norm": 1.90625, "learning_rate": 0.0001912164841764886, "loss": 2.0183, "step": 114230 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019121572668762794, "loss": 2.3171, "step": 114235 }, { "epoch": 0.27, "grad_norm": 1.84375, "learning_rate": 0.00019121496916760632, "loss": 2.1943, "step": 114240 }, { "epoch": 0.27, "grad_norm": 2.359375, "learning_rate": 0.00019121421161642403, "loss": 2.2476, "step": 114245 }, { "epoch": 0.27, "grad_norm": 2.4375, "learning_rate": 0.0001912134540340813, "loss": 2.1784, "step": 114250 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.0001912126964205784, "loss": 2.1907, "step": 114255 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.00019121193877591556, "loss": 2.1348, "step": 114260 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019121118110009312, "loss": 2.0908, "step": 114265 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019121042339311127, "loss": 2.0566, "step": 114270 }, { "epoch": 0.27, "grad_norm": 1.953125, "learning_rate": 0.00019120966565497026, "loss": 2.1452, "step": 114275 }, { "epoch": 0.27, "grad_norm": 1.8203125, "learning_rate": 0.00019120890788567037, "loss": 2.211, "step": 114280 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.00019120815008521192, "loss": 2.0948, "step": 114285 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019120739225359507, "loss": 1.9297, "step": 114290 }, { "epoch": 0.27, "grad_norm": 2.328125, "learning_rate": 0.00019120663439082015, "loss": 2.275, "step": 114295 }, { "epoch": 0.27, "grad_norm": 1.7890625, "learning_rate": 0.00019120587649688731, "loss": 2.0512, "step": 114300 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019120511857179698, "loss": 2.0545, "step": 114305 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019120436061554927, "loss": 2.2299, "step": 114310 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.00019120360262814455, "loss": 2.3039, "step": 114315 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019120284460958297, "loss": 2.0179, "step": 114320 }, { "epoch": 0.27, "grad_norm": 1.984375, "learning_rate": 0.0001912020865598649, "loss": 2.1735, "step": 114325 }, { "epoch": 0.27, "grad_norm": 1.875, "learning_rate": 0.0001912013284789905, "loss": 2.007, "step": 114330 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.00019120057036696005, "loss": 2.2018, "step": 114335 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.00019119981222377389, "loss": 2.3036, "step": 114340 }, { "epoch": 0.27, "grad_norm": 1.9921875, "learning_rate": 0.0001911990540494322, "loss": 2.2547, "step": 114345 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.00019119829584393524, "loss": 2.1101, "step": 114350 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019119753760728328, "loss": 1.9542, "step": 114355 }, { "epoch": 0.27, "grad_norm": 2.46875, "learning_rate": 0.00019119677933947664, "loss": 2.167, "step": 114360 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019119602104051547, "loss": 2.1714, "step": 114365 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019119526271040013, "loss": 2.231, "step": 114370 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019119450434913082, "loss": 2.128, "step": 114375 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.0001911937459567078, "loss": 2.1659, "step": 114380 }, { "epoch": 0.27, "grad_norm": 1.8828125, "learning_rate": 0.00019119298753313135, "loss": 2.1276, "step": 114385 }, { "epoch": 0.27, "grad_norm": 1.75, "learning_rate": 0.00019119222907840172, "loss": 2.1228, "step": 114390 }, { "epoch": 0.27, "grad_norm": 2.453125, "learning_rate": 0.0001911914705925192, "loss": 2.1653, "step": 114395 }, { "epoch": 0.27, "grad_norm": 1.8046875, "learning_rate": 0.000191190712075484, "loss": 2.0823, "step": 114400 }, { "epoch": 0.27, "grad_norm": 2.359375, "learning_rate": 0.00019118995352729635, "loss": 2.2478, "step": 114405 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019118919494795662, "loss": 2.0867, "step": 114410 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019118843633746498, "loss": 2.0239, "step": 114415 }, { "epoch": 0.27, "grad_norm": 2.484375, "learning_rate": 0.00019118767769582174, "loss": 2.1103, "step": 114420 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.0001911869190230271, "loss": 2.0241, "step": 114425 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.0001911861603190814, "loss": 2.1075, "step": 114430 }, { "epoch": 0.27, "grad_norm": 1.703125, "learning_rate": 0.0001911854015839848, "loss": 2.1902, "step": 114435 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.00019118464281773766, "loss": 2.1208, "step": 114440 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019118388402034016, "loss": 2.185, "step": 114445 }, { "epoch": 0.27, "grad_norm": 1.78125, "learning_rate": 0.0001911831251917926, "loss": 1.9495, "step": 114450 }, { "epoch": 0.27, "grad_norm": 1.6953125, "learning_rate": 0.00019118236633209525, "loss": 2.0549, "step": 114455 }, { "epoch": 0.27, "grad_norm": 2.359375, "learning_rate": 0.00019118160744124833, "loss": 2.0321, "step": 114460 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019118084851925212, "loss": 2.1771, "step": 114465 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.0001911800895661069, "loss": 2.0228, "step": 114470 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019117933058181288, "loss": 2.1443, "step": 114475 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.00019117857156637037, "loss": 2.0644, "step": 114480 }, { "epoch": 0.27, "grad_norm": 1.9375, "learning_rate": 0.00019117781251977957, "loss": 2.2765, "step": 114485 }, { "epoch": 0.27, "grad_norm": 2.515625, "learning_rate": 0.00019117705344204083, "loss": 2.1746, "step": 114490 }, { "epoch": 0.27, "grad_norm": 2.5, "learning_rate": 0.00019117629433315433, "loss": 2.0592, "step": 114495 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.00019117553519312032, "loss": 2.1896, "step": 114500 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019117477602193913, "loss": 2.1112, "step": 114505 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.00019117401681961095, "loss": 2.1364, "step": 114510 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.00019117325758613612, "loss": 2.1745, "step": 114515 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019117249832151484, "loss": 2.1835, "step": 114520 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.00019117173902574735, "loss": 2.1338, "step": 114525 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.00019117097969883397, "loss": 2.3099, "step": 114530 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019117022034077492, "loss": 2.3057, "step": 114535 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.00019116946095157046, "loss": 2.1346, "step": 114540 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.0001911687015312209, "loss": 2.2386, "step": 114545 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.0001911679420797264, "loss": 2.1606, "step": 114550 }, { "epoch": 0.27, "grad_norm": 1.828125, "learning_rate": 0.00019116718259708733, "loss": 2.0592, "step": 114555 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.00019116642308330387, "loss": 2.2624, "step": 114560 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.0001911656635383763, "loss": 2.1798, "step": 114565 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.00019116490396230487, "loss": 2.2825, "step": 114570 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.0001911641443550899, "loss": 2.2809, "step": 114575 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019116338471673156, "loss": 2.3301, "step": 114580 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.0001911626250472302, "loss": 2.0848, "step": 114585 }, { "epoch": 0.27, "grad_norm": 1.953125, "learning_rate": 0.000191161865346586, "loss": 2.2534, "step": 114590 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.00019116110561479926, "loss": 2.0788, "step": 114595 }, { "epoch": 0.27, "grad_norm": 1.8125, "learning_rate": 0.00019116034585187026, "loss": 2.1648, "step": 114600 }, { "epoch": 0.27, "grad_norm": 1.703125, "learning_rate": 0.00019115958605779923, "loss": 2.1605, "step": 114605 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.0001911588262325864, "loss": 2.1831, "step": 114610 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019115806637623207, "loss": 2.0182, "step": 114615 }, { "epoch": 0.27, "grad_norm": 1.6484375, "learning_rate": 0.0001911573064887365, "loss": 2.2285, "step": 114620 }, { "epoch": 0.27, "grad_norm": 1.7734375, "learning_rate": 0.00019115654657009994, "loss": 2.1519, "step": 114625 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.00019115578662032263, "loss": 2.1126, "step": 114630 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019115502663940488, "loss": 2.0975, "step": 114635 }, { "epoch": 0.27, "grad_norm": 2.5, "learning_rate": 0.00019115426662734693, "loss": 2.3082, "step": 114640 }, { "epoch": 0.27, "grad_norm": 2.75, "learning_rate": 0.00019115350658414898, "loss": 2.1887, "step": 114645 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.0001911527465098114, "loss": 2.2015, "step": 114650 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019115198640433433, "loss": 2.1449, "step": 114655 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.0001911512262677181, "loss": 2.0844, "step": 114660 }, { "epoch": 0.27, "grad_norm": 1.828125, "learning_rate": 0.000191150466099963, "loss": 2.1788, "step": 114665 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019114970590106923, "loss": 2.2569, "step": 114670 }, { "epoch": 0.27, "grad_norm": 2.3125, "learning_rate": 0.00019114894567103704, "loss": 2.1849, "step": 114675 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.00019114818540986672, "loss": 2.2671, "step": 114680 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019114742511755855, "loss": 2.066, "step": 114685 }, { "epoch": 0.27, "grad_norm": 2.53125, "learning_rate": 0.00019114666479411275, "loss": 1.9609, "step": 114690 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019114590443952963, "loss": 2.2585, "step": 114695 }, { "epoch": 0.27, "grad_norm": 1.8671875, "learning_rate": 0.00019114514405380937, "loss": 2.255, "step": 114700 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.00019114438363695228, "loss": 2.1351, "step": 114705 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019114362318895864, "loss": 2.2826, "step": 114710 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019114286270982867, "loss": 2.151, "step": 114715 }, { "epoch": 0.27, "grad_norm": 1.8125, "learning_rate": 0.00019114210219956266, "loss": 1.8858, "step": 114720 }, { "epoch": 0.27, "grad_norm": 1.9140625, "learning_rate": 0.00019114134165816083, "loss": 2.2567, "step": 114725 }, { "epoch": 0.27, "grad_norm": 1.7109375, "learning_rate": 0.00019114058108562348, "loss": 2.2368, "step": 114730 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.00019113982048195088, "loss": 2.1796, "step": 114735 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019113905984714325, "loss": 2.1665, "step": 114740 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.00019113829918120082, "loss": 1.9887, "step": 114745 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019113753848412396, "loss": 2.185, "step": 114750 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.00019113677775591282, "loss": 2.1391, "step": 114755 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.0001911360169965677, "loss": 2.1009, "step": 114760 }, { "epoch": 0.27, "grad_norm": 1.984375, "learning_rate": 0.0001911352562060889, "loss": 2.2389, "step": 114765 }, { "epoch": 0.27, "grad_norm": 1.6015625, "learning_rate": 0.0001911344953844766, "loss": 2.0545, "step": 114770 }, { "epoch": 0.27, "grad_norm": 1.71875, "learning_rate": 0.00019113373453173114, "loss": 2.1566, "step": 114775 }, { "epoch": 0.27, "grad_norm": 2.34375, "learning_rate": 0.00019113297364785274, "loss": 2.1114, "step": 114780 }, { "epoch": 0.27, "grad_norm": 2.796875, "learning_rate": 0.00019113221273284165, "loss": 2.1235, "step": 114785 }, { "epoch": 0.27, "grad_norm": 2.390625, "learning_rate": 0.00019113145178669812, "loss": 2.282, "step": 114790 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.0001911306908094225, "loss": 2.1893, "step": 114795 }, { "epoch": 0.27, "grad_norm": 1.8828125, "learning_rate": 0.00019112992980101491, "loss": 2.1232, "step": 114800 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.00019112916876147575, "loss": 1.995, "step": 114805 }, { "epoch": 0.27, "grad_norm": 1.8828125, "learning_rate": 0.00019112840769080518, "loss": 2.2139, "step": 114810 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019112764658900348, "loss": 2.2544, "step": 114815 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019112688545607095, "loss": 2.2363, "step": 114820 }, { "epoch": 0.27, "grad_norm": 1.703125, "learning_rate": 0.0001911261242920078, "loss": 2.2193, "step": 114825 }, { "epoch": 0.27, "grad_norm": 1.9375, "learning_rate": 0.00019112536309681433, "loss": 2.1267, "step": 114830 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.00019112460187049076, "loss": 2.1147, "step": 114835 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.0001911238406130374, "loss": 2.1459, "step": 114840 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.00019112307932445447, "loss": 2.0884, "step": 114845 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019112231800474225, "loss": 2.1079, "step": 114850 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.000191121556653901, "loss": 2.1277, "step": 114855 }, { "epoch": 0.27, "grad_norm": 2.546875, "learning_rate": 0.00019112079527193094, "loss": 2.2209, "step": 114860 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.0001911200338588324, "loss": 2.1711, "step": 114865 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.0001911192724146056, "loss": 2.2244, "step": 114870 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.00019111851093925078, "loss": 1.9405, "step": 114875 }, { "epoch": 0.27, "grad_norm": 1.71875, "learning_rate": 0.00019111774943276826, "loss": 2.1283, "step": 114880 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.00019111698789515825, "loss": 2.155, "step": 114885 }, { "epoch": 0.27, "grad_norm": 2.40625, "learning_rate": 0.000191116226326421, "loss": 2.0289, "step": 114890 }, { "epoch": 0.27, "grad_norm": 2.4375, "learning_rate": 0.00019111546472655683, "loss": 2.2494, "step": 114895 }, { "epoch": 0.27, "grad_norm": 2.640625, "learning_rate": 0.00019111470309556594, "loss": 2.2836, "step": 114900 }, { "epoch": 0.27, "grad_norm": 1.6484375, "learning_rate": 0.00019111394143344864, "loss": 2.0549, "step": 114905 }, { "epoch": 0.27, "grad_norm": 1.8359375, "learning_rate": 0.00019111317974020514, "loss": 2.2125, "step": 114910 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.00019111241801583571, "loss": 2.0249, "step": 114915 }, { "epoch": 0.27, "grad_norm": 1.6875, "learning_rate": 0.00019111165626034066, "loss": 2.0651, "step": 114920 }, { "epoch": 0.27, "grad_norm": 2.71875, "learning_rate": 0.0001911108944737202, "loss": 2.2148, "step": 114925 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.00019111013265597462, "loss": 2.2785, "step": 114930 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019110937080710417, "loss": 2.0608, "step": 114935 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019110860892710907, "loss": 2.2105, "step": 114940 }, { "epoch": 0.27, "grad_norm": 1.609375, "learning_rate": 0.00019110784701598964, "loss": 1.9609, "step": 114945 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.00019110708507374613, "loss": 2.2874, "step": 114950 }, { "epoch": 0.27, "grad_norm": 2.328125, "learning_rate": 0.00019110632310037878, "loss": 2.1445, "step": 114955 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.00019110556109588783, "loss": 2.1694, "step": 114960 }, { "epoch": 0.27, "grad_norm": 1.8359375, "learning_rate": 0.00019110479906027362, "loss": 2.228, "step": 114965 }, { "epoch": 0.27, "grad_norm": 2.953125, "learning_rate": 0.00019110403699353632, "loss": 2.1191, "step": 114970 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.00019110327489567627, "loss": 2.1161, "step": 114975 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019110251276669362, "loss": 2.1149, "step": 114980 }, { "epoch": 0.27, "grad_norm": 1.6796875, "learning_rate": 0.00019110175060658874, "loss": 2.1492, "step": 114985 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019110098841536183, "loss": 2.2246, "step": 114990 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.00019110022619301322, "loss": 2.168, "step": 114995 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.00019109946393954308, "loss": 2.0476, "step": 115000 }, { "epoch": 0.27, "grad_norm": 1.96875, "learning_rate": 0.0001910987016549517, "loss": 2.0863, "step": 115005 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.00019109793933923938, "loss": 2.2889, "step": 115010 }, { "epoch": 0.27, "grad_norm": 1.6640625, "learning_rate": 0.00019109717699240634, "loss": 2.0647, "step": 115015 }, { "epoch": 0.27, "grad_norm": 1.8828125, "learning_rate": 0.00019109641461445285, "loss": 2.0242, "step": 115020 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.0001910956522053792, "loss": 2.261, "step": 115025 }, { "epoch": 0.27, "grad_norm": 2.3125, "learning_rate": 0.0001910948897651856, "loss": 2.0735, "step": 115030 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.0001910941272938723, "loss": 2.1721, "step": 115035 }, { "epoch": 0.27, "grad_norm": 1.984375, "learning_rate": 0.00019109336479143963, "loss": 2.0951, "step": 115040 }, { "epoch": 0.27, "grad_norm": 2.421875, "learning_rate": 0.00019109260225788784, "loss": 2.0698, "step": 115045 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019109183969321714, "loss": 2.0921, "step": 115050 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.00019109107709742778, "loss": 2.2108, "step": 115055 }, { "epoch": 0.27, "grad_norm": 2.65625, "learning_rate": 0.00019109031447052011, "loss": 2.3251, "step": 115060 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.0001910895518124943, "loss": 2.0158, "step": 115065 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019108878912335066, "loss": 2.3235, "step": 115070 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.00019108802640308944, "loss": 2.16, "step": 115075 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.0001910872636517109, "loss": 2.22, "step": 115080 }, { "epoch": 0.27, "grad_norm": 2.359375, "learning_rate": 0.0001910865008692153, "loss": 2.1023, "step": 115085 }, { "epoch": 0.27, "grad_norm": 2.65625, "learning_rate": 0.0001910857380556029, "loss": 2.1334, "step": 115090 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019108497521087397, "loss": 2.2638, "step": 115095 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.0001910842123350287, "loss": 2.0916, "step": 115100 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.00019108344942806747, "loss": 1.9911, "step": 115105 }, { "epoch": 0.27, "grad_norm": 1.6796875, "learning_rate": 0.00019108268648999048, "loss": 2.3535, "step": 115110 }, { "epoch": 0.27, "grad_norm": 1.78125, "learning_rate": 0.00019108192352079798, "loss": 2.1771, "step": 115115 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.00019108116052049025, "loss": 2.092, "step": 115120 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.00019108039748906753, "loss": 2.2352, "step": 115125 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019107963442653011, "loss": 2.1293, "step": 115130 }, { "epoch": 0.27, "grad_norm": 1.96875, "learning_rate": 0.00019107887133287823, "loss": 2.2264, "step": 115135 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.00019107810820811216, "loss": 1.9782, "step": 115140 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019107734505223215, "loss": 2.1364, "step": 115145 }, { "epoch": 0.27, "grad_norm": 1.71875, "learning_rate": 0.00019107658186523846, "loss": 2.0775, "step": 115150 }, { "epoch": 0.27, "grad_norm": 2.46875, "learning_rate": 0.00019107581864713136, "loss": 2.2803, "step": 115155 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.00019107505539791113, "loss": 2.1426, "step": 115160 }, { "epoch": 0.27, "grad_norm": 1.84375, "learning_rate": 0.00019107429211757796, "loss": 1.9263, "step": 115165 }, { "epoch": 0.27, "grad_norm": 1.6328125, "learning_rate": 0.00019107352880613223, "loss": 2.0358, "step": 115170 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.0001910727654635741, "loss": 2.2242, "step": 115175 }, { "epoch": 0.27, "grad_norm": 2.9375, "learning_rate": 0.00019107200208990383, "loss": 2.2391, "step": 115180 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.00019107123868512176, "loss": 2.1859, "step": 115185 }, { "epoch": 0.27, "grad_norm": 1.7734375, "learning_rate": 0.00019107047524922804, "loss": 2.0916, "step": 115190 }, { "epoch": 0.27, "grad_norm": 1.90625, "learning_rate": 0.00019106971178222307, "loss": 2.1831, "step": 115195 }, { "epoch": 0.27, "grad_norm": 1.9921875, "learning_rate": 0.000191068948284107, "loss": 2.2311, "step": 115200 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019106818475488012, "loss": 2.1723, "step": 115205 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.00019106742119454268, "loss": 2.2286, "step": 115210 }, { "epoch": 0.27, "grad_norm": 1.796875, "learning_rate": 0.00019106665760309501, "loss": 2.2514, "step": 115215 }, { "epoch": 0.27, "grad_norm": 1.703125, "learning_rate": 0.00019106589398053728, "loss": 1.9974, "step": 115220 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.0001910651303268698, "loss": 2.3456, "step": 115225 }, { "epoch": 0.27, "grad_norm": 2.453125, "learning_rate": 0.0001910643666420928, "loss": 2.2701, "step": 115230 }, { "epoch": 0.27, "grad_norm": 1.7890625, "learning_rate": 0.0001910636029262066, "loss": 2.1603, "step": 115235 }, { "epoch": 0.27, "grad_norm": 1.6640625, "learning_rate": 0.0001910628391792114, "loss": 2.1928, "step": 115240 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.00019106207540110748, "loss": 2.258, "step": 115245 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.0001910613115918951, "loss": 2.1036, "step": 115250 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019106054775157452, "loss": 2.154, "step": 115255 }, { "epoch": 0.27, "grad_norm": 2.5625, "learning_rate": 0.000191059783880146, "loss": 2.2325, "step": 115260 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019105901997760983, "loss": 2.1635, "step": 115265 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.00019105825604396625, "loss": 2.0695, "step": 115270 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.0001910574920792155, "loss": 2.4097, "step": 115275 }, { "epoch": 0.27, "grad_norm": 1.7890625, "learning_rate": 0.00019105672808335787, "loss": 2.1636, "step": 115280 }, { "epoch": 0.27, "grad_norm": 1.8125, "learning_rate": 0.00019105596405639361, "loss": 2.2398, "step": 115285 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.00019105519999832298, "loss": 2.1534, "step": 115290 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.00019105443590914623, "loss": 2.0029, "step": 115295 }, { "epoch": 0.27, "grad_norm": 2.375, "learning_rate": 0.00019105367178886363, "loss": 2.0138, "step": 115300 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.00019105290763747547, "loss": 2.2028, "step": 115305 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019105214345498198, "loss": 2.1068, "step": 115310 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.0001910513792413834, "loss": 2.2668, "step": 115315 }, { "epoch": 0.27, "grad_norm": 2.375, "learning_rate": 0.00019105061499668006, "loss": 2.1894, "step": 115320 }, { "epoch": 0.27, "grad_norm": 1.9921875, "learning_rate": 0.00019104985072087216, "loss": 2.2899, "step": 115325 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019104908641395997, "loss": 2.157, "step": 115330 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019104832207594377, "loss": 2.2181, "step": 115335 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.0001910475577068238, "loss": 2.1426, "step": 115340 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.00019104679330660034, "loss": 2.039, "step": 115345 }, { "epoch": 0.27, "grad_norm": 1.6953125, "learning_rate": 0.00019104602887527365, "loss": 2.1401, "step": 115350 }, { "epoch": 0.27, "grad_norm": 2.53125, "learning_rate": 0.00019104526441284398, "loss": 2.1793, "step": 115355 }, { "epoch": 0.27, "grad_norm": 1.7578125, "learning_rate": 0.0001910444999193116, "loss": 2.1544, "step": 115360 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.00019104373539467677, "loss": 2.011, "step": 115365 }, { "epoch": 0.27, "grad_norm": 1.7421875, "learning_rate": 0.00019104297083893975, "loss": 2.0338, "step": 115370 }, { "epoch": 0.27, "grad_norm": 1.6875, "learning_rate": 0.00019104220625210078, "loss": 2.1026, "step": 115375 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019104144163416015, "loss": 2.2443, "step": 115380 }, { "epoch": 0.27, "grad_norm": 1.953125, "learning_rate": 0.00019104067698511812, "loss": 2.1292, "step": 115385 }, { "epoch": 0.27, "grad_norm": 1.828125, "learning_rate": 0.00019103991230497494, "loss": 2.2304, "step": 115390 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019103914759373088, "loss": 2.1899, "step": 115395 }, { "epoch": 0.27, "grad_norm": 1.8828125, "learning_rate": 0.0001910383828513862, "loss": 2.2609, "step": 115400 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.00019103761807794114, "loss": 2.0324, "step": 115405 }, { "epoch": 0.27, "grad_norm": 1.8046875, "learning_rate": 0.00019103685327339598, "loss": 1.8692, "step": 115410 }, { "epoch": 0.27, "grad_norm": 2.3125, "learning_rate": 0.000191036088437751, "loss": 2.0695, "step": 115415 }, { "epoch": 0.27, "grad_norm": 1.9375, "learning_rate": 0.00019103532357100643, "loss": 1.9616, "step": 115420 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019103455867316256, "loss": 2.3263, "step": 115425 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019103379374421962, "loss": 2.271, "step": 115430 }, { "epoch": 0.27, "grad_norm": 1.796875, "learning_rate": 0.00019103302878417785, "loss": 2.1138, "step": 115435 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.0001910322637930376, "loss": 2.3278, "step": 115440 }, { "epoch": 0.27, "grad_norm": 1.7734375, "learning_rate": 0.00019103149877079906, "loss": 2.1581, "step": 115445 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019103073371746248, "loss": 2.029, "step": 115450 }, { "epoch": 0.27, "grad_norm": 1.7734375, "learning_rate": 0.00019102996863302817, "loss": 2.2225, "step": 115455 }, { "epoch": 0.27, "grad_norm": 2.75, "learning_rate": 0.0001910292035174964, "loss": 2.0245, "step": 115460 }, { "epoch": 0.27, "grad_norm": 1.7109375, "learning_rate": 0.00019102843837086737, "loss": 2.0749, "step": 115465 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.0001910276731931414, "loss": 2.2175, "step": 115470 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.00019102690798431868, "loss": 2.3124, "step": 115475 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.00019102614274439956, "loss": 1.9699, "step": 115480 }, { "epoch": 0.27, "grad_norm": 1.8359375, "learning_rate": 0.00019102537747338425, "loss": 2.0697, "step": 115485 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019102461217127302, "loss": 2.1442, "step": 115490 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.0001910238468380661, "loss": 2.2584, "step": 115495 }, { "epoch": 0.27, "grad_norm": 1.875, "learning_rate": 0.00019102308147376382, "loss": 2.1391, "step": 115500 }, { "epoch": 0.27, "grad_norm": 2.390625, "learning_rate": 0.0001910223160783664, "loss": 2.2368, "step": 115505 }, { "epoch": 0.27, "grad_norm": 2.359375, "learning_rate": 0.0001910215506518741, "loss": 2.1998, "step": 115510 }, { "epoch": 0.27, "grad_norm": 1.6640625, "learning_rate": 0.0001910207851942872, "loss": 2.1441, "step": 115515 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.00019102001970560593, "loss": 2.1359, "step": 115520 }, { "epoch": 0.27, "grad_norm": 1.875, "learning_rate": 0.00019101925418583057, "loss": 2.1514, "step": 115525 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.0001910184886349614, "loss": 2.1423, "step": 115530 }, { "epoch": 0.27, "grad_norm": 2.40625, "learning_rate": 0.00019101772305299866, "loss": 2.1846, "step": 115535 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.0001910169574399426, "loss": 2.2162, "step": 115540 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.0001910161917957935, "loss": 2.0598, "step": 115545 }, { "epoch": 0.27, "grad_norm": 1.6484375, "learning_rate": 0.00019101542612055163, "loss": 2.1281, "step": 115550 }, { "epoch": 0.27, "grad_norm": 1.90625, "learning_rate": 0.00019101466041421725, "loss": 2.2042, "step": 115555 }, { "epoch": 0.27, "grad_norm": 1.671875, "learning_rate": 0.00019101389467679057, "loss": 2.2341, "step": 115560 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.00019101312890827194, "loss": 2.247, "step": 115565 }, { "epoch": 0.27, "grad_norm": 1.90625, "learning_rate": 0.00019101236310866155, "loss": 2.2169, "step": 115570 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.00019101159727795967, "loss": 2.1471, "step": 115575 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.00019101083141616663, "loss": 2.3263, "step": 115580 }, { "epoch": 0.27, "grad_norm": 1.8359375, "learning_rate": 0.00019101006552328258, "loss": 2.1854, "step": 115585 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.0001910092995993079, "loss": 2.029, "step": 115590 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019100853364424273, "loss": 2.1562, "step": 115595 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.00019100776765808744, "loss": 2.0396, "step": 115600 }, { "epoch": 0.27, "grad_norm": 1.9140625, "learning_rate": 0.00019100700164084223, "loss": 2.0623, "step": 115605 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.00019100623559250736, "loss": 2.0708, "step": 115610 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.00019100546951308315, "loss": 2.0916, "step": 115615 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.0001910047034025698, "loss": 1.9211, "step": 115620 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.0001910039372609676, "loss": 2.1237, "step": 115625 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.0001910031710882768, "loss": 2.2071, "step": 115630 }, { "epoch": 0.27, "grad_norm": 1.75, "learning_rate": 0.00019100240488449763, "loss": 2.047, "step": 115635 }, { "epoch": 0.27, "grad_norm": 1.7578125, "learning_rate": 0.0001910016386496304, "loss": 2.2491, "step": 115640 }, { "epoch": 0.27, "grad_norm": 1.84375, "learning_rate": 0.0001910008723836754, "loss": 2.2584, "step": 115645 }, { "epoch": 0.27, "grad_norm": 1.71875, "learning_rate": 0.00019100010608663284, "loss": 2.1174, "step": 115650 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019099933975850298, "loss": 2.3082, "step": 115655 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019099857339928612, "loss": 2.0017, "step": 115660 }, { "epoch": 0.27, "grad_norm": 1.6171875, "learning_rate": 0.00019099780700898247, "loss": 2.0216, "step": 115665 }, { "epoch": 0.27, "grad_norm": 2.75, "learning_rate": 0.0001909970405875923, "loss": 2.0843, "step": 115670 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019099627413511595, "loss": 2.2913, "step": 115675 }, { "epoch": 0.27, "grad_norm": 2.34375, "learning_rate": 0.0001909955076515536, "loss": 2.2234, "step": 115680 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.0001909947411369055, "loss": 2.2326, "step": 115685 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.00019099397459117196, "loss": 2.1127, "step": 115690 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019099320801435323, "loss": 2.1741, "step": 115695 }, { "epoch": 0.27, "grad_norm": 2.859375, "learning_rate": 0.00019099244140644958, "loss": 2.1235, "step": 115700 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.00019099167476746128, "loss": 2.2844, "step": 115705 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.00019099090809738853, "loss": 2.1508, "step": 115710 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.00019099014139623165, "loss": 2.2409, "step": 115715 }, { "epoch": 0.27, "grad_norm": 1.953125, "learning_rate": 0.00019098937466399094, "loss": 2.3352, "step": 115720 }, { "epoch": 0.27, "grad_norm": 1.78125, "learning_rate": 0.00019098860790066655, "loss": 2.0399, "step": 115725 }, { "epoch": 0.27, "grad_norm": 1.8515625, "learning_rate": 0.0001909878411062588, "loss": 2.1394, "step": 115730 }, { "epoch": 0.27, "grad_norm": 1.8671875, "learning_rate": 0.000190987074280768, "loss": 2.2453, "step": 115735 }, { "epoch": 0.27, "grad_norm": 2.515625, "learning_rate": 0.00019098630742419431, "loss": 1.9686, "step": 115740 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.00019098554053653807, "loss": 2.0175, "step": 115745 }, { "epoch": 0.27, "grad_norm": 1.7890625, "learning_rate": 0.00019098477361779954, "loss": 2.1308, "step": 115750 }, { "epoch": 0.27, "grad_norm": 1.796875, "learning_rate": 0.00019098400666797894, "loss": 2.1677, "step": 115755 }, { "epoch": 0.27, "grad_norm": 1.9609375, "learning_rate": 0.00019098323968707654, "loss": 2.152, "step": 115760 }, { "epoch": 0.27, "grad_norm": 1.828125, "learning_rate": 0.00019098247267509267, "loss": 2.2271, "step": 115765 }, { "epoch": 0.27, "grad_norm": 2.359375, "learning_rate": 0.00019098170563202745, "loss": 2.1616, "step": 115770 }, { "epoch": 0.27, "grad_norm": 2.484375, "learning_rate": 0.0001909809385578813, "loss": 2.3292, "step": 115775 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019098017145265442, "loss": 2.1364, "step": 115780 }, { "epoch": 0.27, "grad_norm": 1.796875, "learning_rate": 0.000190979404316347, "loss": 2.1308, "step": 115785 }, { "epoch": 0.27, "grad_norm": 2.765625, "learning_rate": 0.00019097863714895942, "loss": 2.1368, "step": 115790 }, { "epoch": 0.27, "grad_norm": 2.703125, "learning_rate": 0.00019097786995049188, "loss": 2.2464, "step": 115795 }, { "epoch": 0.27, "grad_norm": 2.484375, "learning_rate": 0.0001909771027209446, "loss": 2.1572, "step": 115800 }, { "epoch": 0.27, "grad_norm": 1.78125, "learning_rate": 0.00019097633546031795, "loss": 2.1699, "step": 115805 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019097556816861211, "loss": 2.1849, "step": 115810 }, { "epoch": 0.27, "grad_norm": 1.96875, "learning_rate": 0.00019097480084582738, "loss": 1.9786, "step": 115815 }, { "epoch": 0.27, "grad_norm": 1.9375, "learning_rate": 0.00019097403349196398, "loss": 2.1303, "step": 115820 }, { "epoch": 0.27, "grad_norm": 1.734375, "learning_rate": 0.00019097326610702223, "loss": 2.1021, "step": 115825 }, { "epoch": 0.27, "grad_norm": 2.5, "learning_rate": 0.00019097249869100234, "loss": 2.0375, "step": 115830 }, { "epoch": 0.27, "grad_norm": 1.953125, "learning_rate": 0.0001909717312439046, "loss": 2.1616, "step": 115835 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.00019097096376572927, "loss": 2.1687, "step": 115840 }, { "epoch": 0.27, "grad_norm": 1.8203125, "learning_rate": 0.00019097019625647662, "loss": 2.2988, "step": 115845 }, { "epoch": 0.27, "grad_norm": 1.984375, "learning_rate": 0.00019096942871614687, "loss": 2.2261, "step": 115850 }, { "epoch": 0.27, "grad_norm": 1.90625, "learning_rate": 0.00019096866114474032, "loss": 1.9982, "step": 115855 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.00019096789354225725, "loss": 2.1851, "step": 115860 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019096712590869789, "loss": 2.2034, "step": 115865 }, { "epoch": 0.27, "grad_norm": 1.859375, "learning_rate": 0.0001909663582440625, "loss": 2.2542, "step": 115870 }, { "epoch": 0.27, "grad_norm": 1.8984375, "learning_rate": 0.00019096559054835138, "loss": 1.9843, "step": 115875 }, { "epoch": 0.27, "grad_norm": 2.5, "learning_rate": 0.00019096482282156472, "loss": 2.2225, "step": 115880 }, { "epoch": 0.27, "grad_norm": 2.328125, "learning_rate": 0.00019096405506370285, "loss": 2.0557, "step": 115885 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019096328727476604, "loss": 2.2734, "step": 115890 }, { "epoch": 0.27, "grad_norm": 1.765625, "learning_rate": 0.00019096251945475447, "loss": 2.0922, "step": 115895 }, { "epoch": 0.27, "grad_norm": 1.9375, "learning_rate": 0.00019096175160366848, "loss": 2.0765, "step": 115900 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019096098372150828, "loss": 2.1285, "step": 115905 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.0001909602158082742, "loss": 2.3633, "step": 115910 }, { "epoch": 0.27, "grad_norm": 1.8125, "learning_rate": 0.0001909594478639664, "loss": 2.0532, "step": 115915 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019095867988858526, "loss": 2.0694, "step": 115920 }, { "epoch": 0.27, "grad_norm": 2.3125, "learning_rate": 0.00019095791188213096, "loss": 2.1445, "step": 115925 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019095714384460383, "loss": 2.1172, "step": 115930 }, { "epoch": 0.27, "grad_norm": 2.390625, "learning_rate": 0.00019095637577600406, "loss": 2.1126, "step": 115935 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.00019095560767633192, "loss": 2.0019, "step": 115940 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.00019095483954558771, "loss": 2.0162, "step": 115945 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.00019095407138377168, "loss": 2.1042, "step": 115950 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.0001909533031908841, "loss": 2.2254, "step": 115955 }, { "epoch": 0.27, "grad_norm": 1.7265625, "learning_rate": 0.0001909525349669252, "loss": 2.0629, "step": 115960 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019095176671189528, "loss": 2.0801, "step": 115965 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019095099842579458, "loss": 2.0358, "step": 115970 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019095023010862335, "loss": 2.0788, "step": 115975 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019094946176038193, "loss": 2.0773, "step": 115980 }, { "epoch": 0.27, "grad_norm": 1.9921875, "learning_rate": 0.00019094869338107047, "loss": 2.1411, "step": 115985 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.00019094792497068933, "loss": 2.2204, "step": 115990 }, { "epoch": 0.27, "grad_norm": 1.7734375, "learning_rate": 0.0001909471565292387, "loss": 2.1267, "step": 115995 }, { "epoch": 0.27, "grad_norm": 2.484375, "learning_rate": 0.00019094638805671886, "loss": 2.2293, "step": 116000 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.0001909456195531301, "loss": 2.1182, "step": 116005 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.00019094485101847267, "loss": 2.2992, "step": 116010 }, { "epoch": 0.27, "grad_norm": 2.75, "learning_rate": 0.00019094408245274683, "loss": 2.148, "step": 116015 }, { "epoch": 0.27, "grad_norm": 1.78125, "learning_rate": 0.00019094331385595284, "loss": 2.2186, "step": 116020 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.000190942545228091, "loss": 2.2362, "step": 116025 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.00019094177656916146, "loss": 1.9984, "step": 116030 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.0001909410078791646, "loss": 1.9368, "step": 116035 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.00019094023915810064, "loss": 2.0297, "step": 116040 }, { "epoch": 0.27, "grad_norm": 1.765625, "learning_rate": 0.00019093947040596987, "loss": 2.2099, "step": 116045 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.0001909387016227725, "loss": 2.2471, "step": 116050 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.00019093793280850884, "loss": 2.2061, "step": 116055 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.0001909371639631791, "loss": 2.1282, "step": 116060 }, { "epoch": 0.27, "grad_norm": 2.421875, "learning_rate": 0.0001909363950867836, "loss": 2.1599, "step": 116065 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.00019093562617932257, "loss": 2.1868, "step": 116070 }, { "epoch": 0.27, "grad_norm": 1.8125, "learning_rate": 0.0001909348572407963, "loss": 2.2983, "step": 116075 }, { "epoch": 0.27, "grad_norm": 1.734375, "learning_rate": 0.00019093408827120501, "loss": 2.1463, "step": 116080 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.000190933319270549, "loss": 2.3537, "step": 116085 }, { "epoch": 0.27, "grad_norm": 1.8671875, "learning_rate": 0.00019093255023882853, "loss": 2.4062, "step": 116090 }, { "epoch": 0.27, "grad_norm": 2.671875, "learning_rate": 0.00019093178117604383, "loss": 2.1505, "step": 116095 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019093101208219517, "loss": 2.2595, "step": 116100 }, { "epoch": 0.27, "grad_norm": 1.75, "learning_rate": 0.00019093024295728284, "loss": 2.1383, "step": 116105 }, { "epoch": 0.27, "grad_norm": 1.734375, "learning_rate": 0.00019092947380130714, "loss": 2.2654, "step": 116110 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.0001909287046142682, "loss": 2.0597, "step": 116115 }, { "epoch": 0.27, "grad_norm": 1.640625, "learning_rate": 0.00019092793539616642, "loss": 2.174, "step": 116120 }, { "epoch": 0.27, "grad_norm": 2.4375, "learning_rate": 0.000190927166147002, "loss": 2.2484, "step": 116125 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019092639686677518, "loss": 2.2488, "step": 116130 }, { "epoch": 0.27, "grad_norm": 1.7578125, "learning_rate": 0.00019092562755548627, "loss": 2.2368, "step": 116135 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.00019092485821313554, "loss": 2.2474, "step": 116140 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.0001909240888397232, "loss": 2.0496, "step": 116145 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.00019092331943524956, "loss": 2.2798, "step": 116150 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019092254999971488, "loss": 1.9587, "step": 116155 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.00019092178053311938, "loss": 2.0585, "step": 116160 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019092101103546335, "loss": 2.2536, "step": 116165 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019092024150674708, "loss": 2.0665, "step": 116170 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019091947194697077, "loss": 2.1177, "step": 116175 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019091870235613475, "loss": 2.2699, "step": 116180 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019091793273423924, "loss": 2.1681, "step": 116185 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019091716308128454, "loss": 2.341, "step": 116190 }, { "epoch": 0.27, "grad_norm": 2.328125, "learning_rate": 0.00019091639339727085, "loss": 2.1497, "step": 116195 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.0001909156236821985, "loss": 2.209, "step": 116200 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.0001909148539360677, "loss": 2.1621, "step": 116205 }, { "epoch": 0.27, "grad_norm": 1.75, "learning_rate": 0.00019091408415887874, "loss": 2.1458, "step": 116210 }, { "epoch": 0.27, "grad_norm": 1.8828125, "learning_rate": 0.0001909133143506319, "loss": 2.1177, "step": 116215 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019091254451132742, "loss": 2.1393, "step": 116220 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.0001909117746409655, "loss": 2.0513, "step": 116225 }, { "epoch": 0.27, "grad_norm": 2.359375, "learning_rate": 0.00019091100473954655, "loss": 2.1046, "step": 116230 }, { "epoch": 0.27, "grad_norm": 1.796875, "learning_rate": 0.00019091023480707073, "loss": 2.0354, "step": 116235 }, { "epoch": 0.27, "grad_norm": 1.7734375, "learning_rate": 0.00019090946484353832, "loss": 1.9959, "step": 116240 }, { "epoch": 0.27, "grad_norm": 1.7265625, "learning_rate": 0.00019090869484894961, "loss": 1.9192, "step": 116245 }, { "epoch": 0.27, "grad_norm": 2.15625, "learning_rate": 0.0001909079248233048, "loss": 2.1606, "step": 116250 }, { "epoch": 0.27, "grad_norm": 1.7109375, "learning_rate": 0.00019090715476660424, "loss": 2.0717, "step": 116255 }, { "epoch": 0.27, "grad_norm": 1.5, "learning_rate": 0.00019090638467884812, "loss": 2.2707, "step": 116260 }, { "epoch": 0.27, "grad_norm": 2.125, "learning_rate": 0.00019090561456003673, "loss": 2.0396, "step": 116265 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019090484441017033, "loss": 2.1878, "step": 116270 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.0001909040742292492, "loss": 2.0383, "step": 116275 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.0001909033040172736, "loss": 2.1228, "step": 116280 }, { "epoch": 0.27, "grad_norm": 2.3125, "learning_rate": 0.00019090253377424375, "loss": 2.1188, "step": 116285 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019090176350015998, "loss": 2.2423, "step": 116290 }, { "epoch": 0.27, "grad_norm": 2.09375, "learning_rate": 0.0001909009931950225, "loss": 2.2989, "step": 116295 }, { "epoch": 0.27, "grad_norm": 2.40625, "learning_rate": 0.00019090022285883157, "loss": 2.3687, "step": 116300 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.0001908994524915875, "loss": 2.197, "step": 116305 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019089868209329054, "loss": 2.2681, "step": 116310 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.00019089791166394094, "loss": 2.0077, "step": 116315 }, { "epoch": 0.27, "grad_norm": 3.125, "learning_rate": 0.00019089714120353892, "loss": 2.1302, "step": 116320 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.00019089637071208483, "loss": 2.0442, "step": 116325 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019089560018957887, "loss": 2.1132, "step": 116330 }, { "epoch": 0.27, "grad_norm": 1.65625, "learning_rate": 0.00019089482963602133, "loss": 2.0226, "step": 116335 }, { "epoch": 0.27, "grad_norm": 1.84375, "learning_rate": 0.00019089405905141251, "loss": 2.1243, "step": 116340 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.00019089328843575258, "loss": 2.2449, "step": 116345 }, { "epoch": 0.27, "grad_norm": 1.6484375, "learning_rate": 0.00019089251778904188, "loss": 2.1581, "step": 116350 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019089174711128064, "loss": 2.1842, "step": 116355 }, { "epoch": 0.27, "grad_norm": 1.84375, "learning_rate": 0.00019089097640246911, "loss": 2.1732, "step": 116360 }, { "epoch": 0.27, "grad_norm": 1.7109375, "learning_rate": 0.00019089020566260761, "loss": 2.0037, "step": 116365 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019088943489169635, "loss": 2.0876, "step": 116370 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.0001908886640897356, "loss": 2.1799, "step": 116375 }, { "epoch": 0.27, "grad_norm": 2.421875, "learning_rate": 0.00019088789325672566, "loss": 2.2109, "step": 116380 }, { "epoch": 0.27, "grad_norm": 2.59375, "learning_rate": 0.00019088712239266674, "loss": 2.0407, "step": 116385 }, { "epoch": 0.27, "grad_norm": 1.7734375, "learning_rate": 0.00019088635149755914, "loss": 2.0313, "step": 116390 }, { "epoch": 0.27, "grad_norm": 1.6875, "learning_rate": 0.0001908855805714031, "loss": 2.0528, "step": 116395 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.00019088480961419895, "loss": 2.3772, "step": 116400 }, { "epoch": 0.27, "grad_norm": 2.046875, "learning_rate": 0.00019088403862594687, "loss": 2.1877, "step": 116405 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.0001908832676066471, "loss": 2.3315, "step": 116410 }, { "epoch": 0.27, "grad_norm": 1.5234375, "learning_rate": 0.00019088249655630003, "loss": 1.8035, "step": 116415 }, { "epoch": 0.27, "grad_norm": 2.375, "learning_rate": 0.00019088172547490585, "loss": 2.2259, "step": 116420 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.00019088095436246478, "loss": 2.1305, "step": 116425 }, { "epoch": 0.27, "grad_norm": 1.96875, "learning_rate": 0.00019088018321897717, "loss": 2.1991, "step": 116430 }, { "epoch": 0.27, "grad_norm": 1.90625, "learning_rate": 0.00019087941204444322, "loss": 1.9681, "step": 116435 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.0001908786408388632, "loss": 2.1992, "step": 116440 }, { "epoch": 0.27, "grad_norm": 2.109375, "learning_rate": 0.00019087786960223738, "loss": 2.2543, "step": 116445 }, { "epoch": 0.27, "grad_norm": 2.359375, "learning_rate": 0.00019087709833456607, "loss": 2.435, "step": 116450 }, { "epoch": 0.27, "grad_norm": 1.703125, "learning_rate": 0.00019087632703584947, "loss": 2.1082, "step": 116455 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.0001908755557060879, "loss": 2.063, "step": 116460 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.00019087478434528157, "loss": 1.9921, "step": 116465 }, { "epoch": 0.27, "grad_norm": 2.234375, "learning_rate": 0.00019087401295343076, "loss": 2.1331, "step": 116470 }, { "epoch": 0.27, "grad_norm": 1.96875, "learning_rate": 0.00019087324153053574, "loss": 2.3707, "step": 116475 }, { "epoch": 0.27, "grad_norm": 1.6640625, "learning_rate": 0.0001908724700765968, "loss": 2.1391, "step": 116480 }, { "epoch": 0.27, "grad_norm": 2.1875, "learning_rate": 0.00019087169859161415, "loss": 2.2554, "step": 116485 }, { "epoch": 0.27, "grad_norm": 2.625, "learning_rate": 0.00019087092707558808, "loss": 2.1938, "step": 116490 }, { "epoch": 0.27, "grad_norm": 2.015625, "learning_rate": 0.00019087015552851885, "loss": 2.3347, "step": 116495 }, { "epoch": 0.27, "grad_norm": 1.7109375, "learning_rate": 0.00019086938395040674, "loss": 2.0744, "step": 116500 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.000190868612341252, "loss": 1.9897, "step": 116505 }, { "epoch": 0.27, "grad_norm": 1.9375, "learning_rate": 0.0001908678407010549, "loss": 1.9937, "step": 116510 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.0001908670690298157, "loss": 2.0814, "step": 116515 }, { "epoch": 0.27, "grad_norm": 2.296875, "learning_rate": 0.00019086629732753462, "loss": 2.0422, "step": 116520 }, { "epoch": 0.27, "grad_norm": 1.8671875, "learning_rate": 0.000190865525594212, "loss": 2.1144, "step": 116525 }, { "epoch": 0.27, "grad_norm": 2.59375, "learning_rate": 0.00019086475382984805, "loss": 2.3125, "step": 116530 }, { "epoch": 0.27, "grad_norm": 2.453125, "learning_rate": 0.00019086398203444307, "loss": 2.1799, "step": 116535 }, { "epoch": 0.27, "grad_norm": 1.9140625, "learning_rate": 0.0001908632102079973, "loss": 2.0216, "step": 116540 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.000190862438350511, "loss": 2.2327, "step": 116545 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.00019086166646198444, "loss": 2.1808, "step": 116550 }, { "epoch": 0.27, "grad_norm": 1.78125, "learning_rate": 0.00019086089454241792, "loss": 2.1881, "step": 116555 }, { "epoch": 0.27, "grad_norm": 2.0625, "learning_rate": 0.00019086012259181162, "loss": 2.1703, "step": 116560 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.0001908593506101659, "loss": 2.1988, "step": 116565 }, { "epoch": 0.27, "grad_norm": 2.40625, "learning_rate": 0.00019085857859748098, "loss": 2.1884, "step": 116570 }, { "epoch": 0.27, "grad_norm": 1.8828125, "learning_rate": 0.00019085780655375708, "loss": 1.9991, "step": 116575 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.00019085703447899454, "loss": 2.1326, "step": 116580 }, { "epoch": 0.27, "grad_norm": 2.0, "learning_rate": 0.00019085626237319358, "loss": 2.0015, "step": 116585 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.00019085549023635444, "loss": 2.2314, "step": 116590 }, { "epoch": 0.27, "grad_norm": 2.859375, "learning_rate": 0.00019085471806847747, "loss": 2.1562, "step": 116595 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019085394586956285, "loss": 2.0967, "step": 116600 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.00019085317363961088, "loss": 2.1353, "step": 116605 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.00019085240137862184, "loss": 2.322, "step": 116610 }, { "epoch": 0.27, "grad_norm": 1.8203125, "learning_rate": 0.00019085162908659596, "loss": 1.9902, "step": 116615 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.0001908508567635335, "loss": 2.0574, "step": 116620 }, { "epoch": 0.27, "grad_norm": 1.8203125, "learning_rate": 0.00019085008440943474, "loss": 2.3109, "step": 116625 }, { "epoch": 0.27, "grad_norm": 1.96875, "learning_rate": 0.00019084931202429994, "loss": 2.273, "step": 116630 }, { "epoch": 0.27, "grad_norm": 2.34375, "learning_rate": 0.0001908485396081294, "loss": 2.2335, "step": 116635 }, { "epoch": 0.27, "grad_norm": 2.28125, "learning_rate": 0.0001908477671609233, "loss": 1.9901, "step": 116640 }, { "epoch": 0.27, "grad_norm": 1.6875, "learning_rate": 0.000190846994682682, "loss": 2.155, "step": 116645 }, { "epoch": 0.27, "grad_norm": 2.265625, "learning_rate": 0.0001908462221734057, "loss": 2.1777, "step": 116650 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.0001908454496330947, "loss": 2.1519, "step": 116655 }, { "epoch": 0.27, "grad_norm": 2.25, "learning_rate": 0.0001908446770617492, "loss": 2.1982, "step": 116660 }, { "epoch": 0.27, "grad_norm": 2.578125, "learning_rate": 0.00019084390445936957, "loss": 2.0425, "step": 116665 }, { "epoch": 0.27, "grad_norm": 2.03125, "learning_rate": 0.000190843131825956, "loss": 2.107, "step": 116670 }, { "epoch": 0.27, "grad_norm": 1.7109375, "learning_rate": 0.00019084235916150876, "loss": 2.0478, "step": 116675 }, { "epoch": 0.27, "grad_norm": 1.921875, "learning_rate": 0.0001908415864660281, "loss": 2.2494, "step": 116680 }, { "epoch": 0.27, "grad_norm": 1.8203125, "learning_rate": 0.00019084081373951434, "loss": 2.2999, "step": 116685 }, { "epoch": 0.27, "grad_norm": 1.9453125, "learning_rate": 0.0001908400409819677, "loss": 2.2297, "step": 116690 }, { "epoch": 0.27, "grad_norm": 1.7421875, "learning_rate": 0.00019083926819338847, "loss": 1.9737, "step": 116695 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.00019083849537377686, "loss": 2.2248, "step": 116700 }, { "epoch": 0.27, "grad_norm": 1.9140625, "learning_rate": 0.00019083772252313322, "loss": 2.0609, "step": 116705 }, { "epoch": 0.27, "grad_norm": 1.984375, "learning_rate": 0.0001908369496414577, "loss": 2.2147, "step": 116710 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.00019083617672875067, "loss": 2.0365, "step": 116715 }, { "epoch": 0.27, "grad_norm": 1.96875, "learning_rate": 0.00019083540378501236, "loss": 2.1022, "step": 116720 }, { "epoch": 0.27, "grad_norm": 1.8046875, "learning_rate": 0.000190834630810243, "loss": 1.9409, "step": 116725 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019083385780444294, "loss": 2.034, "step": 116730 }, { "epoch": 0.27, "grad_norm": 2.3125, "learning_rate": 0.00019083308476761234, "loss": 2.3213, "step": 116735 }, { "epoch": 0.27, "grad_norm": 1.84375, "learning_rate": 0.00019083231169975156, "loss": 2.0301, "step": 116740 }, { "epoch": 0.27, "grad_norm": 1.7734375, "learning_rate": 0.00019083153860086077, "loss": 1.8323, "step": 116745 }, { "epoch": 0.27, "grad_norm": 2.203125, "learning_rate": 0.00019083076547094028, "loss": 1.998, "step": 116750 }, { "epoch": 0.27, "grad_norm": 1.8125, "learning_rate": 0.00019082999230999036, "loss": 2.0376, "step": 116755 }, { "epoch": 0.27, "grad_norm": 1.9921875, "learning_rate": 0.00019082921911801125, "loss": 2.1776, "step": 116760 }, { "epoch": 0.27, "grad_norm": 2.21875, "learning_rate": 0.00019082844589500328, "loss": 2.1931, "step": 116765 }, { "epoch": 0.27, "grad_norm": 1.890625, "learning_rate": 0.0001908276726409666, "loss": 2.0576, "step": 116770 }, { "epoch": 0.27, "grad_norm": 2.171875, "learning_rate": 0.0001908268993559016, "loss": 2.1934, "step": 116775 }, { "epoch": 0.27, "grad_norm": 1.859375, "learning_rate": 0.00019082612603980845, "loss": 2.0859, "step": 116780 }, { "epoch": 0.27, "grad_norm": 1.828125, "learning_rate": 0.00019082535269268746, "loss": 2.0935, "step": 116785 }, { "epoch": 0.27, "grad_norm": 1.7890625, "learning_rate": 0.0001908245793145389, "loss": 2.0807, "step": 116790 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.000190823805905363, "loss": 2.0936, "step": 116795 }, { "epoch": 0.27, "grad_norm": 1.8671875, "learning_rate": 0.00019082303246516002, "loss": 2.1739, "step": 116800 }, { "epoch": 0.27, "grad_norm": 1.9765625, "learning_rate": 0.00019082225899393028, "loss": 2.0921, "step": 116805 }, { "epoch": 0.27, "grad_norm": 1.75, "learning_rate": 0.00019082148549167397, "loss": 2.1269, "step": 116810 }, { "epoch": 0.27, "grad_norm": 1.8515625, "learning_rate": 0.00019082071195839143, "loss": 2.2038, "step": 116815 }, { "epoch": 0.27, "grad_norm": 2.40625, "learning_rate": 0.00019081993839408287, "loss": 2.3178, "step": 116820 }, { "epoch": 0.27, "grad_norm": 1.8359375, "learning_rate": 0.00019081916479874854, "loss": 2.2709, "step": 116825 }, { "epoch": 0.27, "grad_norm": 1.8203125, "learning_rate": 0.0001908183911723888, "loss": 2.1202, "step": 116830 }, { "epoch": 0.27, "grad_norm": 2.140625, "learning_rate": 0.0001908176175150038, "loss": 2.111, "step": 116835 }, { "epoch": 0.27, "grad_norm": 1.9296875, "learning_rate": 0.0001908168438265939, "loss": 1.8649, "step": 116840 }, { "epoch": 0.27, "grad_norm": 2.078125, "learning_rate": 0.0001908160701071593, "loss": 1.9996, "step": 116845 }, { "epoch": 0.27, "grad_norm": 1.8046875, "learning_rate": 0.00019081529635670027, "loss": 2.1489, "step": 116850 }, { "epoch": 0.27, "grad_norm": 1.8359375, "learning_rate": 0.0001908145225752171, "loss": 2.1599, "step": 116855 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.00019081374876271003, "loss": 2.1904, "step": 116860 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019081297491917935, "loss": 2.0783, "step": 116865 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.00019081220104462529, "loss": 2.1345, "step": 116870 }, { "epoch": 0.28, "grad_norm": 1.90625, "learning_rate": 0.00019081142713904816, "loss": 2.2353, "step": 116875 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.0001908106532024482, "loss": 2.0917, "step": 116880 }, { "epoch": 0.28, "grad_norm": 2.625, "learning_rate": 0.00019080987923482566, "loss": 1.9824, "step": 116885 }, { "epoch": 0.28, "grad_norm": 2.5, "learning_rate": 0.0001908091052361808, "loss": 1.784, "step": 116890 }, { "epoch": 0.28, "grad_norm": 2.53125, "learning_rate": 0.00019080833120651395, "loss": 2.342, "step": 116895 }, { "epoch": 0.28, "grad_norm": 1.78125, "learning_rate": 0.0001908075571458253, "loss": 2.2114, "step": 116900 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019080678305411514, "loss": 2.1142, "step": 116905 }, { "epoch": 0.28, "grad_norm": 1.96875, "learning_rate": 0.00019080600893138375, "loss": 2.1993, "step": 116910 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019080523477763136, "loss": 1.8614, "step": 116915 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019080446059285827, "loss": 2.2432, "step": 116920 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.00019080368637706472, "loss": 2.1642, "step": 116925 }, { "epoch": 0.28, "grad_norm": 1.7421875, "learning_rate": 0.00019080291213025098, "loss": 2.0253, "step": 116930 }, { "epoch": 0.28, "grad_norm": 1.828125, "learning_rate": 0.00019080213785241734, "loss": 2.1854, "step": 116935 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019080136354356403, "loss": 2.1544, "step": 116940 }, { "epoch": 0.28, "grad_norm": 1.8828125, "learning_rate": 0.00019080058920369135, "loss": 2.1261, "step": 116945 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.0001907998148327995, "loss": 2.1572, "step": 116950 }, { "epoch": 0.28, "grad_norm": 1.5703125, "learning_rate": 0.00019079904043088882, "loss": 2.119, "step": 116955 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019079826599795952, "loss": 2.1212, "step": 116960 }, { "epoch": 0.28, "grad_norm": 2.328125, "learning_rate": 0.00019079749153401188, "loss": 2.0697, "step": 116965 }, { "epoch": 0.28, "grad_norm": 1.9375, "learning_rate": 0.00019079671703904622, "loss": 2.2445, "step": 116970 }, { "epoch": 0.28, "grad_norm": 1.765625, "learning_rate": 0.00019079594251306266, "loss": 2.1153, "step": 116975 }, { "epoch": 0.28, "grad_norm": 1.90625, "learning_rate": 0.00019079516795606166, "loss": 2.2072, "step": 116980 }, { "epoch": 0.28, "grad_norm": 1.7265625, "learning_rate": 0.00019079439336804334, "loss": 2.2209, "step": 116985 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.00019079361874900798, "loss": 2.1592, "step": 116990 }, { "epoch": 0.28, "grad_norm": 2.40625, "learning_rate": 0.00019079284409895592, "loss": 1.9952, "step": 116995 }, { "epoch": 0.28, "grad_norm": 1.9765625, "learning_rate": 0.00019079206941788736, "loss": 1.8713, "step": 117000 }, { "epoch": 0.28, "grad_norm": 1.9453125, "learning_rate": 0.0001907912947058026, "loss": 1.9851, "step": 117005 }, { "epoch": 0.28, "grad_norm": 1.6484375, "learning_rate": 0.00019079051996270186, "loss": 2.2452, "step": 117010 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019078974518858545, "loss": 2.0977, "step": 117015 }, { "epoch": 0.28, "grad_norm": 1.65625, "learning_rate": 0.0001907889703834536, "loss": 2.2278, "step": 117020 }, { "epoch": 0.28, "grad_norm": 2.546875, "learning_rate": 0.00019078819554730657, "loss": 2.2203, "step": 117025 }, { "epoch": 0.28, "grad_norm": 1.7890625, "learning_rate": 0.0001907874206801447, "loss": 2.0706, "step": 117030 }, { "epoch": 0.28, "grad_norm": 2.34375, "learning_rate": 0.00019078664578196816, "loss": 2.0393, "step": 117035 }, { "epoch": 0.28, "grad_norm": 1.859375, "learning_rate": 0.0001907858708527773, "loss": 2.0444, "step": 117040 }, { "epoch": 0.28, "grad_norm": 2.734375, "learning_rate": 0.0001907850958925723, "loss": 2.2572, "step": 117045 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019078432090135347, "loss": 2.0173, "step": 117050 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019078354587912107, "loss": 2.1277, "step": 117055 }, { "epoch": 0.28, "grad_norm": 1.90625, "learning_rate": 0.00019078277082587537, "loss": 2.2357, "step": 117060 }, { "epoch": 0.28, "grad_norm": 2.734375, "learning_rate": 0.00019078199574161663, "loss": 2.127, "step": 117065 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.0001907812206263451, "loss": 2.1767, "step": 117070 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.0001907804454800611, "loss": 2.3472, "step": 117075 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019077967030276482, "loss": 2.2466, "step": 117080 }, { "epoch": 0.28, "grad_norm": 1.8125, "learning_rate": 0.00019077889509445657, "loss": 2.1379, "step": 117085 }, { "epoch": 0.28, "grad_norm": 1.8984375, "learning_rate": 0.0001907781198551366, "loss": 2.1286, "step": 117090 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019077734458480515, "loss": 2.0892, "step": 117095 }, { "epoch": 0.28, "grad_norm": 1.78125, "learning_rate": 0.00019077656928346258, "loss": 2.1366, "step": 117100 }, { "epoch": 0.28, "grad_norm": 1.96875, "learning_rate": 0.000190775793951109, "loss": 2.2086, "step": 117105 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019077501858774483, "loss": 2.1718, "step": 117110 }, { "epoch": 0.28, "grad_norm": 1.7890625, "learning_rate": 0.00019077424319337025, "loss": 2.1816, "step": 117115 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019077346776798552, "loss": 2.1283, "step": 117120 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019077269231159094, "loss": 2.1611, "step": 117125 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019077191682418678, "loss": 2.1886, "step": 117130 }, { "epoch": 0.28, "grad_norm": 2.609375, "learning_rate": 0.00019077114130577327, "loss": 2.1387, "step": 117135 }, { "epoch": 0.28, "grad_norm": 1.8984375, "learning_rate": 0.0001907703657563507, "loss": 2.1025, "step": 117140 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019076959017591931, "loss": 2.2923, "step": 117145 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019076881456447939, "loss": 2.1422, "step": 117150 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019076803892203122, "loss": 2.3433, "step": 117155 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.000190767263248575, "loss": 2.1593, "step": 117160 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019076648754411108, "loss": 2.1473, "step": 117165 }, { "epoch": 0.28, "grad_norm": 2.4375, "learning_rate": 0.00019076571180863967, "loss": 2.1551, "step": 117170 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019076493604216102, "loss": 2.0283, "step": 117175 }, { "epoch": 0.28, "grad_norm": 1.6953125, "learning_rate": 0.00019076416024467542, "loss": 2.1909, "step": 117180 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019076338441618317, "loss": 2.1898, "step": 117185 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019076260855668448, "loss": 2.0943, "step": 117190 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019076183266617967, "loss": 1.9894, "step": 117195 }, { "epoch": 0.28, "grad_norm": 2.609375, "learning_rate": 0.00019076105674466893, "loss": 2.0899, "step": 117200 }, { "epoch": 0.28, "grad_norm": 1.90625, "learning_rate": 0.0001907602807921526, "loss": 2.1604, "step": 117205 }, { "epoch": 0.28, "grad_norm": 1.859375, "learning_rate": 0.00019075950480863089, "loss": 2.1999, "step": 117210 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019075872879410408, "loss": 2.0218, "step": 117215 }, { "epoch": 0.28, "grad_norm": 2.140625, "learning_rate": 0.00019075795274857248, "loss": 2.0445, "step": 117220 }, { "epoch": 0.28, "grad_norm": 1.8828125, "learning_rate": 0.00019075717667203627, "loss": 2.0712, "step": 117225 }, { "epoch": 0.28, "grad_norm": 1.65625, "learning_rate": 0.00019075640056449579, "loss": 2.1301, "step": 117230 }, { "epoch": 0.28, "grad_norm": 1.9765625, "learning_rate": 0.00019075562442595127, "loss": 2.104, "step": 117235 }, { "epoch": 0.28, "grad_norm": 2.140625, "learning_rate": 0.000190754848256403, "loss": 2.2623, "step": 117240 }, { "epoch": 0.28, "grad_norm": 1.7265625, "learning_rate": 0.0001907540720558512, "loss": 2.278, "step": 117245 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019075329582429618, "loss": 2.2136, "step": 117250 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019075251956173818, "loss": 1.8455, "step": 117255 }, { "epoch": 0.28, "grad_norm": 2.3125, "learning_rate": 0.0001907517432681775, "loss": 2.2241, "step": 117260 }, { "epoch": 0.28, "grad_norm": 2.578125, "learning_rate": 0.00019075096694361432, "loss": 2.2476, "step": 117265 }, { "epoch": 0.28, "grad_norm": 1.6875, "learning_rate": 0.000190750190588049, "loss": 2.3104, "step": 117270 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019074941420148177, "loss": 2.1943, "step": 117275 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.0001907486377839129, "loss": 2.045, "step": 117280 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019074786133534266, "loss": 2.0895, "step": 117285 }, { "epoch": 0.28, "grad_norm": 1.9765625, "learning_rate": 0.00019074708485577126, "loss": 2.0748, "step": 117290 }, { "epoch": 0.28, "grad_norm": 1.71875, "learning_rate": 0.00019074630834519906, "loss": 2.2094, "step": 117295 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019074553180362623, "loss": 2.1406, "step": 117300 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.0001907447552310531, "loss": 2.1543, "step": 117305 }, { "epoch": 0.28, "grad_norm": 2.40625, "learning_rate": 0.00019074397862747992, "loss": 2.112, "step": 117310 }, { "epoch": 0.28, "grad_norm": 1.9765625, "learning_rate": 0.00019074320199290697, "loss": 2.0882, "step": 117315 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.00019074242532733448, "loss": 2.3011, "step": 117320 }, { "epoch": 0.28, "grad_norm": 1.65625, "learning_rate": 0.0001907416486307627, "loss": 2.0374, "step": 117325 }, { "epoch": 0.28, "grad_norm": 1.7265625, "learning_rate": 0.000190740871903192, "loss": 2.1514, "step": 117330 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.0001907400951446225, "loss": 2.1648, "step": 117335 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.00019073931835505458, "loss": 2.2268, "step": 117340 }, { "epoch": 0.28, "grad_norm": 1.8984375, "learning_rate": 0.00019073854153448845, "loss": 2.0074, "step": 117345 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.0001907377646829244, "loss": 2.3114, "step": 117350 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019073698780036264, "loss": 2.0425, "step": 117355 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019073621088680354, "loss": 2.2149, "step": 117360 }, { "epoch": 0.28, "grad_norm": 2.296875, "learning_rate": 0.00019073543394224727, "loss": 2.1944, "step": 117365 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019073465696669412, "loss": 2.2273, "step": 117370 }, { "epoch": 0.28, "grad_norm": 1.7265625, "learning_rate": 0.0001907338799601444, "loss": 2.1313, "step": 117375 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.00019073310292259832, "loss": 2.1117, "step": 117380 }, { "epoch": 0.28, "grad_norm": 1.9140625, "learning_rate": 0.00019073232585405618, "loss": 2.1956, "step": 117385 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019073154875451823, "loss": 2.1338, "step": 117390 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.0001907307716239847, "loss": 2.15, "step": 117395 }, { "epoch": 0.28, "grad_norm": 1.828125, "learning_rate": 0.00019072999446245593, "loss": 2.2135, "step": 117400 }, { "epoch": 0.28, "grad_norm": 2.234375, "learning_rate": 0.00019072921726993213, "loss": 2.1506, "step": 117405 }, { "epoch": 0.28, "grad_norm": 1.9375, "learning_rate": 0.0001907284400464136, "loss": 2.1059, "step": 117410 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019072766279190056, "loss": 2.0994, "step": 117415 }, { "epoch": 0.28, "grad_norm": 1.7890625, "learning_rate": 0.00019072688550639337, "loss": 1.9695, "step": 117420 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.00019072610818989216, "loss": 2.1939, "step": 117425 }, { "epoch": 0.28, "grad_norm": 2.53125, "learning_rate": 0.0001907253308423973, "loss": 2.0594, "step": 117430 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.00019072455346390903, "loss": 2.3355, "step": 117435 }, { "epoch": 0.28, "grad_norm": 1.640625, "learning_rate": 0.0001907237760544276, "loss": 2.195, "step": 117440 }, { "epoch": 0.28, "grad_norm": 1.953125, "learning_rate": 0.0001907229986139533, "loss": 2.3188, "step": 117445 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019072222114248632, "loss": 2.1433, "step": 117450 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.000190721443640027, "loss": 2.1056, "step": 117455 }, { "epoch": 0.28, "grad_norm": 2.453125, "learning_rate": 0.00019072066610657565, "loss": 2.1019, "step": 117460 }, { "epoch": 0.28, "grad_norm": 1.65625, "learning_rate": 0.0001907198885421324, "loss": 2.002, "step": 117465 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.00019071911094669766, "loss": 2.0277, "step": 117470 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019071833332027157, "loss": 2.134, "step": 117475 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019071755566285448, "loss": 2.3903, "step": 117480 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019071677797444662, "loss": 2.162, "step": 117485 }, { "epoch": 0.28, "grad_norm": 1.796875, "learning_rate": 0.00019071600025504827, "loss": 2.2043, "step": 117490 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019071522250465968, "loss": 1.9603, "step": 117495 }, { "epoch": 0.28, "grad_norm": 1.6796875, "learning_rate": 0.00019071444472328113, "loss": 2.0683, "step": 117500 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019071366691091285, "loss": 2.1797, "step": 117505 }, { "epoch": 0.28, "grad_norm": 1.671875, "learning_rate": 0.00019071288906755517, "loss": 2.0726, "step": 117510 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019071211119320834, "loss": 2.2726, "step": 117515 }, { "epoch": 0.28, "grad_norm": 2.546875, "learning_rate": 0.00019071133328787257, "loss": 1.8379, "step": 117520 }, { "epoch": 0.28, "grad_norm": 2.421875, "learning_rate": 0.0001907105553515482, "loss": 2.2832, "step": 117525 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019070977738423541, "loss": 1.9784, "step": 117530 }, { "epoch": 0.28, "grad_norm": 1.7734375, "learning_rate": 0.00019070899938593457, "loss": 2.1737, "step": 117535 }, { "epoch": 0.28, "grad_norm": 1.765625, "learning_rate": 0.00019070822135664584, "loss": 2.3187, "step": 117540 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.00019070744329636956, "loss": 2.2771, "step": 117545 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.000190706665205106, "loss": 2.3718, "step": 117550 }, { "epoch": 0.28, "grad_norm": 2.5625, "learning_rate": 0.00019070588708285536, "loss": 2.1054, "step": 117555 }, { "epoch": 0.28, "grad_norm": 2.4375, "learning_rate": 0.00019070510892961795, "loss": 2.238, "step": 117560 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019070433074539403, "loss": 2.1107, "step": 117565 }, { "epoch": 0.28, "grad_norm": 2.3125, "learning_rate": 0.00019070355253018387, "loss": 2.1069, "step": 117570 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.0001907027742839877, "loss": 2.1344, "step": 117575 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019070199600680588, "loss": 2.1137, "step": 117580 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019070121769863856, "loss": 2.0458, "step": 117585 }, { "epoch": 0.28, "grad_norm": 1.6953125, "learning_rate": 0.0001907004393594861, "loss": 2.2311, "step": 117590 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.0001906996609893487, "loss": 2.2967, "step": 117595 }, { "epoch": 0.28, "grad_norm": 1.8515625, "learning_rate": 0.00019069888258822665, "loss": 2.2107, "step": 117600 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019069810415612022, "loss": 2.0952, "step": 117605 }, { "epoch": 0.28, "grad_norm": 1.96875, "learning_rate": 0.00019069732569302968, "loss": 2.1858, "step": 117610 }, { "epoch": 0.28, "grad_norm": 2.453125, "learning_rate": 0.0001906965471989553, "loss": 2.0371, "step": 117615 }, { "epoch": 0.28, "grad_norm": 1.703125, "learning_rate": 0.00019069576867389732, "loss": 1.9545, "step": 117620 }, { "epoch": 0.28, "grad_norm": 2.3125, "learning_rate": 0.000190694990117856, "loss": 2.3828, "step": 117625 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.00019069421153083166, "loss": 2.123, "step": 117630 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019069343291282448, "loss": 2.0799, "step": 117635 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019069265426383485, "loss": 2.0314, "step": 117640 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.00019069187558386292, "loss": 2.047, "step": 117645 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019069109687290902, "loss": 2.0567, "step": 117650 }, { "epoch": 0.28, "grad_norm": 2.859375, "learning_rate": 0.00019069031813097337, "loss": 2.2195, "step": 117655 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019068953935805628, "loss": 2.1571, "step": 117660 }, { "epoch": 0.28, "grad_norm": 2.28125, "learning_rate": 0.000190688760554158, "loss": 2.0858, "step": 117665 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019068798171927883, "loss": 2.2892, "step": 117670 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019068720285341892, "loss": 2.2009, "step": 117675 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019068642395657866, "loss": 2.1618, "step": 117680 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.00019068564502875828, "loss": 2.2509, "step": 117685 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019068486606995807, "loss": 2.2216, "step": 117690 }, { "epoch": 0.28, "grad_norm": 2.84375, "learning_rate": 0.0001906840870801782, "loss": 2.0064, "step": 117695 }, { "epoch": 0.28, "grad_norm": 1.5703125, "learning_rate": 0.00019068330805941904, "loss": 2.0418, "step": 117700 }, { "epoch": 0.28, "grad_norm": 1.96875, "learning_rate": 0.0001906825290076808, "loss": 2.1134, "step": 117705 }, { "epoch": 0.28, "grad_norm": 2.296875, "learning_rate": 0.00019068174992496376, "loss": 2.1814, "step": 117710 }, { "epoch": 0.28, "grad_norm": 2.515625, "learning_rate": 0.00019068097081126822, "loss": 1.8805, "step": 117715 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.0001906801916665944, "loss": 2.0366, "step": 117720 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019067941249094258, "loss": 2.1742, "step": 117725 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019067863328431302, "loss": 2.1979, "step": 117730 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019067785404670599, "loss": 2.2356, "step": 117735 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019067707477812178, "loss": 2.1829, "step": 117740 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019067629547856062, "loss": 2.1406, "step": 117745 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.0001906755161480228, "loss": 2.0402, "step": 117750 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.0001906747367865086, "loss": 2.0924, "step": 117755 }, { "epoch": 0.28, "grad_norm": 1.7265625, "learning_rate": 0.00019067395739401824, "loss": 2.0015, "step": 117760 }, { "epoch": 0.28, "grad_norm": 1.734375, "learning_rate": 0.000190673177970552, "loss": 2.1778, "step": 117765 }, { "epoch": 0.28, "grad_norm": 1.78125, "learning_rate": 0.00019067239851611016, "loss": 2.0985, "step": 117770 }, { "epoch": 0.28, "grad_norm": 1.78125, "learning_rate": 0.00019067161903069303, "loss": 2.0174, "step": 117775 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.00019067083951430078, "loss": 2.2958, "step": 117780 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019067005996693378, "loss": 1.9437, "step": 117785 }, { "epoch": 0.28, "grad_norm": 1.8515625, "learning_rate": 0.0001906692803885922, "loss": 2.1185, "step": 117790 }, { "epoch": 0.28, "grad_norm": 2.203125, "learning_rate": 0.00019066850077927635, "loss": 2.1764, "step": 117795 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.0001906677211389865, "loss": 2.2642, "step": 117800 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.0001906669414677229, "loss": 2.0821, "step": 117805 }, { "epoch": 0.28, "grad_norm": 1.9375, "learning_rate": 0.00019066616176548586, "loss": 2.0665, "step": 117810 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.0001906653820322756, "loss": 2.2786, "step": 117815 }, { "epoch": 0.28, "grad_norm": 1.7578125, "learning_rate": 0.0001906646022680924, "loss": 2.1572, "step": 117820 }, { "epoch": 0.28, "grad_norm": 1.765625, "learning_rate": 0.00019066382247293653, "loss": 2.0569, "step": 117825 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.00019066304264680825, "loss": 2.1531, "step": 117830 }, { "epoch": 0.28, "grad_norm": 1.96875, "learning_rate": 0.00019066226278970785, "loss": 2.1548, "step": 117835 }, { "epoch": 0.28, "grad_norm": 2.3125, "learning_rate": 0.00019066148290163555, "loss": 2.1923, "step": 117840 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.00019066070298259165, "loss": 2.0772, "step": 117845 }, { "epoch": 0.28, "grad_norm": 2.46875, "learning_rate": 0.00019065992303257643, "loss": 2.1142, "step": 117850 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019065914305159008, "loss": 2.3187, "step": 117855 }, { "epoch": 0.28, "grad_norm": 2.34375, "learning_rate": 0.00019065836303963299, "loss": 2.2756, "step": 117860 }, { "epoch": 0.28, "grad_norm": 1.859375, "learning_rate": 0.0001906575829967053, "loss": 2.2005, "step": 117865 }, { "epoch": 0.28, "grad_norm": 1.9765625, "learning_rate": 0.00019065680292280738, "loss": 2.0699, "step": 117870 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019065602281793944, "loss": 2.1484, "step": 117875 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.0001906552426821018, "loss": 2.0798, "step": 117880 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019065446251529463, "loss": 2.1696, "step": 117885 }, { "epoch": 0.28, "grad_norm": 2.390625, "learning_rate": 0.00019065368231751823, "loss": 2.0598, "step": 117890 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019065290208877293, "loss": 2.197, "step": 117895 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019065212182905894, "loss": 2.0103, "step": 117900 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019065134153837656, "loss": 2.0754, "step": 117905 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019065056121672603, "loss": 1.9135, "step": 117910 }, { "epoch": 0.28, "grad_norm": 2.703125, "learning_rate": 0.0001906497808641076, "loss": 2.0209, "step": 117915 }, { "epoch": 0.28, "grad_norm": 1.7265625, "learning_rate": 0.00019064900048052158, "loss": 2.1359, "step": 117920 }, { "epoch": 0.28, "grad_norm": 1.8515625, "learning_rate": 0.00019064822006596824, "loss": 2.195, "step": 117925 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019064743962044777, "loss": 2.1496, "step": 117930 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.00019064665914396054, "loss": 1.9901, "step": 117935 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019064587863650673, "loss": 2.0609, "step": 117940 }, { "epoch": 0.28, "grad_norm": 2.390625, "learning_rate": 0.00019064509809808668, "loss": 2.1361, "step": 117945 }, { "epoch": 0.28, "grad_norm": 2.484375, "learning_rate": 0.0001906443175287006, "loss": 2.1571, "step": 117950 }, { "epoch": 0.28, "grad_norm": 2.421875, "learning_rate": 0.00019064353692834878, "loss": 2.0222, "step": 117955 }, { "epoch": 0.28, "grad_norm": 2.140625, "learning_rate": 0.00019064275629703147, "loss": 2.0116, "step": 117960 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019064197563474898, "loss": 2.1255, "step": 117965 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019064119494150155, "loss": 2.1749, "step": 117970 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019064041421728943, "loss": 1.9892, "step": 117975 }, { "epoch": 0.28, "grad_norm": 1.796875, "learning_rate": 0.00019063963346211286, "loss": 2.0815, "step": 117980 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.0001906388526759722, "loss": 2.2031, "step": 117985 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.00019063807185886765, "loss": 2.1757, "step": 117990 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019063729101079948, "loss": 1.9504, "step": 117995 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.000190636510131768, "loss": 2.1801, "step": 118000 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019063572922177339, "loss": 2.1238, "step": 118005 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019063494828081603, "loss": 2.0466, "step": 118010 }, { "epoch": 0.28, "grad_norm": 1.796875, "learning_rate": 0.00019063416730889609, "loss": 2.1461, "step": 118015 }, { "epoch": 0.28, "grad_norm": 2.328125, "learning_rate": 0.00019063338630601387, "loss": 2.0984, "step": 118020 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019063260527216967, "loss": 2.1414, "step": 118025 }, { "epoch": 0.28, "grad_norm": 1.765625, "learning_rate": 0.0001906318242073637, "loss": 2.2062, "step": 118030 }, { "epoch": 0.28, "grad_norm": 2.203125, "learning_rate": 0.00019063104311159627, "loss": 2.0995, "step": 118035 }, { "epoch": 0.28, "grad_norm": 1.7421875, "learning_rate": 0.00019063026198486765, "loss": 2.0933, "step": 118040 }, { "epoch": 0.28, "grad_norm": 1.71875, "learning_rate": 0.00019062948082717806, "loss": 2.3098, "step": 118045 }, { "epoch": 0.28, "grad_norm": 2.203125, "learning_rate": 0.0001906286996385278, "loss": 2.1744, "step": 118050 }, { "epoch": 0.28, "grad_norm": 1.7109375, "learning_rate": 0.00019062791841891716, "loss": 2.1769, "step": 118055 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.00019062713716834636, "loss": 2.1233, "step": 118060 }, { "epoch": 0.28, "grad_norm": 1.90625, "learning_rate": 0.00019062635588681568, "loss": 2.1999, "step": 118065 }, { "epoch": 0.28, "grad_norm": 1.625, "learning_rate": 0.00019062557457432539, "loss": 2.0049, "step": 118070 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019062479323087576, "loss": 2.2489, "step": 118075 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019062401185646708, "loss": 1.9967, "step": 118080 }, { "epoch": 0.28, "grad_norm": 1.6640625, "learning_rate": 0.0001906232304510996, "loss": 2.2024, "step": 118085 }, { "epoch": 0.28, "grad_norm": 2.34375, "learning_rate": 0.00019062244901477354, "loss": 2.2458, "step": 118090 }, { "epoch": 0.28, "grad_norm": 1.71875, "learning_rate": 0.0001906216675474892, "loss": 2.3544, "step": 118095 }, { "epoch": 0.28, "grad_norm": 2.390625, "learning_rate": 0.0001906208860492469, "loss": 2.1273, "step": 118100 }, { "epoch": 0.28, "grad_norm": 2.328125, "learning_rate": 0.00019062010452004685, "loss": 2.17, "step": 118105 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.00019061932295988933, "loss": 2.1735, "step": 118110 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019061854136877458, "loss": 2.1635, "step": 118115 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019061775974670294, "loss": 2.1429, "step": 118120 }, { "epoch": 0.28, "grad_norm": 2.28125, "learning_rate": 0.00019061697809367458, "loss": 2.1057, "step": 118125 }, { "epoch": 0.28, "grad_norm": 1.859375, "learning_rate": 0.00019061619640968984, "loss": 2.0845, "step": 118130 }, { "epoch": 0.28, "grad_norm": 1.8984375, "learning_rate": 0.00019061541469474896, "loss": 2.0046, "step": 118135 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.0001906146329488522, "loss": 2.0847, "step": 118140 }, { "epoch": 0.28, "grad_norm": 1.96875, "learning_rate": 0.0001906138511719999, "loss": 2.1031, "step": 118145 }, { "epoch": 0.28, "grad_norm": 2.140625, "learning_rate": 0.0001906130693641922, "loss": 2.1024, "step": 118150 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019061228752542943, "loss": 2.1116, "step": 118155 }, { "epoch": 0.28, "grad_norm": 2.359375, "learning_rate": 0.0001906115056557119, "loss": 2.1623, "step": 118160 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.0001906107237550398, "loss": 2.1139, "step": 118165 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019060994182341342, "loss": 2.0014, "step": 118170 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.0001906091598608331, "loss": 2.2351, "step": 118175 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019060837786729902, "loss": 1.9776, "step": 118180 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019060759584281148, "loss": 2.1946, "step": 118185 }, { "epoch": 0.28, "grad_norm": 2.234375, "learning_rate": 0.0001906068137873707, "loss": 2.3627, "step": 118190 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019060603170097703, "loss": 2.2706, "step": 118195 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.0001906052495836307, "loss": 2.0912, "step": 118200 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019060446743533196, "loss": 2.2137, "step": 118205 }, { "epoch": 0.28, "grad_norm": 1.8359375, "learning_rate": 0.00019060368525608106, "loss": 2.2501, "step": 118210 }, { "epoch": 0.28, "grad_norm": 2.4375, "learning_rate": 0.00019060290304587837, "loss": 2.3047, "step": 118215 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019060212080472403, "loss": 2.2376, "step": 118220 }, { "epoch": 0.28, "grad_norm": 2.3125, "learning_rate": 0.00019060133853261837, "loss": 2.2989, "step": 118225 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019060055622956168, "loss": 2.0652, "step": 118230 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.00019059977389555418, "loss": 2.2515, "step": 118235 }, { "epoch": 0.28, "grad_norm": 1.7578125, "learning_rate": 0.00019059899153059612, "loss": 2.175, "step": 118240 }, { "epoch": 0.28, "grad_norm": 1.78125, "learning_rate": 0.00019059820913468783, "loss": 1.997, "step": 118245 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019059742670782956, "loss": 2.4742, "step": 118250 }, { "epoch": 0.28, "grad_norm": 1.8828125, "learning_rate": 0.00019059664425002154, "loss": 2.1568, "step": 118255 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.0001905958617612641, "loss": 1.9658, "step": 118260 }, { "epoch": 0.28, "grad_norm": 1.9140625, "learning_rate": 0.00019059507924155744, "loss": 2.2165, "step": 118265 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019059429669090187, "loss": 2.0561, "step": 118270 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.00019059351410929764, "loss": 2.1744, "step": 118275 }, { "epoch": 0.28, "grad_norm": 1.7109375, "learning_rate": 0.00019059273149674502, "loss": 2.1883, "step": 118280 }, { "epoch": 0.28, "grad_norm": 2.234375, "learning_rate": 0.00019059194885324429, "loss": 2.1676, "step": 118285 }, { "epoch": 0.28, "grad_norm": 2.453125, "learning_rate": 0.0001905911661787957, "loss": 2.2038, "step": 118290 }, { "epoch": 0.28, "grad_norm": 2.328125, "learning_rate": 0.00019059038347339952, "loss": 1.9798, "step": 118295 }, { "epoch": 0.28, "grad_norm": 1.59375, "learning_rate": 0.00019058960073705606, "loss": 2.1911, "step": 118300 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019058881796976551, "loss": 2.2371, "step": 118305 }, { "epoch": 0.28, "grad_norm": 2.515625, "learning_rate": 0.00019058803517152818, "loss": 2.065, "step": 118310 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019058725234234432, "loss": 2.0701, "step": 118315 }, { "epoch": 0.28, "grad_norm": 1.671875, "learning_rate": 0.00019058646948221425, "loss": 2.0473, "step": 118320 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019058568659113817, "loss": 2.1129, "step": 118325 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.0001905849036691164, "loss": 2.1626, "step": 118330 }, { "epoch": 0.28, "grad_norm": 1.8125, "learning_rate": 0.00019058412071614917, "loss": 2.1887, "step": 118335 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.00019058333773223676, "loss": 2.0307, "step": 118340 }, { "epoch": 0.28, "grad_norm": 2.234375, "learning_rate": 0.00019058255471737944, "loss": 2.1577, "step": 118345 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.00019058177167157746, "loss": 1.7479, "step": 118350 }, { "epoch": 0.28, "grad_norm": 1.9453125, "learning_rate": 0.00019058098859483114, "loss": 2.0935, "step": 118355 }, { "epoch": 0.28, "grad_norm": 1.578125, "learning_rate": 0.00019058020548714068, "loss": 2.0586, "step": 118360 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019057942234850642, "loss": 2.0176, "step": 118365 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.00019057863917892854, "loss": 2.065, "step": 118370 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019057785597840736, "loss": 2.345, "step": 118375 }, { "epoch": 0.28, "grad_norm": 2.578125, "learning_rate": 0.00019057707274694317, "loss": 2.1756, "step": 118380 }, { "epoch": 0.28, "grad_norm": 1.9375, "learning_rate": 0.0001905762894845362, "loss": 2.1954, "step": 118385 }, { "epoch": 0.28, "grad_norm": 1.578125, "learning_rate": 0.0001905755061911867, "loss": 2.0212, "step": 118390 }, { "epoch": 0.28, "grad_norm": 1.7578125, "learning_rate": 0.000190574722866895, "loss": 2.0021, "step": 118395 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.0001905739395116613, "loss": 2.1309, "step": 118400 }, { "epoch": 0.28, "grad_norm": 2.375, "learning_rate": 0.00019057315612548595, "loss": 2.2096, "step": 118405 }, { "epoch": 0.28, "grad_norm": 2.296875, "learning_rate": 0.00019057237270836912, "loss": 2.2655, "step": 118410 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019057158926031112, "loss": 2.1195, "step": 118415 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019057080578131227, "loss": 2.0395, "step": 118420 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019057002227137275, "loss": 2.0896, "step": 118425 }, { "epoch": 0.28, "grad_norm": 1.90625, "learning_rate": 0.0001905692387304929, "loss": 2.3972, "step": 118430 }, { "epoch": 0.28, "grad_norm": 1.59375, "learning_rate": 0.0001905684551586729, "loss": 2.2567, "step": 118435 }, { "epoch": 0.28, "grad_norm": 2.359375, "learning_rate": 0.0001905676715559131, "loss": 2.2724, "step": 118440 }, { "epoch": 0.28, "grad_norm": 1.90625, "learning_rate": 0.0001905668879222138, "loss": 2.3747, "step": 118445 }, { "epoch": 0.28, "grad_norm": 2.765625, "learning_rate": 0.00019056610425757513, "loss": 2.1808, "step": 118450 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019056532056199745, "loss": 2.163, "step": 118455 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019056453683548104, "loss": 2.1291, "step": 118460 }, { "epoch": 0.28, "grad_norm": 1.7734375, "learning_rate": 0.0001905637530780261, "loss": 2.1674, "step": 118465 }, { "epoch": 0.28, "grad_norm": 2.828125, "learning_rate": 0.000190562969289633, "loss": 2.1063, "step": 118470 }, { "epoch": 0.28, "grad_norm": 2.4375, "learning_rate": 0.0001905621854703019, "loss": 2.2281, "step": 118475 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019056140162003314, "loss": 2.1315, "step": 118480 }, { "epoch": 0.28, "grad_norm": 2.140625, "learning_rate": 0.00019056061773882696, "loss": 2.1956, "step": 118485 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019055983382668362, "loss": 2.0808, "step": 118490 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.0001905590498836034, "loss": 2.0201, "step": 118495 }, { "epoch": 0.28, "grad_norm": 1.9375, "learning_rate": 0.00019055826590958657, "loss": 2.1079, "step": 118500 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019055748190463338, "loss": 2.2607, "step": 118505 }, { "epoch": 0.28, "grad_norm": 2.296875, "learning_rate": 0.00019055669786874414, "loss": 2.2534, "step": 118510 }, { "epoch": 0.28, "grad_norm": 1.796875, "learning_rate": 0.00019055591380191906, "loss": 2.0871, "step": 118515 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019055512970415846, "loss": 2.1378, "step": 118520 }, { "epoch": 0.28, "grad_norm": 2.484375, "learning_rate": 0.0001905543455754626, "loss": 2.1109, "step": 118525 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019055356141583167, "loss": 2.1232, "step": 118530 }, { "epoch": 0.28, "grad_norm": 2.328125, "learning_rate": 0.00019055277722526605, "loss": 2.2697, "step": 118535 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019055199300376595, "loss": 2.032, "step": 118540 }, { "epoch": 0.28, "grad_norm": 2.484375, "learning_rate": 0.00019055120875133166, "loss": 2.1511, "step": 118545 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.0001905504244679634, "loss": 2.207, "step": 118550 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.0001905496401536615, "loss": 2.1927, "step": 118555 }, { "epoch": 0.28, "grad_norm": 2.734375, "learning_rate": 0.0001905488558084262, "loss": 2.2214, "step": 118560 }, { "epoch": 0.28, "grad_norm": 2.421875, "learning_rate": 0.00019054807143225778, "loss": 2.1552, "step": 118565 }, { "epoch": 0.28, "grad_norm": 1.9375, "learning_rate": 0.00019054728702515647, "loss": 2.3287, "step": 118570 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.0001905465025871226, "loss": 2.0555, "step": 118575 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019054571811815638, "loss": 2.0687, "step": 118580 }, { "epoch": 0.28, "grad_norm": 1.9375, "learning_rate": 0.0001905449336182581, "loss": 1.9998, "step": 118585 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019054414908742801, "loss": 2.287, "step": 118590 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.0001905433645256664, "loss": 2.256, "step": 118595 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019054257993297356, "loss": 2.0961, "step": 118600 }, { "epoch": 0.28, "grad_norm": 1.7578125, "learning_rate": 0.00019054179530934973, "loss": 2.1303, "step": 118605 }, { "epoch": 0.28, "grad_norm": 1.8046875, "learning_rate": 0.00019054101065479518, "loss": 2.0411, "step": 118610 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019054022596931016, "loss": 2.1092, "step": 118615 }, { "epoch": 0.28, "grad_norm": 1.90625, "learning_rate": 0.00019053944125289497, "loss": 2.2053, "step": 118620 }, { "epoch": 0.28, "grad_norm": 1.65625, "learning_rate": 0.00019053865650554984, "loss": 2.0883, "step": 118625 }, { "epoch": 0.28, "grad_norm": 1.96875, "learning_rate": 0.00019053787172727512, "loss": 2.1407, "step": 118630 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019053708691807098, "loss": 2.0332, "step": 118635 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.00019053630207793774, "loss": 2.113, "step": 118640 }, { "epoch": 0.28, "grad_norm": 1.9140625, "learning_rate": 0.00019053551720687564, "loss": 2.1118, "step": 118645 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.000190534732304885, "loss": 2.2505, "step": 118650 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.000190533947371966, "loss": 2.2879, "step": 118655 }, { "epoch": 0.28, "grad_norm": 1.8984375, "learning_rate": 0.00019053316240811902, "loss": 2.2388, "step": 118660 }, { "epoch": 0.28, "grad_norm": 1.734375, "learning_rate": 0.00019053237741334421, "loss": 2.072, "step": 118665 }, { "epoch": 0.28, "grad_norm": 1.7421875, "learning_rate": 0.00019053159238764192, "loss": 2.0934, "step": 118670 }, { "epoch": 0.28, "grad_norm": 2.328125, "learning_rate": 0.00019053080733101243, "loss": 2.1697, "step": 118675 }, { "epoch": 0.28, "grad_norm": 1.7578125, "learning_rate": 0.00019053002224345594, "loss": 2.0919, "step": 118680 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.00019052923712497278, "loss": 2.2722, "step": 118685 }, { "epoch": 0.28, "grad_norm": 1.8671875, "learning_rate": 0.00019052845197556318, "loss": 2.1504, "step": 118690 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019052766679522742, "loss": 2.1688, "step": 118695 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019052688158396574, "loss": 2.1565, "step": 118700 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019052609634177844, "loss": 2.2687, "step": 118705 }, { "epoch": 0.28, "grad_norm": 2.4375, "learning_rate": 0.00019052531106866582, "loss": 2.0873, "step": 118710 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.0001905245257646281, "loss": 2.0035, "step": 118715 }, { "epoch": 0.28, "grad_norm": 2.6875, "learning_rate": 0.00019052374042966553, "loss": 2.1589, "step": 118720 }, { "epoch": 0.28, "grad_norm": 1.90625, "learning_rate": 0.00019052295506377846, "loss": 2.2043, "step": 118725 }, { "epoch": 0.28, "grad_norm": 2.390625, "learning_rate": 0.00019052216966696704, "loss": 2.2836, "step": 118730 }, { "epoch": 0.28, "grad_norm": 2.28125, "learning_rate": 0.00019052138423923166, "loss": 2.1965, "step": 118735 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019052059878057253, "loss": 2.1622, "step": 118740 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.00019051981329098992, "loss": 2.1025, "step": 118745 }, { "epoch": 0.28, "grad_norm": 1.375, "learning_rate": 0.00019051902777048407, "loss": 2.0658, "step": 118750 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019051824221905531, "loss": 2.1884, "step": 118755 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019051745663670384, "loss": 2.3059, "step": 118760 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.00019051667102343, "loss": 2.0673, "step": 118765 }, { "epoch": 0.28, "grad_norm": 2.375, "learning_rate": 0.00019051588537923403, "loss": 2.2866, "step": 118770 }, { "epoch": 0.28, "grad_norm": 1.7421875, "learning_rate": 0.00019051509970411615, "loss": 2.1343, "step": 118775 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.0001905143139980767, "loss": 2.0373, "step": 118780 }, { "epoch": 0.28, "grad_norm": 1.765625, "learning_rate": 0.0001905135282611159, "loss": 2.1108, "step": 118785 }, { "epoch": 0.28, "grad_norm": 1.9453125, "learning_rate": 0.00019051274249323407, "loss": 2.0256, "step": 118790 }, { "epoch": 0.28, "grad_norm": 1.6796875, "learning_rate": 0.00019051195669443142, "loss": 2.0515, "step": 118795 }, { "epoch": 0.28, "grad_norm": 1.8203125, "learning_rate": 0.00019051117086470827, "loss": 2.0602, "step": 118800 }, { "epoch": 0.28, "grad_norm": 1.8203125, "learning_rate": 0.0001905103850040648, "loss": 2.138, "step": 118805 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019050959911250142, "loss": 2.1209, "step": 118810 }, { "epoch": 0.28, "grad_norm": 1.9453125, "learning_rate": 0.00019050881319001827, "loss": 1.9523, "step": 118815 }, { "epoch": 0.28, "grad_norm": 2.953125, "learning_rate": 0.0001905080272366157, "loss": 2.2238, "step": 118820 }, { "epoch": 0.28, "grad_norm": 1.8984375, "learning_rate": 0.0001905072412522939, "loss": 2.0586, "step": 118825 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019050645523705325, "loss": 2.3344, "step": 118830 }, { "epoch": 0.28, "grad_norm": 1.8125, "learning_rate": 0.0001905056691908939, "loss": 2.1203, "step": 118835 }, { "epoch": 0.28, "grad_norm": 2.40625, "learning_rate": 0.00019050488311381618, "loss": 2.1632, "step": 118840 }, { "epoch": 0.28, "grad_norm": 1.7265625, "learning_rate": 0.00019050409700582037, "loss": 1.9923, "step": 118845 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.00019050331086690672, "loss": 2.1941, "step": 118850 }, { "epoch": 0.28, "grad_norm": 1.78125, "learning_rate": 0.00019050252469707547, "loss": 2.1771, "step": 118855 }, { "epoch": 0.28, "grad_norm": 2.140625, "learning_rate": 0.00019050173849632693, "loss": 2.0594, "step": 118860 }, { "epoch": 0.28, "grad_norm": 3.0, "learning_rate": 0.00019050095226466133, "loss": 2.1694, "step": 118865 }, { "epoch": 0.28, "grad_norm": 1.8203125, "learning_rate": 0.000190500166002079, "loss": 2.0334, "step": 118870 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019049937970858018, "loss": 2.1848, "step": 118875 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019049859338416512, "loss": 2.2765, "step": 118880 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.0001904978070288341, "loss": 2.1497, "step": 118885 }, { "epoch": 0.28, "grad_norm": 1.96875, "learning_rate": 0.00019049702064258733, "loss": 2.1636, "step": 118890 }, { "epoch": 0.28, "grad_norm": 1.765625, "learning_rate": 0.00019049623422542523, "loss": 2.0113, "step": 118895 }, { "epoch": 0.28, "grad_norm": 2.140625, "learning_rate": 0.0001904954477773479, "loss": 1.992, "step": 118900 }, { "epoch": 0.28, "grad_norm": 2.234375, "learning_rate": 0.0001904946612983557, "loss": 2.1349, "step": 118905 }, { "epoch": 0.28, "grad_norm": 1.7734375, "learning_rate": 0.00019049387478844892, "loss": 2.1152, "step": 118910 }, { "epoch": 0.28, "grad_norm": 1.9375, "learning_rate": 0.00019049308824762776, "loss": 2.1895, "step": 118915 }, { "epoch": 0.28, "grad_norm": 1.765625, "learning_rate": 0.00019049230167589255, "loss": 2.0197, "step": 118920 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019049151507324349, "loss": 2.1546, "step": 118925 }, { "epoch": 0.28, "grad_norm": 1.8046875, "learning_rate": 0.00019049072843968092, "loss": 2.1175, "step": 118930 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019048994177520506, "loss": 2.119, "step": 118935 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.0001904891550798162, "loss": 2.1251, "step": 118940 }, { "epoch": 0.28, "grad_norm": 2.28125, "learning_rate": 0.00019048836835351457, "loss": 2.0824, "step": 118945 }, { "epoch": 0.28, "grad_norm": 2.328125, "learning_rate": 0.00019048758159630053, "loss": 2.1955, "step": 118950 }, { "epoch": 0.28, "grad_norm": 2.28125, "learning_rate": 0.00019048679480817426, "loss": 2.3766, "step": 118955 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019048600798913604, "loss": 2.0425, "step": 118960 }, { "epoch": 0.28, "grad_norm": 1.8359375, "learning_rate": 0.00019048522113918618, "loss": 2.1602, "step": 118965 }, { "epoch": 0.28, "grad_norm": 2.484375, "learning_rate": 0.00019048443425832495, "loss": 2.1689, "step": 118970 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019048364734655258, "loss": 1.9515, "step": 118975 }, { "epoch": 0.28, "grad_norm": 1.8125, "learning_rate": 0.00019048286040386936, "loss": 1.9943, "step": 118980 }, { "epoch": 0.28, "grad_norm": 2.3125, "learning_rate": 0.00019048207343027552, "loss": 2.1533, "step": 118985 }, { "epoch": 0.28, "grad_norm": 1.9140625, "learning_rate": 0.0001904812864257714, "loss": 2.2114, "step": 118990 }, { "epoch": 0.28, "grad_norm": 2.671875, "learning_rate": 0.00019048049939035723, "loss": 2.3895, "step": 118995 }, { "epoch": 0.28, "grad_norm": 2.328125, "learning_rate": 0.00019047971232403324, "loss": 1.9921, "step": 119000 }, { "epoch": 0.28, "grad_norm": 1.7265625, "learning_rate": 0.00019047892522679977, "loss": 1.9797, "step": 119005 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.00019047813809865708, "loss": 2.1102, "step": 119010 }, { "epoch": 0.28, "grad_norm": 1.7734375, "learning_rate": 0.00019047735093960538, "loss": 1.9302, "step": 119015 }, { "epoch": 0.28, "grad_norm": 1.671875, "learning_rate": 0.00019047656374964497, "loss": 2.136, "step": 119020 }, { "epoch": 0.28, "grad_norm": 2.34375, "learning_rate": 0.00019047577652877616, "loss": 2.0202, "step": 119025 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019047498927699915, "loss": 2.1807, "step": 119030 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.00019047420199431428, "loss": 2.0836, "step": 119035 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019047341468072176, "loss": 2.1875, "step": 119040 }, { "epoch": 0.28, "grad_norm": 1.8671875, "learning_rate": 0.00019047262733622189, "loss": 2.4091, "step": 119045 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.0001904718399608149, "loss": 2.1705, "step": 119050 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019047105255450112, "loss": 2.0653, "step": 119055 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019047026511728076, "loss": 2.2305, "step": 119060 }, { "epoch": 0.28, "grad_norm": 1.8828125, "learning_rate": 0.00019046947764915414, "loss": 2.0981, "step": 119065 }, { "epoch": 0.28, "grad_norm": 2.390625, "learning_rate": 0.0001904686901501215, "loss": 2.2885, "step": 119070 }, { "epoch": 0.28, "grad_norm": 1.953125, "learning_rate": 0.00019046790262018312, "loss": 2.1558, "step": 119075 }, { "epoch": 0.28, "grad_norm": 1.9140625, "learning_rate": 0.00019046711505933924, "loss": 2.2326, "step": 119080 }, { "epoch": 0.28, "grad_norm": 1.9140625, "learning_rate": 0.00019046632746759018, "loss": 2.083, "step": 119085 }, { "epoch": 0.28, "grad_norm": 1.953125, "learning_rate": 0.0001904655398449362, "loss": 2.1335, "step": 119090 }, { "epoch": 0.28, "grad_norm": 2.296875, "learning_rate": 0.00019046475219137751, "loss": 2.1605, "step": 119095 }, { "epoch": 0.28, "grad_norm": 2.203125, "learning_rate": 0.00019046396450691445, "loss": 2.4861, "step": 119100 }, { "epoch": 0.28, "grad_norm": 1.59375, "learning_rate": 0.00019046317679154723, "loss": 1.9219, "step": 119105 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019046238904527619, "loss": 2.2616, "step": 119110 }, { "epoch": 0.28, "grad_norm": 1.7109375, "learning_rate": 0.00019046160126810153, "loss": 2.1848, "step": 119115 }, { "epoch": 0.28, "grad_norm": 1.6171875, "learning_rate": 0.0001904608134600235, "loss": 2.0679, "step": 119120 }, { "epoch": 0.28, "grad_norm": 2.46875, "learning_rate": 0.0001904600256210425, "loss": 2.1626, "step": 119125 }, { "epoch": 0.28, "grad_norm": 1.8828125, "learning_rate": 0.00019045923775115866, "loss": 2.2501, "step": 119130 }, { "epoch": 0.28, "grad_norm": 1.6953125, "learning_rate": 0.00019045844985037232, "loss": 2.1822, "step": 119135 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019045766191868376, "loss": 2.2466, "step": 119140 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019045687395609318, "loss": 2.078, "step": 119145 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019045608596260092, "loss": 2.2112, "step": 119150 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019045529793820716, "loss": 1.9735, "step": 119155 }, { "epoch": 0.28, "grad_norm": 2.703125, "learning_rate": 0.0001904545098829123, "loss": 2.0946, "step": 119160 }, { "epoch": 0.28, "grad_norm": 1.6640625, "learning_rate": 0.0001904537217967165, "loss": 2.1062, "step": 119165 }, { "epoch": 0.28, "grad_norm": 2.65625, "learning_rate": 0.00019045293367962008, "loss": 2.2015, "step": 119170 }, { "epoch": 0.28, "grad_norm": 1.8359375, "learning_rate": 0.00019045214553162332, "loss": 2.3182, "step": 119175 }, { "epoch": 0.28, "grad_norm": 1.7109375, "learning_rate": 0.00019045135735272647, "loss": 2.1591, "step": 119180 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.00019045056914292975, "loss": 2.0847, "step": 119185 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.0001904497809022335, "loss": 2.1854, "step": 119190 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019044899263063797, "loss": 2.2194, "step": 119195 }, { "epoch": 0.28, "grad_norm": 2.71875, "learning_rate": 0.00019044820432814342, "loss": 2.0649, "step": 119200 }, { "epoch": 0.28, "grad_norm": 2.34375, "learning_rate": 0.00019044741599475012, "loss": 2.1277, "step": 119205 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019044662763045836, "loss": 2.2121, "step": 119210 }, { "epoch": 0.28, "grad_norm": 1.90625, "learning_rate": 0.00019044583923526835, "loss": 2.2901, "step": 119215 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019044505080918045, "loss": 2.123, "step": 119220 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019044426235219483, "loss": 2.1281, "step": 119225 }, { "epoch": 0.28, "grad_norm": 1.8046875, "learning_rate": 0.00019044347386431185, "loss": 2.2239, "step": 119230 }, { "epoch": 0.28, "grad_norm": 1.5234375, "learning_rate": 0.0001904426853455317, "loss": 2.0917, "step": 119235 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019044189679585474, "loss": 2.1565, "step": 119240 }, { "epoch": 0.28, "grad_norm": 2.484375, "learning_rate": 0.00019044110821528115, "loss": 2.0651, "step": 119245 }, { "epoch": 0.28, "grad_norm": 2.296875, "learning_rate": 0.00019044031960381124, "loss": 2.2192, "step": 119250 }, { "epoch": 0.28, "grad_norm": 1.6171875, "learning_rate": 0.00019043953096144529, "loss": 1.9696, "step": 119255 }, { "epoch": 0.28, "grad_norm": 1.7578125, "learning_rate": 0.00019043874228818355, "loss": 2.1119, "step": 119260 }, { "epoch": 0.28, "grad_norm": 2.609375, "learning_rate": 0.00019043795358402628, "loss": 2.2138, "step": 119265 }, { "epoch": 0.28, "grad_norm": 2.8125, "learning_rate": 0.00019043716484897378, "loss": 2.1376, "step": 119270 }, { "epoch": 0.28, "grad_norm": 2.3125, "learning_rate": 0.00019043637608302634, "loss": 2.226, "step": 119275 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.00019043558728618412, "loss": 2.2424, "step": 119280 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.00019043479845844754, "loss": 2.1701, "step": 119285 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019043400959981675, "loss": 1.9061, "step": 119290 }, { "epoch": 0.28, "grad_norm": 1.90625, "learning_rate": 0.00019043322071029203, "loss": 2.1747, "step": 119295 }, { "epoch": 0.28, "grad_norm": 1.8828125, "learning_rate": 0.00019043243178987375, "loss": 2.1434, "step": 119300 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.00019043164283856208, "loss": 2.0928, "step": 119305 }, { "epoch": 0.28, "grad_norm": 1.8671875, "learning_rate": 0.00019043085385635731, "loss": 2.0881, "step": 119310 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.00019043006484325973, "loss": 2.3697, "step": 119315 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.00019042927579926961, "loss": 2.1731, "step": 119320 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.0001904284867243872, "loss": 2.2174, "step": 119325 }, { "epoch": 0.28, "grad_norm": 1.6953125, "learning_rate": 0.0001904276976186128, "loss": 2.2072, "step": 119330 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.0001904269084819466, "loss": 2.0127, "step": 119335 }, { "epoch": 0.28, "grad_norm": 1.9140625, "learning_rate": 0.00019042611931438899, "loss": 2.2732, "step": 119340 }, { "epoch": 0.28, "grad_norm": 1.7578125, "learning_rate": 0.00019042533011594012, "loss": 2.0117, "step": 119345 }, { "epoch": 0.28, "grad_norm": 1.9140625, "learning_rate": 0.00019042454088660037, "loss": 2.0642, "step": 119350 }, { "epoch": 0.28, "grad_norm": 1.78125, "learning_rate": 0.00019042375162636995, "loss": 2.2779, "step": 119355 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.0001904229623352491, "loss": 2.3289, "step": 119360 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.00019042217301323814, "loss": 2.1632, "step": 119365 }, { "epoch": 0.28, "grad_norm": 1.6640625, "learning_rate": 0.00019042138366033736, "loss": 2.0392, "step": 119370 }, { "epoch": 0.28, "grad_norm": 1.75, "learning_rate": 0.00019042059427654698, "loss": 2.0951, "step": 119375 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019041980486186728, "loss": 2.1317, "step": 119380 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019041901541629852, "loss": 2.2101, "step": 119385 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.000190418225939841, "loss": 2.2568, "step": 119390 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019041743643249498, "loss": 2.1528, "step": 119395 }, { "epoch": 0.28, "grad_norm": 1.8046875, "learning_rate": 0.0001904166468942607, "loss": 2.0032, "step": 119400 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019041585732513847, "loss": 2.2813, "step": 119405 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019041506772512855, "loss": 2.2624, "step": 119410 }, { "epoch": 0.28, "grad_norm": 1.7265625, "learning_rate": 0.00019041427809423118, "loss": 2.08, "step": 119415 }, { "epoch": 0.28, "grad_norm": 2.140625, "learning_rate": 0.00019041348843244665, "loss": 2.0265, "step": 119420 }, { "epoch": 0.28, "grad_norm": 2.75, "learning_rate": 0.0001904126987397753, "loss": 2.0845, "step": 119425 }, { "epoch": 0.28, "grad_norm": 2.328125, "learning_rate": 0.00019041190901621726, "loss": 2.171, "step": 119430 }, { "epoch": 0.28, "grad_norm": 1.9765625, "learning_rate": 0.00019041111926177288, "loss": 2.1377, "step": 119435 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019041032947644243, "loss": 2.1427, "step": 119440 }, { "epoch": 0.28, "grad_norm": 1.9453125, "learning_rate": 0.0001904095396602262, "loss": 2.1561, "step": 119445 }, { "epoch": 0.28, "grad_norm": 2.796875, "learning_rate": 0.0001904087498131244, "loss": 2.1942, "step": 119450 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019040795993513734, "loss": 2.046, "step": 119455 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019040717002626528, "loss": 1.9541, "step": 119460 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.0001904063800865085, "loss": 2.2187, "step": 119465 }, { "epoch": 0.28, "grad_norm": 1.7890625, "learning_rate": 0.00019040559011586724, "loss": 2.1704, "step": 119470 }, { "epoch": 0.28, "grad_norm": 2.140625, "learning_rate": 0.0001904048001143418, "loss": 2.0921, "step": 119475 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019040401008193247, "loss": 2.0037, "step": 119480 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.00019040322001863947, "loss": 1.8921, "step": 119485 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019040242992446308, "loss": 2.1751, "step": 119490 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019040163979940357, "loss": 2.154, "step": 119495 }, { "epoch": 0.28, "grad_norm": 1.7265625, "learning_rate": 0.00019040084964346124, "loss": 2.1334, "step": 119500 }, { "epoch": 0.28, "grad_norm": 1.796875, "learning_rate": 0.00019040005945663636, "loss": 2.326, "step": 119505 }, { "epoch": 0.28, "grad_norm": 2.640625, "learning_rate": 0.00019039926923892914, "loss": 2.0881, "step": 119510 }, { "epoch": 0.28, "grad_norm": 2.515625, "learning_rate": 0.00019039847899033992, "loss": 1.9751, "step": 119515 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.0001903976887108689, "loss": 1.9861, "step": 119520 }, { "epoch": 0.28, "grad_norm": 1.953125, "learning_rate": 0.00019039689840051645, "loss": 1.8193, "step": 119525 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019039610805928275, "loss": 2.0668, "step": 119530 }, { "epoch": 0.28, "grad_norm": 2.4375, "learning_rate": 0.00019039531768716807, "loss": 2.2943, "step": 119535 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019039452728417274, "loss": 2.2026, "step": 119540 }, { "epoch": 0.28, "grad_norm": 2.421875, "learning_rate": 0.000190393736850297, "loss": 2.0092, "step": 119545 }, { "epoch": 0.28, "grad_norm": 2.390625, "learning_rate": 0.00019039294638554112, "loss": 2.2016, "step": 119550 }, { "epoch": 0.28, "grad_norm": 2.59375, "learning_rate": 0.00019039215588990537, "loss": 1.9465, "step": 119555 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019039136536339003, "loss": 2.1688, "step": 119560 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019039057480599536, "loss": 2.2356, "step": 119565 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.00019038978421772161, "loss": 1.9733, "step": 119570 }, { "epoch": 0.28, "grad_norm": 1.6171875, "learning_rate": 0.00019038899359856908, "loss": 2.1131, "step": 119575 }, { "epoch": 0.28, "grad_norm": 1.5703125, "learning_rate": 0.00019038820294853804, "loss": 1.9556, "step": 119580 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019038741226762873, "loss": 2.0991, "step": 119585 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019038662155584145, "loss": 2.0842, "step": 119590 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019038583081317648, "loss": 2.0548, "step": 119595 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.00019038504003963404, "loss": 2.0819, "step": 119600 }, { "epoch": 0.28, "grad_norm": 1.6796875, "learning_rate": 0.00019038424923521446, "loss": 2.124, "step": 119605 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.00019038345839991796, "loss": 2.2289, "step": 119610 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019038266753374484, "loss": 2.1024, "step": 119615 }, { "epoch": 0.28, "grad_norm": 2.515625, "learning_rate": 0.00019038187663669533, "loss": 2.2285, "step": 119620 }, { "epoch": 0.28, "grad_norm": 1.8046875, "learning_rate": 0.00019038108570876977, "loss": 2.2098, "step": 119625 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.0001903802947499684, "loss": 2.2278, "step": 119630 }, { "epoch": 0.28, "grad_norm": 1.7578125, "learning_rate": 0.00019037950376029144, "loss": 2.0278, "step": 119635 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019037871273973922, "loss": 2.118, "step": 119640 }, { "epoch": 0.28, "grad_norm": 1.828125, "learning_rate": 0.000190377921688312, "loss": 2.1442, "step": 119645 }, { "epoch": 0.28, "grad_norm": 2.359375, "learning_rate": 0.00019037713060601007, "loss": 2.2415, "step": 119650 }, { "epoch": 0.28, "grad_norm": 2.90625, "learning_rate": 0.00019037633949283363, "loss": 2.0685, "step": 119655 }, { "epoch": 0.28, "grad_norm": 2.53125, "learning_rate": 0.00019037554834878303, "loss": 2.3797, "step": 119660 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019037475717385846, "loss": 2.2493, "step": 119665 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.00019037396596806026, "loss": 2.0789, "step": 119670 }, { "epoch": 0.28, "grad_norm": 2.359375, "learning_rate": 0.00019037317473138867, "loss": 1.8915, "step": 119675 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019037238346384397, "loss": 2.1918, "step": 119680 }, { "epoch": 0.28, "grad_norm": 1.65625, "learning_rate": 0.00019037159216542642, "loss": 2.2133, "step": 119685 }, { "epoch": 0.28, "grad_norm": 1.6953125, "learning_rate": 0.0001903708008361363, "loss": 2.3002, "step": 119690 }, { "epoch": 0.28, "grad_norm": 1.953125, "learning_rate": 0.00019037000947597386, "loss": 2.083, "step": 119695 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019036921808493942, "loss": 2.1581, "step": 119700 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.0001903684266630332, "loss": 1.9501, "step": 119705 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.00019036763521025548, "loss": 2.2271, "step": 119710 }, { "epoch": 0.28, "grad_norm": 1.96875, "learning_rate": 0.0001903668437266065, "loss": 2.0792, "step": 119715 }, { "epoch": 0.28, "grad_norm": 1.796875, "learning_rate": 0.0001903660522120866, "loss": 2.1243, "step": 119720 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019036526066669603, "loss": 2.213, "step": 119725 }, { "epoch": 0.28, "grad_norm": 1.6796875, "learning_rate": 0.00019036446909043506, "loss": 1.9426, "step": 119730 }, { "epoch": 0.28, "grad_norm": 1.953125, "learning_rate": 0.00019036367748330392, "loss": 1.9678, "step": 119735 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.00019036288584530292, "loss": 2.2013, "step": 119740 }, { "epoch": 0.28, "grad_norm": 2.515625, "learning_rate": 0.00019036209417643232, "loss": 1.9825, "step": 119745 }, { "epoch": 0.28, "grad_norm": 2.140625, "learning_rate": 0.00019036130247669238, "loss": 2.1061, "step": 119750 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019036051074608338, "loss": 2.1774, "step": 119755 }, { "epoch": 0.28, "grad_norm": 1.6328125, "learning_rate": 0.00019035971898460557, "loss": 1.9404, "step": 119760 }, { "epoch": 0.28, "grad_norm": 2.515625, "learning_rate": 0.00019035892719225929, "loss": 2.1781, "step": 119765 }, { "epoch": 0.28, "grad_norm": 1.9453125, "learning_rate": 0.00019035813536904473, "loss": 2.1796, "step": 119770 }, { "epoch": 0.28, "grad_norm": 1.7421875, "learning_rate": 0.0001903573435149622, "loss": 2.1, "step": 119775 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.000190356551630012, "loss": 1.9603, "step": 119780 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019035575971419432, "loss": 2.0553, "step": 119785 }, { "epoch": 0.28, "grad_norm": 2.296875, "learning_rate": 0.00019035496776750947, "loss": 2.0913, "step": 119790 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019035417578995773, "loss": 2.1628, "step": 119795 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019035338378153938, "loss": 2.2423, "step": 119800 }, { "epoch": 0.28, "grad_norm": 1.8125, "learning_rate": 0.00019035259174225467, "loss": 2.096, "step": 119805 }, { "epoch": 0.28, "grad_norm": 1.71875, "learning_rate": 0.00019035179967210385, "loss": 2.3074, "step": 119810 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019035100757108727, "loss": 2.2425, "step": 119815 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.0001903502154392051, "loss": 2.1072, "step": 119820 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.0001903494232764577, "loss": 2.2126, "step": 119825 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019034863108284523, "loss": 1.9062, "step": 119830 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.0001903478388583681, "loss": 2.2285, "step": 119835 }, { "epoch": 0.28, "grad_norm": 2.53125, "learning_rate": 0.00019034704660302648, "loss": 2.0888, "step": 119840 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.00019034625431682068, "loss": 2.1505, "step": 119845 }, { "epoch": 0.28, "grad_norm": 1.796875, "learning_rate": 0.00019034546199975095, "loss": 2.1181, "step": 119850 }, { "epoch": 0.28, "grad_norm": 2.234375, "learning_rate": 0.00019034466965181756, "loss": 2.2599, "step": 119855 }, { "epoch": 0.28, "grad_norm": 1.9140625, "learning_rate": 0.00019034387727302082, "loss": 2.2418, "step": 119860 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019034308486336093, "loss": 2.1041, "step": 119865 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.00019034229242283825, "loss": 2.2364, "step": 119870 }, { "epoch": 0.28, "grad_norm": 2.28125, "learning_rate": 0.000190341499951453, "loss": 2.0716, "step": 119875 }, { "epoch": 0.28, "grad_norm": 2.5, "learning_rate": 0.00019034070744920544, "loss": 2.1754, "step": 119880 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019033991491609586, "loss": 2.1022, "step": 119885 }, { "epoch": 0.28, "grad_norm": 2.296875, "learning_rate": 0.00019033912235212453, "loss": 2.1454, "step": 119890 }, { "epoch": 0.28, "grad_norm": 2.296875, "learning_rate": 0.00019033832975729172, "loss": 2.2217, "step": 119895 }, { "epoch": 0.28, "grad_norm": 1.8828125, "learning_rate": 0.00019033753713159767, "loss": 2.1549, "step": 119900 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.00019033674447504272, "loss": 2.1098, "step": 119905 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.0001903359517876271, "loss": 2.2322, "step": 119910 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.00019033515906935103, "loss": 1.858, "step": 119915 }, { "epoch": 0.28, "grad_norm": 1.8203125, "learning_rate": 0.00019033436632021487, "loss": 2.2288, "step": 119920 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.00019033357354021884, "loss": 2.063, "step": 119925 }, { "epoch": 0.28, "grad_norm": 1.78125, "learning_rate": 0.00019033278072936322, "loss": 2.0681, "step": 119930 }, { "epoch": 0.28, "grad_norm": 2.453125, "learning_rate": 0.0001903319878876483, "loss": 2.0687, "step": 119935 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019033119501507434, "loss": 1.9693, "step": 119940 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019033040211164157, "loss": 2.2416, "step": 119945 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019032960917735031, "loss": 2.0932, "step": 119950 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019032881621220083, "loss": 2.1243, "step": 119955 }, { "epoch": 0.28, "grad_norm": 1.953125, "learning_rate": 0.0001903280232161934, "loss": 2.0694, "step": 119960 }, { "epoch": 0.28, "grad_norm": 2.734375, "learning_rate": 0.00019032723018932825, "loss": 2.2158, "step": 119965 }, { "epoch": 0.28, "grad_norm": 2.328125, "learning_rate": 0.0001903264371316057, "loss": 2.2464, "step": 119970 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019032564404302597, "loss": 2.0692, "step": 119975 }, { "epoch": 0.28, "grad_norm": 1.5859375, "learning_rate": 0.0001903248509235894, "loss": 2.1919, "step": 119980 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019032405777329623, "loss": 2.1719, "step": 119985 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.0001903232645921467, "loss": 2.0344, "step": 119990 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019032247138014107, "loss": 2.0177, "step": 119995 }, { "epoch": 0.28, "grad_norm": 1.9453125, "learning_rate": 0.0001903216781372797, "loss": 2.236, "step": 120000 }, { "epoch": 0.28, "grad_norm": 2.140625, "learning_rate": 0.0001903208848635628, "loss": 2.107, "step": 120005 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.00019032009155899062, "loss": 1.9238, "step": 120010 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.00019031929822356348, "loss": 2.1866, "step": 120015 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.00019031850485728163, "loss": 2.0205, "step": 120020 }, { "epoch": 0.28, "grad_norm": 1.9765625, "learning_rate": 0.00019031771146014531, "loss": 2.2278, "step": 120025 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019031691803215485, "loss": 2.2253, "step": 120030 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.0001903161245733105, "loss": 2.1397, "step": 120035 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019031533108361252, "loss": 1.9838, "step": 120040 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.0001903145375630612, "loss": 2.2737, "step": 120045 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019031374401165676, "loss": 2.1211, "step": 120050 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.0001903129504293995, "loss": 2.348, "step": 120055 }, { "epoch": 0.28, "grad_norm": 1.9375, "learning_rate": 0.00019031215681628973, "loss": 2.0283, "step": 120060 }, { "epoch": 0.28, "grad_norm": 1.8203125, "learning_rate": 0.0001903113631723277, "loss": 2.0829, "step": 120065 }, { "epoch": 0.28, "grad_norm": 1.9375, "learning_rate": 0.00019031056949751364, "loss": 2.1604, "step": 120070 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019030977579184786, "loss": 2.0746, "step": 120075 }, { "epoch": 0.28, "grad_norm": 2.453125, "learning_rate": 0.00019030898205533062, "loss": 2.1605, "step": 120080 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019030818828796222, "loss": 2.0926, "step": 120085 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.0001903073944897429, "loss": 2.1627, "step": 120090 }, { "epoch": 0.28, "grad_norm": 1.7890625, "learning_rate": 0.0001903066006606729, "loss": 2.07, "step": 120095 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019030580680075255, "loss": 2.1937, "step": 120100 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.00019030501290998207, "loss": 2.2248, "step": 120105 }, { "epoch": 0.28, "grad_norm": 1.9140625, "learning_rate": 0.00019030421898836179, "loss": 2.1315, "step": 120110 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019030342503589196, "loss": 2.2039, "step": 120115 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019030263105257282, "loss": 1.9462, "step": 120120 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.0001903018370384047, "loss": 2.3521, "step": 120125 }, { "epoch": 0.28, "grad_norm": 2.328125, "learning_rate": 0.00019030104299338777, "loss": 2.2826, "step": 120130 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019030024891752244, "loss": 2.1495, "step": 120135 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019029945481080884, "loss": 2.2629, "step": 120140 }, { "epoch": 0.28, "grad_norm": 2.296875, "learning_rate": 0.00019029866067324735, "loss": 2.116, "step": 120145 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019029786650483818, "loss": 2.3068, "step": 120150 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019029707230558166, "loss": 2.3592, "step": 120155 }, { "epoch": 0.28, "grad_norm": 1.6875, "learning_rate": 0.00019029627807547796, "loss": 2.1586, "step": 120160 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019029548381452744, "loss": 1.9063, "step": 120165 }, { "epoch": 0.28, "grad_norm": 1.90625, "learning_rate": 0.00019029468952273034, "loss": 1.7993, "step": 120170 }, { "epoch": 0.28, "grad_norm": 2.375, "learning_rate": 0.00019029389520008695, "loss": 2.2278, "step": 120175 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019029310084659755, "loss": 2.1071, "step": 120180 }, { "epoch": 0.28, "grad_norm": 1.7421875, "learning_rate": 0.00019029230646226236, "loss": 2.2056, "step": 120185 }, { "epoch": 0.28, "grad_norm": 1.828125, "learning_rate": 0.0001902915120470817, "loss": 2.1706, "step": 120190 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.0001902907176010558, "loss": 2.1954, "step": 120195 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019028992312418496, "loss": 2.1968, "step": 120200 }, { "epoch": 0.28, "grad_norm": 2.203125, "learning_rate": 0.00019028912861646945, "loss": 2.1045, "step": 120205 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019028833407790953, "loss": 2.2894, "step": 120210 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.00019028753950850547, "loss": 2.1413, "step": 120215 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019028674490825757, "loss": 2.1577, "step": 120220 }, { "epoch": 0.28, "grad_norm": 1.8671875, "learning_rate": 0.00019028595027716606, "loss": 2.0238, "step": 120225 }, { "epoch": 0.28, "grad_norm": 1.828125, "learning_rate": 0.00019028515561523123, "loss": 2.2525, "step": 120230 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.0001902843609224534, "loss": 2.1435, "step": 120235 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.00019028356619883274, "loss": 2.1433, "step": 120240 }, { "epoch": 0.28, "grad_norm": 2.4375, "learning_rate": 0.0001902827714443696, "loss": 2.0499, "step": 120245 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.0001902819766590642, "loss": 1.8679, "step": 120250 }, { "epoch": 0.28, "grad_norm": 1.8828125, "learning_rate": 0.00019028118184291688, "loss": 1.9093, "step": 120255 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019028038699592784, "loss": 2.2522, "step": 120260 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.0001902795921180974, "loss": 2.1455, "step": 120265 }, { "epoch": 0.28, "grad_norm": 1.8203125, "learning_rate": 0.0001902787972094258, "loss": 2.3732, "step": 120270 }, { "epoch": 0.28, "grad_norm": 1.765625, "learning_rate": 0.00019027800226991337, "loss": 2.1242, "step": 120275 }, { "epoch": 0.28, "grad_norm": 1.9765625, "learning_rate": 0.00019027720729956028, "loss": 1.9833, "step": 120280 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.0001902764122983669, "loss": 1.8642, "step": 120285 }, { "epoch": 0.28, "grad_norm": 1.8359375, "learning_rate": 0.00019027561726633343, "loss": 2.0428, "step": 120290 }, { "epoch": 0.28, "grad_norm": 2.375, "learning_rate": 0.00019027482220346017, "loss": 2.103, "step": 120295 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019027402710974743, "loss": 2.1119, "step": 120300 }, { "epoch": 0.28, "grad_norm": 1.8984375, "learning_rate": 0.00019027323198519543, "loss": 2.15, "step": 120305 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019027243682980444, "loss": 2.0287, "step": 120310 }, { "epoch": 0.28, "grad_norm": 1.8828125, "learning_rate": 0.00019027164164357475, "loss": 2.1822, "step": 120315 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019027084642650666, "loss": 2.1417, "step": 120320 }, { "epoch": 0.28, "grad_norm": 2.5625, "learning_rate": 0.00019027005117860037, "loss": 2.0405, "step": 120325 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019026925589985623, "loss": 2.2876, "step": 120330 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019026846059027445, "loss": 2.0226, "step": 120335 }, { "epoch": 0.28, "grad_norm": 1.9140625, "learning_rate": 0.00019026766524985532, "loss": 2.1216, "step": 120340 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.00019026686987859914, "loss": 2.0725, "step": 120345 }, { "epoch": 0.28, "grad_norm": 2.28125, "learning_rate": 0.00019026607447650615, "loss": 2.1403, "step": 120350 }, { "epoch": 0.28, "grad_norm": 1.9375, "learning_rate": 0.00019026527904357664, "loss": 2.1475, "step": 120355 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019026448357981087, "loss": 2.0072, "step": 120360 }, { "epoch": 0.28, "grad_norm": 2.4375, "learning_rate": 0.00019026368808520912, "loss": 2.1401, "step": 120365 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019026289255977163, "loss": 1.8999, "step": 120370 }, { "epoch": 0.28, "grad_norm": 1.9765625, "learning_rate": 0.00019026209700349873, "loss": 2.1074, "step": 120375 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019026130141639068, "loss": 2.1996, "step": 120380 }, { "epoch": 0.28, "grad_norm": 1.7265625, "learning_rate": 0.00019026050579844768, "loss": 2.2635, "step": 120385 }, { "epoch": 0.28, "grad_norm": 2.3125, "learning_rate": 0.00019025971014967008, "loss": 2.3835, "step": 120390 }, { "epoch": 0.28, "grad_norm": 2.5, "learning_rate": 0.00019025891447005812, "loss": 2.1619, "step": 120395 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019025811875961207, "loss": 2.1698, "step": 120400 }, { "epoch": 0.28, "grad_norm": 2.421875, "learning_rate": 0.00019025732301833223, "loss": 2.0645, "step": 120405 }, { "epoch": 0.28, "grad_norm": 1.9921875, "learning_rate": 0.00019025652724621885, "loss": 1.9982, "step": 120410 }, { "epoch": 0.28, "grad_norm": 1.7421875, "learning_rate": 0.0001902557314432722, "loss": 2.006, "step": 120415 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019025493560949255, "loss": 2.1859, "step": 120420 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.0001902541397448802, "loss": 2.3312, "step": 120425 }, { "epoch": 0.28, "grad_norm": 1.9765625, "learning_rate": 0.00019025334384943534, "loss": 2.2695, "step": 120430 }, { "epoch": 0.28, "grad_norm": 2.359375, "learning_rate": 0.00019025254792315836, "loss": 2.2339, "step": 120435 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019025175196604944, "loss": 2.2467, "step": 120440 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.0001902509559781089, "loss": 2.1548, "step": 120445 }, { "epoch": 0.28, "grad_norm": 1.953125, "learning_rate": 0.000190250159959337, "loss": 2.1399, "step": 120450 }, { "epoch": 0.28, "grad_norm": 1.4453125, "learning_rate": 0.00019024936390973397, "loss": 2.1176, "step": 120455 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.0001902485678293002, "loss": 2.2872, "step": 120460 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.0001902477717180358, "loss": 1.9452, "step": 120465 }, { "epoch": 0.28, "grad_norm": 2.140625, "learning_rate": 0.00019024697557594117, "loss": 2.0592, "step": 120470 }, { "epoch": 0.28, "grad_norm": 1.78125, "learning_rate": 0.0001902461794030165, "loss": 2.1061, "step": 120475 }, { "epoch": 0.28, "grad_norm": 1.8046875, "learning_rate": 0.00019024538319926216, "loss": 2.2819, "step": 120480 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019024458696467834, "loss": 2.1686, "step": 120485 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.0001902437906992653, "loss": 2.0408, "step": 120490 }, { "epoch": 0.28, "grad_norm": 1.796875, "learning_rate": 0.00019024299440302336, "loss": 2.2187, "step": 120495 }, { "epoch": 0.28, "grad_norm": 2.375, "learning_rate": 0.00019024219807595277, "loss": 2.067, "step": 120500 }, { "epoch": 0.28, "grad_norm": 1.9453125, "learning_rate": 0.00019024140171805382, "loss": 2.4187, "step": 120505 }, { "epoch": 0.28, "grad_norm": 1.9453125, "learning_rate": 0.00019024060532932678, "loss": 2.0677, "step": 120510 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019023980890977188, "loss": 2.3079, "step": 120515 }, { "epoch": 0.28, "grad_norm": 1.8828125, "learning_rate": 0.00019023901245938946, "loss": 2.109, "step": 120520 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019023821597817972, "loss": 2.1589, "step": 120525 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.000190237419466143, "loss": 2.0191, "step": 120530 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019023662292327954, "loss": 2.0757, "step": 120535 }, { "epoch": 0.28, "grad_norm": 1.953125, "learning_rate": 0.00019023582634958958, "loss": 2.1832, "step": 120540 }, { "epoch": 0.28, "grad_norm": 1.84375, "learning_rate": 0.00019023502974507345, "loss": 2.0945, "step": 120545 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.0001902342331097314, "loss": 2.1678, "step": 120550 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.0001902334364435637, "loss": 2.1107, "step": 120555 }, { "epoch": 0.28, "grad_norm": 1.6875, "learning_rate": 0.0001902326397465706, "loss": 2.393, "step": 120560 }, { "epoch": 0.28, "grad_norm": 1.8828125, "learning_rate": 0.00019023184301875242, "loss": 2.0084, "step": 120565 }, { "epoch": 0.28, "grad_norm": 2.40625, "learning_rate": 0.0001902310462601094, "loss": 2.1378, "step": 120570 }, { "epoch": 0.28, "grad_norm": 2.25, "learning_rate": 0.00019023024947064184, "loss": 2.0936, "step": 120575 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019022945265034994, "loss": 2.2875, "step": 120580 }, { "epoch": 0.28, "grad_norm": 1.8203125, "learning_rate": 0.00019022865579923406, "loss": 2.083, "step": 120585 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019022785891729442, "loss": 1.9863, "step": 120590 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019022706200453132, "loss": 2.1119, "step": 120595 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.000190226265060945, "loss": 2.0032, "step": 120600 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019022546808653576, "loss": 2.1574, "step": 120605 }, { "epoch": 0.28, "grad_norm": 1.8671875, "learning_rate": 0.00019022467108130385, "loss": 2.1798, "step": 120610 }, { "epoch": 0.28, "grad_norm": 1.7109375, "learning_rate": 0.0001902238740452496, "loss": 2.1328, "step": 120615 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.0001902230769783732, "loss": 2.283, "step": 120620 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019022227988067496, "loss": 2.2238, "step": 120625 }, { "epoch": 0.28, "grad_norm": 2.34375, "learning_rate": 0.00019022148275215517, "loss": 2.2709, "step": 120630 }, { "epoch": 0.28, "grad_norm": 2.484375, "learning_rate": 0.00019022068559281408, "loss": 2.1437, "step": 120635 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019021988840265194, "loss": 2.2228, "step": 120640 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019021909118166908, "loss": 2.0025, "step": 120645 }, { "epoch": 0.28, "grad_norm": 1.640625, "learning_rate": 0.00019021829392986574, "loss": 2.0459, "step": 120650 }, { "epoch": 0.28, "grad_norm": 1.8125, "learning_rate": 0.0001902174966472422, "loss": 2.0666, "step": 120655 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019021669933379874, "loss": 2.1557, "step": 120660 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.0001902159019895356, "loss": 2.0917, "step": 120665 }, { "epoch": 0.28, "grad_norm": 1.9765625, "learning_rate": 0.00019021510461445304, "loss": 2.2189, "step": 120670 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.0001902143072085514, "loss": 2.3596, "step": 120675 }, { "epoch": 0.28, "grad_norm": 1.859375, "learning_rate": 0.0001902135097718309, "loss": 2.0469, "step": 120680 }, { "epoch": 0.28, "grad_norm": 2.09375, "learning_rate": 0.00019021271230429184, "loss": 2.1029, "step": 120685 }, { "epoch": 0.28, "grad_norm": 1.8125, "learning_rate": 0.0001902119148059345, "loss": 2.2475, "step": 120690 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.0001902111172767591, "loss": 2.1722, "step": 120695 }, { "epoch": 0.28, "grad_norm": 2.375, "learning_rate": 0.00019021031971676598, "loss": 2.1867, "step": 120700 }, { "epoch": 0.28, "grad_norm": 2.421875, "learning_rate": 0.00019020952212595534, "loss": 2.1826, "step": 120705 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019020872450432755, "loss": 2.0488, "step": 120710 }, { "epoch": 0.28, "grad_norm": 2.515625, "learning_rate": 0.00019020792685188275, "loss": 2.0645, "step": 120715 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.00019020712916862135, "loss": 2.0864, "step": 120720 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019020633145454353, "loss": 2.0589, "step": 120725 }, { "epoch": 0.28, "grad_norm": 1.8984375, "learning_rate": 0.00019020553370964954, "loss": 2.2039, "step": 120730 }, { "epoch": 0.28, "grad_norm": 1.703125, "learning_rate": 0.0001902047359339398, "loss": 2.1693, "step": 120735 }, { "epoch": 0.28, "grad_norm": 2.46875, "learning_rate": 0.0001902039381274144, "loss": 2.1571, "step": 120740 }, { "epoch": 0.28, "grad_norm": 2.4375, "learning_rate": 0.00019020314029007378, "loss": 2.1004, "step": 120745 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019020234242191807, "loss": 2.1484, "step": 120750 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019020154452294762, "loss": 2.2782, "step": 120755 }, { "epoch": 0.28, "grad_norm": 2.375, "learning_rate": 0.0001902007465931627, "loss": 2.0172, "step": 120760 }, { "epoch": 0.28, "grad_norm": 1.8046875, "learning_rate": 0.00019019994863256356, "loss": 2.1354, "step": 120765 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019019915064115049, "loss": 2.2183, "step": 120770 }, { "epoch": 0.28, "grad_norm": 1.9296875, "learning_rate": 0.00019019835261892376, "loss": 2.1049, "step": 120775 }, { "epoch": 0.28, "grad_norm": 1.5859375, "learning_rate": 0.0001901975545658836, "loss": 2.2084, "step": 120780 }, { "epoch": 0.28, "grad_norm": 2.703125, "learning_rate": 0.00019019675648203034, "loss": 2.2548, "step": 120785 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019019595836736425, "loss": 2.1347, "step": 120790 }, { "epoch": 0.28, "grad_norm": 2.234375, "learning_rate": 0.00019019516022188558, "loss": 2.203, "step": 120795 }, { "epoch": 0.28, "grad_norm": 1.953125, "learning_rate": 0.00019019436204559457, "loss": 2.2436, "step": 120800 }, { "epoch": 0.28, "grad_norm": 1.9765625, "learning_rate": 0.00019019356383849156, "loss": 2.232, "step": 120805 }, { "epoch": 0.28, "grad_norm": 1.7734375, "learning_rate": 0.0001901927656005768, "loss": 2.0115, "step": 120810 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019019196733185054, "loss": 2.0872, "step": 120815 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019019116903231309, "loss": 2.1248, "step": 120820 }, { "epoch": 0.28, "grad_norm": 2.734375, "learning_rate": 0.00019019037070196467, "loss": 2.1499, "step": 120825 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019018957234080559, "loss": 2.174, "step": 120830 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019018877394883613, "loss": 2.0153, "step": 120835 }, { "epoch": 0.28, "grad_norm": 1.984375, "learning_rate": 0.00019018797552605653, "loss": 2.2884, "step": 120840 }, { "epoch": 0.28, "grad_norm": 1.7578125, "learning_rate": 0.00019018717707246712, "loss": 2.1948, "step": 120845 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.0001901863785880681, "loss": 2.1225, "step": 120850 }, { "epoch": 0.28, "grad_norm": 1.8046875, "learning_rate": 0.0001901855800728598, "loss": 2.1499, "step": 120855 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.00019018478152684244, "loss": 2.0984, "step": 120860 }, { "epoch": 0.28, "grad_norm": 1.9453125, "learning_rate": 0.00019018398295001635, "loss": 2.2681, "step": 120865 }, { "epoch": 0.28, "grad_norm": 1.8203125, "learning_rate": 0.00019018318434238176, "loss": 2.1201, "step": 120870 }, { "epoch": 0.28, "grad_norm": 2.375, "learning_rate": 0.00019018238570393898, "loss": 2.1931, "step": 120875 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019018158703468824, "loss": 2.1184, "step": 120880 }, { "epoch": 0.28, "grad_norm": 1.625, "learning_rate": 0.00019018078833462983, "loss": 2.0591, "step": 120885 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019017998960376407, "loss": 2.2318, "step": 120890 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.00019017919084209114, "loss": 2.1332, "step": 120895 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019017839204961138, "loss": 2.0673, "step": 120900 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019017759322632506, "loss": 2.2188, "step": 120905 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019017679437223243, "loss": 2.1733, "step": 120910 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019017599548733376, "loss": 2.0778, "step": 120915 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.00019017519657162934, "loss": 1.948, "step": 120920 }, { "epoch": 0.28, "grad_norm": 1.890625, "learning_rate": 0.00019017439762511945, "loss": 2.2662, "step": 120925 }, { "epoch": 0.28, "grad_norm": 1.6953125, "learning_rate": 0.00019017359864780434, "loss": 2.1043, "step": 120930 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.0001901727996396843, "loss": 1.9955, "step": 120935 }, { "epoch": 0.28, "grad_norm": 2.0, "learning_rate": 0.0001901720006007596, "loss": 2.2311, "step": 120940 }, { "epoch": 0.28, "grad_norm": 1.9453125, "learning_rate": 0.0001901712015310305, "loss": 2.105, "step": 120945 }, { "epoch": 0.28, "grad_norm": 2.15625, "learning_rate": 0.00019017040243049727, "loss": 2.1411, "step": 120950 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.0001901696032991602, "loss": 2.1996, "step": 120955 }, { "epoch": 0.28, "grad_norm": 2.6875, "learning_rate": 0.00019016880413701956, "loss": 2.1039, "step": 120960 }, { "epoch": 0.28, "grad_norm": 1.7734375, "learning_rate": 0.00019016800494407564, "loss": 1.9897, "step": 120965 }, { "epoch": 0.28, "grad_norm": 1.921875, "learning_rate": 0.00019016720572032867, "loss": 2.1296, "step": 120970 }, { "epoch": 0.28, "grad_norm": 2.109375, "learning_rate": 0.00019016640646577897, "loss": 2.0736, "step": 120975 }, { "epoch": 0.28, "grad_norm": 1.8125, "learning_rate": 0.00019016560718042677, "loss": 2.2834, "step": 120980 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.00019016480786427238, "loss": 2.3262, "step": 120985 }, { "epoch": 0.28, "grad_norm": 2.40625, "learning_rate": 0.00019016400851731605, "loss": 2.1776, "step": 120990 }, { "epoch": 0.28, "grad_norm": 1.9609375, "learning_rate": 0.00019016320913955805, "loss": 2.2617, "step": 120995 }, { "epoch": 0.28, "grad_norm": 2.0625, "learning_rate": 0.00019016240973099868, "loss": 2.0929, "step": 121000 }, { "epoch": 0.28, "grad_norm": 1.5703125, "learning_rate": 0.00019016161029163816, "loss": 2.272, "step": 121005 }, { "epoch": 0.28, "grad_norm": 2.21875, "learning_rate": 0.00019016081082147684, "loss": 1.9766, "step": 121010 }, { "epoch": 0.28, "grad_norm": 2.171875, "learning_rate": 0.00019016001132051493, "loss": 2.1243, "step": 121015 }, { "epoch": 0.28, "grad_norm": 2.3125, "learning_rate": 0.00019015921178875273, "loss": 2.1491, "step": 121020 }, { "epoch": 0.28, "grad_norm": 2.015625, "learning_rate": 0.00019015841222619052, "loss": 2.0939, "step": 121025 }, { "epoch": 0.28, "grad_norm": 2.125, "learning_rate": 0.00019015761263282857, "loss": 2.0375, "step": 121030 }, { "epoch": 0.28, "grad_norm": 1.828125, "learning_rate": 0.0001901568130086671, "loss": 2.2166, "step": 121035 }, { "epoch": 0.28, "grad_norm": 1.875, "learning_rate": 0.00019015601335370646, "loss": 2.1354, "step": 121040 }, { "epoch": 0.28, "grad_norm": 2.453125, "learning_rate": 0.00019015521366794687, "loss": 2.1533, "step": 121045 }, { "epoch": 0.28, "grad_norm": 2.265625, "learning_rate": 0.00019015441395138865, "loss": 2.3636, "step": 121050 }, { "epoch": 0.28, "grad_norm": 1.71875, "learning_rate": 0.00019015361420403202, "loss": 2.3285, "step": 121055 }, { "epoch": 0.28, "grad_norm": 2.484375, "learning_rate": 0.0001901528144258773, "loss": 2.169, "step": 121060 }, { "epoch": 0.28, "grad_norm": 2.1875, "learning_rate": 0.00019015201461692477, "loss": 1.957, "step": 121065 }, { "epoch": 0.28, "grad_norm": 3.09375, "learning_rate": 0.00019015121477717462, "loss": 2.0879, "step": 121070 }, { "epoch": 0.28, "grad_norm": 2.046875, "learning_rate": 0.0001901504149066272, "loss": 2.1397, "step": 121075 }, { "epoch": 0.28, "grad_norm": 1.8828125, "learning_rate": 0.00019014961500528278, "loss": 2.0494, "step": 121080 }, { "epoch": 0.28, "grad_norm": 1.765625, "learning_rate": 0.00019014881507314159, "loss": 2.0897, "step": 121085 }, { "epoch": 0.28, "grad_norm": 2.03125, "learning_rate": 0.00019014801511020395, "loss": 2.0515, "step": 121090 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019014721511647012, "loss": 2.1854, "step": 121095 }, { "epoch": 0.28, "grad_norm": 2.078125, "learning_rate": 0.00019014641509194034, "loss": 2.1048, "step": 121100 }, { "epoch": 0.28, "grad_norm": 1.8984375, "learning_rate": 0.00019014561503661494, "loss": 2.2127, "step": 121105 }, { "epoch": 0.29, "grad_norm": 2.28125, "learning_rate": 0.00019014481495049413, "loss": 2.2992, "step": 121110 }, { "epoch": 0.29, "grad_norm": 2.328125, "learning_rate": 0.00019014401483357824, "loss": 2.1763, "step": 121115 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.0001901432146858675, "loss": 2.1162, "step": 121120 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00019014241450736224, "loss": 2.3296, "step": 121125 }, { "epoch": 0.29, "grad_norm": 1.6953125, "learning_rate": 0.00019014161429806268, "loss": 2.1411, "step": 121130 }, { "epoch": 0.29, "grad_norm": 1.6484375, "learning_rate": 0.00019014081405796908, "loss": 1.9856, "step": 121135 }, { "epoch": 0.29, "grad_norm": 1.9765625, "learning_rate": 0.00019014001378708177, "loss": 2.0631, "step": 121140 }, { "epoch": 0.29, "grad_norm": 1.796875, "learning_rate": 0.000190139213485401, "loss": 2.0732, "step": 121145 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00019013841315292703, "loss": 2.2481, "step": 121150 }, { "epoch": 0.29, "grad_norm": 1.71875, "learning_rate": 0.00019013761278966013, "loss": 2.2216, "step": 121155 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00019013681239560062, "loss": 2.0422, "step": 121160 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.0001901360119707487, "loss": 2.1518, "step": 121165 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00019013521151510472, "loss": 2.2192, "step": 121170 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00019013441102866892, "loss": 2.0887, "step": 121175 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.00019013361051144152, "loss": 2.3076, "step": 121180 }, { "epoch": 0.29, "grad_norm": 1.8359375, "learning_rate": 0.00019013280996342289, "loss": 2.0517, "step": 121185 }, { "epoch": 0.29, "grad_norm": 1.9375, "learning_rate": 0.00019013200938461323, "loss": 2.0773, "step": 121190 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.00019013120877501286, "loss": 2.2314, "step": 121195 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.00019013040813462204, "loss": 2.218, "step": 121200 }, { "epoch": 0.29, "grad_norm": 1.78125, "learning_rate": 0.00019012960746344105, "loss": 2.1504, "step": 121205 }, { "epoch": 0.29, "grad_norm": 1.8125, "learning_rate": 0.00019012880676147012, "loss": 2.2268, "step": 121210 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00019012800602870956, "loss": 2.1393, "step": 121215 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.00019012720526515964, "loss": 2.2074, "step": 121220 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.00019012640447082065, "loss": 2.0585, "step": 121225 }, { "epoch": 0.29, "grad_norm": 2.234375, "learning_rate": 0.00019012560364569284, "loss": 2.1397, "step": 121230 }, { "epoch": 0.29, "grad_norm": 2.84375, "learning_rate": 0.00019012480278977645, "loss": 2.2377, "step": 121235 }, { "epoch": 0.29, "grad_norm": 1.71875, "learning_rate": 0.00019012400190307184, "loss": 2.3206, "step": 121240 }, { "epoch": 0.29, "grad_norm": 2.234375, "learning_rate": 0.0001901232009855792, "loss": 2.2136, "step": 121245 }, { "epoch": 0.29, "grad_norm": 2.390625, "learning_rate": 0.0001901224000372989, "loss": 2.2016, "step": 121250 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.00019012159905823107, "loss": 2.141, "step": 121255 }, { "epoch": 0.29, "grad_norm": 1.8359375, "learning_rate": 0.00019012079804837614, "loss": 2.2729, "step": 121260 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00019011999700773425, "loss": 2.0532, "step": 121265 }, { "epoch": 0.29, "grad_norm": 2.265625, "learning_rate": 0.0001901191959363058, "loss": 2.0106, "step": 121270 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.00019011839483409096, "loss": 2.169, "step": 121275 }, { "epoch": 0.29, "grad_norm": 2.484375, "learning_rate": 0.00019011759370109004, "loss": 2.1862, "step": 121280 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00019011679253730333, "loss": 2.0431, "step": 121285 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.00019011599134273105, "loss": 2.0752, "step": 121290 }, { "epoch": 0.29, "grad_norm": 1.8359375, "learning_rate": 0.00019011519011737356, "loss": 2.1429, "step": 121295 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00019011438886123107, "loss": 2.156, "step": 121300 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00019011358757430386, "loss": 2.2133, "step": 121305 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.00019011278625659226, "loss": 2.1254, "step": 121310 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.00019011198490809646, "loss": 2.2735, "step": 121315 }, { "epoch": 0.29, "grad_norm": 2.625, "learning_rate": 0.00019011118352881676, "loss": 2.1384, "step": 121320 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00019011038211875348, "loss": 2.1488, "step": 121325 }, { "epoch": 0.29, "grad_norm": 1.890625, "learning_rate": 0.00019010958067790682, "loss": 2.0811, "step": 121330 }, { "epoch": 0.29, "grad_norm": 1.8203125, "learning_rate": 0.00019010877920627711, "loss": 2.2313, "step": 121335 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.0001901079777038646, "loss": 2.0649, "step": 121340 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.0001901071761706696, "loss": 2.1253, "step": 121345 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00019010637460669234, "loss": 2.2093, "step": 121350 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.0001901055730119331, "loss": 1.9808, "step": 121355 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00019010477138639215, "loss": 2.2859, "step": 121360 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.0001901039697300698, "loss": 2.1572, "step": 121365 }, { "epoch": 0.29, "grad_norm": 1.859375, "learning_rate": 0.0001901031680429663, "loss": 2.0797, "step": 121370 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.0001901023663250819, "loss": 2.026, "step": 121375 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00019010156457641692, "loss": 2.1899, "step": 121380 }, { "epoch": 0.29, "grad_norm": 2.234375, "learning_rate": 0.00019010076279697162, "loss": 1.9117, "step": 121385 }, { "epoch": 0.29, "grad_norm": 2.4375, "learning_rate": 0.00019009996098674626, "loss": 2.0613, "step": 121390 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.0001900991591457411, "loss": 2.1187, "step": 121395 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00019009835727395644, "loss": 1.9391, "step": 121400 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00019009755537139253, "loss": 2.1951, "step": 121405 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.0001900967534380497, "loss": 2.0567, "step": 121410 }, { "epoch": 0.29, "grad_norm": 1.921875, "learning_rate": 0.00019009595147392814, "loss": 2.2027, "step": 121415 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.0001900951494790282, "loss": 2.1881, "step": 121420 }, { "epoch": 0.29, "grad_norm": 1.859375, "learning_rate": 0.00019009434745335014, "loss": 2.0588, "step": 121425 }, { "epoch": 0.29, "grad_norm": 2.484375, "learning_rate": 0.00019009354539689416, "loss": 2.15, "step": 121430 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00019009274330966064, "loss": 2.0243, "step": 121435 }, { "epoch": 0.29, "grad_norm": 2.28125, "learning_rate": 0.0001900919411916498, "loss": 2.3108, "step": 121440 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00019009113904286192, "loss": 2.2974, "step": 121445 }, { "epoch": 0.29, "grad_norm": 1.8515625, "learning_rate": 0.00019009033686329726, "loss": 2.3864, "step": 121450 }, { "epoch": 0.29, "grad_norm": 1.796875, "learning_rate": 0.00019008953465295608, "loss": 2.1561, "step": 121455 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00019008873241183874, "loss": 2.0699, "step": 121460 }, { "epoch": 0.29, "grad_norm": 2.375, "learning_rate": 0.00019008793013994542, "loss": 2.0091, "step": 121465 }, { "epoch": 0.29, "grad_norm": 2.65625, "learning_rate": 0.00019008712783727643, "loss": 2.0985, "step": 121470 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00019008632550383204, "loss": 2.1702, "step": 121475 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00019008552313961254, "loss": 2.1727, "step": 121480 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.00019008472074461818, "loss": 2.1485, "step": 121485 }, { "epoch": 0.29, "grad_norm": 1.8984375, "learning_rate": 0.00019008391831884926, "loss": 2.086, "step": 121490 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.00019008311586230604, "loss": 2.1033, "step": 121495 }, { "epoch": 0.29, "grad_norm": 3.5, "learning_rate": 0.00019008231337498877, "loss": 2.064, "step": 121500 }, { "epoch": 0.29, "grad_norm": 1.890625, "learning_rate": 0.00019008151085689776, "loss": 2.1223, "step": 121505 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.0001900807083080333, "loss": 2.2287, "step": 121510 }, { "epoch": 0.29, "grad_norm": 1.9375, "learning_rate": 0.0001900799057283956, "loss": 2.1461, "step": 121515 }, { "epoch": 0.29, "grad_norm": 1.765625, "learning_rate": 0.00019007910311798497, "loss": 2.3481, "step": 121520 }, { "epoch": 0.29, "grad_norm": 1.6953125, "learning_rate": 0.00019007830047680172, "loss": 2.1263, "step": 121525 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00019007749780484607, "loss": 2.1875, "step": 121530 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.0001900766951021183, "loss": 2.2335, "step": 121535 }, { "epoch": 0.29, "grad_norm": 1.859375, "learning_rate": 0.00019007589236861872, "loss": 2.1866, "step": 121540 }, { "epoch": 0.29, "grad_norm": 1.6796875, "learning_rate": 0.00019007508960434754, "loss": 2.0647, "step": 121545 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00019007428680930513, "loss": 2.0375, "step": 121550 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00019007348398349168, "loss": 2.0978, "step": 121555 }, { "epoch": 0.29, "grad_norm": 1.9375, "learning_rate": 0.00019007268112690749, "loss": 2.1749, "step": 121560 }, { "epoch": 0.29, "grad_norm": 1.7265625, "learning_rate": 0.00019007187823955285, "loss": 2.3108, "step": 121565 }, { "epoch": 0.29, "grad_norm": 1.984375, "learning_rate": 0.000190071075321428, "loss": 2.0077, "step": 121570 }, { "epoch": 0.29, "grad_norm": 2.484375, "learning_rate": 0.00019007027237253327, "loss": 2.2512, "step": 121575 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.0001900694693928689, "loss": 1.9224, "step": 121580 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00019006866638243515, "loss": 2.3352, "step": 121585 }, { "epoch": 0.29, "grad_norm": 1.671875, "learning_rate": 0.00019006786334123233, "loss": 2.1433, "step": 121590 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.00019006706026926067, "loss": 2.2082, "step": 121595 }, { "epoch": 0.29, "grad_norm": 1.8671875, "learning_rate": 0.00019006625716652047, "loss": 1.9954, "step": 121600 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.000190065454033012, "loss": 2.0493, "step": 121605 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.00019006465086873555, "loss": 2.0434, "step": 121610 }, { "epoch": 0.29, "grad_norm": 1.8984375, "learning_rate": 0.00019006384767369138, "loss": 2.0705, "step": 121615 }, { "epoch": 0.29, "grad_norm": 1.9140625, "learning_rate": 0.00019006304444787978, "loss": 2.1996, "step": 121620 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.000190062241191301, "loss": 2.1346, "step": 121625 }, { "epoch": 0.29, "grad_norm": 1.734375, "learning_rate": 0.0001900614379039553, "loss": 2.084, "step": 121630 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.00019006063458584303, "loss": 2.2773, "step": 121635 }, { "epoch": 0.29, "grad_norm": 1.9921875, "learning_rate": 0.00019005983123696437, "loss": 2.1669, "step": 121640 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.00019005902785731966, "loss": 2.1548, "step": 121645 }, { "epoch": 0.29, "grad_norm": 2.40625, "learning_rate": 0.00019005822444690914, "loss": 2.1585, "step": 121650 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.0001900574210057331, "loss": 2.2347, "step": 121655 }, { "epoch": 0.29, "grad_norm": 2.484375, "learning_rate": 0.00019005661753379184, "loss": 2.0602, "step": 121660 }, { "epoch": 0.29, "grad_norm": 1.8984375, "learning_rate": 0.00019005581403108558, "loss": 2.2348, "step": 121665 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.00019005501049761461, "loss": 2.1529, "step": 121670 }, { "epoch": 0.29, "grad_norm": 1.859375, "learning_rate": 0.00019005420693337923, "loss": 2.1394, "step": 121675 }, { "epoch": 0.29, "grad_norm": 2.265625, "learning_rate": 0.0001900534033383797, "loss": 2.1179, "step": 121680 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.00019005259971261628, "loss": 2.1806, "step": 121685 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.0001900517960560893, "loss": 1.9578, "step": 121690 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.00019005099236879894, "loss": 2.2914, "step": 121695 }, { "epoch": 0.29, "grad_norm": 1.71875, "learning_rate": 0.00019005018865074556, "loss": 2.1933, "step": 121700 }, { "epoch": 0.29, "grad_norm": 1.921875, "learning_rate": 0.00019004938490192942, "loss": 1.9442, "step": 121705 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.00019004858112235074, "loss": 2.0019, "step": 121710 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00019004777731200983, "loss": 2.0814, "step": 121715 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.000190046973470907, "loss": 2.0992, "step": 121720 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.00019004616959904244, "loss": 2.1416, "step": 121725 }, { "epoch": 0.29, "grad_norm": 2.53125, "learning_rate": 0.00019004536569641652, "loss": 2.2083, "step": 121730 }, { "epoch": 0.29, "grad_norm": 2.4375, "learning_rate": 0.00019004456176302946, "loss": 2.1863, "step": 121735 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.00019004375779888152, "loss": 2.0224, "step": 121740 }, { "epoch": 0.29, "grad_norm": 1.859375, "learning_rate": 0.000190042953803973, "loss": 2.014, "step": 121745 }, { "epoch": 0.29, "grad_norm": 1.7578125, "learning_rate": 0.0001900421497783042, "loss": 2.1607, "step": 121750 }, { "epoch": 0.29, "grad_norm": 1.984375, "learning_rate": 0.00019004134572187537, "loss": 2.0304, "step": 121755 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.0001900405416346868, "loss": 2.1287, "step": 121760 }, { "epoch": 0.29, "grad_norm": 2.328125, "learning_rate": 0.00019003973751673868, "loss": 2.1646, "step": 121765 }, { "epoch": 0.29, "grad_norm": 1.8828125, "learning_rate": 0.00019003893336803142, "loss": 2.196, "step": 121770 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.0001900381291885652, "loss": 2.3256, "step": 121775 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.00019003732497834032, "loss": 2.1727, "step": 121780 }, { "epoch": 0.29, "grad_norm": 2.390625, "learning_rate": 0.00019003652073735708, "loss": 2.1615, "step": 121785 }, { "epoch": 0.29, "grad_norm": 1.9453125, "learning_rate": 0.0001900357164656157, "loss": 2.3458, "step": 121790 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.0001900349121631165, "loss": 1.9729, "step": 121795 }, { "epoch": 0.29, "grad_norm": 2.28125, "learning_rate": 0.00019003410782985976, "loss": 1.9916, "step": 121800 }, { "epoch": 0.29, "grad_norm": 2.28125, "learning_rate": 0.0001900333034658457, "loss": 1.9896, "step": 121805 }, { "epoch": 0.29, "grad_norm": 2.234375, "learning_rate": 0.00019003249907107464, "loss": 2.2853, "step": 121810 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00019003169464554685, "loss": 2.2035, "step": 121815 }, { "epoch": 0.29, "grad_norm": 1.7578125, "learning_rate": 0.0001900308901892626, "loss": 2.2848, "step": 121820 }, { "epoch": 0.29, "grad_norm": 1.6796875, "learning_rate": 0.00019003008570222217, "loss": 2.1848, "step": 121825 }, { "epoch": 0.29, "grad_norm": 2.515625, "learning_rate": 0.00019002928118442585, "loss": 2.371, "step": 121830 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00019002847663587385, "loss": 2.1193, "step": 121835 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.00019002767205656654, "loss": 2.2389, "step": 121840 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.0001900268674465041, "loss": 2.063, "step": 121845 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.00019002606280568688, "loss": 2.0716, "step": 121850 }, { "epoch": 0.29, "grad_norm": 2.28125, "learning_rate": 0.0001900252581341151, "loss": 2.1775, "step": 121855 }, { "epoch": 0.29, "grad_norm": 2.328125, "learning_rate": 0.0001900244534317891, "loss": 2.0178, "step": 121860 }, { "epoch": 0.29, "grad_norm": 1.640625, "learning_rate": 0.00019002364869870905, "loss": 2.3132, "step": 121865 }, { "epoch": 0.29, "grad_norm": 1.78125, "learning_rate": 0.00019002284393487533, "loss": 2.2693, "step": 121870 }, { "epoch": 0.29, "grad_norm": 1.78125, "learning_rate": 0.00019002203914028818, "loss": 2.0777, "step": 121875 }, { "epoch": 0.29, "grad_norm": 2.59375, "learning_rate": 0.00019002123431494786, "loss": 2.2495, "step": 121880 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.00019002042945885463, "loss": 2.192, "step": 121885 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.00019001962457200881, "loss": 2.1807, "step": 121890 }, { "epoch": 0.29, "grad_norm": 2.234375, "learning_rate": 0.00019001881965441064, "loss": 2.1075, "step": 121895 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00019001801470606045, "loss": 2.1639, "step": 121900 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.00019001720972695844, "loss": 2.1985, "step": 121905 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.0001900164047171049, "loss": 1.9747, "step": 121910 }, { "epoch": 0.29, "grad_norm": 1.8828125, "learning_rate": 0.00019001559967650016, "loss": 2.1866, "step": 121915 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00019001479460514445, "loss": 2.1572, "step": 121920 }, { "epoch": 0.29, "grad_norm": 2.265625, "learning_rate": 0.00019001398950303804, "loss": 2.331, "step": 121925 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.0001900131843701812, "loss": 2.0721, "step": 121930 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.00019001237920657427, "loss": 2.2191, "step": 121935 }, { "epoch": 0.29, "grad_norm": 1.8046875, "learning_rate": 0.00019001157401221747, "loss": 2.0851, "step": 121940 }, { "epoch": 0.29, "grad_norm": 3.578125, "learning_rate": 0.00019001076878711107, "loss": 2.082, "step": 121945 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.00019000996353125537, "loss": 2.1792, "step": 121950 }, { "epoch": 0.29, "grad_norm": 2.578125, "learning_rate": 0.0001900091582446506, "loss": 2.1491, "step": 121955 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.00019000835292729712, "loss": 2.1905, "step": 121960 }, { "epoch": 0.29, "grad_norm": 2.8125, "learning_rate": 0.00019000754757919513, "loss": 2.0765, "step": 121965 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.0001900067422003449, "loss": 2.1499, "step": 121970 }, { "epoch": 0.29, "grad_norm": 1.8671875, "learning_rate": 0.0001900059367907468, "loss": 2.104, "step": 121975 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.000190005131350401, "loss": 2.1436, "step": 121980 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.0001900043258793078, "loss": 2.2177, "step": 121985 }, { "epoch": 0.29, "grad_norm": 1.703125, "learning_rate": 0.00019000352037746752, "loss": 2.0997, "step": 121990 }, { "epoch": 0.29, "grad_norm": 2.671875, "learning_rate": 0.00019000271484488038, "loss": 2.2801, "step": 121995 }, { "epoch": 0.29, "grad_norm": 1.96875, "learning_rate": 0.0001900019092815467, "loss": 2.2597, "step": 122000 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.00019000110368746673, "loss": 1.8958, "step": 122005 }, { "epoch": 0.29, "grad_norm": 1.7265625, "learning_rate": 0.00019000029806264074, "loss": 2.2251, "step": 122010 }, { "epoch": 0.29, "grad_norm": 1.8671875, "learning_rate": 0.00018999949240706903, "loss": 2.2326, "step": 122015 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00018999868672075186, "loss": 2.0229, "step": 122020 }, { "epoch": 0.29, "grad_norm": 1.8671875, "learning_rate": 0.00018999788100368948, "loss": 2.0435, "step": 122025 }, { "epoch": 0.29, "grad_norm": 2.40625, "learning_rate": 0.00018999707525588225, "loss": 2.3006, "step": 122030 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00018999626947733034, "loss": 2.1831, "step": 122035 }, { "epoch": 0.29, "grad_norm": 1.765625, "learning_rate": 0.0001899954636680341, "loss": 2.0852, "step": 122040 }, { "epoch": 0.29, "grad_norm": 1.765625, "learning_rate": 0.00018999465782799376, "loss": 2.1361, "step": 122045 }, { "epoch": 0.29, "grad_norm": 1.8984375, "learning_rate": 0.0001899938519572096, "loss": 2.3064, "step": 122050 }, { "epoch": 0.29, "grad_norm": 1.765625, "learning_rate": 0.00018999304605568193, "loss": 2.1672, "step": 122055 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.000189992240123411, "loss": 2.2818, "step": 122060 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.0001899914341603971, "loss": 2.1354, "step": 122065 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.00018999062816664046, "loss": 2.1768, "step": 122070 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.0001899898221421414, "loss": 2.2119, "step": 122075 }, { "epoch": 0.29, "grad_norm": 2.375, "learning_rate": 0.00018998901608690018, "loss": 2.229, "step": 122080 }, { "epoch": 0.29, "grad_norm": 2.328125, "learning_rate": 0.0001899882100009171, "loss": 2.1629, "step": 122085 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.00018998740388419242, "loss": 2.3449, "step": 122090 }, { "epoch": 0.29, "grad_norm": 2.671875, "learning_rate": 0.00018998659773672641, "loss": 2.1448, "step": 122095 }, { "epoch": 0.29, "grad_norm": 1.890625, "learning_rate": 0.00018998579155851932, "loss": 2.2533, "step": 122100 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018998498534957147, "loss": 2.1913, "step": 122105 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.0001899841791098831, "loss": 2.0783, "step": 122110 }, { "epoch": 0.29, "grad_norm": 2.390625, "learning_rate": 0.00018998337283945453, "loss": 2.2593, "step": 122115 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.00018998256653828598, "loss": 2.3594, "step": 122120 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.00018998176020637776, "loss": 2.1812, "step": 122125 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018998095384373018, "loss": 2.0951, "step": 122130 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018998014745034343, "loss": 2.2236, "step": 122135 }, { "epoch": 0.29, "grad_norm": 2.328125, "learning_rate": 0.00018997934102621785, "loss": 2.0682, "step": 122140 }, { "epoch": 0.29, "grad_norm": 1.703125, "learning_rate": 0.00018997853457135367, "loss": 2.0199, "step": 122145 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018997772808575122, "loss": 2.2173, "step": 122150 }, { "epoch": 0.29, "grad_norm": 1.8828125, "learning_rate": 0.00018997692156941072, "loss": 1.9808, "step": 122155 }, { "epoch": 0.29, "grad_norm": 1.9765625, "learning_rate": 0.0001899761150223325, "loss": 2.0651, "step": 122160 }, { "epoch": 0.29, "grad_norm": 1.9140625, "learning_rate": 0.00018997530844451678, "loss": 2.3291, "step": 122165 }, { "epoch": 0.29, "grad_norm": 2.453125, "learning_rate": 0.00018997450183596388, "loss": 2.0604, "step": 122170 }, { "epoch": 0.29, "grad_norm": 2.265625, "learning_rate": 0.00018997369519667406, "loss": 2.0284, "step": 122175 }, { "epoch": 0.29, "grad_norm": 2.265625, "learning_rate": 0.00018997288852664758, "loss": 2.2768, "step": 122180 }, { "epoch": 0.29, "grad_norm": 1.7578125, "learning_rate": 0.00018997208182588472, "loss": 2.1934, "step": 122185 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.0001899712750943858, "loss": 2.1487, "step": 122190 }, { "epoch": 0.29, "grad_norm": 1.71875, "learning_rate": 0.00018997046833215105, "loss": 2.1082, "step": 122195 }, { "epoch": 0.29, "grad_norm": 1.8125, "learning_rate": 0.00018996966153918073, "loss": 2.1517, "step": 122200 }, { "epoch": 0.29, "grad_norm": 1.921875, "learning_rate": 0.00018996885471547518, "loss": 2.0173, "step": 122205 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.0001899680478610346, "loss": 2.2964, "step": 122210 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.0001899672409758593, "loss": 2.132, "step": 122215 }, { "epoch": 0.29, "grad_norm": 1.96875, "learning_rate": 0.0001899664340599496, "loss": 2.0605, "step": 122220 }, { "epoch": 0.29, "grad_norm": 1.7734375, "learning_rate": 0.00018996562711330573, "loss": 2.0212, "step": 122225 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018996482013592796, "loss": 2.1521, "step": 122230 }, { "epoch": 0.29, "grad_norm": 1.890625, "learning_rate": 0.00018996401312781656, "loss": 2.1971, "step": 122235 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.0001899632060889718, "loss": 2.0982, "step": 122240 }, { "epoch": 0.29, "grad_norm": 1.9375, "learning_rate": 0.00018996239901939403, "loss": 2.0728, "step": 122245 }, { "epoch": 0.29, "grad_norm": 1.8984375, "learning_rate": 0.00018996159191908346, "loss": 2.0492, "step": 122250 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018996078478804038, "loss": 2.0181, "step": 122255 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.00018995997762626506, "loss": 2.1249, "step": 122260 }, { "epoch": 0.29, "grad_norm": 1.7421875, "learning_rate": 0.00018995917043375778, "loss": 2.2353, "step": 122265 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.0001899583632105188, "loss": 2.3093, "step": 122270 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00018995755595654841, "loss": 2.1346, "step": 122275 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.0001899567486718469, "loss": 2.1719, "step": 122280 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.0001899559413564145, "loss": 2.322, "step": 122285 }, { "epoch": 0.29, "grad_norm": 2.328125, "learning_rate": 0.00018995513401025158, "loss": 2.1526, "step": 122290 }, { "epoch": 0.29, "grad_norm": 1.7265625, "learning_rate": 0.00018995432663335832, "loss": 2.2573, "step": 122295 }, { "epoch": 0.29, "grad_norm": 1.7734375, "learning_rate": 0.00018995351922573505, "loss": 2.1102, "step": 122300 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.000189952711787382, "loss": 2.2648, "step": 122305 }, { "epoch": 0.29, "grad_norm": 2.453125, "learning_rate": 0.00018995190431829952, "loss": 2.2658, "step": 122310 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.0001899510968184878, "loss": 2.1676, "step": 122315 }, { "epoch": 0.29, "grad_norm": 2.59375, "learning_rate": 0.00018995028928794715, "loss": 2.1705, "step": 122320 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.00018994948172667786, "loss": 2.1122, "step": 122325 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.00018994867413468019, "loss": 2.0039, "step": 122330 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.0001899478665119544, "loss": 2.3265, "step": 122335 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00018994705885850083, "loss": 2.1831, "step": 122340 }, { "epoch": 0.29, "grad_norm": 1.890625, "learning_rate": 0.0001899462511743197, "loss": 2.1168, "step": 122345 }, { "epoch": 0.29, "grad_norm": 1.984375, "learning_rate": 0.00018994544345941132, "loss": 2.1628, "step": 122350 }, { "epoch": 0.29, "grad_norm": 1.734375, "learning_rate": 0.00018994463571377591, "loss": 2.1084, "step": 122355 }, { "epoch": 0.29, "grad_norm": 1.8828125, "learning_rate": 0.00018994382793741378, "loss": 2.0427, "step": 122360 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00018994302013032523, "loss": 2.2901, "step": 122365 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.0001899422122925105, "loss": 2.2227, "step": 122370 }, { "epoch": 0.29, "grad_norm": 2.5, "learning_rate": 0.00018994140442396987, "loss": 2.1693, "step": 122375 }, { "epoch": 0.29, "grad_norm": 2.578125, "learning_rate": 0.00018994059652470364, "loss": 2.1173, "step": 122380 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018993978859471207, "loss": 2.0923, "step": 122385 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.00018993898063399546, "loss": 2.015, "step": 122390 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00018993817264255402, "loss": 2.2069, "step": 122395 }, { "epoch": 0.29, "grad_norm": 1.7578125, "learning_rate": 0.0001899373646203881, "loss": 2.1509, "step": 122400 }, { "epoch": 0.29, "grad_norm": 2.75, "learning_rate": 0.00018993655656749793, "loss": 2.3083, "step": 122405 }, { "epoch": 0.29, "grad_norm": 3.703125, "learning_rate": 0.0001899357484838838, "loss": 2.1907, "step": 122410 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.000189934940369546, "loss": 2.1574, "step": 122415 }, { "epoch": 0.29, "grad_norm": 1.765625, "learning_rate": 0.00018993413222448478, "loss": 2.0722, "step": 122420 }, { "epoch": 0.29, "grad_norm": 2.453125, "learning_rate": 0.00018993332404870043, "loss": 2.2585, "step": 122425 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018993251584219323, "loss": 2.1787, "step": 122430 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00018993170760496346, "loss": 2.1851, "step": 122435 }, { "epoch": 0.29, "grad_norm": 1.828125, "learning_rate": 0.00018993089933701138, "loss": 2.0505, "step": 122440 }, { "epoch": 0.29, "grad_norm": 2.6875, "learning_rate": 0.00018993009103833726, "loss": 1.9763, "step": 122445 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00018992928270894138, "loss": 2.1886, "step": 122450 }, { "epoch": 0.29, "grad_norm": 1.8203125, "learning_rate": 0.00018992847434882407, "loss": 2.2048, "step": 122455 }, { "epoch": 0.29, "grad_norm": 3.03125, "learning_rate": 0.00018992766595798553, "loss": 2.2179, "step": 122460 }, { "epoch": 0.29, "grad_norm": 1.8046875, "learning_rate": 0.0001899268575364261, "loss": 2.2379, "step": 122465 }, { "epoch": 0.29, "grad_norm": 1.875, "learning_rate": 0.000189926049084146, "loss": 2.0503, "step": 122470 }, { "epoch": 0.29, "grad_norm": 1.9765625, "learning_rate": 0.00018992524060114553, "loss": 2.1817, "step": 122475 }, { "epoch": 0.29, "grad_norm": 1.9375, "learning_rate": 0.00018992443208742498, "loss": 2.2607, "step": 122480 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.0001899236235429846, "loss": 2.1199, "step": 122485 }, { "epoch": 0.29, "grad_norm": 1.640625, "learning_rate": 0.00018992281496782467, "loss": 2.0916, "step": 122490 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018992200636194548, "loss": 2.0738, "step": 122495 }, { "epoch": 0.29, "grad_norm": 2.53125, "learning_rate": 0.00018992119772534734, "loss": 2.2159, "step": 122500 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018992038905803045, "loss": 2.1285, "step": 122505 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018991958035999512, "loss": 2.2158, "step": 122510 }, { "epoch": 0.29, "grad_norm": 1.8671875, "learning_rate": 0.00018991877163124164, "loss": 2.1109, "step": 122515 }, { "epoch": 0.29, "grad_norm": 2.265625, "learning_rate": 0.0001899179628717703, "loss": 2.1584, "step": 122520 }, { "epoch": 0.29, "grad_norm": 1.84375, "learning_rate": 0.00018991715408158132, "loss": 2.164, "step": 122525 }, { "epoch": 0.29, "grad_norm": 1.8515625, "learning_rate": 0.00018991634526067502, "loss": 2.2859, "step": 122530 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.0001899155364090517, "loss": 2.2544, "step": 122535 }, { "epoch": 0.29, "grad_norm": 2.6875, "learning_rate": 0.00018991472752671155, "loss": 2.2428, "step": 122540 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018991391861365492, "loss": 2.2222, "step": 122545 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00018991310966988209, "loss": 2.247, "step": 122550 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018991230069539326, "loss": 2.0042, "step": 122555 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018991149169018877, "loss": 2.1047, "step": 122560 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018991068265426892, "loss": 2.2405, "step": 122565 }, { "epoch": 0.29, "grad_norm": 1.7421875, "learning_rate": 0.00018990987358763391, "loss": 2.0933, "step": 122570 }, { "epoch": 0.29, "grad_norm": 1.625, "learning_rate": 0.00018990906449028409, "loss": 2.1437, "step": 122575 }, { "epoch": 0.29, "grad_norm": 4.03125, "learning_rate": 0.00018990825536221968, "loss": 2.0284, "step": 122580 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018990744620344099, "loss": 2.1645, "step": 122585 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018990663701394826, "loss": 2.1175, "step": 122590 }, { "epoch": 0.29, "grad_norm": 1.8125, "learning_rate": 0.00018990582779374182, "loss": 2.2375, "step": 122595 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018990501854282193, "loss": 2.0737, "step": 122600 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.00018990420926118882, "loss": 2.0359, "step": 122605 }, { "epoch": 0.29, "grad_norm": 1.8125, "learning_rate": 0.00018990339994884284, "loss": 2.277, "step": 122610 }, { "epoch": 0.29, "grad_norm": 2.234375, "learning_rate": 0.00018990259060578418, "loss": 2.1481, "step": 122615 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.0001899017812320132, "loss": 2.1711, "step": 122620 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018990097182753016, "loss": 2.1299, "step": 122625 }, { "epoch": 0.29, "grad_norm": 1.6875, "learning_rate": 0.0001899001623923353, "loss": 2.1242, "step": 122630 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.00018989935292642888, "loss": 2.0847, "step": 122635 }, { "epoch": 0.29, "grad_norm": 1.984375, "learning_rate": 0.00018989854342981125, "loss": 2.3051, "step": 122640 }, { "epoch": 0.29, "grad_norm": 1.7734375, "learning_rate": 0.0001898977339024826, "loss": 2.1827, "step": 122645 }, { "epoch": 0.29, "grad_norm": 1.734375, "learning_rate": 0.0001898969243444433, "loss": 2.1811, "step": 122650 }, { "epoch": 0.29, "grad_norm": 1.5546875, "learning_rate": 0.00018989611475569355, "loss": 2.0512, "step": 122655 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00018989530513623367, "loss": 1.9566, "step": 122660 }, { "epoch": 0.29, "grad_norm": 1.75, "learning_rate": 0.00018989449548606396, "loss": 1.9575, "step": 122665 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018989368580518462, "loss": 2.1889, "step": 122670 }, { "epoch": 0.29, "grad_norm": 1.984375, "learning_rate": 0.000189892876093596, "loss": 2.1837, "step": 122675 }, { "epoch": 0.29, "grad_norm": 2.265625, "learning_rate": 0.00018989206635129828, "loss": 2.2203, "step": 122680 }, { "epoch": 0.29, "grad_norm": 1.875, "learning_rate": 0.00018989125657829184, "loss": 2.023, "step": 122685 }, { "epoch": 0.29, "grad_norm": 2.46875, "learning_rate": 0.00018989044677457693, "loss": 2.1681, "step": 122690 }, { "epoch": 0.29, "grad_norm": 1.734375, "learning_rate": 0.00018988963694015377, "loss": 2.0974, "step": 122695 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.0001898888270750227, "loss": 2.1431, "step": 122700 }, { "epoch": 0.29, "grad_norm": 1.7109375, "learning_rate": 0.000189888017179184, "loss": 2.2198, "step": 122705 }, { "epoch": 0.29, "grad_norm": 1.3984375, "learning_rate": 0.00018988720725263792, "loss": 2.0341, "step": 122710 }, { "epoch": 0.29, "grad_norm": 1.9453125, "learning_rate": 0.00018988639729538472, "loss": 2.0746, "step": 122715 }, { "epoch": 0.29, "grad_norm": 2.28125, "learning_rate": 0.0001898855873074247, "loss": 2.1369, "step": 122720 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.00018988477728875812, "loss": 2.1452, "step": 122725 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018988396723938528, "loss": 2.0665, "step": 122730 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018988315715930647, "loss": 2.2402, "step": 122735 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00018988234704852191, "loss": 2.0129, "step": 122740 }, { "epoch": 0.29, "grad_norm": 1.921875, "learning_rate": 0.00018988153690703192, "loss": 2.1456, "step": 122745 }, { "epoch": 0.29, "grad_norm": 2.75, "learning_rate": 0.0001898807267348368, "loss": 2.0424, "step": 122750 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00018987991653193675, "loss": 1.951, "step": 122755 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.0001898791062983321, "loss": 2.0572, "step": 122760 }, { "epoch": 0.29, "grad_norm": 2.28125, "learning_rate": 0.00018987829603402313, "loss": 2.0254, "step": 122765 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.0001898774857390101, "loss": 2.1981, "step": 122770 }, { "epoch": 0.29, "grad_norm": 2.453125, "learning_rate": 0.00018987667541329328, "loss": 2.0824, "step": 122775 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00018987586505687297, "loss": 2.1563, "step": 122780 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.0001898750546697494, "loss": 2.1405, "step": 122785 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.00018987424425192295, "loss": 2.0, "step": 122790 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.0001898734338033938, "loss": 2.1773, "step": 122795 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018987262332416225, "loss": 2.0873, "step": 122800 }, { "epoch": 0.29, "grad_norm": 2.484375, "learning_rate": 0.00018987181281422853, "loss": 2.1066, "step": 122805 }, { "epoch": 0.29, "grad_norm": 2.640625, "learning_rate": 0.00018987100227359306, "loss": 2.1608, "step": 122810 }, { "epoch": 0.29, "grad_norm": 2.59375, "learning_rate": 0.00018987019170225598, "loss": 2.2247, "step": 122815 }, { "epoch": 0.29, "grad_norm": 1.8359375, "learning_rate": 0.00018986938110021758, "loss": 2.212, "step": 122820 }, { "epoch": 0.29, "grad_norm": 1.921875, "learning_rate": 0.0001898685704674782, "loss": 2.1033, "step": 122825 }, { "epoch": 0.29, "grad_norm": 1.765625, "learning_rate": 0.0001898677598040381, "loss": 2.1103, "step": 122830 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018986694910989754, "loss": 2.1883, "step": 122835 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.0001898661383850568, "loss": 2.1001, "step": 122840 }, { "epoch": 0.29, "grad_norm": 1.796875, "learning_rate": 0.00018986532762951615, "loss": 2.202, "step": 122845 }, { "epoch": 0.29, "grad_norm": 1.703125, "learning_rate": 0.0001898645168432759, "loss": 2.1131, "step": 122850 }, { "epoch": 0.29, "grad_norm": 1.7421875, "learning_rate": 0.00018986370602633624, "loss": 2.3152, "step": 122855 }, { "epoch": 0.29, "grad_norm": 1.734375, "learning_rate": 0.00018986289517869756, "loss": 2.0794, "step": 122860 }, { "epoch": 0.29, "grad_norm": 1.9765625, "learning_rate": 0.00018986208430036007, "loss": 1.9074, "step": 122865 }, { "epoch": 0.29, "grad_norm": 1.8359375, "learning_rate": 0.00018986127339132406, "loss": 2.0494, "step": 122870 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018986046245158983, "loss": 2.2793, "step": 122875 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.0001898596514811576, "loss": 2.1553, "step": 122880 }, { "epoch": 0.29, "grad_norm": 1.7734375, "learning_rate": 0.00018985884048002772, "loss": 2.1768, "step": 122885 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.0001898580294482004, "loss": 2.0784, "step": 122890 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018985721838567595, "loss": 2.2016, "step": 122895 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.00018985640729245467, "loss": 2.3081, "step": 122900 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.0001898555961685368, "loss": 2.0552, "step": 122905 }, { "epoch": 0.29, "grad_norm": 1.8203125, "learning_rate": 0.0001898547850139226, "loss": 2.0703, "step": 122910 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00018985397382861242, "loss": 2.1339, "step": 122915 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.00018985316261260646, "loss": 2.2417, "step": 122920 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018985235136590505, "loss": 2.0459, "step": 122925 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.00018985154008850844, "loss": 2.2564, "step": 122930 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.0001898507287804169, "loss": 2.2292, "step": 122935 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00018984991744163074, "loss": 2.0521, "step": 122940 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.00018984910607215024, "loss": 2.0502, "step": 122945 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.0001898482946719756, "loss": 2.0941, "step": 122950 }, { "epoch": 0.29, "grad_norm": 7.125, "learning_rate": 0.0001898474832411072, "loss": 2.2477, "step": 122955 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018984667177954524, "loss": 2.0013, "step": 122960 }, { "epoch": 0.29, "grad_norm": 1.921875, "learning_rate": 0.00018984586028729006, "loss": 2.1203, "step": 122965 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.0001898450487643419, "loss": 2.1421, "step": 122970 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018984423721070103, "loss": 2.1564, "step": 122975 }, { "epoch": 0.29, "grad_norm": 1.984375, "learning_rate": 0.00018984342562636773, "loss": 2.1996, "step": 122980 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.0001898426140113423, "loss": 2.2075, "step": 122985 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018984180236562498, "loss": 2.0095, "step": 122990 }, { "epoch": 0.29, "grad_norm": 1.84375, "learning_rate": 0.0001898409906892161, "loss": 2.3989, "step": 122995 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.0001898401789821159, "loss": 2.2138, "step": 123000 }, { "epoch": 0.29, "grad_norm": 2.40625, "learning_rate": 0.00018983936724432465, "loss": 2.1347, "step": 123005 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00018983855547584266, "loss": 2.1949, "step": 123010 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00018983774367667018, "loss": 2.2351, "step": 123015 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.0001898369318468075, "loss": 2.2807, "step": 123020 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.0001898361199862549, "loss": 2.0952, "step": 123025 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.00018983530809501265, "loss": 2.1482, "step": 123030 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.00018983449617308105, "loss": 2.221, "step": 123035 }, { "epoch": 0.29, "grad_norm": 2.28125, "learning_rate": 0.0001898336842204603, "loss": 2.2203, "step": 123040 }, { "epoch": 0.29, "grad_norm": 2.984375, "learning_rate": 0.00018983287223715077, "loss": 2.1744, "step": 123045 }, { "epoch": 0.29, "grad_norm": 1.6953125, "learning_rate": 0.0001898320602231527, "loss": 2.0656, "step": 123050 }, { "epoch": 0.29, "grad_norm": 2.515625, "learning_rate": 0.0001898312481784664, "loss": 2.2608, "step": 123055 }, { "epoch": 0.29, "grad_norm": 2.421875, "learning_rate": 0.00018983043610309208, "loss": 2.2437, "step": 123060 }, { "epoch": 0.29, "grad_norm": 2.46875, "learning_rate": 0.00018982962399703005, "loss": 2.1256, "step": 123065 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00018982881186028062, "loss": 2.2353, "step": 123070 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018982799969284402, "loss": 2.0998, "step": 123075 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00018982718749472054, "loss": 2.1064, "step": 123080 }, { "epoch": 0.29, "grad_norm": 1.8984375, "learning_rate": 0.00018982637526591048, "loss": 2.1196, "step": 123085 }, { "epoch": 0.29, "grad_norm": 1.9140625, "learning_rate": 0.00018982556300641408, "loss": 2.1653, "step": 123090 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018982475071623167, "loss": 2.2641, "step": 123095 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00018982393839536346, "loss": 2.1532, "step": 123100 }, { "epoch": 0.29, "grad_norm": 1.9140625, "learning_rate": 0.0001898231260438098, "loss": 2.082, "step": 123105 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.0001898223136615709, "loss": 2.2127, "step": 123110 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00018982150124864707, "loss": 2.0152, "step": 123115 }, { "epoch": 0.29, "grad_norm": 1.703125, "learning_rate": 0.0001898206888050386, "loss": 2.0733, "step": 123120 }, { "epoch": 0.29, "grad_norm": 1.921875, "learning_rate": 0.00018981987633074577, "loss": 2.2608, "step": 123125 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018981906382576881, "loss": 2.1638, "step": 123130 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.00018981825129010802, "loss": 2.0075, "step": 123135 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018981743872376373, "loss": 2.2032, "step": 123140 }, { "epoch": 0.29, "grad_norm": 2.484375, "learning_rate": 0.00018981662612673614, "loss": 2.1523, "step": 123145 }, { "epoch": 0.29, "grad_norm": 2.5, "learning_rate": 0.00018981581349902557, "loss": 2.0038, "step": 123150 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.0001898150008406323, "loss": 1.9857, "step": 123155 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00018981418815155657, "loss": 2.1224, "step": 123160 }, { "epoch": 0.29, "grad_norm": 1.8515625, "learning_rate": 0.0001898133754317987, "loss": 2.3307, "step": 123165 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00018981256268135898, "loss": 2.1143, "step": 123170 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.00018981174990023765, "loss": 2.1212, "step": 123175 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.00018981093708843495, "loss": 2.1653, "step": 123180 }, { "epoch": 0.29, "grad_norm": 1.8984375, "learning_rate": 0.00018981012424595126, "loss": 2.1471, "step": 123185 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018980931137278678, "loss": 2.1461, "step": 123190 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.0001898084984689418, "loss": 2.1419, "step": 123195 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.00018980768553441662, "loss": 2.0464, "step": 123200 }, { "epoch": 0.29, "grad_norm": 2.453125, "learning_rate": 0.0001898068725692115, "loss": 2.1262, "step": 123205 }, { "epoch": 0.29, "grad_norm": 1.8125, "learning_rate": 0.00018980605957332673, "loss": 2.2301, "step": 123210 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018980524654676258, "loss": 2.3442, "step": 123215 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018980443348951933, "loss": 2.0194, "step": 123220 }, { "epoch": 0.29, "grad_norm": 1.9921875, "learning_rate": 0.00018980362040159725, "loss": 2.0592, "step": 123225 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018980280728299662, "loss": 2.2005, "step": 123230 }, { "epoch": 0.29, "grad_norm": 2.609375, "learning_rate": 0.00018980199413371772, "loss": 2.2042, "step": 123235 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.00018980118095376087, "loss": 2.061, "step": 123240 }, { "epoch": 0.29, "grad_norm": 1.796875, "learning_rate": 0.0001898003677431263, "loss": 2.0353, "step": 123245 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00018979955450181425, "loss": 2.0123, "step": 123250 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00018979874122982508, "loss": 1.9674, "step": 123255 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.00018979792792715903, "loss": 2.223, "step": 123260 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.00018979711459381636, "loss": 2.1152, "step": 123265 }, { "epoch": 0.29, "grad_norm": 1.796875, "learning_rate": 0.00018979630122979737, "loss": 2.0636, "step": 123270 }, { "epoch": 0.29, "grad_norm": 1.8671875, "learning_rate": 0.00018979548783510235, "loss": 2.3868, "step": 123275 }, { "epoch": 0.29, "grad_norm": 1.8125, "learning_rate": 0.00018979467440973156, "loss": 2.2373, "step": 123280 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018979386095368525, "loss": 2.0707, "step": 123285 }, { "epoch": 0.29, "grad_norm": 2.53125, "learning_rate": 0.00018979304746696377, "loss": 2.233, "step": 123290 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.00018979223394956733, "loss": 2.0216, "step": 123295 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00018979142040149625, "loss": 2.0771, "step": 123300 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.0001897906068227508, "loss": 2.1798, "step": 123305 }, { "epoch": 0.29, "grad_norm": 1.6875, "learning_rate": 0.00018978979321333122, "loss": 2.0279, "step": 123310 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00018978897957323785, "loss": 2.1583, "step": 123315 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00018978816590247094, "loss": 2.0633, "step": 123320 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018978735220103073, "loss": 2.1591, "step": 123325 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018978653846891757, "loss": 2.4829, "step": 123330 }, { "epoch": 0.29, "grad_norm": 1.9375, "learning_rate": 0.00018978572470613165, "loss": 2.2972, "step": 123335 }, { "epoch": 0.29, "grad_norm": 1.6875, "learning_rate": 0.00018978491091267333, "loss": 2.0467, "step": 123340 }, { "epoch": 0.29, "grad_norm": 2.234375, "learning_rate": 0.00018978409708854285, "loss": 1.9157, "step": 123345 }, { "epoch": 0.29, "grad_norm": 2.515625, "learning_rate": 0.0001897832832337405, "loss": 2.3051, "step": 123350 }, { "epoch": 0.29, "grad_norm": 1.6171875, "learning_rate": 0.00018978246934826654, "loss": 2.0522, "step": 123355 }, { "epoch": 0.29, "grad_norm": 2.234375, "learning_rate": 0.0001897816554321213, "loss": 2.1243, "step": 123360 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.00018978084148530498, "loss": 2.0048, "step": 123365 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.0001897800275078179, "loss": 2.2519, "step": 123370 }, { "epoch": 0.29, "grad_norm": 1.890625, "learning_rate": 0.00018977921349966033, "loss": 2.2549, "step": 123375 }, { "epoch": 0.29, "grad_norm": 1.796875, "learning_rate": 0.00018977839946083256, "loss": 2.08, "step": 123380 }, { "epoch": 0.29, "grad_norm": 2.359375, "learning_rate": 0.00018977758539133488, "loss": 2.2794, "step": 123385 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.0001897767712911675, "loss": 2.2006, "step": 123390 }, { "epoch": 0.29, "grad_norm": 1.9921875, "learning_rate": 0.0001897759571603308, "loss": 2.1522, "step": 123395 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018977514299882499, "loss": 2.2292, "step": 123400 }, { "epoch": 0.29, "grad_norm": 1.765625, "learning_rate": 0.00018977432880665034, "loss": 2.2016, "step": 123405 }, { "epoch": 0.29, "grad_norm": 2.28125, "learning_rate": 0.00018977351458380717, "loss": 2.1175, "step": 123410 }, { "epoch": 0.29, "grad_norm": 2.359375, "learning_rate": 0.00018977270033029573, "loss": 2.0465, "step": 123415 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00018977188604611632, "loss": 2.1297, "step": 123420 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.0001897710717312692, "loss": 2.3153, "step": 123425 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.00018977025738575463, "loss": 2.0103, "step": 123430 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018976944300957293, "loss": 2.0707, "step": 123435 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.0001897686286027244, "loss": 1.9943, "step": 123440 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00018976781416520924, "loss": 1.9719, "step": 123445 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.00018976699969702775, "loss": 2.1488, "step": 123450 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018976618519818024, "loss": 2.206, "step": 123455 }, { "epoch": 0.29, "grad_norm": 1.9453125, "learning_rate": 0.00018976537066866698, "loss": 1.9752, "step": 123460 }, { "epoch": 0.29, "grad_norm": 1.921875, "learning_rate": 0.00018976455610848823, "loss": 2.1455, "step": 123465 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018976374151764432, "loss": 2.0312, "step": 123470 }, { "epoch": 0.29, "grad_norm": 1.921875, "learning_rate": 0.00018976292689613545, "loss": 2.1034, "step": 123475 }, { "epoch": 0.29, "grad_norm": 2.390625, "learning_rate": 0.00018976211224396193, "loss": 2.1309, "step": 123480 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00018976129756112403, "loss": 2.212, "step": 123485 }, { "epoch": 0.29, "grad_norm": 1.78125, "learning_rate": 0.0001897604828476221, "loss": 2.2324, "step": 123490 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.0001897596681034563, "loss": 2.2945, "step": 123495 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018975885332862702, "loss": 2.1984, "step": 123500 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00018975803852313447, "loss": 2.1007, "step": 123505 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00018975722368697894, "loss": 2.2288, "step": 123510 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.0001897564088201607, "loss": 1.9886, "step": 123515 }, { "epoch": 0.29, "grad_norm": 1.6875, "learning_rate": 0.0001897555939226801, "loss": 2.1861, "step": 123520 }, { "epoch": 0.29, "grad_norm": 1.65625, "learning_rate": 0.00018975477899453732, "loss": 2.1125, "step": 123525 }, { "epoch": 0.29, "grad_norm": 1.921875, "learning_rate": 0.0001897539640357327, "loss": 2.1517, "step": 123530 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.00018975314904626646, "loss": 2.1977, "step": 123535 }, { "epoch": 0.29, "grad_norm": 1.75, "learning_rate": 0.00018975233402613895, "loss": 2.2077, "step": 123540 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.0001897515189753504, "loss": 2.1437, "step": 123545 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018975070389390107, "loss": 2.3047, "step": 123550 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.00018974988878179132, "loss": 2.1355, "step": 123555 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00018974907363902136, "loss": 2.2239, "step": 123560 }, { "epoch": 0.29, "grad_norm": 1.765625, "learning_rate": 0.0001897482584655915, "loss": 2.2196, "step": 123565 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.00018974744326150203, "loss": 2.0186, "step": 123570 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.00018974662802675316, "loss": 2.18, "step": 123575 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.00018974581276134524, "loss": 2.1567, "step": 123580 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018974499746527852, "loss": 2.1727, "step": 123585 }, { "epoch": 0.29, "grad_norm": 1.921875, "learning_rate": 0.00018974418213855328, "loss": 2.0794, "step": 123590 }, { "epoch": 0.29, "grad_norm": 1.7734375, "learning_rate": 0.00018974336678116978, "loss": 2.0382, "step": 123595 }, { "epoch": 0.29, "grad_norm": 1.7421875, "learning_rate": 0.00018974255139312835, "loss": 2.1664, "step": 123600 }, { "epoch": 0.29, "grad_norm": 1.7890625, "learning_rate": 0.0001897417359744292, "loss": 2.1869, "step": 123605 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018974092052507268, "loss": 2.1615, "step": 123610 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.00018974010504505903, "loss": 2.1415, "step": 123615 }, { "epoch": 0.29, "grad_norm": 1.7421875, "learning_rate": 0.00018973928953438854, "loss": 2.1531, "step": 123620 }, { "epoch": 0.29, "grad_norm": 1.5234375, "learning_rate": 0.00018973847399306147, "loss": 1.9929, "step": 123625 }, { "epoch": 0.29, "grad_norm": 1.9453125, "learning_rate": 0.00018973765842107812, "loss": 2.0045, "step": 123630 }, { "epoch": 0.29, "grad_norm": 1.8046875, "learning_rate": 0.00018973684281843874, "loss": 2.2642, "step": 123635 }, { "epoch": 0.29, "grad_norm": 1.875, "learning_rate": 0.0001897360271851436, "loss": 1.9904, "step": 123640 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.00018973521152119304, "loss": 2.1748, "step": 123645 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.00018973439582658732, "loss": 2.0757, "step": 123650 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.00018973358010132668, "loss": 2.1817, "step": 123655 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018973276434541142, "loss": 2.2115, "step": 123660 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.00018973194855884186, "loss": 2.0787, "step": 123665 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018973113274161818, "loss": 2.1489, "step": 123670 }, { "epoch": 0.29, "grad_norm": 1.875, "learning_rate": 0.00018973031689374073, "loss": 2.1509, "step": 123675 }, { "epoch": 0.29, "grad_norm": 1.7890625, "learning_rate": 0.00018972950101520981, "loss": 2.2181, "step": 123680 }, { "epoch": 0.29, "grad_norm": 2.875, "learning_rate": 0.00018972868510602563, "loss": 2.1272, "step": 123685 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018972786916618853, "loss": 2.0188, "step": 123690 }, { "epoch": 0.29, "grad_norm": 1.9140625, "learning_rate": 0.00018972705319569874, "loss": 2.2275, "step": 123695 }, { "epoch": 0.29, "grad_norm": 2.328125, "learning_rate": 0.00018972623719455656, "loss": 2.1829, "step": 123700 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.0001897254211627623, "loss": 2.4372, "step": 123705 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018972460510031618, "loss": 2.2552, "step": 123710 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00018972378900721851, "loss": 2.1574, "step": 123715 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018972297288346958, "loss": 2.2348, "step": 123720 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00018972215672906966, "loss": 2.0563, "step": 123725 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.00018972134054401902, "loss": 2.1549, "step": 123730 }, { "epoch": 0.29, "grad_norm": 2.421875, "learning_rate": 0.00018972052432831795, "loss": 2.2642, "step": 123735 }, { "epoch": 0.29, "grad_norm": 2.421875, "learning_rate": 0.00018971970808196668, "loss": 2.2029, "step": 123740 }, { "epoch": 0.29, "grad_norm": 1.9921875, "learning_rate": 0.00018971889180496558, "loss": 2.2084, "step": 123745 }, { "epoch": 0.29, "grad_norm": 9.625, "learning_rate": 0.00018971807549731485, "loss": 2.1546, "step": 123750 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.0001897172591590148, "loss": 1.9936, "step": 123755 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018971644279006572, "loss": 2.3014, "step": 123760 }, { "epoch": 0.29, "grad_norm": 2.640625, "learning_rate": 0.00018971562639046786, "loss": 2.1847, "step": 123765 }, { "epoch": 0.29, "grad_norm": 1.765625, "learning_rate": 0.00018971480996022154, "loss": 2.2341, "step": 123770 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018971399349932701, "loss": 2.2505, "step": 123775 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018971317700778456, "loss": 2.1119, "step": 123780 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00018971236048559442, "loss": 2.1556, "step": 123785 }, { "epoch": 0.29, "grad_norm": 1.71875, "learning_rate": 0.00018971154393275695, "loss": 2.2158, "step": 123790 }, { "epoch": 0.29, "grad_norm": 1.6875, "learning_rate": 0.00018971072734927236, "loss": 1.9868, "step": 123795 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.00018970991073514098, "loss": 2.2746, "step": 123800 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018970909409036306, "loss": 2.118, "step": 123805 }, { "epoch": 0.29, "grad_norm": 1.8046875, "learning_rate": 0.0001897082774149389, "loss": 2.278, "step": 123810 }, { "epoch": 0.29, "grad_norm": 2.53125, "learning_rate": 0.00018970746070886874, "loss": 2.2494, "step": 123815 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00018970664397215288, "loss": 2.0771, "step": 123820 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00018970582720479162, "loss": 2.1365, "step": 123825 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00018970501040678524, "loss": 2.2078, "step": 123830 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.00018970419357813398, "loss": 2.1513, "step": 123835 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00018970337671883814, "loss": 2.1889, "step": 123840 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.000189702559828898, "loss": 1.9739, "step": 123845 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00018970174290831385, "loss": 2.1064, "step": 123850 }, { "epoch": 0.29, "grad_norm": 2.421875, "learning_rate": 0.00018970092595708594, "loss": 2.1277, "step": 123855 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00018970010897521457, "loss": 2.2744, "step": 123860 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00018969929196270004, "loss": 2.2135, "step": 123865 }, { "epoch": 0.29, "grad_norm": 2.328125, "learning_rate": 0.0001896984749195426, "loss": 1.9625, "step": 123870 }, { "epoch": 0.29, "grad_norm": 4.90625, "learning_rate": 0.00018969765784574252, "loss": 2.4001, "step": 123875 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.0001896968407413001, "loss": 2.1338, "step": 123880 }, { "epoch": 0.29, "grad_norm": 1.984375, "learning_rate": 0.0001896960236062156, "loss": 2.1694, "step": 123885 }, { "epoch": 0.29, "grad_norm": 1.9453125, "learning_rate": 0.00018969520644048933, "loss": 2.2335, "step": 123890 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00018969438924412152, "loss": 2.021, "step": 123895 }, { "epoch": 0.29, "grad_norm": 1.9765625, "learning_rate": 0.0001896935720171125, "loss": 2.1417, "step": 123900 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00018969275475946254, "loss": 2.1841, "step": 123905 }, { "epoch": 0.29, "grad_norm": 1.796875, "learning_rate": 0.00018969193747117192, "loss": 1.9327, "step": 123910 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018969112015224086, "loss": 2.1772, "step": 123915 }, { "epoch": 0.29, "grad_norm": 1.9140625, "learning_rate": 0.0001896903028026697, "loss": 2.183, "step": 123920 }, { "epoch": 0.29, "grad_norm": 1.9765625, "learning_rate": 0.00018968948542245876, "loss": 2.0826, "step": 123925 }, { "epoch": 0.29, "grad_norm": 2.4375, "learning_rate": 0.00018968866801160822, "loss": 2.1162, "step": 123930 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.0001896878505701184, "loss": 2.0927, "step": 123935 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.0001896870330979896, "loss": 2.0873, "step": 123940 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.00018968621559522207, "loss": 2.0736, "step": 123945 }, { "epoch": 0.29, "grad_norm": 2.8125, "learning_rate": 0.0001896853980618161, "loss": 2.0957, "step": 123950 }, { "epoch": 0.29, "grad_norm": 2.359375, "learning_rate": 0.00018968458049777197, "loss": 2.0049, "step": 123955 }, { "epoch": 0.29, "grad_norm": 2.53125, "learning_rate": 0.00018968376290309, "loss": 2.0473, "step": 123960 }, { "epoch": 0.29, "grad_norm": 1.8359375, "learning_rate": 0.00018968294527777038, "loss": 2.1397, "step": 123965 }, { "epoch": 0.29, "grad_norm": 1.78125, "learning_rate": 0.00018968212762181344, "loss": 2.1889, "step": 123970 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.0001896813099352195, "loss": 2.061, "step": 123975 }, { "epoch": 0.29, "grad_norm": 1.7421875, "learning_rate": 0.00018968049221798877, "loss": 1.9658, "step": 123980 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.00018967967447012157, "loss": 1.9768, "step": 123985 }, { "epoch": 0.29, "grad_norm": 1.9765625, "learning_rate": 0.00018967885669161815, "loss": 2.0703, "step": 123990 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00018967803888247882, "loss": 2.1382, "step": 123995 }, { "epoch": 0.29, "grad_norm": 2.234375, "learning_rate": 0.00018967722104270386, "loss": 2.179, "step": 124000 }, { "epoch": 0.29, "grad_norm": 1.859375, "learning_rate": 0.00018967640317229352, "loss": 2.0421, "step": 124005 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018967558527124807, "loss": 2.0625, "step": 124010 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00018967476733956784, "loss": 2.0954, "step": 124015 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018967394937725307, "loss": 2.0599, "step": 124020 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.00018967313138430408, "loss": 1.9478, "step": 124025 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00018967231336072108, "loss": 2.0943, "step": 124030 }, { "epoch": 0.29, "grad_norm": 2.265625, "learning_rate": 0.00018967149530650442, "loss": 2.1196, "step": 124035 }, { "epoch": 0.29, "grad_norm": 1.9921875, "learning_rate": 0.00018967067722165434, "loss": 2.2564, "step": 124040 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.00018966985910617113, "loss": 2.0273, "step": 124045 }, { "epoch": 0.29, "grad_norm": 1.890625, "learning_rate": 0.0001896690409600551, "loss": 2.0349, "step": 124050 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.00018966822278330647, "loss": 2.19, "step": 124055 }, { "epoch": 0.29, "grad_norm": 2.28125, "learning_rate": 0.00018966740457592555, "loss": 2.0835, "step": 124060 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.0001896665863379126, "loss": 2.1439, "step": 124065 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.00018966576806926796, "loss": 2.159, "step": 124070 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.00018966494976999183, "loss": 2.0464, "step": 124075 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018966413144008457, "loss": 2.2658, "step": 124080 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.00018966331307954638, "loss": 2.1371, "step": 124085 }, { "epoch": 0.29, "grad_norm": 1.8046875, "learning_rate": 0.0001896624946883776, "loss": 2.2065, "step": 124090 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.00018966167626657847, "loss": 1.9842, "step": 124095 }, { "epoch": 0.29, "grad_norm": 2.4375, "learning_rate": 0.00018966085781414927, "loss": 2.0616, "step": 124100 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00018966003933109033, "loss": 2.1239, "step": 124105 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.00018965922081740186, "loss": 2.1725, "step": 124110 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.0001896584022730842, "loss": 2.0607, "step": 124115 }, { "epoch": 0.29, "grad_norm": 1.671875, "learning_rate": 0.0001896575836981376, "loss": 2.2388, "step": 124120 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.00018965676509256235, "loss": 2.1063, "step": 124125 }, { "epoch": 0.29, "grad_norm": 1.8984375, "learning_rate": 0.00018965594645635868, "loss": 2.0778, "step": 124130 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018965512778952695, "loss": 2.1881, "step": 124135 }, { "epoch": 0.29, "grad_norm": 2.359375, "learning_rate": 0.0001896543090920674, "loss": 2.3693, "step": 124140 }, { "epoch": 0.29, "grad_norm": 1.9921875, "learning_rate": 0.0001896534903639803, "loss": 2.1774, "step": 124145 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018965267160526595, "loss": 1.9799, "step": 124150 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018965185281592464, "loss": 2.182, "step": 124155 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.0001896510339959566, "loss": 2.0259, "step": 124160 }, { "epoch": 0.29, "grad_norm": 1.828125, "learning_rate": 0.00018965021514536217, "loss": 2.1175, "step": 124165 }, { "epoch": 0.29, "grad_norm": 1.96875, "learning_rate": 0.00018964939626414158, "loss": 1.9949, "step": 124170 }, { "epoch": 0.29, "grad_norm": 1.8125, "learning_rate": 0.00018964857735229512, "loss": 2.166, "step": 124175 }, { "epoch": 0.29, "grad_norm": 2.421875, "learning_rate": 0.00018964775840982312, "loss": 2.3794, "step": 124180 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.00018964693943672578, "loss": 2.2424, "step": 124185 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.00018964612043300343, "loss": 2.2235, "step": 124190 }, { "epoch": 0.29, "grad_norm": 1.9765625, "learning_rate": 0.00018964530139865636, "loss": 2.012, "step": 124195 }, { "epoch": 0.29, "grad_norm": 1.96875, "learning_rate": 0.00018964448233368478, "loss": 2.1413, "step": 124200 }, { "epoch": 0.29, "grad_norm": 1.4140625, "learning_rate": 0.0001896436632380891, "loss": 1.8217, "step": 124205 }, { "epoch": 0.29, "grad_norm": 1.984375, "learning_rate": 0.00018964284411186944, "loss": 2.137, "step": 124210 }, { "epoch": 0.29, "grad_norm": 1.8359375, "learning_rate": 0.00018964202495502618, "loss": 2.1525, "step": 124215 }, { "epoch": 0.29, "grad_norm": 1.96875, "learning_rate": 0.00018964120576755962, "loss": 2.048, "step": 124220 }, { "epoch": 0.29, "grad_norm": 1.8046875, "learning_rate": 0.00018964038654946993, "loss": 2.0144, "step": 124225 }, { "epoch": 0.29, "grad_norm": 1.96875, "learning_rate": 0.00018963956730075752, "loss": 2.2758, "step": 124230 }, { "epoch": 0.29, "grad_norm": 1.875, "learning_rate": 0.00018963874802142256, "loss": 2.1433, "step": 124235 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.0001896379287114654, "loss": 2.0535, "step": 124240 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.00018963710937088633, "loss": 1.9963, "step": 124245 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.00018963628999968556, "loss": 2.2708, "step": 124250 }, { "epoch": 0.29, "grad_norm": 3.0, "learning_rate": 0.0001896354705978634, "loss": 2.1883, "step": 124255 }, { "epoch": 0.29, "grad_norm": 1.71875, "learning_rate": 0.00018963465116542016, "loss": 2.1245, "step": 124260 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018963383170235608, "loss": 2.0322, "step": 124265 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.0001896330122086715, "loss": 2.1917, "step": 124270 }, { "epoch": 0.29, "grad_norm": 2.46875, "learning_rate": 0.0001896321926843666, "loss": 2.0043, "step": 124275 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00018963137312944176, "loss": 2.1481, "step": 124280 }, { "epoch": 0.29, "grad_norm": 2.296875, "learning_rate": 0.00018963055354389717, "loss": 2.0205, "step": 124285 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.0001896297339277332, "loss": 2.1268, "step": 124290 }, { "epoch": 0.29, "grad_norm": 2.421875, "learning_rate": 0.00018962891428095006, "loss": 2.1054, "step": 124295 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.0001896280946035481, "loss": 2.146, "step": 124300 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.00018962727489552753, "loss": 2.1602, "step": 124305 }, { "epoch": 0.29, "grad_norm": 1.828125, "learning_rate": 0.00018962645515688864, "loss": 2.1024, "step": 124310 }, { "epoch": 0.29, "grad_norm": 2.328125, "learning_rate": 0.00018962563538763174, "loss": 2.1236, "step": 124315 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.0001896248155877571, "loss": 2.1594, "step": 124320 }, { "epoch": 0.29, "grad_norm": 1.765625, "learning_rate": 0.000189623995757265, "loss": 2.1073, "step": 124325 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00018962317589615572, "loss": 2.0728, "step": 124330 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018962235600442954, "loss": 2.2756, "step": 124335 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.00018962153608208671, "loss": 2.2475, "step": 124340 }, { "epoch": 0.29, "grad_norm": 1.96875, "learning_rate": 0.0001896207161291276, "loss": 2.1915, "step": 124345 }, { "epoch": 0.29, "grad_norm": 2.546875, "learning_rate": 0.00018961989614555238, "loss": 1.9787, "step": 124350 }, { "epoch": 0.29, "grad_norm": 1.9921875, "learning_rate": 0.0001896190761313614, "loss": 1.9579, "step": 124355 }, { "epoch": 0.29, "grad_norm": 2.453125, "learning_rate": 0.0001896182560865549, "loss": 2.0844, "step": 124360 }, { "epoch": 0.29, "grad_norm": 1.859375, "learning_rate": 0.00018961743601113317, "loss": 2.0844, "step": 124365 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.0001896166159050965, "loss": 2.1341, "step": 124370 }, { "epoch": 0.29, "grad_norm": 1.8828125, "learning_rate": 0.0001896157957684452, "loss": 2.1851, "step": 124375 }, { "epoch": 0.29, "grad_norm": 1.6640625, "learning_rate": 0.0001896149756011795, "loss": 2.1058, "step": 124380 }, { "epoch": 0.29, "grad_norm": 1.9765625, "learning_rate": 0.0001896141554032997, "loss": 2.3977, "step": 124385 }, { "epoch": 0.29, "grad_norm": 1.7421875, "learning_rate": 0.00018961333517480603, "loss": 1.9453, "step": 124390 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.0001896125149156989, "loss": 2.1442, "step": 124395 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018961169462597847, "loss": 2.2339, "step": 124400 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00018961087430564505, "loss": 2.1031, "step": 124405 }, { "epoch": 0.29, "grad_norm": 2.453125, "learning_rate": 0.00018961005395469894, "loss": 2.2073, "step": 124410 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00018960923357314044, "loss": 2.3512, "step": 124415 }, { "epoch": 0.29, "grad_norm": 1.8984375, "learning_rate": 0.00018960841316096978, "loss": 2.2205, "step": 124420 }, { "epoch": 0.29, "grad_norm": 2.5, "learning_rate": 0.00018960759271818725, "loss": 2.1446, "step": 124425 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.00018960677224479316, "loss": 1.9949, "step": 124430 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.00018960595174078774, "loss": 2.2079, "step": 124435 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018960513120617133, "loss": 2.0513, "step": 124440 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.00018960431064094418, "loss": 1.9857, "step": 124445 }, { "epoch": 0.29, "grad_norm": 2.8125, "learning_rate": 0.00018960349004510657, "loss": 2.1671, "step": 124450 }, { "epoch": 0.29, "grad_norm": 2.484375, "learning_rate": 0.00018960266941865878, "loss": 2.16, "step": 124455 }, { "epoch": 0.29, "grad_norm": 1.9140625, "learning_rate": 0.00018960184876160108, "loss": 2.3061, "step": 124460 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.0001896010280739338, "loss": 2.1893, "step": 124465 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.00018960020735565714, "loss": 2.0655, "step": 124470 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018959938660677146, "loss": 2.2791, "step": 124475 }, { "epoch": 0.29, "grad_norm": 1.8203125, "learning_rate": 0.00018959856582727698, "loss": 2.151, "step": 124480 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.00018959774501717402, "loss": 2.2279, "step": 124485 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.00018959692417646283, "loss": 2.358, "step": 124490 }, { "epoch": 0.29, "grad_norm": 2.46875, "learning_rate": 0.00018959610330514373, "loss": 2.0965, "step": 124495 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.00018959528240321694, "loss": 2.1229, "step": 124500 }, { "epoch": 0.29, "grad_norm": 1.703125, "learning_rate": 0.00018959446147068283, "loss": 2.0673, "step": 124505 }, { "epoch": 0.29, "grad_norm": 2.6875, "learning_rate": 0.0001895936405075416, "loss": 2.2779, "step": 124510 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018959281951379353, "loss": 2.1327, "step": 124515 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018959199848943894, "loss": 2.3083, "step": 124520 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.00018959117743447813, "loss": 2.2167, "step": 124525 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.00018959035634891132, "loss": 2.0619, "step": 124530 }, { "epoch": 0.29, "grad_norm": 1.9765625, "learning_rate": 0.00018958953523273882, "loss": 2.2421, "step": 124535 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018958871408596094, "loss": 2.2362, "step": 124540 }, { "epoch": 0.29, "grad_norm": 2.546875, "learning_rate": 0.00018958789290857787, "loss": 2.1682, "step": 124545 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.00018958707170059, "loss": 2.0592, "step": 124550 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.00018958625046199757, "loss": 2.2209, "step": 124555 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.00018958542919280083, "loss": 1.994, "step": 124560 }, { "epoch": 0.29, "grad_norm": 2.546875, "learning_rate": 0.0001895846078930001, "loss": 2.0792, "step": 124565 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.0001895837865625956, "loss": 2.1843, "step": 124570 }, { "epoch": 0.29, "grad_norm": 1.8046875, "learning_rate": 0.0001895829652015877, "loss": 2.1691, "step": 124575 }, { "epoch": 0.29, "grad_norm": 1.8515625, "learning_rate": 0.0001895821438099766, "loss": 2.1161, "step": 124580 }, { "epoch": 0.29, "grad_norm": 2.421875, "learning_rate": 0.00018958132238776262, "loss": 2.03, "step": 124585 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.00018958050093494607, "loss": 2.1663, "step": 124590 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.00018957967945152718, "loss": 2.2857, "step": 124595 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.00018957885793750623, "loss": 2.0119, "step": 124600 }, { "epoch": 0.29, "grad_norm": 1.9375, "learning_rate": 0.00018957803639288353, "loss": 2.1532, "step": 124605 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018957721481765934, "loss": 2.0944, "step": 124610 }, { "epoch": 0.29, "grad_norm": 2.234375, "learning_rate": 0.00018957639321183395, "loss": 2.2286, "step": 124615 }, { "epoch": 0.29, "grad_norm": 1.84375, "learning_rate": 0.00018957557157540765, "loss": 2.2092, "step": 124620 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.0001895747499083807, "loss": 2.0114, "step": 124625 }, { "epoch": 0.29, "grad_norm": 1.9453125, "learning_rate": 0.0001895739282107534, "loss": 2.2062, "step": 124630 }, { "epoch": 0.29, "grad_norm": 1.96875, "learning_rate": 0.00018957310648252604, "loss": 2.4152, "step": 124635 }, { "epoch": 0.29, "grad_norm": 1.5703125, "learning_rate": 0.00018957228472369882, "loss": 2.0441, "step": 124640 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00018957146293427214, "loss": 2.0615, "step": 124645 }, { "epoch": 0.29, "grad_norm": 1.828125, "learning_rate": 0.0001895706411142462, "loss": 2.0747, "step": 124650 }, { "epoch": 0.29, "grad_norm": 2.359375, "learning_rate": 0.00018956981926362133, "loss": 2.1899, "step": 124655 }, { "epoch": 0.29, "grad_norm": 1.578125, "learning_rate": 0.00018956899738239777, "loss": 2.0796, "step": 124660 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00018956817547057583, "loss": 2.007, "step": 124665 }, { "epoch": 0.29, "grad_norm": 2.59375, "learning_rate": 0.00018956735352815572, "loss": 2.093, "step": 124670 }, { "epoch": 0.29, "grad_norm": 1.8203125, "learning_rate": 0.00018956653155513783, "loss": 1.9952, "step": 124675 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.00018956570955152237, "loss": 2.2125, "step": 124680 }, { "epoch": 0.29, "grad_norm": 1.8671875, "learning_rate": 0.00018956488751730965, "loss": 2.1307, "step": 124685 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.00018956406545249995, "loss": 2.1305, "step": 124690 }, { "epoch": 0.29, "grad_norm": 1.8984375, "learning_rate": 0.00018956324335709352, "loss": 2.1, "step": 124695 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018956242123109067, "loss": 2.0858, "step": 124700 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00018956159907449166, "loss": 2.1607, "step": 124705 }, { "epoch": 0.29, "grad_norm": 1.75, "learning_rate": 0.0001895607768872968, "loss": 2.0014, "step": 124710 }, { "epoch": 0.29, "grad_norm": 1.7265625, "learning_rate": 0.00018955995466950632, "loss": 2.1598, "step": 124715 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.0001895591324211206, "loss": 2.0828, "step": 124720 }, { "epoch": 0.29, "grad_norm": 2.5, "learning_rate": 0.0001895583101421398, "loss": 2.1724, "step": 124725 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018955748783256428, "loss": 2.2179, "step": 124730 }, { "epoch": 0.29, "grad_norm": 1.6484375, "learning_rate": 0.0001895566654923943, "loss": 2.2647, "step": 124735 }, { "epoch": 0.29, "grad_norm": 1.96875, "learning_rate": 0.00018955584312163013, "loss": 2.1081, "step": 124740 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00018955502072027207, "loss": 2.1261, "step": 124745 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.0001895541982883204, "loss": 2.1376, "step": 124750 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.00018955337582577536, "loss": 2.1034, "step": 124755 }, { "epoch": 0.29, "grad_norm": 1.8671875, "learning_rate": 0.00018955255333263725, "loss": 2.0585, "step": 124760 }, { "epoch": 0.29, "grad_norm": 1.890625, "learning_rate": 0.0001895517308089064, "loss": 1.9847, "step": 124765 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.00018955090825458306, "loss": 2.2835, "step": 124770 }, { "epoch": 0.29, "grad_norm": 2.53125, "learning_rate": 0.00018955008566966749, "loss": 2.0235, "step": 124775 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.00018954926305415998, "loss": 2.2129, "step": 124780 }, { "epoch": 0.29, "grad_norm": 1.9140625, "learning_rate": 0.00018954844040806085, "loss": 2.2505, "step": 124785 }, { "epoch": 0.29, "grad_norm": 1.828125, "learning_rate": 0.00018954761773137032, "loss": 2.2142, "step": 124790 }, { "epoch": 0.29, "grad_norm": 2.40625, "learning_rate": 0.0001895467950240887, "loss": 2.0467, "step": 124795 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018954597228621628, "loss": 2.0892, "step": 124800 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018954514951775332, "loss": 2.2071, "step": 124805 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018954432671870012, "loss": 2.1346, "step": 124810 }, { "epoch": 0.29, "grad_norm": 1.828125, "learning_rate": 0.00018954350388905694, "loss": 2.4072, "step": 124815 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.0001895426810288241, "loss": 2.0631, "step": 124820 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018954185813800187, "loss": 2.152, "step": 124825 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.0001895410352165905, "loss": 2.313, "step": 124830 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018954021226459027, "loss": 2.3155, "step": 124835 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018953938928200148, "loss": 2.122, "step": 124840 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018953856626882442, "loss": 1.9704, "step": 124845 }, { "epoch": 0.29, "grad_norm": 2.3125, "learning_rate": 0.00018953774322505936, "loss": 1.9171, "step": 124850 }, { "epoch": 0.29, "grad_norm": 1.875, "learning_rate": 0.00018953692015070657, "loss": 2.1418, "step": 124855 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00018953609704576638, "loss": 2.111, "step": 124860 }, { "epoch": 0.29, "grad_norm": 2.328125, "learning_rate": 0.000189535273910239, "loss": 2.2717, "step": 124865 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.0001895344507441248, "loss": 2.1375, "step": 124870 }, { "epoch": 0.29, "grad_norm": 2.265625, "learning_rate": 0.00018953362754742394, "loss": 2.106, "step": 124875 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.0001895328043201368, "loss": 2.0271, "step": 124880 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.00018953198106226365, "loss": 2.2057, "step": 124885 }, { "epoch": 0.29, "grad_norm": 1.8046875, "learning_rate": 0.00018953115777380473, "loss": 1.9655, "step": 124890 }, { "epoch": 0.29, "grad_norm": 2.390625, "learning_rate": 0.00018953033445476036, "loss": 2.2579, "step": 124895 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.0001895295111051308, "loss": 2.2587, "step": 124900 }, { "epoch": 0.29, "grad_norm": 1.9296875, "learning_rate": 0.0001895286877249163, "loss": 2.0875, "step": 124905 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.0001895278643141172, "loss": 2.0428, "step": 124910 }, { "epoch": 0.29, "grad_norm": 1.6875, "learning_rate": 0.00018952704087273376, "loss": 2.1869, "step": 124915 }, { "epoch": 0.29, "grad_norm": 1.7890625, "learning_rate": 0.00018952621740076625, "loss": 2.15, "step": 124920 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018952539389821497, "loss": 2.1413, "step": 124925 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.00018952457036508017, "loss": 2.1744, "step": 124930 }, { "epoch": 0.29, "grad_norm": 1.78125, "learning_rate": 0.0001895237468013622, "loss": 2.049, "step": 124935 }, { "epoch": 0.29, "grad_norm": 2.71875, "learning_rate": 0.00018952292320706127, "loss": 2.0826, "step": 124940 }, { "epoch": 0.29, "grad_norm": 2.40625, "learning_rate": 0.00018952209958217767, "loss": 2.1556, "step": 124945 }, { "epoch": 0.29, "grad_norm": 2.046875, "learning_rate": 0.0001895212759267117, "loss": 2.0335, "step": 124950 }, { "epoch": 0.29, "grad_norm": 1.8125, "learning_rate": 0.0001895204522406637, "loss": 2.1018, "step": 124955 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.0001895196285240338, "loss": 2.0076, "step": 124960 }, { "epoch": 0.29, "grad_norm": 1.890625, "learning_rate": 0.00018951880477682242, "loss": 2.13, "step": 124965 }, { "epoch": 0.29, "grad_norm": 2.359375, "learning_rate": 0.00018951798099902978, "loss": 2.4322, "step": 124970 }, { "epoch": 0.29, "grad_norm": 1.921875, "learning_rate": 0.00018951715719065616, "loss": 2.1585, "step": 124975 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00018951633335170188, "loss": 2.0001, "step": 124980 }, { "epoch": 0.29, "grad_norm": 1.9375, "learning_rate": 0.0001895155094821672, "loss": 2.3001, "step": 124985 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.0001895146855820524, "loss": 2.1663, "step": 124990 }, { "epoch": 0.29, "grad_norm": 2.140625, "learning_rate": 0.00018951386165135773, "loss": 2.2042, "step": 124995 }, { "epoch": 0.29, "grad_norm": 1.84375, "learning_rate": 0.00018951303769008352, "loss": 2.1598, "step": 125000 }, { "epoch": 0.29, "grad_norm": 1.9921875, "learning_rate": 0.00018951221369823003, "loss": 2.2022, "step": 125005 }, { "epoch": 0.29, "grad_norm": 1.890625, "learning_rate": 0.00018951138967579758, "loss": 2.2139, "step": 125010 }, { "epoch": 0.29, "grad_norm": 1.9375, "learning_rate": 0.00018951056562278637, "loss": 2.1295, "step": 125015 }, { "epoch": 0.29, "grad_norm": 1.703125, "learning_rate": 0.00018950974153919675, "loss": 2.236, "step": 125020 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.00018950891742502895, "loss": 2.1742, "step": 125025 }, { "epoch": 0.29, "grad_norm": 1.6953125, "learning_rate": 0.0001895080932802833, "loss": 2.1066, "step": 125030 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018950726910496005, "loss": 2.1022, "step": 125035 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.0001895064448990595, "loss": 2.1879, "step": 125040 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.00018950562066258195, "loss": 2.0801, "step": 125045 }, { "epoch": 0.29, "grad_norm": 2.359375, "learning_rate": 0.00018950479639552766, "loss": 2.1205, "step": 125050 }, { "epoch": 0.29, "grad_norm": 3.171875, "learning_rate": 0.00018950397209789688, "loss": 2.1369, "step": 125055 }, { "epoch": 0.29, "grad_norm": 1.9140625, "learning_rate": 0.00018950314776968992, "loss": 2.0991, "step": 125060 }, { "epoch": 0.29, "grad_norm": 2.484375, "learning_rate": 0.00018950232341090705, "loss": 2.3507, "step": 125065 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.0001895014990215486, "loss": 2.173, "step": 125070 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.0001895006746016148, "loss": 2.2031, "step": 125075 }, { "epoch": 0.29, "grad_norm": 2.03125, "learning_rate": 0.0001894998501511059, "loss": 2.2218, "step": 125080 }, { "epoch": 0.29, "grad_norm": 2.390625, "learning_rate": 0.0001894990256700223, "loss": 2.0997, "step": 125085 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018949820115836418, "loss": 2.1947, "step": 125090 }, { "epoch": 0.29, "grad_norm": 2.28125, "learning_rate": 0.00018949737661613184, "loss": 2.1784, "step": 125095 }, { "epoch": 0.29, "grad_norm": 2.015625, "learning_rate": 0.0001894965520433256, "loss": 2.1805, "step": 125100 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.00018949572743994568, "loss": 2.1625, "step": 125105 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.0001894949028059924, "loss": 2.0843, "step": 125110 }, { "epoch": 0.29, "grad_norm": 1.7578125, "learning_rate": 0.00018949407814146607, "loss": 1.9637, "step": 125115 }, { "epoch": 0.29, "grad_norm": 1.7890625, "learning_rate": 0.00018949325344636692, "loss": 2.1286, "step": 125120 }, { "epoch": 0.29, "grad_norm": 1.75, "learning_rate": 0.00018949242872069526, "loss": 2.0328, "step": 125125 }, { "epoch": 0.29, "grad_norm": 1.8359375, "learning_rate": 0.0001894916039644514, "loss": 2.0643, "step": 125130 }, { "epoch": 0.29, "grad_norm": 2.40625, "learning_rate": 0.0001894907791776355, "loss": 2.0087, "step": 125135 }, { "epoch": 0.29, "grad_norm": 1.9140625, "learning_rate": 0.00018948995436024796, "loss": 2.165, "step": 125140 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018948912951228906, "loss": 2.371, "step": 125145 }, { "epoch": 0.29, "grad_norm": 1.9765625, "learning_rate": 0.00018948830463375903, "loss": 2.2607, "step": 125150 }, { "epoch": 0.29, "grad_norm": 1.984375, "learning_rate": 0.00018948747972465817, "loss": 2.1726, "step": 125155 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.00018948665478498678, "loss": 2.16, "step": 125160 }, { "epoch": 0.29, "grad_norm": 2.09375, "learning_rate": 0.0001894858298147451, "loss": 2.0989, "step": 125165 }, { "epoch": 0.29, "grad_norm": 1.9609375, "learning_rate": 0.00018948500481393344, "loss": 2.098, "step": 125170 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.0001894841797825521, "loss": 1.9998, "step": 125175 }, { "epoch": 0.29, "grad_norm": 1.8203125, "learning_rate": 0.00018948335472060134, "loss": 2.2794, "step": 125180 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.00018948252962808142, "loss": 2.0393, "step": 125185 }, { "epoch": 0.29, "grad_norm": 2.375, "learning_rate": 0.00018948170450499264, "loss": 2.1936, "step": 125190 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018948087935133533, "loss": 2.2798, "step": 125195 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.0001894800541671097, "loss": 2.2319, "step": 125200 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018947922895231606, "loss": 2.2409, "step": 125205 }, { "epoch": 0.29, "grad_norm": 2.421875, "learning_rate": 0.0001894784037069547, "loss": 2.1347, "step": 125210 }, { "epoch": 0.29, "grad_norm": 1.6484375, "learning_rate": 0.00018947757843102586, "loss": 1.9818, "step": 125215 }, { "epoch": 0.29, "grad_norm": 2.28125, "learning_rate": 0.00018947675312452992, "loss": 2.2608, "step": 125220 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018947592778746706, "loss": 2.0417, "step": 125225 }, { "epoch": 0.29, "grad_norm": 1.9921875, "learning_rate": 0.0001894751024198376, "loss": 2.214, "step": 125230 }, { "epoch": 0.29, "grad_norm": 1.890625, "learning_rate": 0.00018947427702164181, "loss": 2.1073, "step": 125235 }, { "epoch": 0.29, "grad_norm": 2.078125, "learning_rate": 0.00018947345159288001, "loss": 1.865, "step": 125240 }, { "epoch": 0.29, "grad_norm": 2.421875, "learning_rate": 0.00018947262613355247, "loss": 2.1919, "step": 125245 }, { "epoch": 0.29, "grad_norm": 1.96875, "learning_rate": 0.00018947180064365942, "loss": 2.1936, "step": 125250 }, { "epoch": 0.29, "grad_norm": 1.90625, "learning_rate": 0.0001894709751232012, "loss": 2.0824, "step": 125255 }, { "epoch": 0.29, "grad_norm": 1.8125, "learning_rate": 0.00018947014957217806, "loss": 2.0072, "step": 125260 }, { "epoch": 0.29, "grad_norm": 2.15625, "learning_rate": 0.0001894693239905903, "loss": 2.2999, "step": 125265 }, { "epoch": 0.29, "grad_norm": 2.0, "learning_rate": 0.00018946849837843822, "loss": 2.1813, "step": 125270 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.00018946767273572202, "loss": 2.1084, "step": 125275 }, { "epoch": 0.29, "grad_norm": 2.0625, "learning_rate": 0.0001894668470624421, "loss": 2.1057, "step": 125280 }, { "epoch": 0.29, "grad_norm": 2.203125, "learning_rate": 0.00018946602135859868, "loss": 2.2456, "step": 125285 }, { "epoch": 0.29, "grad_norm": 1.71875, "learning_rate": 0.000189465195624192, "loss": 2.0776, "step": 125290 }, { "epoch": 0.29, "grad_norm": 2.109375, "learning_rate": 0.00018946436985922243, "loss": 2.2119, "step": 125295 }, { "epoch": 0.29, "grad_norm": 2.1875, "learning_rate": 0.0001894635440636902, "loss": 2.1595, "step": 125300 }, { "epoch": 0.29, "grad_norm": 1.875, "learning_rate": 0.0001894627182375956, "loss": 2.121, "step": 125305 }, { "epoch": 0.29, "grad_norm": 2.125, "learning_rate": 0.0001894618923809389, "loss": 2.1007, "step": 125310 }, { "epoch": 0.29, "grad_norm": 2.34375, "learning_rate": 0.00018946106649372037, "loss": 2.0946, "step": 125315 }, { "epoch": 0.29, "grad_norm": 2.171875, "learning_rate": 0.00018946024057594037, "loss": 2.2195, "step": 125320 }, { "epoch": 0.29, "grad_norm": 2.25, "learning_rate": 0.0001894594146275991, "loss": 2.2912, "step": 125325 }, { "epoch": 0.29, "grad_norm": 1.953125, "learning_rate": 0.00018945858864869689, "loss": 2.2958, "step": 125330 }, { "epoch": 0.29, "grad_norm": 1.9921875, "learning_rate": 0.000189457762639234, "loss": 2.2324, "step": 125335 }, { "epoch": 0.29, "grad_norm": 2.53125, "learning_rate": 0.0001894569365992107, "loss": 2.2028, "step": 125340 }, { "epoch": 0.29, "grad_norm": 2.390625, "learning_rate": 0.00018945611052862728, "loss": 2.1559, "step": 125345 }, { "epoch": 0.29, "grad_norm": 2.21875, "learning_rate": 0.00018945528442748403, "loss": 2.1267, "step": 125350 }, { "epoch": 0.3, "grad_norm": 1.828125, "learning_rate": 0.00018945445829578127, "loss": 2.0232, "step": 125355 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018945363213351924, "loss": 2.1869, "step": 125360 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.00018945280594069819, "loss": 2.1006, "step": 125365 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.00018945197971731846, "loss": 1.9238, "step": 125370 }, { "epoch": 0.3, "grad_norm": 1.7890625, "learning_rate": 0.0001894511534633803, "loss": 1.9783, "step": 125375 }, { "epoch": 0.3, "grad_norm": 1.984375, "learning_rate": 0.00018945032717888403, "loss": 2.2124, "step": 125380 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018944950086382988, "loss": 2.1113, "step": 125385 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.00018944867451821818, "loss": 2.1492, "step": 125390 }, { "epoch": 0.3, "grad_norm": 2.296875, "learning_rate": 0.0001894478481420492, "loss": 2.2662, "step": 125395 }, { "epoch": 0.3, "grad_norm": 2.390625, "learning_rate": 0.00018944702173532315, "loss": 2.1095, "step": 125400 }, { "epoch": 0.3, "grad_norm": 2.421875, "learning_rate": 0.00018944619529804042, "loss": 2.1121, "step": 125405 }, { "epoch": 0.3, "grad_norm": 1.9296875, "learning_rate": 0.00018944536883020127, "loss": 2.1196, "step": 125410 }, { "epoch": 0.3, "grad_norm": 2.328125, "learning_rate": 0.0001894445423318059, "loss": 2.1281, "step": 125415 }, { "epoch": 0.3, "grad_norm": 1.8828125, "learning_rate": 0.00018944371580285473, "loss": 2.1559, "step": 125420 }, { "epoch": 0.3, "grad_norm": 2.515625, "learning_rate": 0.0001894428892433479, "loss": 2.0805, "step": 125425 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018944206265328578, "loss": 2.0551, "step": 125430 }, { "epoch": 0.3, "grad_norm": 1.8359375, "learning_rate": 0.00018944123603266863, "loss": 2.2205, "step": 125435 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018944040938149672, "loss": 2.0598, "step": 125440 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018943958269977037, "loss": 2.0022, "step": 125445 }, { "epoch": 0.3, "grad_norm": 1.921875, "learning_rate": 0.0001894387559874898, "loss": 1.9259, "step": 125450 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.00018943792924465535, "loss": 2.1069, "step": 125455 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.0001894371024712673, "loss": 2.0268, "step": 125460 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.0001894362756673259, "loss": 2.2968, "step": 125465 }, { "epoch": 0.3, "grad_norm": 1.78125, "learning_rate": 0.00018943544883283142, "loss": 2.1372, "step": 125470 }, { "epoch": 0.3, "grad_norm": 2.328125, "learning_rate": 0.00018943462196778418, "loss": 2.1367, "step": 125475 }, { "epoch": 0.3, "grad_norm": 2.640625, "learning_rate": 0.00018943379507218447, "loss": 2.2151, "step": 125480 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018943296814603257, "loss": 2.1231, "step": 125485 }, { "epoch": 0.3, "grad_norm": 2.375, "learning_rate": 0.0001894321411893287, "loss": 2.3484, "step": 125490 }, { "epoch": 0.3, "grad_norm": 1.890625, "learning_rate": 0.0001894313142020732, "loss": 2.2043, "step": 125495 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018943048718426635, "loss": 2.2585, "step": 125500 }, { "epoch": 0.3, "grad_norm": 1.9921875, "learning_rate": 0.0001894296601359084, "loss": 2.251, "step": 125505 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.0001894288330569997, "loss": 2.0932, "step": 125510 }, { "epoch": 0.3, "grad_norm": 1.8203125, "learning_rate": 0.00018942800594754048, "loss": 2.2524, "step": 125515 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.000189427178807531, "loss": 2.1904, "step": 125520 }, { "epoch": 0.3, "grad_norm": 1.6796875, "learning_rate": 0.0001894263516369716, "loss": 1.9537, "step": 125525 }, { "epoch": 0.3, "grad_norm": 2.5, "learning_rate": 0.00018942552443586253, "loss": 2.1051, "step": 125530 }, { "epoch": 0.3, "grad_norm": 1.6796875, "learning_rate": 0.00018942469720420409, "loss": 2.1673, "step": 125535 }, { "epoch": 0.3, "grad_norm": 2.296875, "learning_rate": 0.0001894238699419965, "loss": 2.1825, "step": 125540 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018942304264924018, "loss": 2.2646, "step": 125545 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.00018942221532593527, "loss": 2.1905, "step": 125550 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018942138797208214, "loss": 2.2419, "step": 125555 }, { "epoch": 0.3, "grad_norm": 1.8828125, "learning_rate": 0.000189420560587681, "loss": 2.0695, "step": 125560 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018941973317273219, "loss": 2.0909, "step": 125565 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.000189418905727236, "loss": 2.1462, "step": 125570 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018941807825119267, "loss": 1.9523, "step": 125575 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.0001894172507446025, "loss": 2.1296, "step": 125580 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.0001894164232074658, "loss": 2.2395, "step": 125585 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018941559563978282, "loss": 2.1386, "step": 125590 }, { "epoch": 0.3, "grad_norm": 1.8515625, "learning_rate": 0.00018941476804155383, "loss": 2.1058, "step": 125595 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018941394041277913, "loss": 2.2537, "step": 125600 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.00018941311275345903, "loss": 2.1707, "step": 125605 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.0001894122850635938, "loss": 2.3729, "step": 125610 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018941145734318368, "loss": 2.0999, "step": 125615 }, { "epoch": 0.3, "grad_norm": 1.6875, "learning_rate": 0.00018941062959222898, "loss": 2.1918, "step": 125620 }, { "epoch": 0.3, "grad_norm": 2.328125, "learning_rate": 0.00018940980181073004, "loss": 2.0816, "step": 125625 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018940897399868706, "loss": 2.3042, "step": 125630 }, { "epoch": 0.3, "grad_norm": 1.7421875, "learning_rate": 0.0001894081461561003, "loss": 2.2408, "step": 125635 }, { "epoch": 0.3, "grad_norm": 1.8125, "learning_rate": 0.00018940731828297018, "loss": 2.1235, "step": 125640 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018940649037929686, "loss": 2.2219, "step": 125645 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.00018940566244508066, "loss": 2.0826, "step": 125650 }, { "epoch": 0.3, "grad_norm": 2.6875, "learning_rate": 0.00018940483448032187, "loss": 2.1795, "step": 125655 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.00018940400648502078, "loss": 2.1815, "step": 125660 }, { "epoch": 0.3, "grad_norm": 1.984375, "learning_rate": 0.00018940317845917763, "loss": 1.9572, "step": 125665 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018940235040279275, "loss": 2.1791, "step": 125670 }, { "epoch": 0.3, "grad_norm": 2.390625, "learning_rate": 0.0001894015223158664, "loss": 2.0555, "step": 125675 }, { "epoch": 0.3, "grad_norm": 1.71875, "learning_rate": 0.00018940069419839887, "loss": 2.0753, "step": 125680 }, { "epoch": 0.3, "grad_norm": 1.96875, "learning_rate": 0.00018939986605039043, "loss": 2.2168, "step": 125685 }, { "epoch": 0.3, "grad_norm": 2.609375, "learning_rate": 0.0001893990378718414, "loss": 2.1134, "step": 125690 }, { "epoch": 0.3, "grad_norm": 1.75, "learning_rate": 0.00018939820966275202, "loss": 2.2637, "step": 125695 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.0001893973814231226, "loss": 2.2404, "step": 125700 }, { "epoch": 0.3, "grad_norm": 1.6875, "learning_rate": 0.00018939655315295344, "loss": 2.287, "step": 125705 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.00018939572485224474, "loss": 2.1934, "step": 125710 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.00018939489652099688, "loss": 1.9577, "step": 125715 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018939406815921008, "loss": 2.0803, "step": 125720 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.00018939323976688464, "loss": 2.1946, "step": 125725 }, { "epoch": 0.3, "grad_norm": 1.9375, "learning_rate": 0.00018939241134402086, "loss": 2.2581, "step": 125730 }, { "epoch": 0.3, "grad_norm": 1.953125, "learning_rate": 0.00018939158289061904, "loss": 2.1578, "step": 125735 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.0001893907544066794, "loss": 2.0764, "step": 125740 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018938992589220225, "loss": 2.2527, "step": 125745 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.0001893890973471879, "loss": 2.1805, "step": 125750 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.0001893882687716366, "loss": 2.0904, "step": 125755 }, { "epoch": 0.3, "grad_norm": 2.71875, "learning_rate": 0.00018938744016554866, "loss": 2.1473, "step": 125760 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.0001893866115289243, "loss": 2.1911, "step": 125765 }, { "epoch": 0.3, "grad_norm": 2.59375, "learning_rate": 0.00018938578286176392, "loss": 2.1754, "step": 125770 }, { "epoch": 0.3, "grad_norm": 1.9765625, "learning_rate": 0.00018938495416406773, "loss": 2.3092, "step": 125775 }, { "epoch": 0.3, "grad_norm": 1.6875, "learning_rate": 0.000189384125435836, "loss": 2.095, "step": 125780 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.000189383296677069, "loss": 2.0028, "step": 125785 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.0001893824678877671, "loss": 2.3054, "step": 125790 }, { "epoch": 0.3, "grad_norm": 1.9296875, "learning_rate": 0.00018938163906793046, "loss": 2.1953, "step": 125795 }, { "epoch": 0.3, "grad_norm": 1.9921875, "learning_rate": 0.0001893808102175595, "loss": 2.1474, "step": 125800 }, { "epoch": 0.3, "grad_norm": 2.734375, "learning_rate": 0.0001893799813366544, "loss": 2.4246, "step": 125805 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018937915242521548, "loss": 2.1005, "step": 125810 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018937832348324304, "loss": 2.1786, "step": 125815 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018937749451073733, "loss": 2.0497, "step": 125820 }, { "epoch": 0.3, "grad_norm": 1.984375, "learning_rate": 0.00018937666550769864, "loss": 2.1044, "step": 125825 }, { "epoch": 0.3, "grad_norm": 2.296875, "learning_rate": 0.00018937583647412728, "loss": 2.2266, "step": 125830 }, { "epoch": 0.3, "grad_norm": 2.578125, "learning_rate": 0.00018937500741002348, "loss": 2.3802, "step": 125835 }, { "epoch": 0.3, "grad_norm": 1.96875, "learning_rate": 0.0001893741783153876, "loss": 2.2662, "step": 125840 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.00018937334919021982, "loss": 2.142, "step": 125845 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018937252003452053, "loss": 2.1155, "step": 125850 }, { "epoch": 0.3, "grad_norm": 2.5, "learning_rate": 0.00018937169084828995, "loss": 2.0647, "step": 125855 }, { "epoch": 0.3, "grad_norm": 1.796875, "learning_rate": 0.0001893708616315284, "loss": 2.1553, "step": 125860 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018937003238423615, "loss": 2.2091, "step": 125865 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.0001893692031064134, "loss": 2.1383, "step": 125870 }, { "epoch": 0.3, "grad_norm": 1.9375, "learning_rate": 0.00018936837379806057, "loss": 2.1395, "step": 125875 }, { "epoch": 0.3, "grad_norm": 2.34375, "learning_rate": 0.0001893675444591779, "loss": 1.9175, "step": 125880 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.00018936671508976562, "loss": 2.3203, "step": 125885 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018936588568982407, "loss": 2.1858, "step": 125890 }, { "epoch": 0.3, "grad_norm": 1.578125, "learning_rate": 0.0001893650562593535, "loss": 2.2219, "step": 125895 }, { "epoch": 0.3, "grad_norm": 2.703125, "learning_rate": 0.0001893642267983542, "loss": 2.0859, "step": 125900 }, { "epoch": 0.3, "grad_norm": 1.5625, "learning_rate": 0.00018936339730682647, "loss": 2.1634, "step": 125905 }, { "epoch": 0.3, "grad_norm": 2.3125, "learning_rate": 0.00018936256778477058, "loss": 2.183, "step": 125910 }, { "epoch": 0.3, "grad_norm": 2.34375, "learning_rate": 0.00018936173823218684, "loss": 2.0907, "step": 125915 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018936090864907549, "loss": 2.0961, "step": 125920 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.00018936007903543682, "loss": 2.2124, "step": 125925 }, { "epoch": 0.3, "grad_norm": 1.8828125, "learning_rate": 0.00018935924939127114, "loss": 2.1618, "step": 125930 }, { "epoch": 0.3, "grad_norm": 4.09375, "learning_rate": 0.0001893584197165787, "loss": 2.3195, "step": 125935 }, { "epoch": 0.3, "grad_norm": 3.1875, "learning_rate": 0.0001893575900113598, "loss": 2.1277, "step": 125940 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018935676027561477, "loss": 1.941, "step": 125945 }, { "epoch": 0.3, "grad_norm": 1.9375, "learning_rate": 0.00018935593050934383, "loss": 2.1735, "step": 125950 }, { "epoch": 0.3, "grad_norm": 1.953125, "learning_rate": 0.0001893551007125473, "loss": 2.0973, "step": 125955 }, { "epoch": 0.3, "grad_norm": 1.8359375, "learning_rate": 0.00018935427088522541, "loss": 2.1955, "step": 125960 }, { "epoch": 0.3, "grad_norm": 1.8046875, "learning_rate": 0.0001893534410273785, "loss": 2.2343, "step": 125965 }, { "epoch": 0.3, "grad_norm": 4.9375, "learning_rate": 0.00018935261113900683, "loss": 2.1192, "step": 125970 }, { "epoch": 0.3, "grad_norm": 2.375, "learning_rate": 0.0001893517812201107, "loss": 2.0384, "step": 125975 }, { "epoch": 0.3, "grad_norm": 1.84375, "learning_rate": 0.00018935095127069036, "loss": 2.1345, "step": 125980 }, { "epoch": 0.3, "grad_norm": 1.6640625, "learning_rate": 0.00018935012129074613, "loss": 2.1179, "step": 125985 }, { "epoch": 0.3, "grad_norm": 1.9609375, "learning_rate": 0.00018934929128027828, "loss": 2.0712, "step": 125990 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018934846123928707, "loss": 2.1404, "step": 125995 }, { "epoch": 0.3, "grad_norm": 1.921875, "learning_rate": 0.00018934763116777283, "loss": 2.0242, "step": 126000 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.0001893468010657358, "loss": 2.2298, "step": 126005 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.00018934597093317633, "loss": 2.0891, "step": 126010 }, { "epoch": 0.3, "grad_norm": 1.6875, "learning_rate": 0.00018934514077009458, "loss": 2.2642, "step": 126015 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018934431057649097, "loss": 1.9664, "step": 126020 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.00018934348035236568, "loss": 2.2532, "step": 126025 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.00018934265009771907, "loss": 2.1778, "step": 126030 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018934181981255136, "loss": 2.1117, "step": 126035 }, { "epoch": 0.3, "grad_norm": 2.296875, "learning_rate": 0.0001893409894968629, "loss": 2.1037, "step": 126040 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.00018934015915065392, "loss": 2.179, "step": 126045 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018933932877392474, "loss": 2.0676, "step": 126050 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.0001893384983666756, "loss": 2.2088, "step": 126055 }, { "epoch": 0.3, "grad_norm": 2.4375, "learning_rate": 0.0001893376679289068, "loss": 2.1074, "step": 126060 }, { "epoch": 0.3, "grad_norm": 1.7109375, "learning_rate": 0.00018933683746061864, "loss": 2.0732, "step": 126065 }, { "epoch": 0.3, "grad_norm": 1.796875, "learning_rate": 0.0001893360069618114, "loss": 2.1583, "step": 126070 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.00018933517643248538, "loss": 1.8923, "step": 126075 }, { "epoch": 0.3, "grad_norm": 2.671875, "learning_rate": 0.00018933434587264082, "loss": 2.2306, "step": 126080 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.00018933351528227803, "loss": 1.9588, "step": 126085 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.00018933268466139727, "loss": 2.1871, "step": 126090 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.0001893318540099989, "loss": 2.1244, "step": 126095 }, { "epoch": 0.3, "grad_norm": 1.8828125, "learning_rate": 0.0001893310233280831, "loss": 2.0468, "step": 126100 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.00018933019261565024, "loss": 2.1667, "step": 126105 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.00018932936187270052, "loss": 2.1551, "step": 126110 }, { "epoch": 0.3, "grad_norm": 1.9609375, "learning_rate": 0.0001893285310992343, "loss": 2.2232, "step": 126115 }, { "epoch": 0.3, "grad_norm": 2.734375, "learning_rate": 0.00018932770029525184, "loss": 2.188, "step": 126120 }, { "epoch": 0.3, "grad_norm": 2.453125, "learning_rate": 0.00018932686946075342, "loss": 2.3273, "step": 126125 }, { "epoch": 0.3, "grad_norm": 1.8828125, "learning_rate": 0.00018932603859573929, "loss": 2.0996, "step": 126130 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.0001893252077002098, "loss": 2.094, "step": 126135 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018932437677416516, "loss": 2.2091, "step": 126140 }, { "epoch": 0.3, "grad_norm": 1.8203125, "learning_rate": 0.00018932354581760574, "loss": 2.1917, "step": 126145 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018932271483053175, "loss": 2.1202, "step": 126150 }, { "epoch": 0.3, "grad_norm": 1.90625, "learning_rate": 0.00018932188381294348, "loss": 2.2146, "step": 126155 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.0001893210527648413, "loss": 2.1944, "step": 126160 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018932022168622538, "loss": 2.2003, "step": 126165 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018931939057709605, "loss": 2.1378, "step": 126170 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.0001893185594374536, "loss": 2.0339, "step": 126175 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.0001893177282672983, "loss": 2.2165, "step": 126180 }, { "epoch": 0.3, "grad_norm": 1.9765625, "learning_rate": 0.00018931689706663048, "loss": 2.2348, "step": 126185 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.00018931606583545035, "loss": 2.2319, "step": 126190 }, { "epoch": 0.3, "grad_norm": 1.96875, "learning_rate": 0.00018931523457375827, "loss": 2.0642, "step": 126195 }, { "epoch": 0.3, "grad_norm": 2.5, "learning_rate": 0.00018931440328155444, "loss": 2.1815, "step": 126200 }, { "epoch": 0.3, "grad_norm": 1.921875, "learning_rate": 0.00018931357195883924, "loss": 2.1311, "step": 126205 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018931274060561285, "loss": 2.2681, "step": 126210 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018931190922187566, "loss": 2.207, "step": 126215 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018931107780762786, "loss": 2.2373, "step": 126220 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.0001893102463628698, "loss": 2.2383, "step": 126225 }, { "epoch": 0.3, "grad_norm": 1.8515625, "learning_rate": 0.00018930941488760173, "loss": 2.2625, "step": 126230 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.00018930858338182395, "loss": 2.0526, "step": 126235 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.00018930775184553675, "loss": 2.0361, "step": 126240 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.0001893069202787404, "loss": 2.0469, "step": 126245 }, { "epoch": 0.3, "grad_norm": 2.484375, "learning_rate": 0.00018930608868143517, "loss": 2.177, "step": 126250 }, { "epoch": 0.3, "grad_norm": 1.8828125, "learning_rate": 0.00018930525705362138, "loss": 2.0157, "step": 126255 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018930442539529926, "loss": 2.2225, "step": 126260 }, { "epoch": 0.3, "grad_norm": 2.328125, "learning_rate": 0.00018930359370646915, "loss": 2.2557, "step": 126265 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018930276198713132, "loss": 1.9557, "step": 126270 }, { "epoch": 0.3, "grad_norm": 1.578125, "learning_rate": 0.00018930193023728603, "loss": 2.089, "step": 126275 }, { "epoch": 0.3, "grad_norm": 2.484375, "learning_rate": 0.00018930109845693361, "loss": 2.0877, "step": 126280 }, { "epoch": 0.3, "grad_norm": 1.9765625, "learning_rate": 0.0001893002666460743, "loss": 2.0742, "step": 126285 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.0001892994348047084, "loss": 2.1278, "step": 126290 }, { "epoch": 0.3, "grad_norm": 1.9921875, "learning_rate": 0.0001892986029328362, "loss": 2.2412, "step": 126295 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.00018929777103045793, "loss": 2.0882, "step": 126300 }, { "epoch": 0.3, "grad_norm": 1.7890625, "learning_rate": 0.00018929693909757397, "loss": 2.2299, "step": 126305 }, { "epoch": 0.3, "grad_norm": 1.9765625, "learning_rate": 0.00018929610713418457, "loss": 1.9975, "step": 126310 }, { "epoch": 0.3, "grad_norm": 1.875, "learning_rate": 0.00018929527514028997, "loss": 2.3063, "step": 126315 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.00018929444311589048, "loss": 2.0298, "step": 126320 }, { "epoch": 0.3, "grad_norm": 1.9921875, "learning_rate": 0.0001892936110609864, "loss": 2.1489, "step": 126325 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.000189292778975578, "loss": 2.3174, "step": 126330 }, { "epoch": 0.3, "grad_norm": 1.734375, "learning_rate": 0.00018929194685966559, "loss": 2.0384, "step": 126335 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.0001892911147132494, "loss": 2.1617, "step": 126340 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018929028253632975, "loss": 2.1663, "step": 126345 }, { "epoch": 0.3, "grad_norm": 1.765625, "learning_rate": 0.00018928945032890693, "loss": 1.9937, "step": 126350 }, { "epoch": 0.3, "grad_norm": 1.8203125, "learning_rate": 0.0001892886180909812, "loss": 2.2551, "step": 126355 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.0001892877858225529, "loss": 2.2171, "step": 126360 }, { "epoch": 0.3, "grad_norm": 2.8125, "learning_rate": 0.00018928695352362222, "loss": 2.2049, "step": 126365 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018928612119418952, "loss": 2.0003, "step": 126370 }, { "epoch": 0.3, "grad_norm": 2.609375, "learning_rate": 0.00018928528883425507, "loss": 2.2104, "step": 126375 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.00018928445644381911, "loss": 2.2611, "step": 126380 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.000189283624022882, "loss": 2.1525, "step": 126385 }, { "epoch": 0.3, "grad_norm": 1.8125, "learning_rate": 0.000189282791571444, "loss": 2.1972, "step": 126390 }, { "epoch": 0.3, "grad_norm": 1.9765625, "learning_rate": 0.00018928195908950532, "loss": 2.0839, "step": 126395 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018928112657706634, "loss": 2.005, "step": 126400 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.0001892802940341273, "loss": 1.9318, "step": 126405 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018927946146068852, "loss": 2.197, "step": 126410 }, { "epoch": 0.3, "grad_norm": 1.90625, "learning_rate": 0.00018927862885675023, "loss": 2.1322, "step": 126415 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.00018927779622231273, "loss": 1.9859, "step": 126420 }, { "epoch": 0.3, "grad_norm": 2.515625, "learning_rate": 0.00018927696355737635, "loss": 2.1715, "step": 126425 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.0001892761308619413, "loss": 2.2232, "step": 126430 }, { "epoch": 0.3, "grad_norm": 2.375, "learning_rate": 0.00018927529813600794, "loss": 2.0799, "step": 126435 }, { "epoch": 0.3, "grad_norm": 2.296875, "learning_rate": 0.0001892744653795765, "loss": 2.1881, "step": 126440 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018927363259264727, "loss": 2.0766, "step": 126445 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018927279977522056, "loss": 2.0912, "step": 126450 }, { "epoch": 0.3, "grad_norm": 1.796875, "learning_rate": 0.00018927196692729666, "loss": 2.0639, "step": 126455 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018927113404887584, "loss": 2.0713, "step": 126460 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.00018927030113995836, "loss": 2.0983, "step": 126465 }, { "epoch": 0.3, "grad_norm": 2.4375, "learning_rate": 0.00018926946820054452, "loss": 2.2462, "step": 126470 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018926863523063462, "loss": 2.1953, "step": 126475 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018926780223022896, "loss": 2.0722, "step": 126480 }, { "epoch": 0.3, "grad_norm": 2.328125, "learning_rate": 0.00018926696919932775, "loss": 2.1745, "step": 126485 }, { "epoch": 0.3, "grad_norm": 2.546875, "learning_rate": 0.00018926613613793139, "loss": 2.0667, "step": 126490 }, { "epoch": 0.3, "grad_norm": 3.03125, "learning_rate": 0.00018926530304604007, "loss": 2.1809, "step": 126495 }, { "epoch": 0.3, "grad_norm": 2.484375, "learning_rate": 0.00018926446992365408, "loss": 2.3375, "step": 126500 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018926363677077376, "loss": 2.2663, "step": 126505 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.00018926280358739933, "loss": 1.9504, "step": 126510 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.00018926197037353112, "loss": 2.1115, "step": 126515 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.0001892611371291694, "loss": 2.2227, "step": 126520 }, { "epoch": 0.3, "grad_norm": 2.390625, "learning_rate": 0.00018926030385431445, "loss": 2.3085, "step": 126525 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.00018925947054896657, "loss": 2.0498, "step": 126530 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018925863721312608, "loss": 2.2922, "step": 126535 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018925780384679316, "loss": 2.2205, "step": 126540 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018925697044996817, "loss": 2.0479, "step": 126545 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.00018925613702265142, "loss": 2.1548, "step": 126550 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.0001892553035648431, "loss": 2.0969, "step": 126555 }, { "epoch": 0.3, "grad_norm": 1.8984375, "learning_rate": 0.00018925447007654356, "loss": 2.0797, "step": 126560 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018925363655775308, "loss": 2.1767, "step": 126565 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018925280300847197, "loss": 2.2378, "step": 126570 }, { "epoch": 0.3, "grad_norm": 2.828125, "learning_rate": 0.00018925196942870044, "loss": 2.1348, "step": 126575 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.00018925113581843883, "loss": 2.3063, "step": 126580 }, { "epoch": 0.3, "grad_norm": 2.390625, "learning_rate": 0.00018925030217768743, "loss": 2.0713, "step": 126585 }, { "epoch": 0.3, "grad_norm": 1.6875, "learning_rate": 0.0001892494685064465, "loss": 2.0651, "step": 126590 }, { "epoch": 0.3, "grad_norm": 1.84375, "learning_rate": 0.00018924863480471632, "loss": 2.1675, "step": 126595 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018924780107249724, "loss": 2.2763, "step": 126600 }, { "epoch": 0.3, "grad_norm": 2.390625, "learning_rate": 0.0001892469673097894, "loss": 2.2662, "step": 126605 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018924613351659325, "loss": 2.2029, "step": 126610 }, { "epoch": 0.3, "grad_norm": 2.59375, "learning_rate": 0.000189245299692909, "loss": 2.3258, "step": 126615 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.0001892444658387369, "loss": 2.2016, "step": 126620 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.0001892436319540773, "loss": 2.0445, "step": 126625 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.00018924279803893043, "loss": 2.156, "step": 126630 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.0001892419640932966, "loss": 2.0374, "step": 126635 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.00018924113011717613, "loss": 2.1569, "step": 126640 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018924029611056925, "loss": 2.1123, "step": 126645 }, { "epoch": 0.3, "grad_norm": 1.8515625, "learning_rate": 0.0001892394620734763, "loss": 2.1662, "step": 126650 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.0001892386280058975, "loss": 2.0374, "step": 126655 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.00018923779390783317, "loss": 2.3303, "step": 126660 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.00018923695977928357, "loss": 2.1162, "step": 126665 }, { "epoch": 0.3, "grad_norm": 1.8203125, "learning_rate": 0.00018923612562024905, "loss": 1.8408, "step": 126670 }, { "epoch": 0.3, "grad_norm": 2.859375, "learning_rate": 0.0001892352914307298, "loss": 2.1691, "step": 126675 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.0001892344572107262, "loss": 2.2044, "step": 126680 }, { "epoch": 0.3, "grad_norm": 1.578125, "learning_rate": 0.00018923362296023848, "loss": 2.1461, "step": 126685 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018923278867926695, "loss": 2.0774, "step": 126690 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018923195436781185, "loss": 2.1799, "step": 126695 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.00018923112002587348, "loss": 2.3162, "step": 126700 }, { "epoch": 0.3, "grad_norm": 2.53125, "learning_rate": 0.0001892302856534522, "loss": 2.1754, "step": 126705 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.0001892294512505482, "loss": 2.2552, "step": 126710 }, { "epoch": 0.3, "grad_norm": 1.8203125, "learning_rate": 0.0001892286168171618, "loss": 2.0331, "step": 126715 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.0001892277823532933, "loss": 2.1783, "step": 126720 }, { "epoch": 0.3, "grad_norm": 5.3125, "learning_rate": 0.00018922694785894297, "loss": 2.1535, "step": 126725 }, { "epoch": 0.3, "grad_norm": 1.875, "learning_rate": 0.00018922611333411108, "loss": 2.2259, "step": 126730 }, { "epoch": 0.3, "grad_norm": 2.34375, "learning_rate": 0.00018922527877879793, "loss": 2.1129, "step": 126735 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018922444419300382, "loss": 1.966, "step": 126740 }, { "epoch": 0.3, "grad_norm": 2.859375, "learning_rate": 0.00018922360957672905, "loss": 2.0915, "step": 126745 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018922277492997385, "loss": 2.1889, "step": 126750 }, { "epoch": 0.3, "grad_norm": 1.6953125, "learning_rate": 0.0001892219402527385, "loss": 2.1943, "step": 126755 }, { "epoch": 0.3, "grad_norm": 1.953125, "learning_rate": 0.00018922110554502337, "loss": 2.1053, "step": 126760 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.00018922027080682866, "loss": 2.0214, "step": 126765 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.0001892194360381547, "loss": 2.199, "step": 126770 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018921860123900176, "loss": 2.2088, "step": 126775 }, { "epoch": 0.3, "grad_norm": 2.515625, "learning_rate": 0.00018921776640937012, "loss": 2.1825, "step": 126780 }, { "epoch": 0.3, "grad_norm": 1.8046875, "learning_rate": 0.0001892169315492601, "loss": 2.325, "step": 126785 }, { "epoch": 0.3, "grad_norm": 2.390625, "learning_rate": 0.00018921609665867192, "loss": 1.9484, "step": 126790 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018921526173760594, "loss": 2.2196, "step": 126795 }, { "epoch": 0.3, "grad_norm": 1.859375, "learning_rate": 0.00018921442678606238, "loss": 2.1626, "step": 126800 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.00018921359180404157, "loss": 2.2531, "step": 126805 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.00018921275679154378, "loss": 1.9922, "step": 126810 }, { "epoch": 0.3, "grad_norm": 2.375, "learning_rate": 0.00018921192174856926, "loss": 2.1942, "step": 126815 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.00018921108667511836, "loss": 2.1208, "step": 126820 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018921025157119135, "loss": 2.1829, "step": 126825 }, { "epoch": 0.3, "grad_norm": 1.875, "learning_rate": 0.00018920941643678848, "loss": 2.2126, "step": 126830 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018920858127191006, "loss": 2.1849, "step": 126835 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018920774607655635, "loss": 2.0339, "step": 126840 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.0001892069108507277, "loss": 2.0625, "step": 126845 }, { "epoch": 0.3, "grad_norm": 1.71875, "learning_rate": 0.0001892060755944243, "loss": 2.2423, "step": 126850 }, { "epoch": 0.3, "grad_norm": 1.953125, "learning_rate": 0.00018920524030764652, "loss": 2.0414, "step": 126855 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018920440499039463, "loss": 2.1654, "step": 126860 }, { "epoch": 0.3, "grad_norm": 1.828125, "learning_rate": 0.00018920356964266885, "loss": 2.06, "step": 126865 }, { "epoch": 0.3, "grad_norm": 1.9921875, "learning_rate": 0.00018920273426446956, "loss": 2.0176, "step": 126870 }, { "epoch": 0.3, "grad_norm": 1.9921875, "learning_rate": 0.00018920189885579696, "loss": 2.0982, "step": 126875 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018920106341665139, "loss": 2.0892, "step": 126880 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.00018920022794703312, "loss": 2.1858, "step": 126885 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.0001891993924469424, "loss": 2.2952, "step": 126890 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.0001891985569163796, "loss": 2.1331, "step": 126895 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.00018919772135534492, "loss": 2.0585, "step": 126900 }, { "epoch": 0.3, "grad_norm": 1.8515625, "learning_rate": 0.0001891968857638387, "loss": 2.1758, "step": 126905 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.0001891960501418612, "loss": 2.0522, "step": 126910 }, { "epoch": 0.3, "grad_norm": 1.9921875, "learning_rate": 0.00018919521448941274, "loss": 2.0857, "step": 126915 }, { "epoch": 0.3, "grad_norm": 1.7578125, "learning_rate": 0.00018919437880649353, "loss": 2.2712, "step": 126920 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.0001891935430931039, "loss": 2.0818, "step": 126925 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.0001891927073492442, "loss": 2.0987, "step": 126930 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.0001891918715749146, "loss": 2.132, "step": 126935 }, { "epoch": 0.3, "grad_norm": 2.3125, "learning_rate": 0.00018919103577011545, "loss": 2.0474, "step": 126940 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.00018919019993484702, "loss": 2.1393, "step": 126945 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018918936406910964, "loss": 2.11, "step": 126950 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018918852817290352, "loss": 2.2442, "step": 126955 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.00018918769224622896, "loss": 2.2813, "step": 126960 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.0001891868562890863, "loss": 2.1597, "step": 126965 }, { "epoch": 0.3, "grad_norm": 2.296875, "learning_rate": 0.0001891860203014758, "loss": 2.1458, "step": 126970 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.0001891851842833977, "loss": 2.2239, "step": 126975 }, { "epoch": 0.3, "grad_norm": 1.8515625, "learning_rate": 0.00018918434823485237, "loss": 2.3655, "step": 126980 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018918351215584003, "loss": 2.4098, "step": 126985 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018918267604636096, "loss": 2.2555, "step": 126990 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.00018918183990641548, "loss": 2.2025, "step": 126995 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.0001891810037360039, "loss": 2.1402, "step": 127000 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018918016753512645, "loss": 2.0983, "step": 127005 }, { "epoch": 0.3, "grad_norm": 2.5625, "learning_rate": 0.00018917933130378344, "loss": 2.1908, "step": 127010 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018917849504197513, "loss": 2.2665, "step": 127015 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.00018917765874970182, "loss": 2.2152, "step": 127020 }, { "epoch": 0.3, "grad_norm": 1.734375, "learning_rate": 0.00018917682242696387, "loss": 2.2133, "step": 127025 }, { "epoch": 0.3, "grad_norm": 1.8203125, "learning_rate": 0.00018917598607376147, "loss": 2.132, "step": 127030 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.0001891751496900949, "loss": 2.4794, "step": 127035 }, { "epoch": 0.3, "grad_norm": 2.59375, "learning_rate": 0.0001891743132759645, "loss": 2.2505, "step": 127040 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018917347683137056, "loss": 2.0339, "step": 127045 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.00018917264035631334, "loss": 2.0417, "step": 127050 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.0001891718038507931, "loss": 2.0183, "step": 127055 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018917096731481018, "loss": 2.303, "step": 127060 }, { "epoch": 0.3, "grad_norm": 1.8671875, "learning_rate": 0.00018917013074836486, "loss": 2.1039, "step": 127065 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018916929415145736, "loss": 2.2729, "step": 127070 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.00018916845752408806, "loss": 2.248, "step": 127075 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018916762086625716, "loss": 2.103, "step": 127080 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.000189166784177965, "loss": 2.2108, "step": 127085 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.00018916594745921184, "loss": 2.1446, "step": 127090 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.00018916511070999795, "loss": 2.2658, "step": 127095 }, { "epoch": 0.3, "grad_norm": 1.9296875, "learning_rate": 0.0001891642739303237, "loss": 2.163, "step": 127100 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.00018916343712018928, "loss": 2.1764, "step": 127105 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018916260027959504, "loss": 2.2671, "step": 127110 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.0001891617634085412, "loss": 2.259, "step": 127115 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.0001891609265070281, "loss": 2.2406, "step": 127120 }, { "epoch": 0.3, "grad_norm": 1.6640625, "learning_rate": 0.000189160089575056, "loss": 2.0305, "step": 127125 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.0001891592526126252, "loss": 2.0452, "step": 127130 }, { "epoch": 0.3, "grad_norm": 2.328125, "learning_rate": 0.000189158415619736, "loss": 1.8185, "step": 127135 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.00018915757859638865, "loss": 2.2921, "step": 127140 }, { "epoch": 0.3, "grad_norm": 1.921875, "learning_rate": 0.0001891567415425835, "loss": 2.0841, "step": 127145 }, { "epoch": 0.3, "grad_norm": 1.8046875, "learning_rate": 0.00018915590445832075, "loss": 2.0852, "step": 127150 }, { "epoch": 0.3, "grad_norm": 2.546875, "learning_rate": 0.0001891550673436007, "loss": 2.176, "step": 127155 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.00018915423019842372, "loss": 2.1658, "step": 127160 }, { "epoch": 0.3, "grad_norm": 2.546875, "learning_rate": 0.00018915339302279, "loss": 2.1492, "step": 127165 }, { "epoch": 0.3, "grad_norm": 1.8046875, "learning_rate": 0.00018915255581669987, "loss": 2.1049, "step": 127170 }, { "epoch": 0.3, "grad_norm": 1.84375, "learning_rate": 0.0001891517185801536, "loss": 2.1829, "step": 127175 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018915088131315152, "loss": 2.2492, "step": 127180 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.00018915004401569388, "loss": 2.2814, "step": 127185 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018914920668778093, "loss": 1.8843, "step": 127190 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.000189148369329413, "loss": 2.315, "step": 127195 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.0001891475319405904, "loss": 2.1738, "step": 127200 }, { "epoch": 0.3, "grad_norm": 1.96875, "learning_rate": 0.00018914669452131338, "loss": 2.2407, "step": 127205 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.0001891458570715822, "loss": 2.2471, "step": 127210 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.0001891450195913972, "loss": 2.2529, "step": 127215 }, { "epoch": 0.3, "grad_norm": 1.9609375, "learning_rate": 0.00018914418208075868, "loss": 2.1339, "step": 127220 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018914334453966683, "loss": 2.0101, "step": 127225 }, { "epoch": 0.3, "grad_norm": 1.9609375, "learning_rate": 0.00018914250696812202, "loss": 2.2084, "step": 127230 }, { "epoch": 0.3, "grad_norm": 1.8203125, "learning_rate": 0.00018914166936612455, "loss": 1.9236, "step": 127235 }, { "epoch": 0.3, "grad_norm": 2.53125, "learning_rate": 0.0001891408317336746, "loss": 2.1311, "step": 127240 }, { "epoch": 0.3, "grad_norm": 2.640625, "learning_rate": 0.0001891399940707726, "loss": 2.061, "step": 127245 }, { "epoch": 0.3, "grad_norm": 1.9375, "learning_rate": 0.0001891391563774187, "loss": 2.1176, "step": 127250 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018913831865361328, "loss": 1.9058, "step": 127255 }, { "epoch": 0.3, "grad_norm": 1.71875, "learning_rate": 0.00018913748089935658, "loss": 2.1168, "step": 127260 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.0001891366431146489, "loss": 2.3105, "step": 127265 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.00018913580529949053, "loss": 1.9631, "step": 127270 }, { "epoch": 0.3, "grad_norm": 1.8984375, "learning_rate": 0.00018913496745388174, "loss": 2.1156, "step": 127275 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018913412957782287, "loss": 1.9461, "step": 127280 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018913329167131413, "loss": 2.0219, "step": 127285 }, { "epoch": 0.3, "grad_norm": 2.34375, "learning_rate": 0.00018913245373435584, "loss": 2.281, "step": 127290 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018913161576694832, "loss": 1.8896, "step": 127295 }, { "epoch": 0.3, "grad_norm": 1.90625, "learning_rate": 0.00018913077776909178, "loss": 2.1766, "step": 127300 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018912993974078656, "loss": 2.1946, "step": 127305 }, { "epoch": 0.3, "grad_norm": 1.9296875, "learning_rate": 0.00018912910168203295, "loss": 2.2125, "step": 127310 }, { "epoch": 0.3, "grad_norm": 1.9375, "learning_rate": 0.00018912826359283123, "loss": 2.1447, "step": 127315 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018912742547318165, "loss": 2.2279, "step": 127320 }, { "epoch": 0.3, "grad_norm": 1.8984375, "learning_rate": 0.00018912658732308453, "loss": 2.0507, "step": 127325 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.0001891257491425402, "loss": 2.0563, "step": 127330 }, { "epoch": 0.3, "grad_norm": 1.953125, "learning_rate": 0.00018912491093154882, "loss": 2.1425, "step": 127335 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.0001891240726901108, "loss": 2.2274, "step": 127340 }, { "epoch": 0.3, "grad_norm": 1.9296875, "learning_rate": 0.00018912323441822637, "loss": 2.1971, "step": 127345 }, { "epoch": 0.3, "grad_norm": 3.6875, "learning_rate": 0.00018912239611589584, "loss": 2.1993, "step": 127350 }, { "epoch": 0.3, "grad_norm": 1.8125, "learning_rate": 0.0001891215577831195, "loss": 2.1397, "step": 127355 }, { "epoch": 0.3, "grad_norm": 2.53125, "learning_rate": 0.00018912071941989757, "loss": 2.2197, "step": 127360 }, { "epoch": 0.3, "grad_norm": 1.7265625, "learning_rate": 0.00018911988102623042, "loss": 2.0029, "step": 127365 }, { "epoch": 0.3, "grad_norm": 2.296875, "learning_rate": 0.00018911904260211827, "loss": 2.1475, "step": 127370 }, { "epoch": 0.3, "grad_norm": 1.9765625, "learning_rate": 0.00018911820414756146, "loss": 2.1644, "step": 127375 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018911736566256027, "loss": 2.243, "step": 127380 }, { "epoch": 0.3, "grad_norm": 1.859375, "learning_rate": 0.00018911652714711496, "loss": 2.2321, "step": 127385 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018911568860122583, "loss": 2.2605, "step": 127390 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.00018911485002489316, "loss": 2.1555, "step": 127395 }, { "epoch": 0.3, "grad_norm": 2.421875, "learning_rate": 0.00018911401141811723, "loss": 2.1635, "step": 127400 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.00018911317278089834, "loss": 2.2776, "step": 127405 }, { "epoch": 0.3, "grad_norm": 1.5390625, "learning_rate": 0.0001891123341132368, "loss": 1.9921, "step": 127410 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018911149541513285, "loss": 2.1365, "step": 127415 }, { "epoch": 0.3, "grad_norm": 2.296875, "learning_rate": 0.0001891106566865868, "loss": 2.1653, "step": 127420 }, { "epoch": 0.3, "grad_norm": 2.3125, "learning_rate": 0.0001891098179275989, "loss": 2.1934, "step": 127425 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018910897913816953, "loss": 2.0986, "step": 127430 }, { "epoch": 0.3, "grad_norm": 1.796875, "learning_rate": 0.0001891081403182989, "loss": 2.2197, "step": 127435 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.0001891073014679873, "loss": 2.2367, "step": 127440 }, { "epoch": 0.3, "grad_norm": 1.9921875, "learning_rate": 0.000189106462587235, "loss": 2.1197, "step": 127445 }, { "epoch": 0.3, "grad_norm": 2.4375, "learning_rate": 0.00018910562367604236, "loss": 2.1772, "step": 127450 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.0001891047847344096, "loss": 2.1185, "step": 127455 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.00018910394576233708, "loss": 2.0549, "step": 127460 }, { "epoch": 0.3, "grad_norm": 1.8515625, "learning_rate": 0.000189103106759825, "loss": 2.2634, "step": 127465 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018910226772687368, "loss": 2.2269, "step": 127470 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.0001891014286634834, "loss": 2.1676, "step": 127475 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018910058956965445, "loss": 2.0509, "step": 127480 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018909975044538717, "loss": 2.254, "step": 127485 }, { "epoch": 0.3, "grad_norm": 1.9609375, "learning_rate": 0.00018909891129068176, "loss": 2.1021, "step": 127490 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018909807210553857, "loss": 2.1798, "step": 127495 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.00018909723288995783, "loss": 2.062, "step": 127500 }, { "epoch": 0.3, "grad_norm": 1.953125, "learning_rate": 0.00018909639364393988, "loss": 2.4769, "step": 127505 }, { "epoch": 0.3, "grad_norm": 1.8359375, "learning_rate": 0.00018909555436748496, "loss": 1.9573, "step": 127510 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018909471506059342, "loss": 1.8717, "step": 127515 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.0001890938757232655, "loss": 2.1048, "step": 127520 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.0001890930363555015, "loss": 2.0869, "step": 127525 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018909219695730168, "loss": 2.2575, "step": 127530 }, { "epoch": 0.3, "grad_norm": 2.375, "learning_rate": 0.00018909135752866635, "loss": 1.9875, "step": 127535 }, { "epoch": 0.3, "grad_norm": 2.3125, "learning_rate": 0.00018909051806959584, "loss": 2.1148, "step": 127540 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018908967858009034, "loss": 2.1644, "step": 127545 }, { "epoch": 0.3, "grad_norm": 1.796875, "learning_rate": 0.00018908883906015023, "loss": 2.1375, "step": 127550 }, { "epoch": 0.3, "grad_norm": 2.3125, "learning_rate": 0.00018908799950977573, "loss": 2.0802, "step": 127555 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.00018908715992896718, "loss": 2.1336, "step": 127560 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018908632031772483, "loss": 2.0891, "step": 127565 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018908548067604895, "loss": 2.1244, "step": 127570 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018908464100393988, "loss": 1.9673, "step": 127575 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.0001890838013013979, "loss": 2.0878, "step": 127580 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.00018908296156842322, "loss": 2.1384, "step": 127585 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018908212180501624, "loss": 1.858, "step": 127590 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.00018908128201117717, "loss": 2.0931, "step": 127595 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.0001890804421869063, "loss": 2.113, "step": 127600 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018907960233220398, "loss": 2.171, "step": 127605 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.0001890787624470704, "loss": 2.2382, "step": 127610 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018907792253150595, "loss": 1.955, "step": 127615 }, { "epoch": 0.3, "grad_norm": 2.671875, "learning_rate": 0.00018907708258551084, "loss": 2.1169, "step": 127620 }, { "epoch": 0.3, "grad_norm": 2.390625, "learning_rate": 0.00018907624260908537, "loss": 2.2235, "step": 127625 }, { "epoch": 0.3, "grad_norm": 2.515625, "learning_rate": 0.0001890754026022299, "loss": 2.1441, "step": 127630 }, { "epoch": 0.3, "grad_norm": 2.515625, "learning_rate": 0.0001890745625649446, "loss": 2.2677, "step": 127635 }, { "epoch": 0.3, "grad_norm": 1.953125, "learning_rate": 0.00018907372249722982, "loss": 2.1075, "step": 127640 }, { "epoch": 0.3, "grad_norm": 2.484375, "learning_rate": 0.00018907288239908586, "loss": 2.2812, "step": 127645 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018907204227051297, "loss": 2.1891, "step": 127650 }, { "epoch": 0.3, "grad_norm": 2.546875, "learning_rate": 0.00018907120211151144, "loss": 1.9928, "step": 127655 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.0001890703619220816, "loss": 2.0486, "step": 127660 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018906952170222373, "loss": 2.0492, "step": 127665 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018906868145193806, "loss": 2.1854, "step": 127670 }, { "epoch": 0.3, "grad_norm": 1.828125, "learning_rate": 0.00018906784117122492, "loss": 2.3018, "step": 127675 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018906700086008458, "loss": 2.125, "step": 127680 }, { "epoch": 0.3, "grad_norm": 1.8515625, "learning_rate": 0.00018906616051851734, "loss": 2.177, "step": 127685 }, { "epoch": 0.3, "grad_norm": 1.9765625, "learning_rate": 0.0001890653201465235, "loss": 1.9656, "step": 127690 }, { "epoch": 0.3, "grad_norm": 1.953125, "learning_rate": 0.00018906447974410332, "loss": 2.2003, "step": 127695 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.0001890636393112571, "loss": 2.0737, "step": 127700 }, { "epoch": 0.3, "grad_norm": 1.7578125, "learning_rate": 0.00018906279884798512, "loss": 2.0921, "step": 127705 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.0001890619583542877, "loss": 2.1107, "step": 127710 }, { "epoch": 0.3, "grad_norm": 1.734375, "learning_rate": 0.00018906111783016506, "loss": 2.0822, "step": 127715 }, { "epoch": 0.3, "grad_norm": 1.6875, "learning_rate": 0.00018906027727561754, "loss": 1.9977, "step": 127720 }, { "epoch": 0.3, "grad_norm": 1.84375, "learning_rate": 0.00018905943669064544, "loss": 1.8706, "step": 127725 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.000189058596075249, "loss": 2.1978, "step": 127730 }, { "epoch": 0.3, "grad_norm": 1.78125, "learning_rate": 0.00018905775542942853, "loss": 2.4148, "step": 127735 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018905691475318433, "loss": 2.1625, "step": 127740 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018905607404651664, "loss": 2.0133, "step": 127745 }, { "epoch": 0.3, "grad_norm": 2.421875, "learning_rate": 0.0001890552333094258, "loss": 2.0891, "step": 127750 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.0001890543925419121, "loss": 2.0631, "step": 127755 }, { "epoch": 0.3, "grad_norm": 1.875, "learning_rate": 0.00018905355174397576, "loss": 2.191, "step": 127760 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018905271091561714, "loss": 2.0522, "step": 127765 }, { "epoch": 0.3, "grad_norm": 1.734375, "learning_rate": 0.00018905187005683648, "loss": 2.0714, "step": 127770 }, { "epoch": 0.3, "grad_norm": 2.328125, "learning_rate": 0.00018905102916763413, "loss": 2.1455, "step": 127775 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.0001890501882480103, "loss": 2.2284, "step": 127780 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018904934729796531, "loss": 1.9547, "step": 127785 }, { "epoch": 0.3, "grad_norm": 1.875, "learning_rate": 0.00018904850631749944, "loss": 2.0841, "step": 127790 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.000189047665306613, "loss": 1.9846, "step": 127795 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.00018904682426530624, "loss": 1.9838, "step": 127800 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.0001890459831935795, "loss": 2.224, "step": 127805 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018904514209143304, "loss": 2.1115, "step": 127810 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018904430095886712, "loss": 2.1861, "step": 127815 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018904345979588207, "loss": 1.9631, "step": 127820 }, { "epoch": 0.3, "grad_norm": 2.34375, "learning_rate": 0.00018904261860247813, "loss": 2.2273, "step": 127825 }, { "epoch": 0.3, "grad_norm": 2.734375, "learning_rate": 0.00018904177737865565, "loss": 1.9978, "step": 127830 }, { "epoch": 0.3, "grad_norm": 2.3125, "learning_rate": 0.00018904093612441488, "loss": 2.3254, "step": 127835 }, { "epoch": 0.3, "grad_norm": 1.828125, "learning_rate": 0.00018904009483975608, "loss": 2.1056, "step": 127840 }, { "epoch": 0.3, "grad_norm": 1.9375, "learning_rate": 0.0001890392535246796, "loss": 2.1599, "step": 127845 }, { "epoch": 0.3, "grad_norm": 2.921875, "learning_rate": 0.00018903841217918568, "loss": 2.3695, "step": 127850 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018903757080327465, "loss": 2.1533, "step": 127855 }, { "epoch": 0.3, "grad_norm": 2.453125, "learning_rate": 0.00018903672939694674, "loss": 2.0167, "step": 127860 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.0001890358879602023, "loss": 2.265, "step": 127865 }, { "epoch": 0.3, "grad_norm": 2.390625, "learning_rate": 0.00018903504649304154, "loss": 2.102, "step": 127870 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018903420499546484, "loss": 2.1175, "step": 127875 }, { "epoch": 0.3, "grad_norm": 2.625, "learning_rate": 0.0001890333634674724, "loss": 1.908, "step": 127880 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.00018903252190906458, "loss": 2.1477, "step": 127885 }, { "epoch": 0.3, "grad_norm": 1.9921875, "learning_rate": 0.00018903168032024162, "loss": 2.1307, "step": 127890 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018903083870100385, "loss": 2.191, "step": 127895 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018902999705135148, "loss": 2.1975, "step": 127900 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018902915537128488, "loss": 1.9711, "step": 127905 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.0001890283136608043, "loss": 2.1488, "step": 127910 }, { "epoch": 0.3, "grad_norm": 1.765625, "learning_rate": 0.00018902747191991006, "loss": 2.0656, "step": 127915 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018902663014860237, "loss": 2.1728, "step": 127920 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018902578834688162, "loss": 2.2677, "step": 127925 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.000189024946514748, "loss": 2.157, "step": 127930 }, { "epoch": 0.3, "grad_norm": 2.65625, "learning_rate": 0.00018902410465220187, "loss": 2.0939, "step": 127935 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.0001890232627592435, "loss": 2.0464, "step": 127940 }, { "epoch": 0.3, "grad_norm": 2.3125, "learning_rate": 0.00018902242083587316, "loss": 2.1732, "step": 127945 }, { "epoch": 0.3, "grad_norm": 1.78125, "learning_rate": 0.00018902157888209113, "loss": 2.1137, "step": 127950 }, { "epoch": 0.3, "grad_norm": 1.953125, "learning_rate": 0.00018902073689789774, "loss": 1.9885, "step": 127955 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.00018901989488329324, "loss": 2.1372, "step": 127960 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.00018901905283827793, "loss": 2.0218, "step": 127965 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018901821076285208, "loss": 2.1831, "step": 127970 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.00018901736865701603, "loss": 2.3009, "step": 127975 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018901652652077, "loss": 2.1102, "step": 127980 }, { "epoch": 0.3, "grad_norm": 1.8046875, "learning_rate": 0.00018901568435411432, "loss": 2.2178, "step": 127985 }, { "epoch": 0.3, "grad_norm": 3.03125, "learning_rate": 0.00018901484215704927, "loss": 2.0729, "step": 127990 }, { "epoch": 0.3, "grad_norm": 1.8671875, "learning_rate": 0.00018901399992957515, "loss": 2.1007, "step": 127995 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018901315767169222, "loss": 2.3055, "step": 128000 }, { "epoch": 0.3, "grad_norm": 2.421875, "learning_rate": 0.00018901231538340078, "loss": 1.9772, "step": 128005 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.0001890114730647011, "loss": 2.0778, "step": 128010 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018901063071559353, "loss": 2.1622, "step": 128015 }, { "epoch": 0.3, "grad_norm": 1.8046875, "learning_rate": 0.00018900978833607827, "loss": 2.2349, "step": 128020 }, { "epoch": 0.3, "grad_norm": 1.78125, "learning_rate": 0.00018900894592615569, "loss": 2.0666, "step": 128025 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.00018900810348582603, "loss": 2.2819, "step": 128030 }, { "epoch": 0.3, "grad_norm": 2.5, "learning_rate": 0.00018900726101508958, "loss": 2.194, "step": 128035 }, { "epoch": 0.3, "grad_norm": 2.375, "learning_rate": 0.00018900641851394662, "loss": 2.358, "step": 128040 }, { "epoch": 0.3, "grad_norm": 2.5625, "learning_rate": 0.00018900557598239748, "loss": 2.1574, "step": 128045 }, { "epoch": 0.3, "grad_norm": 1.796875, "learning_rate": 0.00018900473342044237, "loss": 2.2439, "step": 128050 }, { "epoch": 0.3, "grad_norm": 1.78125, "learning_rate": 0.0001890038908280817, "loss": 2.1464, "step": 128055 }, { "epoch": 0.3, "grad_norm": 1.84375, "learning_rate": 0.00018900304820531564, "loss": 1.9578, "step": 128060 }, { "epoch": 0.3, "grad_norm": 2.375, "learning_rate": 0.00018900220555214455, "loss": 2.264, "step": 128065 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.0001890013628685687, "loss": 2.2161, "step": 128070 }, { "epoch": 0.3, "grad_norm": 1.8984375, "learning_rate": 0.00018900052015458833, "loss": 2.1479, "step": 128075 }, { "epoch": 0.3, "grad_norm": 1.59375, "learning_rate": 0.0001889996774102038, "loss": 2.117, "step": 128080 }, { "epoch": 0.3, "grad_norm": 1.9765625, "learning_rate": 0.00018899883463541534, "loss": 2.1295, "step": 128085 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.00018899799183022328, "loss": 2.2617, "step": 128090 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018899714899462788, "loss": 2.1803, "step": 128095 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.00018899630612862948, "loss": 2.0227, "step": 128100 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.0001889954632322283, "loss": 2.0879, "step": 128105 }, { "epoch": 0.3, "grad_norm": 2.71875, "learning_rate": 0.00018899462030542465, "loss": 2.0386, "step": 128110 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018899377734821885, "loss": 2.1278, "step": 128115 }, { "epoch": 0.3, "grad_norm": 1.7578125, "learning_rate": 0.00018899293436061112, "loss": 2.0245, "step": 128120 }, { "epoch": 0.3, "grad_norm": 1.9765625, "learning_rate": 0.0001889920913426018, "loss": 2.0779, "step": 128125 }, { "epoch": 0.3, "grad_norm": 3.15625, "learning_rate": 0.00018899124829419118, "loss": 2.135, "step": 128130 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.00018899040521537955, "loss": 2.1798, "step": 128135 }, { "epoch": 0.3, "grad_norm": 2.65625, "learning_rate": 0.00018898956210616717, "loss": 2.2071, "step": 128140 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.00018898871896655433, "loss": 2.3484, "step": 128145 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018898787579654133, "loss": 2.1119, "step": 128150 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.00018898703259612849, "loss": 2.2209, "step": 128155 }, { "epoch": 0.3, "grad_norm": 2.328125, "learning_rate": 0.00018898618936531603, "loss": 2.1347, "step": 128160 }, { "epoch": 0.3, "grad_norm": 1.6328125, "learning_rate": 0.00018898534610410427, "loss": 1.9536, "step": 128165 }, { "epoch": 0.3, "grad_norm": 1.796875, "learning_rate": 0.00018898450281249352, "loss": 2.0788, "step": 128170 }, { "epoch": 0.3, "grad_norm": 2.734375, "learning_rate": 0.00018898365949048405, "loss": 2.1312, "step": 128175 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018898281613807615, "loss": 2.1564, "step": 128180 }, { "epoch": 0.3, "grad_norm": 1.90625, "learning_rate": 0.0001889819727552701, "loss": 2.1337, "step": 128185 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.0001889811293420662, "loss": 2.0217, "step": 128190 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018898028589846476, "loss": 2.3395, "step": 128195 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018897944242446597, "loss": 2.1804, "step": 128200 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018897859892007027, "loss": 2.0954, "step": 128205 }, { "epoch": 0.3, "grad_norm": 1.7578125, "learning_rate": 0.0001889777553852778, "loss": 2.0689, "step": 128210 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.00018897691182008893, "loss": 2.2992, "step": 128215 }, { "epoch": 0.3, "grad_norm": 1.9921875, "learning_rate": 0.00018897606822450394, "loss": 1.9759, "step": 128220 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018897522459852313, "loss": 2.209, "step": 128225 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018897438094214673, "loss": 2.2002, "step": 128230 }, { "epoch": 0.3, "grad_norm": 1.890625, "learning_rate": 0.0001889735372553751, "loss": 2.2267, "step": 128235 }, { "epoch": 0.3, "grad_norm": 1.7265625, "learning_rate": 0.0001889726935382085, "loss": 2.1062, "step": 128240 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.0001889718497906472, "loss": 2.1516, "step": 128245 }, { "epoch": 0.3, "grad_norm": 2.4375, "learning_rate": 0.0001889710060126915, "loss": 2.1988, "step": 128250 }, { "epoch": 0.3, "grad_norm": 2.546875, "learning_rate": 0.0001889701622043417, "loss": 2.1351, "step": 128255 }, { "epoch": 0.3, "grad_norm": 1.9375, "learning_rate": 0.0001889693183655981, "loss": 2.1441, "step": 128260 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.00018896847449646092, "loss": 2.1717, "step": 128265 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.00018896763059693053, "loss": 2.0853, "step": 128270 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.00018896678666700716, "loss": 2.1625, "step": 128275 }, { "epoch": 0.3, "grad_norm": 1.8359375, "learning_rate": 0.00018896594270669114, "loss": 1.9349, "step": 128280 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018896509871598274, "loss": 2.1733, "step": 128285 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.00018896425469488225, "loss": 2.184, "step": 128290 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018896341064338995, "loss": 2.0149, "step": 128295 }, { "epoch": 0.3, "grad_norm": 2.453125, "learning_rate": 0.00018896256656150614, "loss": 2.2271, "step": 128300 }, { "epoch": 0.3, "grad_norm": 2.65625, "learning_rate": 0.0001889617224492311, "loss": 1.9259, "step": 128305 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.0001889608783065651, "loss": 2.3857, "step": 128310 }, { "epoch": 0.3, "grad_norm": 1.9296875, "learning_rate": 0.0001889600341335085, "loss": 2.3253, "step": 128315 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.0001889591899300615, "loss": 2.1213, "step": 128320 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018895834569622444, "loss": 2.1315, "step": 128325 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.0001889575014319976, "loss": 2.0769, "step": 128330 }, { "epoch": 0.3, "grad_norm": 1.6796875, "learning_rate": 0.00018895665713738127, "loss": 2.1222, "step": 128335 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.0001889558128123757, "loss": 2.1144, "step": 128340 }, { "epoch": 0.3, "grad_norm": 2.671875, "learning_rate": 0.00018895496845698124, "loss": 2.0069, "step": 128345 }, { "epoch": 0.3, "grad_norm": 1.9375, "learning_rate": 0.00018895412407119813, "loss": 1.9234, "step": 128350 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.0001889532796550267, "loss": 2.0486, "step": 128355 }, { "epoch": 0.3, "grad_norm": 1.90625, "learning_rate": 0.00018895243520846718, "loss": 2.0405, "step": 128360 }, { "epoch": 0.3, "grad_norm": 2.5, "learning_rate": 0.00018895159073151996, "loss": 1.8825, "step": 128365 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.0001889507462241852, "loss": 1.9574, "step": 128370 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018894990168646328, "loss": 2.0613, "step": 128375 }, { "epoch": 0.3, "grad_norm": 2.484375, "learning_rate": 0.00018894905711835448, "loss": 2.1364, "step": 128380 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.00018894821251985903, "loss": 2.1412, "step": 128385 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.00018894736789097728, "loss": 2.1773, "step": 128390 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.00018894652323170948, "loss": 2.3106, "step": 128395 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.00018894567854205595, "loss": 2.0909, "step": 128400 }, { "epoch": 0.3, "grad_norm": 1.8515625, "learning_rate": 0.00018894483382201697, "loss": 2.2441, "step": 128405 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.0001889439890715928, "loss": 2.1286, "step": 128410 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018894314429078376, "loss": 2.1775, "step": 128415 }, { "epoch": 0.3, "grad_norm": 1.796875, "learning_rate": 0.00018894229947959013, "loss": 2.1619, "step": 128420 }, { "epoch": 0.3, "grad_norm": 1.8828125, "learning_rate": 0.0001889414546380122, "loss": 2.1731, "step": 128425 }, { "epoch": 0.3, "grad_norm": 2.375, "learning_rate": 0.00018894060976605025, "loss": 2.2754, "step": 128430 }, { "epoch": 0.3, "grad_norm": 2.5, "learning_rate": 0.00018893976486370457, "loss": 2.1862, "step": 128435 }, { "epoch": 0.3, "grad_norm": 2.4375, "learning_rate": 0.00018893891993097546, "loss": 2.1181, "step": 128440 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.0001889380749678632, "loss": 2.1088, "step": 128445 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018893722997436808, "loss": 2.223, "step": 128450 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.00018893638495049038, "loss": 2.1808, "step": 128455 }, { "epoch": 0.3, "grad_norm": 1.96875, "learning_rate": 0.00018893553989623042, "loss": 2.1603, "step": 128460 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018893469481158848, "loss": 2.0885, "step": 128465 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018893384969656478, "loss": 2.1566, "step": 128470 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.0001889330045511597, "loss": 2.1679, "step": 128475 }, { "epoch": 0.3, "grad_norm": 1.6015625, "learning_rate": 0.0001889321593753735, "loss": 2.1955, "step": 128480 }, { "epoch": 0.3, "grad_norm": 1.921875, "learning_rate": 0.00018893131416920645, "loss": 2.1251, "step": 128485 }, { "epoch": 0.3, "grad_norm": 1.875, "learning_rate": 0.00018893046893265884, "loss": 2.2311, "step": 128490 }, { "epoch": 0.3, "grad_norm": 1.9765625, "learning_rate": 0.00018892962366573096, "loss": 2.0904, "step": 128495 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.00018892877836842315, "loss": 2.159, "step": 128500 }, { "epoch": 0.3, "grad_norm": 2.6875, "learning_rate": 0.00018892793304073563, "loss": 2.2446, "step": 128505 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.00018892708768266872, "loss": 2.2895, "step": 128510 }, { "epoch": 0.3, "grad_norm": 1.5703125, "learning_rate": 0.0001889262422942227, "loss": 2.0361, "step": 128515 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.00018892539687539786, "loss": 2.0666, "step": 128520 }, { "epoch": 0.3, "grad_norm": 1.59375, "learning_rate": 0.00018892455142619448, "loss": 2.0605, "step": 128525 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.0001889237059466129, "loss": 2.2107, "step": 128530 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018892286043665336, "loss": 2.1065, "step": 128535 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018892201489631615, "loss": 2.1142, "step": 128540 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018892116932560154, "loss": 2.1661, "step": 128545 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018892032372450987, "loss": 2.2401, "step": 128550 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.0001889194780930414, "loss": 2.1535, "step": 128555 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018891863243119644, "loss": 2.1363, "step": 128560 }, { "epoch": 0.3, "grad_norm": 1.7734375, "learning_rate": 0.00018891778673897524, "loss": 2.1946, "step": 128565 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018891694101637812, "loss": 2.1502, "step": 128570 }, { "epoch": 0.3, "grad_norm": 2.265625, "learning_rate": 0.00018891609526340536, "loss": 2.3037, "step": 128575 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018891524948005725, "loss": 2.1893, "step": 128580 }, { "epoch": 0.3, "grad_norm": 1.796875, "learning_rate": 0.00018891440366633407, "loss": 1.8288, "step": 128585 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.00018891355782223615, "loss": 2.0636, "step": 128590 }, { "epoch": 0.3, "grad_norm": 1.890625, "learning_rate": 0.0001889127119477637, "loss": 2.0401, "step": 128595 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.0001889118660429171, "loss": 2.1703, "step": 128600 }, { "epoch": 0.3, "grad_norm": 1.890625, "learning_rate": 0.00018891102010769654, "loss": 2.0135, "step": 128605 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.0001889101741421024, "loss": 2.0888, "step": 128610 }, { "epoch": 0.3, "grad_norm": 1.9921875, "learning_rate": 0.00018890932814613493, "loss": 2.0986, "step": 128615 }, { "epoch": 0.3, "grad_norm": 1.9609375, "learning_rate": 0.00018890848211979445, "loss": 2.1356, "step": 128620 }, { "epoch": 0.3, "grad_norm": 1.8515625, "learning_rate": 0.00018890763606308118, "loss": 2.0778, "step": 128625 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.00018890678997599545, "loss": 2.1764, "step": 128630 }, { "epoch": 0.3, "grad_norm": 2.421875, "learning_rate": 0.00018890594385853753, "loss": 2.1233, "step": 128635 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018890509771070778, "loss": 2.0993, "step": 128640 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.0001889042515325064, "loss": 2.1704, "step": 128645 }, { "epoch": 0.3, "grad_norm": 2.46875, "learning_rate": 0.0001889034053239337, "loss": 2.0492, "step": 128650 }, { "epoch": 0.3, "grad_norm": 2.328125, "learning_rate": 0.00018890255908499002, "loss": 2.0257, "step": 128655 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.0001889017128156756, "loss": 2.1257, "step": 128660 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018890086651599075, "loss": 2.2113, "step": 128665 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018890002018593575, "loss": 2.2634, "step": 128670 }, { "epoch": 0.3, "grad_norm": 1.90625, "learning_rate": 0.00018889917382551087, "loss": 2.0426, "step": 128675 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018889832743471642, "loss": 2.1251, "step": 128680 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018889748101355274, "loss": 1.9015, "step": 128685 }, { "epoch": 0.3, "grad_norm": 2.34375, "learning_rate": 0.00018889663456202004, "loss": 2.1503, "step": 128690 }, { "epoch": 0.3, "grad_norm": 1.7421875, "learning_rate": 0.00018889578808011862, "loss": 1.9774, "step": 128695 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.0001888949415678488, "loss": 2.1801, "step": 128700 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018889409502521084, "loss": 2.3348, "step": 128705 }, { "epoch": 0.3, "grad_norm": 2.296875, "learning_rate": 0.00018889324845220505, "loss": 2.3169, "step": 128710 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018889240184883174, "loss": 2.2606, "step": 128715 }, { "epoch": 0.3, "grad_norm": 1.671875, "learning_rate": 0.00018889155521509114, "loss": 2.1055, "step": 128720 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.00018889070855098358, "loss": 1.9992, "step": 128725 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018888986185650936, "loss": 2.1571, "step": 128730 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.00018888901513166873, "loss": 2.2063, "step": 128735 }, { "epoch": 0.3, "grad_norm": 1.90625, "learning_rate": 0.000188888168376462, "loss": 1.9883, "step": 128740 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018888732159088952, "loss": 2.018, "step": 128745 }, { "epoch": 0.3, "grad_norm": 1.7421875, "learning_rate": 0.00018888647477495143, "loss": 2.4409, "step": 128750 }, { "epoch": 0.3, "grad_norm": 2.640625, "learning_rate": 0.00018888562792864817, "loss": 2.0833, "step": 128755 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018888478105197996, "loss": 2.1252, "step": 128760 }, { "epoch": 0.3, "grad_norm": 1.96875, "learning_rate": 0.00018888393414494704, "loss": 2.1027, "step": 128765 }, { "epoch": 0.3, "grad_norm": 2.3125, "learning_rate": 0.00018888308720754982, "loss": 2.245, "step": 128770 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.0001888822402397885, "loss": 2.2059, "step": 128775 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018888139324166342, "loss": 1.9688, "step": 128780 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.0001888805462131748, "loss": 2.1495, "step": 128785 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.000188879699154323, "loss": 2.2253, "step": 128790 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018887885206510827, "loss": 2.2136, "step": 128795 }, { "epoch": 0.3, "grad_norm": 1.90625, "learning_rate": 0.00018887800494553093, "loss": 2.1449, "step": 128800 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018887715779559124, "loss": 2.3048, "step": 128805 }, { "epoch": 0.3, "grad_norm": 1.453125, "learning_rate": 0.0001888763106152895, "loss": 2.1313, "step": 128810 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.000188875463404626, "loss": 1.9898, "step": 128815 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018887461616360103, "loss": 1.968, "step": 128820 }, { "epoch": 0.3, "grad_norm": 2.390625, "learning_rate": 0.00018887376889221492, "loss": 2.0637, "step": 128825 }, { "epoch": 0.3, "grad_norm": 1.625, "learning_rate": 0.00018887292159046784, "loss": 2.1548, "step": 128830 }, { "epoch": 0.3, "grad_norm": 1.8984375, "learning_rate": 0.00018887207425836023, "loss": 2.0401, "step": 128835 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018887122689589228, "loss": 2.133, "step": 128840 }, { "epoch": 0.3, "grad_norm": 1.96875, "learning_rate": 0.00018887037950306428, "loss": 2.2048, "step": 128845 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018886953207987656, "loss": 2.3303, "step": 128850 }, { "epoch": 0.3, "grad_norm": 2.71875, "learning_rate": 0.00018886868462632942, "loss": 2.3662, "step": 128855 }, { "epoch": 0.3, "grad_norm": 2.34375, "learning_rate": 0.0001888678371424231, "loss": 1.991, "step": 128860 }, { "epoch": 0.3, "grad_norm": 1.75, "learning_rate": 0.00018886698962815795, "loss": 2.0433, "step": 128865 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018886614208353422, "loss": 2.118, "step": 128870 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018886529450855218, "loss": 2.2779, "step": 128875 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.00018886444690321215, "loss": 2.1578, "step": 128880 }, { "epoch": 0.3, "grad_norm": 1.8515625, "learning_rate": 0.0001888635992675144, "loss": 2.0766, "step": 128885 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018886275160145925, "loss": 2.2586, "step": 128890 }, { "epoch": 0.3, "grad_norm": 2.75, "learning_rate": 0.00018886190390504697, "loss": 2.1555, "step": 128895 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018886105617827784, "loss": 2.2655, "step": 128900 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.0001888602084211522, "loss": 2.1196, "step": 128905 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.00018885936063367024, "loss": 2.1664, "step": 128910 }, { "epoch": 0.3, "grad_norm": 1.765625, "learning_rate": 0.00018885851281583236, "loss": 2.0916, "step": 128915 }, { "epoch": 0.3, "grad_norm": 1.9921875, "learning_rate": 0.0001888576649676388, "loss": 2.1809, "step": 128920 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018885681708908982, "loss": 1.9525, "step": 128925 }, { "epoch": 0.3, "grad_norm": 2.421875, "learning_rate": 0.00018885596918018577, "loss": 2.1171, "step": 128930 }, { "epoch": 0.3, "grad_norm": 1.890625, "learning_rate": 0.0001888551212409269, "loss": 2.2311, "step": 128935 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018885427327131347, "loss": 1.9826, "step": 128940 }, { "epoch": 0.3, "grad_norm": 2.296875, "learning_rate": 0.00018885342527134586, "loss": 2.1427, "step": 128945 }, { "epoch": 0.3, "grad_norm": 2.421875, "learning_rate": 0.0001888525772410243, "loss": 2.1158, "step": 128950 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018885172918034908, "loss": 2.0886, "step": 128955 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.0001888508810893205, "loss": 2.1683, "step": 128960 }, { "epoch": 0.3, "grad_norm": 1.859375, "learning_rate": 0.00018885003296793883, "loss": 2.0154, "step": 128965 }, { "epoch": 0.3, "grad_norm": 2.3125, "learning_rate": 0.00018884918481620438, "loss": 2.0757, "step": 128970 }, { "epoch": 0.3, "grad_norm": 2.609375, "learning_rate": 0.00018884833663411748, "loss": 1.9703, "step": 128975 }, { "epoch": 0.3, "grad_norm": 1.9609375, "learning_rate": 0.00018884748842167834, "loss": 2.0384, "step": 128980 }, { "epoch": 0.3, "grad_norm": 2.703125, "learning_rate": 0.0001888466401788873, "loss": 2.0725, "step": 128985 }, { "epoch": 0.3, "grad_norm": 1.8828125, "learning_rate": 0.0001888457919057446, "loss": 2.1362, "step": 128990 }, { "epoch": 0.3, "grad_norm": 1.984375, "learning_rate": 0.0001888449436022506, "loss": 2.1487, "step": 128995 }, { "epoch": 0.3, "grad_norm": 1.890625, "learning_rate": 0.00018884409526840557, "loss": 2.2394, "step": 129000 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018884324690420976, "loss": 2.2122, "step": 129005 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.0001888423985096635, "loss": 2.2019, "step": 129010 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018884155008476707, "loss": 2.0857, "step": 129015 }, { "epoch": 0.3, "grad_norm": 2.390625, "learning_rate": 0.00018884070162952075, "loss": 2.0638, "step": 129020 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018883985314392484, "loss": 2.1419, "step": 129025 }, { "epoch": 0.3, "grad_norm": 1.8828125, "learning_rate": 0.00018883900462797958, "loss": 2.1442, "step": 129030 }, { "epoch": 0.3, "grad_norm": 2.390625, "learning_rate": 0.00018883815608168535, "loss": 2.1473, "step": 129035 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.0001888373075050424, "loss": 2.1594, "step": 129040 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.000188836458898051, "loss": 2.2537, "step": 129045 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018883561026071145, "loss": 2.1087, "step": 129050 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.00018883476159302403, "loss": 1.8859, "step": 129055 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018883391289498905, "loss": 2.1316, "step": 129060 }, { "epoch": 0.3, "grad_norm": 2.453125, "learning_rate": 0.00018883306416660683, "loss": 2.0152, "step": 129065 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.0001888322154078776, "loss": 2.189, "step": 129070 }, { "epoch": 0.3, "grad_norm": 1.8125, "learning_rate": 0.00018883136661880165, "loss": 2.1572, "step": 129075 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018883051779937932, "loss": 1.9838, "step": 129080 }, { "epoch": 0.3, "grad_norm": 1.7109375, "learning_rate": 0.00018882966894961088, "loss": 2.1863, "step": 129085 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.0001888288200694966, "loss": 2.1286, "step": 129090 }, { "epoch": 0.3, "grad_norm": 2.34375, "learning_rate": 0.0001888279711590368, "loss": 1.998, "step": 129095 }, { "epoch": 0.3, "grad_norm": 2.296875, "learning_rate": 0.00018882712221823176, "loss": 2.1614, "step": 129100 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018882627324708174, "loss": 2.0945, "step": 129105 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018882542424558703, "loss": 2.259, "step": 129110 }, { "epoch": 0.3, "grad_norm": 2.375, "learning_rate": 0.00018882457521374798, "loss": 2.3579, "step": 129115 }, { "epoch": 0.3, "grad_norm": 2.4375, "learning_rate": 0.00018882372615156484, "loss": 2.032, "step": 129120 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.00018882287705903795, "loss": 2.2276, "step": 129125 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.0001888220279361675, "loss": 2.2622, "step": 129130 }, { "epoch": 0.3, "grad_norm": 2.09375, "learning_rate": 0.00018882117878295381, "loss": 2.0133, "step": 129135 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.00018882032959939723, "loss": 2.0467, "step": 129140 }, { "epoch": 0.3, "grad_norm": 1.8828125, "learning_rate": 0.00018881948038549805, "loss": 2.1457, "step": 129145 }, { "epoch": 0.3, "grad_norm": 1.8984375, "learning_rate": 0.00018881863114125647, "loss": 2.0587, "step": 129150 }, { "epoch": 0.3, "grad_norm": 2.171875, "learning_rate": 0.00018881778186667284, "loss": 2.0759, "step": 129155 }, { "epoch": 0.3, "grad_norm": 2.671875, "learning_rate": 0.00018881693256174749, "loss": 2.0507, "step": 129160 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.0001888160832264806, "loss": 2.1533, "step": 129165 }, { "epoch": 0.3, "grad_norm": 2.53125, "learning_rate": 0.00018881523386087256, "loss": 1.9957, "step": 129170 }, { "epoch": 0.3, "grad_norm": 1.828125, "learning_rate": 0.00018881438446492364, "loss": 2.1553, "step": 129175 }, { "epoch": 0.3, "grad_norm": 1.6328125, "learning_rate": 0.0001888135350386341, "loss": 2.2134, "step": 129180 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.00018881268558200423, "loss": 2.044, "step": 129185 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.00018881183609503434, "loss": 2.317, "step": 129190 }, { "epoch": 0.3, "grad_norm": 2.484375, "learning_rate": 0.00018881098657772474, "loss": 2.1962, "step": 129195 }, { "epoch": 0.3, "grad_norm": 1.890625, "learning_rate": 0.00018881013703007568, "loss": 2.2121, "step": 129200 }, { "epoch": 0.3, "grad_norm": 1.84375, "learning_rate": 0.00018880928745208748, "loss": 2.2488, "step": 129205 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.0001888084378437604, "loss": 1.9307, "step": 129210 }, { "epoch": 0.3, "grad_norm": 1.859375, "learning_rate": 0.00018880758820509478, "loss": 2.2513, "step": 129215 }, { "epoch": 0.3, "grad_norm": 1.8203125, "learning_rate": 0.0001888067385360908, "loss": 2.0008, "step": 129220 }, { "epoch": 0.3, "grad_norm": 1.65625, "learning_rate": 0.0001888058888367489, "loss": 2.1086, "step": 129225 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018880503910706926, "loss": 2.0255, "step": 129230 }, { "epoch": 0.3, "grad_norm": 2.03125, "learning_rate": 0.00018880418934705224, "loss": 1.91, "step": 129235 }, { "epoch": 0.3, "grad_norm": 1.90625, "learning_rate": 0.00018880333955669805, "loss": 2.0565, "step": 129240 }, { "epoch": 0.3, "grad_norm": 2.34375, "learning_rate": 0.0001888024897360071, "loss": 1.9408, "step": 129245 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018880163988497955, "loss": 2.0768, "step": 129250 }, { "epoch": 0.3, "grad_norm": 1.9609375, "learning_rate": 0.00018880079000361576, "loss": 2.3829, "step": 129255 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.00018879994009191604, "loss": 2.0707, "step": 129260 }, { "epoch": 0.3, "grad_norm": 2.109375, "learning_rate": 0.0001887990901498806, "loss": 2.0908, "step": 129265 }, { "epoch": 0.3, "grad_norm": 1.9375, "learning_rate": 0.00018879824017750983, "loss": 2.0687, "step": 129270 }, { "epoch": 0.3, "grad_norm": 2.3125, "learning_rate": 0.00018879739017480397, "loss": 2.0185, "step": 129275 }, { "epoch": 0.3, "grad_norm": 2.34375, "learning_rate": 0.00018879654014176327, "loss": 2.0221, "step": 129280 }, { "epoch": 0.3, "grad_norm": 1.671875, "learning_rate": 0.00018879569007838808, "loss": 2.0968, "step": 129285 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.00018879483998467865, "loss": 2.2684, "step": 129290 }, { "epoch": 0.3, "grad_norm": 2.34375, "learning_rate": 0.00018879398986063533, "loss": 2.2552, "step": 129295 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.00018879313970625837, "loss": 2.1515, "step": 129300 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018879228952154806, "loss": 1.9936, "step": 129305 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.00018879143930650467, "loss": 2.2954, "step": 129310 }, { "epoch": 0.3, "grad_norm": 1.9609375, "learning_rate": 0.00018879058906112853, "loss": 2.1675, "step": 129315 }, { "epoch": 0.3, "grad_norm": 2.421875, "learning_rate": 0.0001887897387854199, "loss": 2.2001, "step": 129320 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018878888847937913, "loss": 2.1894, "step": 129325 }, { "epoch": 0.3, "grad_norm": 2.359375, "learning_rate": 0.00018878803814300642, "loss": 2.3286, "step": 129330 }, { "epoch": 0.3, "grad_norm": 2.640625, "learning_rate": 0.00018878718777630212, "loss": 2.1185, "step": 129335 }, { "epoch": 0.3, "grad_norm": 1.4140625, "learning_rate": 0.0001887863373792665, "loss": 1.9578, "step": 129340 }, { "epoch": 0.3, "grad_norm": 1.9375, "learning_rate": 0.00018878548695189988, "loss": 2.1102, "step": 129345 }, { "epoch": 0.3, "grad_norm": 2.28125, "learning_rate": 0.0001887846364942025, "loss": 2.239, "step": 129350 }, { "epoch": 0.3, "grad_norm": 2.40625, "learning_rate": 0.0001887837860061747, "loss": 2.0716, "step": 129355 }, { "epoch": 0.3, "grad_norm": 1.8046875, "learning_rate": 0.00018878293548781674, "loss": 2.2439, "step": 129360 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.0001887820849391289, "loss": 2.0199, "step": 129365 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.0001887812343601115, "loss": 2.0562, "step": 129370 }, { "epoch": 0.3, "grad_norm": 2.296875, "learning_rate": 0.00018878038375076485, "loss": 2.093, "step": 129375 }, { "epoch": 0.3, "grad_norm": 2.59375, "learning_rate": 0.00018877953311108919, "loss": 2.1839, "step": 129380 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018877868244108483, "loss": 2.1708, "step": 129385 }, { "epoch": 0.3, "grad_norm": 2.140625, "learning_rate": 0.00018877783174075206, "loss": 2.1348, "step": 129390 }, { "epoch": 0.3, "grad_norm": 2.640625, "learning_rate": 0.00018877698101009118, "loss": 2.132, "step": 129395 }, { "epoch": 0.3, "grad_norm": 1.671875, "learning_rate": 0.00018877613024910245, "loss": 2.1932, "step": 129400 }, { "epoch": 0.3, "grad_norm": 2.453125, "learning_rate": 0.00018877527945778622, "loss": 2.22, "step": 129405 }, { "epoch": 0.3, "grad_norm": 2.546875, "learning_rate": 0.00018877442863614273, "loss": 2.1023, "step": 129410 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.00018877357778417225, "loss": 2.0915, "step": 129415 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.00018877272690187512, "loss": 2.0757, "step": 129420 }, { "epoch": 0.3, "grad_norm": 1.859375, "learning_rate": 0.00018877187598925166, "loss": 2.1997, "step": 129425 }, { "epoch": 0.3, "grad_norm": 2.125, "learning_rate": 0.0001887710250463021, "loss": 2.0885, "step": 129430 }, { "epoch": 0.3, "grad_norm": 1.8203125, "learning_rate": 0.00018877017407302674, "loss": 2.0279, "step": 129435 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018876932306942585, "loss": 2.19, "step": 129440 }, { "epoch": 0.3, "grad_norm": 1.953125, "learning_rate": 0.00018876847203549976, "loss": 2.2905, "step": 129445 }, { "epoch": 0.3, "grad_norm": 1.6875, "learning_rate": 0.0001887676209712488, "loss": 2.032, "step": 129450 }, { "epoch": 0.3, "grad_norm": 1.875, "learning_rate": 0.00018876676987667317, "loss": 2.1128, "step": 129455 }, { "epoch": 0.3, "grad_norm": 2.078125, "learning_rate": 0.0001887659187517732, "loss": 2.155, "step": 129460 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.0001887650675965492, "loss": 2.2331, "step": 129465 }, { "epoch": 0.3, "grad_norm": 1.9140625, "learning_rate": 0.0001887642164110014, "loss": 2.0934, "step": 129470 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.0001887633651951302, "loss": 2.1506, "step": 129475 }, { "epoch": 0.3, "grad_norm": 1.984375, "learning_rate": 0.00018876251394893577, "loss": 2.2475, "step": 129480 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.0001887616626724185, "loss": 2.0372, "step": 129485 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.0001887608113655786, "loss": 2.1468, "step": 129490 }, { "epoch": 0.3, "grad_norm": 2.203125, "learning_rate": 0.0001887599600284164, "loss": 2.328, "step": 129495 }, { "epoch": 0.3, "grad_norm": 1.9453125, "learning_rate": 0.0001887591086609322, "loss": 2.1108, "step": 129500 }, { "epoch": 0.3, "grad_norm": 2.421875, "learning_rate": 0.0001887582572631263, "loss": 2.1952, "step": 129505 }, { "epoch": 0.3, "grad_norm": 2.234375, "learning_rate": 0.00018875740583499894, "loss": 2.0288, "step": 129510 }, { "epoch": 0.3, "grad_norm": 2.1875, "learning_rate": 0.00018875655437655044, "loss": 2.1995, "step": 129515 }, { "epoch": 0.3, "grad_norm": 1.9296875, "learning_rate": 0.0001887557028877811, "loss": 2.1745, "step": 129520 }, { "epoch": 0.3, "grad_norm": 1.8828125, "learning_rate": 0.0001887548513686912, "loss": 2.3104, "step": 129525 }, { "epoch": 0.3, "grad_norm": 2.015625, "learning_rate": 0.00018875399981928102, "loss": 2.1037, "step": 129530 }, { "epoch": 0.3, "grad_norm": 1.8828125, "learning_rate": 0.0001887531482395509, "loss": 2.3013, "step": 129535 }, { "epoch": 0.3, "grad_norm": 1.953125, "learning_rate": 0.00018875229662950106, "loss": 2.1671, "step": 129540 }, { "epoch": 0.3, "grad_norm": 2.046875, "learning_rate": 0.00018875144498913186, "loss": 2.1688, "step": 129545 }, { "epoch": 0.3, "grad_norm": 2.15625, "learning_rate": 0.0001887505933184435, "loss": 2.213, "step": 129550 }, { "epoch": 0.3, "grad_norm": 2.25, "learning_rate": 0.0001887497416174364, "loss": 2.0743, "step": 129555 }, { "epoch": 0.3, "grad_norm": 2.53125, "learning_rate": 0.0001887488898861107, "loss": 2.1522, "step": 129560 }, { "epoch": 0.3, "grad_norm": 2.953125, "learning_rate": 0.00018874803812446683, "loss": 2.1722, "step": 129565 }, { "epoch": 0.3, "grad_norm": 1.8515625, "learning_rate": 0.00018874718633250502, "loss": 2.1533, "step": 129570 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.00018874633451022553, "loss": 2.2207, "step": 129575 }, { "epoch": 0.3, "grad_norm": 1.90625, "learning_rate": 0.0001887454826576287, "loss": 2.1292, "step": 129580 }, { "epoch": 0.3, "grad_norm": 2.21875, "learning_rate": 0.00018874463077471482, "loss": 2.2383, "step": 129585 }, { "epoch": 0.3, "grad_norm": 2.0, "learning_rate": 0.00018874377886148413, "loss": 1.948, "step": 129590 }, { "epoch": 0.3, "grad_norm": 2.0625, "learning_rate": 0.00018874292691793695, "loss": 2.1258, "step": 129595 }, { "epoch": 0.3, "grad_norm": 2.34375, "learning_rate": 0.00018874207494407364, "loss": 2.1877, "step": 129600 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018874122293989436, "loss": 1.9871, "step": 129605 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.0001887403709053995, "loss": 2.1583, "step": 129610 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001887395188405893, "loss": 2.1862, "step": 129615 }, { "epoch": 0.31, "grad_norm": 1.96875, "learning_rate": 0.0001887386667454641, "loss": 2.1756, "step": 129620 }, { "epoch": 0.31, "grad_norm": 2.59375, "learning_rate": 0.00018873781462002413, "loss": 1.9554, "step": 129625 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018873696246426972, "loss": 2.2516, "step": 129630 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018873611027820118, "loss": 2.1844, "step": 129635 }, { "epoch": 0.31, "grad_norm": 1.8359375, "learning_rate": 0.00018873525806181878, "loss": 2.0025, "step": 129640 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018873440581512277, "loss": 2.0414, "step": 129645 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.0001887335535381135, "loss": 1.9666, "step": 129650 }, { "epoch": 0.31, "grad_norm": 1.84375, "learning_rate": 0.00018873270123079123, "loss": 2.3094, "step": 129655 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018873184889315624, "loss": 2.0123, "step": 129660 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018873099652520889, "loss": 2.2499, "step": 129665 }, { "epoch": 0.31, "grad_norm": 1.8515625, "learning_rate": 0.00018873014412694938, "loss": 2.122, "step": 129670 }, { "epoch": 0.31, "grad_norm": 1.8828125, "learning_rate": 0.00018872929169837807, "loss": 2.2044, "step": 129675 }, { "epoch": 0.31, "grad_norm": 2.453125, "learning_rate": 0.0001887284392394952, "loss": 2.1255, "step": 129680 }, { "epoch": 0.31, "grad_norm": 1.9140625, "learning_rate": 0.00018872758675030112, "loss": 2.1257, "step": 129685 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018872673423079607, "loss": 1.9732, "step": 129690 }, { "epoch": 0.31, "grad_norm": 1.8828125, "learning_rate": 0.00018872588168098036, "loss": 2.0214, "step": 129695 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018872502910085426, "loss": 2.1723, "step": 129700 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.0001887241764904181, "loss": 2.2282, "step": 129705 }, { "epoch": 0.31, "grad_norm": 2.40625, "learning_rate": 0.00018872332384967214, "loss": 2.2081, "step": 129710 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.0001887224711786167, "loss": 2.1314, "step": 129715 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018872161847725206, "loss": 2.1766, "step": 129720 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.0001887207657455785, "loss": 2.0982, "step": 129725 }, { "epoch": 0.31, "grad_norm": 2.453125, "learning_rate": 0.00018871991298359628, "loss": 2.2508, "step": 129730 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018871906019130575, "loss": 2.0954, "step": 129735 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.0001887182073687072, "loss": 2.3006, "step": 129740 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018871735451580091, "loss": 2.1167, "step": 129745 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.00018871650163258713, "loss": 2.2386, "step": 129750 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018871564871906623, "loss": 2.1642, "step": 129755 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.0001887147957752384, "loss": 2.1773, "step": 129760 }, { "epoch": 0.31, "grad_norm": 2.921875, "learning_rate": 0.00018871394280110403, "loss": 1.997, "step": 129765 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018871308979666334, "loss": 2.1213, "step": 129770 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018871223676191664, "loss": 2.1129, "step": 129775 }, { "epoch": 0.31, "grad_norm": 1.71875, "learning_rate": 0.00018871138369686426, "loss": 1.9711, "step": 129780 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018871053060150644, "loss": 2.1543, "step": 129785 }, { "epoch": 0.31, "grad_norm": 1.8125, "learning_rate": 0.0001887096774758435, "loss": 2.0758, "step": 129790 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018870882431987576, "loss": 2.4668, "step": 129795 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018870797113360345, "loss": 2.346, "step": 129800 }, { "epoch": 0.31, "grad_norm": 2.609375, "learning_rate": 0.00018870711791702687, "loss": 2.086, "step": 129805 }, { "epoch": 0.31, "grad_norm": 2.5625, "learning_rate": 0.00018870626467014633, "loss": 2.1151, "step": 129810 }, { "epoch": 0.31, "grad_norm": 2.90625, "learning_rate": 0.00018870541139296213, "loss": 2.0723, "step": 129815 }, { "epoch": 0.31, "grad_norm": 1.953125, "learning_rate": 0.00018870455808547458, "loss": 2.0479, "step": 129820 }, { "epoch": 0.31, "grad_norm": 1.8515625, "learning_rate": 0.00018870370474768393, "loss": 2.1175, "step": 129825 }, { "epoch": 0.31, "grad_norm": 1.9453125, "learning_rate": 0.00018870285137959048, "loss": 2.0768, "step": 129830 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018870199798119453, "loss": 2.0372, "step": 129835 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.00018870114455249635, "loss": 2.374, "step": 129840 }, { "epoch": 0.31, "grad_norm": 1.9765625, "learning_rate": 0.00018870029109349626, "loss": 2.3198, "step": 129845 }, { "epoch": 0.31, "grad_norm": 1.96875, "learning_rate": 0.00018869943760419457, "loss": 2.2892, "step": 129850 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.0001886985840845915, "loss": 2.131, "step": 129855 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.0001886977305346874, "loss": 2.0455, "step": 129860 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018869687695448258, "loss": 2.1045, "step": 129865 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018869602334397727, "loss": 2.2032, "step": 129870 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018869516970317177, "loss": 2.0408, "step": 129875 }, { "epoch": 0.31, "grad_norm": 1.9296875, "learning_rate": 0.00018869431603206642, "loss": 2.1537, "step": 129880 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.0001886934623306615, "loss": 2.0742, "step": 129885 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018869260859895724, "loss": 1.9709, "step": 129890 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.000188691754836954, "loss": 2.0677, "step": 129895 }, { "epoch": 0.31, "grad_norm": 2.046875, "learning_rate": 0.00018869090104465204, "loss": 2.1474, "step": 129900 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018869004722205167, "loss": 1.9958, "step": 129905 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018868919336915315, "loss": 2.2112, "step": 129910 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018868833948595682, "loss": 2.0804, "step": 129915 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018868748557246293, "loss": 2.2023, "step": 129920 }, { "epoch": 0.31, "grad_norm": 2.5625, "learning_rate": 0.00018868663162867179, "loss": 2.1791, "step": 129925 }, { "epoch": 0.31, "grad_norm": 2.359375, "learning_rate": 0.00018868577765458367, "loss": 1.943, "step": 129930 }, { "epoch": 0.31, "grad_norm": 1.71875, "learning_rate": 0.0001886849236501989, "loss": 2.0473, "step": 129935 }, { "epoch": 0.31, "grad_norm": 1.8671875, "learning_rate": 0.00018868406961551778, "loss": 2.1225, "step": 129940 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.0001886832155505405, "loss": 2.0291, "step": 129945 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018868236145526748, "loss": 1.9026, "step": 129950 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018868150732969895, "loss": 2.2584, "step": 129955 }, { "epoch": 0.31, "grad_norm": 1.7109375, "learning_rate": 0.0001886806531738352, "loss": 2.219, "step": 129960 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018867979898767656, "loss": 2.2226, "step": 129965 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018867894477122326, "loss": 2.1184, "step": 129970 }, { "epoch": 0.31, "grad_norm": 2.578125, "learning_rate": 0.00018867809052447562, "loss": 2.0991, "step": 129975 }, { "epoch": 0.31, "grad_norm": 2.46875, "learning_rate": 0.00018867723624743397, "loss": 2.2559, "step": 129980 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018867638194009853, "loss": 2.1853, "step": 129985 }, { "epoch": 0.31, "grad_norm": 1.6953125, "learning_rate": 0.00018867552760246965, "loss": 2.1241, "step": 129990 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.0001886746732345476, "loss": 2.1436, "step": 129995 }, { "epoch": 0.31, "grad_norm": 1.953125, "learning_rate": 0.00018867381883633266, "loss": 1.9222, "step": 130000 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.00018867296440782516, "loss": 2.0818, "step": 130005 }, { "epoch": 0.31, "grad_norm": 1.9609375, "learning_rate": 0.00018867210994902536, "loss": 2.139, "step": 130010 }, { "epoch": 0.31, "grad_norm": 1.78125, "learning_rate": 0.00018867125545993354, "loss": 2.1453, "step": 130015 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018867040094055001, "loss": 2.1972, "step": 130020 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.0001886695463908751, "loss": 2.1254, "step": 130025 }, { "epoch": 0.31, "grad_norm": 2.578125, "learning_rate": 0.00018866869181090903, "loss": 2.2775, "step": 130030 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018866783720065212, "loss": 2.264, "step": 130035 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018866698256010472, "loss": 2.2091, "step": 130040 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.000188666127889267, "loss": 2.2929, "step": 130045 }, { "epoch": 0.31, "grad_norm": 1.8046875, "learning_rate": 0.00018866527318813937, "loss": 2.1833, "step": 130050 }, { "epoch": 0.31, "grad_norm": 2.046875, "learning_rate": 0.00018866441845672208, "loss": 1.9531, "step": 130055 }, { "epoch": 0.31, "grad_norm": 2.515625, "learning_rate": 0.00018866356369501538, "loss": 2.1447, "step": 130060 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018866270890301962, "loss": 2.1594, "step": 130065 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018866185408073508, "loss": 2.0468, "step": 130070 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018866099922816202, "loss": 2.1245, "step": 130075 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018866014434530077, "loss": 2.1735, "step": 130080 }, { "epoch": 0.31, "grad_norm": 1.9453125, "learning_rate": 0.0001886592894321516, "loss": 2.1775, "step": 130085 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018865843448871483, "loss": 2.0797, "step": 130090 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.0001886575795149907, "loss": 2.3004, "step": 130095 }, { "epoch": 0.31, "grad_norm": 2.53125, "learning_rate": 0.00018865672451097955, "loss": 2.0955, "step": 130100 }, { "epoch": 0.31, "grad_norm": 2.875, "learning_rate": 0.00018865586947668165, "loss": 2.2046, "step": 130105 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001886550144120973, "loss": 2.4404, "step": 130110 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.0001886541593172268, "loss": 2.033, "step": 130115 }, { "epoch": 0.31, "grad_norm": 1.9921875, "learning_rate": 0.0001886533041920704, "loss": 2.1942, "step": 130120 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018865244903662847, "loss": 2.1626, "step": 130125 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.0001886515938509012, "loss": 2.1459, "step": 130130 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.000188650738634889, "loss": 2.2247, "step": 130135 }, { "epoch": 0.31, "grad_norm": 2.390625, "learning_rate": 0.00018864988338859203, "loss": 2.2881, "step": 130140 }, { "epoch": 0.31, "grad_norm": 1.8671875, "learning_rate": 0.0001886490281120107, "loss": 2.2039, "step": 130145 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018864817280514525, "loss": 2.3197, "step": 130150 }, { "epoch": 0.31, "grad_norm": 1.9296875, "learning_rate": 0.00018864731746799594, "loss": 2.0977, "step": 130155 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018864646210056316, "loss": 2.0992, "step": 130160 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.0001886456067028471, "loss": 2.0551, "step": 130165 }, { "epoch": 0.31, "grad_norm": 1.875, "learning_rate": 0.0001886447512748481, "loss": 2.107, "step": 130170 }, { "epoch": 0.31, "grad_norm": 2.3125, "learning_rate": 0.00018864389581656644, "loss": 2.0546, "step": 130175 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.00018864304032800244, "loss": 2.1534, "step": 130180 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018864218480915633, "loss": 2.0977, "step": 130185 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018864132926002847, "loss": 2.1828, "step": 130190 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.00018864047368061913, "loss": 2.2555, "step": 130195 }, { "epoch": 0.31, "grad_norm": 1.9921875, "learning_rate": 0.00018863961807092858, "loss": 2.3865, "step": 130200 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018863876243095713, "loss": 2.1733, "step": 130205 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018863790676070509, "loss": 2.1424, "step": 130210 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.0001886370510601727, "loss": 2.1201, "step": 130215 }, { "epoch": 0.31, "grad_norm": 1.7734375, "learning_rate": 0.00018863619532936033, "loss": 2.1347, "step": 130220 }, { "epoch": 0.31, "grad_norm": 1.9765625, "learning_rate": 0.0001886353395682682, "loss": 1.9198, "step": 130225 }, { "epoch": 0.31, "grad_norm": 2.046875, "learning_rate": 0.00018863448377689663, "loss": 2.2548, "step": 130230 }, { "epoch": 0.31, "grad_norm": 1.6953125, "learning_rate": 0.00018863362795524592, "loss": 2.0383, "step": 130235 }, { "epoch": 0.31, "grad_norm": 2.546875, "learning_rate": 0.00018863277210331636, "loss": 2.1366, "step": 130240 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018863191622110824, "loss": 2.2577, "step": 130245 }, { "epoch": 0.31, "grad_norm": 2.484375, "learning_rate": 0.00018863106030862184, "loss": 2.1901, "step": 130250 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018863020436585745, "loss": 2.0995, "step": 130255 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018862934839281538, "loss": 2.2151, "step": 130260 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018862849238949594, "loss": 2.1035, "step": 130265 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.00018862763635589937, "loss": 2.0356, "step": 130270 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018862678029202604, "loss": 2.375, "step": 130275 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018862592419787615, "loss": 2.1517, "step": 130280 }, { "epoch": 0.31, "grad_norm": 1.8984375, "learning_rate": 0.00018862506807345005, "loss": 2.1628, "step": 130285 }, { "epoch": 0.31, "grad_norm": 2.5625, "learning_rate": 0.00018862421191874801, "loss": 2.1908, "step": 130290 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018862335573377034, "loss": 2.1498, "step": 130295 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.0001886224995185173, "loss": 1.8665, "step": 130300 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.00018862164327298925, "loss": 2.1102, "step": 130305 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.0001886207869971864, "loss": 2.1328, "step": 130310 }, { "epoch": 0.31, "grad_norm": 2.3125, "learning_rate": 0.00018861993069110912, "loss": 2.3036, "step": 130315 }, { "epoch": 0.31, "grad_norm": 1.9140625, "learning_rate": 0.00018861907435475765, "loss": 2.1805, "step": 130320 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.0001886182179881323, "loss": 2.1331, "step": 130325 }, { "epoch": 0.31, "grad_norm": 2.453125, "learning_rate": 0.00018861736159123336, "loss": 2.0131, "step": 130330 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.00018861650516406112, "loss": 2.0625, "step": 130335 }, { "epoch": 0.31, "grad_norm": 1.9921875, "learning_rate": 0.00018861564870661585, "loss": 2.1393, "step": 130340 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018861479221889788, "loss": 2.0424, "step": 130345 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.0001886139357009075, "loss": 2.4073, "step": 130350 }, { "epoch": 0.31, "grad_norm": 1.78125, "learning_rate": 0.000188613079152645, "loss": 2.0746, "step": 130355 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018861222257411067, "loss": 2.0122, "step": 130360 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.0001886113659653048, "loss": 2.1164, "step": 130365 }, { "epoch": 0.31, "grad_norm": 1.984375, "learning_rate": 0.00018861050932622765, "loss": 2.1472, "step": 130370 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018860965265687953, "loss": 2.0923, "step": 130375 }, { "epoch": 0.31, "grad_norm": 2.609375, "learning_rate": 0.00018860879595726076, "loss": 2.1061, "step": 130380 }, { "epoch": 0.31, "grad_norm": 1.78125, "learning_rate": 0.00018860793922737164, "loss": 1.8601, "step": 130385 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018860708246721243, "loss": 1.9305, "step": 130390 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018860622567678346, "loss": 2.179, "step": 130395 }, { "epoch": 0.31, "grad_norm": 1.9765625, "learning_rate": 0.00018860536885608495, "loss": 1.9377, "step": 130400 }, { "epoch": 0.31, "grad_norm": 1.7265625, "learning_rate": 0.00018860451200511727, "loss": 2.1091, "step": 130405 }, { "epoch": 0.31, "grad_norm": 2.578125, "learning_rate": 0.00018860365512388065, "loss": 2.0819, "step": 130410 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018860279821237546, "loss": 2.2992, "step": 130415 }, { "epoch": 0.31, "grad_norm": 1.9765625, "learning_rate": 0.0001886019412706019, "loss": 2.2663, "step": 130420 }, { "epoch": 0.31, "grad_norm": 1.9609375, "learning_rate": 0.00018860108429856034, "loss": 2.1057, "step": 130425 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018860022729625105, "loss": 2.1375, "step": 130430 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018859937026367428, "loss": 2.1466, "step": 130435 }, { "epoch": 0.31, "grad_norm": 2.796875, "learning_rate": 0.00018859851320083038, "loss": 2.1336, "step": 130440 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018859765610771962, "loss": 2.229, "step": 130445 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001885967989843423, "loss": 2.072, "step": 130450 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.0001885959418306987, "loss": 2.1189, "step": 130455 }, { "epoch": 0.31, "grad_norm": 2.390625, "learning_rate": 0.00018859508464678912, "loss": 2.1354, "step": 130460 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.00018859422743261388, "loss": 2.0063, "step": 130465 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.0001885933701881732, "loss": 2.1584, "step": 130470 }, { "epoch": 0.31, "grad_norm": 1.8203125, "learning_rate": 0.00018859251291346744, "loss": 2.152, "step": 130475 }, { "epoch": 0.31, "grad_norm": 1.9921875, "learning_rate": 0.00018859165560849688, "loss": 2.3384, "step": 130480 }, { "epoch": 0.31, "grad_norm": 1.9140625, "learning_rate": 0.0001885907982732618, "loss": 2.0677, "step": 130485 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018858994090776248, "loss": 2.3057, "step": 130490 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018858908351199923, "loss": 2.2906, "step": 130495 }, { "epoch": 0.31, "grad_norm": 1.765625, "learning_rate": 0.00018858822608597235, "loss": 2.286, "step": 130500 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.0001885873686296821, "loss": 2.2183, "step": 130505 }, { "epoch": 0.31, "grad_norm": 2.875, "learning_rate": 0.00018858651114312885, "loss": 2.0626, "step": 130510 }, { "epoch": 0.31, "grad_norm": 2.046875, "learning_rate": 0.0001885856536263128, "loss": 2.0908, "step": 130515 }, { "epoch": 0.31, "grad_norm": 1.953125, "learning_rate": 0.00018858479607923432, "loss": 2.1289, "step": 130520 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.0001885839385018936, "loss": 2.2701, "step": 130525 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.00018858308089429108, "loss": 1.9893, "step": 130530 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018858222325642693, "loss": 2.2158, "step": 130535 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.0001885813655883015, "loss": 1.9794, "step": 130540 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.0001885805078899151, "loss": 2.2995, "step": 130545 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018857965016126792, "loss": 2.1872, "step": 130550 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018857879240236037, "loss": 2.1421, "step": 130555 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018857793461319269, "loss": 2.3859, "step": 130560 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018857707679376516, "loss": 2.0299, "step": 130565 }, { "epoch": 0.31, "grad_norm": 2.46875, "learning_rate": 0.00018857621894407812, "loss": 2.1623, "step": 130570 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018857536106413182, "loss": 2.1434, "step": 130575 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.0001885745031539266, "loss": 2.1558, "step": 130580 }, { "epoch": 0.31, "grad_norm": 1.7265625, "learning_rate": 0.0001885736452134627, "loss": 2.0896, "step": 130585 }, { "epoch": 0.31, "grad_norm": 1.9765625, "learning_rate": 0.00018857278724274042, "loss": 2.0327, "step": 130590 }, { "epoch": 0.31, "grad_norm": 1.78125, "learning_rate": 0.00018857192924176013, "loss": 2.1139, "step": 130595 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018857107121052202, "loss": 2.0761, "step": 130600 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018857021314902644, "loss": 2.0897, "step": 130605 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018856935505727365, "loss": 2.1396, "step": 130610 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018856849693526395, "loss": 2.128, "step": 130615 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.0001885676387829977, "loss": 2.1943, "step": 130620 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018856678060047508, "loss": 2.1561, "step": 130625 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018856592238769646, "loss": 1.9942, "step": 130630 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018856506414466212, "loss": 2.13, "step": 130635 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.00018856420587137236, "loss": 2.2258, "step": 130640 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.00018856334756782746, "loss": 2.124, "step": 130645 }, { "epoch": 0.31, "grad_norm": 1.859375, "learning_rate": 0.0001885624892340277, "loss": 2.0748, "step": 130650 }, { "epoch": 0.31, "grad_norm": 1.78125, "learning_rate": 0.00018856163086997335, "loss": 2.1275, "step": 130655 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018856077247566478, "loss": 2.0597, "step": 130660 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018855991405110226, "loss": 2.071, "step": 130665 }, { "epoch": 0.31, "grad_norm": 1.703125, "learning_rate": 0.00018855905559628607, "loss": 2.2458, "step": 130670 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.00018855819711121647, "loss": 2.0923, "step": 130675 }, { "epoch": 0.31, "grad_norm": 1.796875, "learning_rate": 0.0001885573385958938, "loss": 1.9676, "step": 130680 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018855648005031833, "loss": 2.2197, "step": 130685 }, { "epoch": 0.31, "grad_norm": 1.96875, "learning_rate": 0.00018855562147449039, "loss": 2.3003, "step": 130690 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.00018855476286841022, "loss": 2.2819, "step": 130695 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018855390423207813, "loss": 2.1453, "step": 130700 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.00018855304556549443, "loss": 2.0364, "step": 130705 }, { "epoch": 0.31, "grad_norm": 2.5, "learning_rate": 0.0001885521868686594, "loss": 2.1194, "step": 130710 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.0001885513281415733, "loss": 2.0943, "step": 130715 }, { "epoch": 0.31, "grad_norm": 1.90625, "learning_rate": 0.0001885504693842365, "loss": 2.0881, "step": 130720 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018854961059664926, "loss": 2.205, "step": 130725 }, { "epoch": 0.31, "grad_norm": 1.875, "learning_rate": 0.00018854875177881186, "loss": 2.0729, "step": 130730 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.0001885478929307246, "loss": 2.2133, "step": 130735 }, { "epoch": 0.31, "grad_norm": 1.9921875, "learning_rate": 0.0001885470340523878, "loss": 2.0933, "step": 130740 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018854617514380166, "loss": 2.2611, "step": 130745 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.0001885453162049666, "loss": 2.254, "step": 130750 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.0001885444572358828, "loss": 2.0945, "step": 130755 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018854359823655066, "loss": 2.1554, "step": 130760 }, { "epoch": 0.31, "grad_norm": 1.8203125, "learning_rate": 0.0001885427392069704, "loss": 2.2982, "step": 130765 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018854188014714234, "loss": 2.1277, "step": 130770 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018854102105706676, "loss": 2.2557, "step": 130775 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018854016193674397, "loss": 2.1111, "step": 130780 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018853930278617424, "loss": 1.9871, "step": 130785 }, { "epoch": 0.31, "grad_norm": 2.046875, "learning_rate": 0.0001885384436053579, "loss": 2.19, "step": 130790 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018853758439429523, "loss": 2.1737, "step": 130795 }, { "epoch": 0.31, "grad_norm": 1.8828125, "learning_rate": 0.00018853672515298649, "loss": 2.0181, "step": 130800 }, { "epoch": 0.31, "grad_norm": 1.9921875, "learning_rate": 0.000188535865881432, "loss": 2.1808, "step": 130805 }, { "epoch": 0.31, "grad_norm": 1.9453125, "learning_rate": 0.00018853500657963206, "loss": 2.0137, "step": 130810 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.00018853414724758695, "loss": 2.2587, "step": 130815 }, { "epoch": 0.31, "grad_norm": 2.390625, "learning_rate": 0.00018853328788529696, "loss": 2.0052, "step": 130820 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.00018853242849276242, "loss": 2.1715, "step": 130825 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.0001885315690699836, "loss": 2.0994, "step": 130830 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018853070961696075, "loss": 2.0543, "step": 130835 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018852985013369423, "loss": 2.0978, "step": 130840 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018852899062018433, "loss": 2.1413, "step": 130845 }, { "epoch": 0.31, "grad_norm": 2.390625, "learning_rate": 0.0001885281310764313, "loss": 1.9782, "step": 130850 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018852727150243544, "loss": 2.1344, "step": 130855 }, { "epoch": 0.31, "grad_norm": 1.7265625, "learning_rate": 0.00018852641189819706, "loss": 2.1151, "step": 130860 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.00018852555226371646, "loss": 2.284, "step": 130865 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018852469259899394, "loss": 2.1047, "step": 130870 }, { "epoch": 0.31, "grad_norm": 2.53125, "learning_rate": 0.00018852383290402975, "loss": 1.9968, "step": 130875 }, { "epoch": 0.31, "grad_norm": 1.796875, "learning_rate": 0.00018852297317882422, "loss": 2.0393, "step": 130880 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018852211342337767, "loss": 2.2246, "step": 130885 }, { "epoch": 0.31, "grad_norm": 2.3125, "learning_rate": 0.00018852125363769037, "loss": 2.145, "step": 130890 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018852039382176255, "loss": 2.152, "step": 130895 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018851953397559456, "loss": 2.0368, "step": 130900 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.00018851867409918672, "loss": 2.273, "step": 130905 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018851781419253931, "loss": 2.3005, "step": 130910 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.0001885169542556526, "loss": 2.2081, "step": 130915 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018851609428852687, "loss": 2.0758, "step": 130920 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018851523429116242, "loss": 2.253, "step": 130925 }, { "epoch": 0.31, "grad_norm": 1.96875, "learning_rate": 0.0001885143742635596, "loss": 2.2068, "step": 130930 }, { "epoch": 0.31, "grad_norm": 1.765625, "learning_rate": 0.00018851351420571866, "loss": 2.2585, "step": 130935 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.0001885126541176399, "loss": 2.0399, "step": 130940 }, { "epoch": 0.31, "grad_norm": 1.7421875, "learning_rate": 0.0001885117939993236, "loss": 2.0897, "step": 130945 }, { "epoch": 0.31, "grad_norm": 1.8515625, "learning_rate": 0.00018851093385077006, "loss": 1.9791, "step": 130950 }, { "epoch": 0.31, "grad_norm": 1.9140625, "learning_rate": 0.0001885100736719796, "loss": 2.0567, "step": 130955 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.00018850921346295248, "loss": 2.2921, "step": 130960 }, { "epoch": 0.31, "grad_norm": 1.859375, "learning_rate": 0.000188508353223689, "loss": 2.2804, "step": 130965 }, { "epoch": 0.31, "grad_norm": 2.484375, "learning_rate": 0.0001885074929541895, "loss": 2.067, "step": 130970 }, { "epoch": 0.31, "grad_norm": 2.046875, "learning_rate": 0.00018850663265445418, "loss": 2.1408, "step": 130975 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018850577232448342, "loss": 2.1568, "step": 130980 }, { "epoch": 0.31, "grad_norm": 1.875, "learning_rate": 0.0001885049119642775, "loss": 1.9701, "step": 130985 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018850405157383663, "loss": 2.1072, "step": 130990 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.00018850319115316125, "loss": 2.2046, "step": 130995 }, { "epoch": 0.31, "grad_norm": 1.84375, "learning_rate": 0.00018850233070225153, "loss": 2.1231, "step": 131000 }, { "epoch": 0.31, "grad_norm": 1.9375, "learning_rate": 0.00018850147022110784, "loss": 2.1579, "step": 131005 }, { "epoch": 0.31, "grad_norm": 2.390625, "learning_rate": 0.00018850060970973043, "loss": 2.0445, "step": 131010 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018849974916811956, "loss": 2.31, "step": 131015 }, { "epoch": 0.31, "grad_norm": 1.875, "learning_rate": 0.00018849888859627563, "loss": 2.1777, "step": 131020 }, { "epoch": 0.31, "grad_norm": 1.859375, "learning_rate": 0.00018849802799419883, "loss": 2.1985, "step": 131025 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018849716736188955, "loss": 2.0543, "step": 131030 }, { "epoch": 0.31, "grad_norm": 1.734375, "learning_rate": 0.000188496306699348, "loss": 2.0901, "step": 131035 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.0001884954460065745, "loss": 2.0455, "step": 131040 }, { "epoch": 0.31, "grad_norm": 2.515625, "learning_rate": 0.00018849458528356937, "loss": 2.1253, "step": 131045 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001884937245303329, "loss": 2.0501, "step": 131050 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018849286374686536, "loss": 2.153, "step": 131055 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.000188492002933167, "loss": 2.2332, "step": 131060 }, { "epoch": 0.31, "grad_norm": 2.578125, "learning_rate": 0.0001884911420892382, "loss": 2.2826, "step": 131065 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018849028121507925, "loss": 2.1303, "step": 131070 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018848942031069038, "loss": 2.1287, "step": 131075 }, { "epoch": 0.31, "grad_norm": 1.953125, "learning_rate": 0.00018848855937607194, "loss": 2.1089, "step": 131080 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.0001884876984112242, "loss": 2.095, "step": 131085 }, { "epoch": 0.31, "grad_norm": 2.8125, "learning_rate": 0.00018848683741614746, "loss": 2.1357, "step": 131090 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.000188485976390842, "loss": 2.1759, "step": 131095 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018848511533530813, "loss": 2.0448, "step": 131100 }, { "epoch": 0.31, "grad_norm": 1.8125, "learning_rate": 0.00018848425424954613, "loss": 2.2626, "step": 131105 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018848339313355634, "loss": 2.1901, "step": 131110 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.000188482531987339, "loss": 2.2403, "step": 131115 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018848167081089438, "loss": 2.1997, "step": 131120 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018848080960422284, "loss": 2.2419, "step": 131125 }, { "epoch": 0.31, "grad_norm": 2.40625, "learning_rate": 0.0001884799483673247, "loss": 1.9981, "step": 131130 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.00018847908710020015, "loss": 2.1038, "step": 131135 }, { "epoch": 0.31, "grad_norm": 3.046875, "learning_rate": 0.00018847822580284954, "loss": 2.1782, "step": 131140 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001884773644752732, "loss": 2.3205, "step": 131145 }, { "epoch": 0.31, "grad_norm": 2.40625, "learning_rate": 0.00018847650311747138, "loss": 2.1414, "step": 131150 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018847564172944434, "loss": 2.274, "step": 131155 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.00018847478031119245, "loss": 2.1626, "step": 131160 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018847391886271596, "loss": 2.0683, "step": 131165 }, { "epoch": 0.31, "grad_norm": 2.59375, "learning_rate": 0.0001884730573840152, "loss": 2.1997, "step": 131170 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018847219587509042, "loss": 1.9647, "step": 131175 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018847133433594194, "loss": 2.0139, "step": 131180 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.00018847047276657005, "loss": 2.1492, "step": 131185 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018846961116697502, "loss": 2.0897, "step": 131190 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.00018846874953715718, "loss": 2.2448, "step": 131195 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.0001884678878771168, "loss": 2.0721, "step": 131200 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.0001884670261868542, "loss": 2.1442, "step": 131205 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.00018846616446636968, "loss": 2.0452, "step": 131210 }, { "epoch": 0.31, "grad_norm": 2.515625, "learning_rate": 0.00018846530271566345, "loss": 2.1108, "step": 131215 }, { "epoch": 0.31, "grad_norm": 2.46875, "learning_rate": 0.0001884644409347359, "loss": 2.0779, "step": 131220 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018846357912358732, "loss": 2.2157, "step": 131225 }, { "epoch": 0.31, "grad_norm": 1.7890625, "learning_rate": 0.00018846271728221794, "loss": 2.1907, "step": 131230 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.0001884618554106281, "loss": 2.0897, "step": 131235 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001884609935088181, "loss": 2.1008, "step": 131240 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.00018846013157678822, "loss": 2.1466, "step": 131245 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018845926961453874, "loss": 2.283, "step": 131250 }, { "epoch": 0.31, "grad_norm": 2.40625, "learning_rate": 0.00018845840762206999, "loss": 1.9413, "step": 131255 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018845754559938222, "loss": 2.0415, "step": 131260 }, { "epoch": 0.31, "grad_norm": 1.9375, "learning_rate": 0.00018845668354647576, "loss": 2.2132, "step": 131265 }, { "epoch": 0.31, "grad_norm": 1.4609375, "learning_rate": 0.0001884558214633509, "loss": 2.1042, "step": 131270 }, { "epoch": 0.31, "grad_norm": 2.359375, "learning_rate": 0.00018845495935000792, "loss": 2.2424, "step": 131275 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.0001884540972064471, "loss": 1.8663, "step": 131280 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.00018845323503266878, "loss": 2.2232, "step": 131285 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018845237282867324, "loss": 1.9067, "step": 131290 }, { "epoch": 0.31, "grad_norm": 2.890625, "learning_rate": 0.00018845151059446074, "loss": 2.2183, "step": 131295 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.00018845064833003164, "loss": 2.101, "step": 131300 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.00018844978603538618, "loss": 2.3206, "step": 131305 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018844892371052462, "loss": 2.1557, "step": 131310 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.00018844806135544736, "loss": 2.1812, "step": 131315 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018844719897015462, "loss": 2.2266, "step": 131320 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.0001884463365546467, "loss": 2.1933, "step": 131325 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018844547410892392, "loss": 2.0395, "step": 131330 }, { "epoch": 0.31, "grad_norm": 2.359375, "learning_rate": 0.00018844461163298654, "loss": 2.1216, "step": 131335 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.00018844374912683492, "loss": 2.169, "step": 131340 }, { "epoch": 0.31, "grad_norm": 3.296875, "learning_rate": 0.00018844288659046926, "loss": 2.2621, "step": 131345 }, { "epoch": 0.31, "grad_norm": 2.578125, "learning_rate": 0.00018844202402388996, "loss": 2.2383, "step": 131350 }, { "epoch": 0.31, "grad_norm": 1.8984375, "learning_rate": 0.00018844116142709724, "loss": 2.1632, "step": 131355 }, { "epoch": 0.31, "grad_norm": 3.046875, "learning_rate": 0.0001884402988000914, "loss": 1.993, "step": 131360 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018843943614287276, "loss": 2.2481, "step": 131365 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.0001884385734554416, "loss": 2.0933, "step": 131370 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018843771073779824, "loss": 1.9608, "step": 131375 }, { "epoch": 0.31, "grad_norm": 2.53125, "learning_rate": 0.0001884368479899429, "loss": 2.1044, "step": 131380 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.000188435985211876, "loss": 2.2112, "step": 131385 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.0001884351224035977, "loss": 2.2363, "step": 131390 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.0001884342595651084, "loss": 2.217, "step": 131395 }, { "epoch": 0.31, "grad_norm": 1.9453125, "learning_rate": 0.00018843339669640834, "loss": 2.1029, "step": 131400 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018843253379749784, "loss": 2.202, "step": 131405 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.00018843167086837718, "loss": 2.0812, "step": 131410 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018843080790904664, "loss": 2.1661, "step": 131415 }, { "epoch": 0.31, "grad_norm": 5.8125, "learning_rate": 0.00018842994491950654, "loss": 2.1935, "step": 131420 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018842908189975715, "loss": 2.1646, "step": 131425 }, { "epoch": 0.31, "grad_norm": 2.609375, "learning_rate": 0.00018842821884979883, "loss": 2.1733, "step": 131430 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.0001884273557696318, "loss": 2.17, "step": 131435 }, { "epoch": 0.31, "grad_norm": 1.7265625, "learning_rate": 0.00018842649265925636, "loss": 2.1106, "step": 131440 }, { "epoch": 0.31, "grad_norm": 2.5, "learning_rate": 0.00018842562951867285, "loss": 2.2176, "step": 131445 }, { "epoch": 0.31, "grad_norm": 1.6484375, "learning_rate": 0.00018842476634788153, "loss": 2.2637, "step": 131450 }, { "epoch": 0.31, "grad_norm": 1.890625, "learning_rate": 0.00018842390314688273, "loss": 2.1806, "step": 131455 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.0001884230399156767, "loss": 2.1258, "step": 131460 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.00018842217665426375, "loss": 2.1297, "step": 131465 }, { "epoch": 0.31, "grad_norm": 2.578125, "learning_rate": 0.0001884213133626442, "loss": 2.2977, "step": 131470 }, { "epoch": 0.31, "grad_norm": 2.984375, "learning_rate": 0.00018842045004081832, "loss": 2.1187, "step": 131475 }, { "epoch": 0.31, "grad_norm": 1.9609375, "learning_rate": 0.0001884195866887864, "loss": 2.0789, "step": 131480 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018841872330654875, "loss": 2.1226, "step": 131485 }, { "epoch": 0.31, "grad_norm": 1.984375, "learning_rate": 0.00018841785989410568, "loss": 2.0265, "step": 131490 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.00018841699645145746, "loss": 2.2958, "step": 131495 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018841613297860437, "loss": 2.0638, "step": 131500 }, { "epoch": 0.31, "grad_norm": 1.8203125, "learning_rate": 0.00018841526947554674, "loss": 1.8651, "step": 131505 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.0001884144059422848, "loss": 2.1434, "step": 131510 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018841354237881895, "loss": 2.351, "step": 131515 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018841267878514942, "loss": 2.1655, "step": 131520 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018841181516127651, "loss": 2.3397, "step": 131525 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018841095150720055, "loss": 2.2482, "step": 131530 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018841008782292178, "loss": 2.094, "step": 131535 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018840922410844052, "loss": 2.189, "step": 131540 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018840836036375705, "loss": 2.1103, "step": 131545 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018840749658887172, "loss": 2.0748, "step": 131550 }, { "epoch": 0.31, "grad_norm": 1.8828125, "learning_rate": 0.00018840663278378477, "loss": 2.4289, "step": 131555 }, { "epoch": 0.31, "grad_norm": 1.8359375, "learning_rate": 0.0001884057689484965, "loss": 2.1656, "step": 131560 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018840490508300722, "loss": 2.137, "step": 131565 }, { "epoch": 0.31, "grad_norm": 2.65625, "learning_rate": 0.00018840404118731724, "loss": 2.1735, "step": 131570 }, { "epoch": 0.31, "grad_norm": 2.453125, "learning_rate": 0.0001884031772614268, "loss": 2.2822, "step": 131575 }, { "epoch": 0.31, "grad_norm": 2.359375, "learning_rate": 0.00018840231330533627, "loss": 2.2341, "step": 131580 }, { "epoch": 0.31, "grad_norm": 1.9296875, "learning_rate": 0.00018840144931904588, "loss": 1.9785, "step": 131585 }, { "epoch": 0.31, "grad_norm": 4.1875, "learning_rate": 0.00018840058530255596, "loss": 2.1363, "step": 131590 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.0001883997212558668, "loss": 2.0805, "step": 131595 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018839885717897867, "loss": 1.9403, "step": 131600 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001883979930718919, "loss": 2.0432, "step": 131605 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018839712893460678, "loss": 2.0867, "step": 131610 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.0001883962647671236, "loss": 2.2278, "step": 131615 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018839540056944266, "loss": 2.1135, "step": 131620 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018839453634156422, "loss": 2.0897, "step": 131625 }, { "epoch": 0.31, "grad_norm": 2.453125, "learning_rate": 0.00018839367208348863, "loss": 2.1594, "step": 131630 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018839280779521614, "loss": 1.9516, "step": 131635 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018839194347674708, "loss": 2.1457, "step": 131640 }, { "epoch": 0.31, "grad_norm": 1.9921875, "learning_rate": 0.0001883910791280817, "loss": 1.9435, "step": 131645 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018839021474922036, "loss": 2.1588, "step": 131650 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.0001883893503401633, "loss": 2.1746, "step": 131655 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018838848590091084, "loss": 2.0314, "step": 131660 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018838762143146326, "loss": 2.0742, "step": 131665 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.0001883867569318209, "loss": 2.2025, "step": 131670 }, { "epoch": 0.31, "grad_norm": 1.7890625, "learning_rate": 0.000188385892401984, "loss": 2.1877, "step": 131675 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018838502784195287, "loss": 2.2551, "step": 131680 }, { "epoch": 0.31, "grad_norm": 1.9140625, "learning_rate": 0.0001883841632517278, "loss": 2.0622, "step": 131685 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.00018838329863130914, "loss": 2.1392, "step": 131690 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018838243398069707, "loss": 2.0902, "step": 131695 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018838156929989203, "loss": 2.1323, "step": 131700 }, { "epoch": 0.31, "grad_norm": 2.515625, "learning_rate": 0.0001883807045888942, "loss": 2.1268, "step": 131705 }, { "epoch": 0.31, "grad_norm": 1.9609375, "learning_rate": 0.00018837983984770393, "loss": 2.0918, "step": 131710 }, { "epoch": 0.31, "grad_norm": 1.9375, "learning_rate": 0.0001883789750763215, "loss": 2.1592, "step": 131715 }, { "epoch": 0.31, "grad_norm": 1.8125, "learning_rate": 0.00018837811027474724, "loss": 1.9511, "step": 131720 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.00018837724544298138, "loss": 2.1022, "step": 131725 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018837638058102427, "loss": 2.0827, "step": 131730 }, { "epoch": 0.31, "grad_norm": 2.359375, "learning_rate": 0.00018837551568887613, "loss": 2.2786, "step": 131735 }, { "epoch": 0.31, "grad_norm": 1.9609375, "learning_rate": 0.0001883746507665374, "loss": 2.0501, "step": 131740 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018837378581400822, "loss": 2.1656, "step": 131745 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018837292083128898, "loss": 2.1708, "step": 131750 }, { "epoch": 0.31, "grad_norm": 1.9453125, "learning_rate": 0.00018837205581837997, "loss": 1.9986, "step": 131755 }, { "epoch": 0.31, "grad_norm": 2.484375, "learning_rate": 0.00018837119077528143, "loss": 2.1244, "step": 131760 }, { "epoch": 0.31, "grad_norm": 2.40625, "learning_rate": 0.00018837032570199366, "loss": 2.142, "step": 131765 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018836946059851703, "loss": 1.9652, "step": 131770 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018836859546485178, "loss": 2.1664, "step": 131775 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018836773030099822, "loss": 2.095, "step": 131780 }, { "epoch": 0.31, "grad_norm": 2.484375, "learning_rate": 0.00018836686510695664, "loss": 2.4029, "step": 131785 }, { "epoch": 0.31, "grad_norm": 1.84375, "learning_rate": 0.0001883659998827273, "loss": 2.0034, "step": 131790 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018836513462831058, "loss": 2.1472, "step": 131795 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.0001883642693437067, "loss": 2.1391, "step": 131800 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.000188363404028916, "loss": 2.2358, "step": 131805 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018836253868393876, "loss": 2.1194, "step": 131810 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018836167330877525, "loss": 2.2146, "step": 131815 }, { "epoch": 0.31, "grad_norm": 2.453125, "learning_rate": 0.00018836080790342582, "loss": 2.0531, "step": 131820 }, { "epoch": 0.31, "grad_norm": 2.46875, "learning_rate": 0.0001883599424678907, "loss": 2.0889, "step": 131825 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.00018835907700217027, "loss": 2.2152, "step": 131830 }, { "epoch": 0.31, "grad_norm": 2.921875, "learning_rate": 0.00018835821150626475, "loss": 2.1312, "step": 131835 }, { "epoch": 0.31, "grad_norm": 1.9140625, "learning_rate": 0.00018835734598017444, "loss": 2.0819, "step": 131840 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.00018835648042389967, "loss": 2.2486, "step": 131845 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018835561483744075, "loss": 2.0986, "step": 131850 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018835474922079796, "loss": 2.1616, "step": 131855 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018835388357397155, "loss": 2.1595, "step": 131860 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018835301789696182, "loss": 2.2243, "step": 131865 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018835215218976914, "loss": 2.1404, "step": 131870 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018835128645239377, "loss": 2.1566, "step": 131875 }, { "epoch": 0.31, "grad_norm": 1.9296875, "learning_rate": 0.000188350420684836, "loss": 2.2199, "step": 131880 }, { "epoch": 0.31, "grad_norm": 2.5, "learning_rate": 0.0001883495548870961, "loss": 2.1695, "step": 131885 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.0001883486890591744, "loss": 2.179, "step": 131890 }, { "epoch": 0.31, "grad_norm": 2.046875, "learning_rate": 0.00018834782320107117, "loss": 2.3344, "step": 131895 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018834695731278676, "loss": 2.0772, "step": 131900 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.0001883460913943214, "loss": 1.9658, "step": 131905 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.00018834522544567542, "loss": 2.0202, "step": 131910 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001883443594668491, "loss": 1.9959, "step": 131915 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018834349345784275, "loss": 2.2068, "step": 131920 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018834262741865664, "loss": 2.3067, "step": 131925 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018834176134929114, "loss": 2.2754, "step": 131930 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.00018834089524974644, "loss": 2.0016, "step": 131935 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.0001883400291200229, "loss": 2.1033, "step": 131940 }, { "epoch": 0.31, "grad_norm": 1.9765625, "learning_rate": 0.0001883391629601208, "loss": 2.2562, "step": 131945 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018833829677004048, "loss": 2.2329, "step": 131950 }, { "epoch": 0.31, "grad_norm": 2.859375, "learning_rate": 0.00018833743054978217, "loss": 2.1367, "step": 131955 }, { "epoch": 0.31, "grad_norm": 2.875, "learning_rate": 0.00018833656429934615, "loss": 2.1274, "step": 131960 }, { "epoch": 0.31, "grad_norm": 2.65625, "learning_rate": 0.00018833569801873283, "loss": 2.2226, "step": 131965 }, { "epoch": 0.31, "grad_norm": 2.5625, "learning_rate": 0.0001883348317079424, "loss": 2.1688, "step": 131970 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.0001883339653669752, "loss": 2.0047, "step": 131975 }, { "epoch": 0.31, "grad_norm": 2.65625, "learning_rate": 0.0001883330989958315, "loss": 2.1662, "step": 131980 }, { "epoch": 0.31, "grad_norm": 2.71875, "learning_rate": 0.00018833223259451162, "loss": 2.1493, "step": 131985 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.0001883313661630158, "loss": 2.0283, "step": 131990 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.00018833049970134447, "loss": 2.1407, "step": 131995 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001883296332094978, "loss": 2.2261, "step": 132000 }, { "epoch": 0.31, "grad_norm": 2.046875, "learning_rate": 0.0001883287666874761, "loss": 2.1945, "step": 132005 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.0001883279001352797, "loss": 2.3202, "step": 132010 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.0001883270335529089, "loss": 2.1221, "step": 132015 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.00018832616694036398, "loss": 2.2468, "step": 132020 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018832530029764527, "loss": 2.0768, "step": 132025 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.000188324433624753, "loss": 2.1617, "step": 132030 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.0001883235669216875, "loss": 2.2157, "step": 132035 }, { "epoch": 0.31, "grad_norm": 1.9375, "learning_rate": 0.0001883227001884491, "loss": 2.2845, "step": 132040 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.00018832183342503802, "loss": 2.2804, "step": 132045 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001883209666314546, "loss": 1.9879, "step": 132050 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018832009980769917, "loss": 2.1729, "step": 132055 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018831923295377202, "loss": 2.2065, "step": 132060 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018831836606967334, "loss": 2.1003, "step": 132065 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018831749915540355, "loss": 2.1955, "step": 132070 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.0001883166322109629, "loss": 2.1099, "step": 132075 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018831576523635168, "loss": 1.902, "step": 132080 }, { "epoch": 0.31, "grad_norm": 1.84375, "learning_rate": 0.00018831489823157018, "loss": 2.0408, "step": 132085 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018831403119661872, "loss": 2.1537, "step": 132090 }, { "epoch": 0.31, "grad_norm": 2.625, "learning_rate": 0.0001883131641314976, "loss": 1.9056, "step": 132095 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018831229703620707, "loss": 2.2315, "step": 132100 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018831142991074747, "loss": 2.0262, "step": 132105 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.0001883105627551191, "loss": 2.1534, "step": 132110 }, { "epoch": 0.31, "grad_norm": 1.9921875, "learning_rate": 0.00018830969556932225, "loss": 2.069, "step": 132115 }, { "epoch": 0.31, "grad_norm": 1.65625, "learning_rate": 0.00018830882835335717, "loss": 2.0932, "step": 132120 }, { "epoch": 0.31, "grad_norm": 1.890625, "learning_rate": 0.00018830796110722421, "loss": 2.0564, "step": 132125 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.00018830709383092366, "loss": 2.1099, "step": 132130 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018830622652445578, "loss": 2.2974, "step": 132135 }, { "epoch": 0.31, "grad_norm": 2.546875, "learning_rate": 0.00018830535918782092, "loss": 1.8842, "step": 132140 }, { "epoch": 0.31, "grad_norm": 2.484375, "learning_rate": 0.00018830449182101933, "loss": 2.1248, "step": 132145 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018830362442405133, "loss": 2.0014, "step": 132150 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.0001883027569969172, "loss": 2.0788, "step": 132155 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.00018830188953961724, "loss": 1.9164, "step": 132160 }, { "epoch": 0.31, "grad_norm": 2.6875, "learning_rate": 0.00018830102205215176, "loss": 2.0948, "step": 132165 }, { "epoch": 0.31, "grad_norm": 2.3125, "learning_rate": 0.00018830015453452105, "loss": 2.3454, "step": 132170 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.00018829928698672543, "loss": 2.0932, "step": 132175 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018829841940876517, "loss": 2.0774, "step": 132180 }, { "epoch": 0.31, "grad_norm": 1.8125, "learning_rate": 0.00018829755180064056, "loss": 2.0497, "step": 132185 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018829668416235187, "loss": 2.1472, "step": 132190 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018829581649389948, "loss": 2.1245, "step": 132195 }, { "epoch": 0.31, "grad_norm": 2.515625, "learning_rate": 0.0001882949487952836, "loss": 2.1802, "step": 132200 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.0001882940810665046, "loss": 2.2424, "step": 132205 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018829321330756272, "loss": 2.1861, "step": 132210 }, { "epoch": 0.31, "grad_norm": 2.3125, "learning_rate": 0.00018829234551845828, "loss": 2.1794, "step": 132215 }, { "epoch": 0.31, "grad_norm": 2.359375, "learning_rate": 0.00018829147769919155, "loss": 2.2505, "step": 132220 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018829060984976287, "loss": 2.0312, "step": 132225 }, { "epoch": 0.31, "grad_norm": 1.9609375, "learning_rate": 0.00018828974197017252, "loss": 2.1569, "step": 132230 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.0001882888740604208, "loss": 2.012, "step": 132235 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.000188288006120508, "loss": 2.1939, "step": 132240 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.0001882871381504344, "loss": 2.0984, "step": 132245 }, { "epoch": 0.31, "grad_norm": 2.390625, "learning_rate": 0.0001882862701502003, "loss": 2.0078, "step": 132250 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018828540211980604, "loss": 2.1545, "step": 132255 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.0001882845340592519, "loss": 2.2606, "step": 132260 }, { "epoch": 0.31, "grad_norm": 1.953125, "learning_rate": 0.00018828366596853813, "loss": 2.3561, "step": 132265 }, { "epoch": 0.31, "grad_norm": 1.953125, "learning_rate": 0.00018828279784766506, "loss": 2.0432, "step": 132270 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.000188281929696633, "loss": 2.1867, "step": 132275 }, { "epoch": 0.31, "grad_norm": 1.7578125, "learning_rate": 0.0001882810615154422, "loss": 2.0344, "step": 132280 }, { "epoch": 0.31, "grad_norm": 2.390625, "learning_rate": 0.000188280193304093, "loss": 2.061, "step": 132285 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.0001882793250625857, "loss": 2.3408, "step": 132290 }, { "epoch": 0.31, "grad_norm": 3.109375, "learning_rate": 0.00018827845679092057, "loss": 2.013, "step": 132295 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018827758848909793, "loss": 2.1678, "step": 132300 }, { "epoch": 0.31, "grad_norm": 1.8984375, "learning_rate": 0.00018827672015711805, "loss": 2.4468, "step": 132305 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018827585179498127, "loss": 2.1935, "step": 132310 }, { "epoch": 0.31, "grad_norm": 2.484375, "learning_rate": 0.00018827498340268783, "loss": 2.279, "step": 132315 }, { "epoch": 0.31, "grad_norm": 1.8828125, "learning_rate": 0.00018827411498023805, "loss": 2.1092, "step": 132320 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018827324652763227, "loss": 2.2066, "step": 132325 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.0001882723780448707, "loss": 2.1172, "step": 132330 }, { "epoch": 0.31, "grad_norm": 2.484375, "learning_rate": 0.0001882715095319537, "loss": 2.1141, "step": 132335 }, { "epoch": 0.31, "grad_norm": 1.9375, "learning_rate": 0.00018827064098888156, "loss": 2.0392, "step": 132340 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018826977241565457, "loss": 2.1039, "step": 132345 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018826890381227305, "loss": 2.0856, "step": 132350 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.00018826803517873724, "loss": 2.0571, "step": 132355 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018826716651504746, "loss": 2.0463, "step": 132360 }, { "epoch": 0.31, "grad_norm": 2.453125, "learning_rate": 0.000188266297821204, "loss": 1.9485, "step": 132365 }, { "epoch": 0.31, "grad_norm": 2.3125, "learning_rate": 0.00018826542909720724, "loss": 2.014, "step": 132370 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018826456034305738, "loss": 2.2559, "step": 132375 }, { "epoch": 0.31, "grad_norm": 2.6875, "learning_rate": 0.0001882636915587547, "loss": 2.2711, "step": 132380 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018826282274429958, "loss": 2.4446, "step": 132385 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.0001882619538996923, "loss": 2.1647, "step": 132390 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001882610850249331, "loss": 2.1618, "step": 132395 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.0001882602161200223, "loss": 1.9943, "step": 132400 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018825934718496025, "loss": 2.298, "step": 132405 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018825847821974718, "loss": 2.239, "step": 132410 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.00018825760922438342, "loss": 2.04, "step": 132415 }, { "epoch": 0.31, "grad_norm": 1.7109375, "learning_rate": 0.00018825674019886925, "loss": 2.1532, "step": 132420 }, { "epoch": 0.31, "grad_norm": 2.703125, "learning_rate": 0.000188255871143205, "loss": 2.1234, "step": 132425 }, { "epoch": 0.31, "grad_norm": 2.046875, "learning_rate": 0.00018825500205739095, "loss": 2.0141, "step": 132430 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018825413294142735, "loss": 2.149, "step": 132435 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018825326379531457, "loss": 2.1575, "step": 132440 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018825239461905287, "loss": 2.3447, "step": 132445 }, { "epoch": 0.31, "grad_norm": 1.90625, "learning_rate": 0.00018825152541264255, "loss": 2.2735, "step": 132450 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.0001882506561760839, "loss": 2.2738, "step": 132455 }, { "epoch": 0.31, "grad_norm": 2.40625, "learning_rate": 0.0001882497869093772, "loss": 2.1619, "step": 132460 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018824891761252282, "loss": 2.2031, "step": 132465 }, { "epoch": 0.31, "grad_norm": 2.5, "learning_rate": 0.000188248048285521, "loss": 2.0014, "step": 132470 }, { "epoch": 0.31, "grad_norm": 2.984375, "learning_rate": 0.00018824717892837204, "loss": 2.2638, "step": 132475 }, { "epoch": 0.31, "grad_norm": 2.515625, "learning_rate": 0.00018824630954107624, "loss": 2.0969, "step": 132480 }, { "epoch": 0.31, "grad_norm": 2.90625, "learning_rate": 0.0001882454401236339, "loss": 2.1501, "step": 132485 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.00018824457067604533, "loss": 2.1024, "step": 132490 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018824370119831078, "loss": 1.9556, "step": 132495 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018824283169043061, "loss": 2.4745, "step": 132500 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.0001882419621524051, "loss": 1.948, "step": 132505 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.0001882410925842345, "loss": 2.1852, "step": 132510 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018824022298591917, "loss": 2.1035, "step": 132515 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.00018823935335745938, "loss": 2.237, "step": 132520 }, { "epoch": 0.31, "grad_norm": 2.609375, "learning_rate": 0.0001882384836988554, "loss": 1.9558, "step": 132525 }, { "epoch": 0.31, "grad_norm": 2.359375, "learning_rate": 0.00018823761401010758, "loss": 2.2377, "step": 132530 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.0001882367442912162, "loss": 2.137, "step": 132535 }, { "epoch": 0.31, "grad_norm": 1.796875, "learning_rate": 0.00018823587454218151, "loss": 1.9949, "step": 132540 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018823500476300385, "loss": 1.9814, "step": 132545 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018823413495368355, "loss": 2.0631, "step": 132550 }, { "epoch": 0.31, "grad_norm": 2.546875, "learning_rate": 0.00018823326511422083, "loss": 2.1014, "step": 132555 }, { "epoch": 0.31, "grad_norm": 2.65625, "learning_rate": 0.00018823239524461604, "loss": 2.1734, "step": 132560 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018823152534486946, "loss": 2.135, "step": 132565 }, { "epoch": 0.31, "grad_norm": 1.8359375, "learning_rate": 0.00018823065541498138, "loss": 1.9876, "step": 132570 }, { "epoch": 0.31, "grad_norm": 1.78125, "learning_rate": 0.00018822978545495215, "loss": 1.9525, "step": 132575 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.000188228915464782, "loss": 2.0735, "step": 132580 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018822804544447127, "loss": 1.9717, "step": 132585 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018822717539402023, "loss": 2.2107, "step": 132590 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018822630531342916, "loss": 2.1764, "step": 132595 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.0001882254352026984, "loss": 2.0981, "step": 132600 }, { "epoch": 0.31, "grad_norm": 1.90625, "learning_rate": 0.0001882245650618282, "loss": 2.1098, "step": 132605 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018822369489081896, "loss": 2.1175, "step": 132610 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018822282468967084, "loss": 2.393, "step": 132615 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.00018822195445838426, "loss": 2.1055, "step": 132620 }, { "epoch": 0.31, "grad_norm": 1.890625, "learning_rate": 0.00018822108419695944, "loss": 2.0014, "step": 132625 }, { "epoch": 0.31, "grad_norm": 2.40625, "learning_rate": 0.00018822021390539666, "loss": 2.1868, "step": 132630 }, { "epoch": 0.31, "grad_norm": 2.5, "learning_rate": 0.00018821934358369632, "loss": 2.2682, "step": 132635 }, { "epoch": 0.31, "grad_norm": 2.609375, "learning_rate": 0.0001882184732318586, "loss": 2.1534, "step": 132640 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018821760284988385, "loss": 2.246, "step": 132645 }, { "epoch": 0.31, "grad_norm": 1.96875, "learning_rate": 0.00018821673243777238, "loss": 2.1929, "step": 132650 }, { "epoch": 0.31, "grad_norm": 3.0625, "learning_rate": 0.0001882158619955245, "loss": 2.0995, "step": 132655 }, { "epoch": 0.31, "grad_norm": 1.859375, "learning_rate": 0.00018821499152314047, "loss": 2.3017, "step": 132660 }, { "epoch": 0.31, "grad_norm": 1.9140625, "learning_rate": 0.00018821412102062058, "loss": 2.1844, "step": 132665 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.00018821325048796515, "loss": 2.0368, "step": 132670 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018821237992517446, "loss": 2.1054, "step": 132675 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.00018821150933224885, "loss": 2.1124, "step": 132680 }, { "epoch": 0.31, "grad_norm": 1.953125, "learning_rate": 0.00018821063870918855, "loss": 2.1913, "step": 132685 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.00018820976805599394, "loss": 2.0212, "step": 132690 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018820889737266528, "loss": 2.0102, "step": 132695 }, { "epoch": 0.31, "grad_norm": 1.953125, "learning_rate": 0.00018820802665920282, "loss": 1.9094, "step": 132700 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018820715591560695, "loss": 2.1769, "step": 132705 }, { "epoch": 0.31, "grad_norm": 2.484375, "learning_rate": 0.00018820628514187786, "loss": 2.2486, "step": 132710 }, { "epoch": 0.31, "grad_norm": 1.578125, "learning_rate": 0.0001882054143380159, "loss": 2.0923, "step": 132715 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.0001882045435040214, "loss": 2.109, "step": 132720 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018820367263989462, "loss": 2.2105, "step": 132725 }, { "epoch": 0.31, "grad_norm": 1.8828125, "learning_rate": 0.0001882028017456359, "loss": 2.0257, "step": 132730 }, { "epoch": 0.31, "grad_norm": 1.9765625, "learning_rate": 0.00018820193082124548, "loss": 2.0758, "step": 132735 }, { "epoch": 0.31, "grad_norm": 1.9921875, "learning_rate": 0.00018820105986672365, "loss": 2.1147, "step": 132740 }, { "epoch": 0.31, "grad_norm": 2.40625, "learning_rate": 0.00018820018888207076, "loss": 2.1679, "step": 132745 }, { "epoch": 0.31, "grad_norm": 2.578125, "learning_rate": 0.0001881993178672871, "loss": 2.2213, "step": 132750 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018819844682237293, "loss": 2.1725, "step": 132755 }, { "epoch": 0.31, "grad_norm": 1.8515625, "learning_rate": 0.00018819757574732858, "loss": 2.3323, "step": 132760 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018819670464215434, "loss": 2.2007, "step": 132765 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018819583350685053, "loss": 2.0055, "step": 132770 }, { "epoch": 0.31, "grad_norm": 1.8203125, "learning_rate": 0.0001881949623414174, "loss": 2.1213, "step": 132775 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018819409114585528, "loss": 2.1244, "step": 132780 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018819321992016446, "loss": 2.2101, "step": 132785 }, { "epoch": 0.31, "grad_norm": 2.59375, "learning_rate": 0.00018819234866434523, "loss": 2.2571, "step": 132790 }, { "epoch": 0.31, "grad_norm": 2.453125, "learning_rate": 0.0001881914773783979, "loss": 1.9917, "step": 132795 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018819060606232272, "loss": 2.0462, "step": 132800 }, { "epoch": 0.31, "grad_norm": 1.953125, "learning_rate": 0.00018818973471612007, "loss": 2.1415, "step": 132805 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.0001881888633397902, "loss": 2.1868, "step": 132810 }, { "epoch": 0.31, "grad_norm": 1.9375, "learning_rate": 0.0001881879919333334, "loss": 2.1942, "step": 132815 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018818712049675004, "loss": 2.14, "step": 132820 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018818624903004029, "loss": 2.1647, "step": 132825 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018818537753320457, "loss": 2.0964, "step": 132830 }, { "epoch": 0.31, "grad_norm": 1.9921875, "learning_rate": 0.00018818450600624308, "loss": 2.018, "step": 132835 }, { "epoch": 0.31, "grad_norm": 2.5625, "learning_rate": 0.00018818363444915619, "loss": 2.2351, "step": 132840 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.0001881827628619442, "loss": 2.1109, "step": 132845 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018818189124460732, "loss": 2.1471, "step": 132850 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.00018818101959714595, "loss": 2.2538, "step": 132855 }, { "epoch": 0.31, "grad_norm": 1.953125, "learning_rate": 0.0001881801479195603, "loss": 2.1098, "step": 132860 }, { "epoch": 0.31, "grad_norm": 1.8046875, "learning_rate": 0.00018817927621185074, "loss": 2.2482, "step": 132865 }, { "epoch": 0.31, "grad_norm": 2.40625, "learning_rate": 0.00018817840447401757, "loss": 2.1046, "step": 132870 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.000188177532706061, "loss": 2.1166, "step": 132875 }, { "epoch": 0.31, "grad_norm": 1.9140625, "learning_rate": 0.00018817666090798144, "loss": 2.1307, "step": 132880 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018817578907977912, "loss": 2.2152, "step": 132885 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.0001881749172214543, "loss": 2.0584, "step": 132890 }, { "epoch": 0.31, "grad_norm": 2.3125, "learning_rate": 0.00018817404533300739, "loss": 2.0908, "step": 132895 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018817317341443857, "loss": 2.0659, "step": 132900 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018817230146574824, "loss": 2.1289, "step": 132905 }, { "epoch": 0.31, "grad_norm": 2.390625, "learning_rate": 0.00018817142948693665, "loss": 2.2068, "step": 132910 }, { "epoch": 0.31, "grad_norm": 2.609375, "learning_rate": 0.0001881705574780041, "loss": 2.1606, "step": 132915 }, { "epoch": 0.31, "grad_norm": 1.6640625, "learning_rate": 0.00018816968543895087, "loss": 2.348, "step": 132920 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.0001881688133697773, "loss": 2.2801, "step": 132925 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.00018816794127048364, "loss": 2.1375, "step": 132930 }, { "epoch": 0.31, "grad_norm": 2.859375, "learning_rate": 0.00018816706914107022, "loss": 2.079, "step": 132935 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.0001881661969815373, "loss": 1.9828, "step": 132940 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.0001881653247918853, "loss": 1.9371, "step": 132945 }, { "epoch": 0.31, "grad_norm": 1.84375, "learning_rate": 0.00018816445257211436, "loss": 2.2034, "step": 132950 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018816358032222485, "loss": 1.9437, "step": 132955 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018816270804221708, "loss": 2.2354, "step": 132960 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.0001881618357320913, "loss": 2.1471, "step": 132965 }, { "epoch": 0.31, "grad_norm": 1.734375, "learning_rate": 0.00018816096339184784, "loss": 2.0755, "step": 132970 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.00018816009102148702, "loss": 2.1618, "step": 132975 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.0001881592186210091, "loss": 2.1661, "step": 132980 }, { "epoch": 0.31, "grad_norm": 2.53125, "learning_rate": 0.0001881583461904144, "loss": 2.1479, "step": 132985 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.0001881574737297032, "loss": 2.1572, "step": 132990 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.0001881566012388758, "loss": 2.2035, "step": 132995 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018815572871793253, "loss": 2.1857, "step": 133000 }, { "epoch": 0.31, "grad_norm": 1.953125, "learning_rate": 0.00018815485616687365, "loss": 2.1747, "step": 133005 }, { "epoch": 0.31, "grad_norm": 2.46875, "learning_rate": 0.0001881539835856995, "loss": 2.2542, "step": 133010 }, { "epoch": 0.31, "grad_norm": 2.53125, "learning_rate": 0.0001881531109744103, "loss": 1.9865, "step": 133015 }, { "epoch": 0.31, "grad_norm": 2.390625, "learning_rate": 0.00018815223833300643, "loss": 2.3343, "step": 133020 }, { "epoch": 0.31, "grad_norm": 1.953125, "learning_rate": 0.00018815136566148816, "loss": 2.2375, "step": 133025 }, { "epoch": 0.31, "grad_norm": 1.8125, "learning_rate": 0.0001881504929598558, "loss": 2.095, "step": 133030 }, { "epoch": 0.31, "grad_norm": 2.3125, "learning_rate": 0.00018814962022810957, "loss": 2.1821, "step": 133035 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018814874746624985, "loss": 2.1113, "step": 133040 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018814787467427696, "loss": 2.2582, "step": 133045 }, { "epoch": 0.31, "grad_norm": 2.640625, "learning_rate": 0.00018814700185219114, "loss": 2.0377, "step": 133050 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018814612899999272, "loss": 2.0025, "step": 133055 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018814525611768196, "loss": 2.1386, "step": 133060 }, { "epoch": 0.31, "grad_norm": 1.890625, "learning_rate": 0.00018814438320525917, "loss": 2.2072, "step": 133065 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.0001881435102627247, "loss": 1.9458, "step": 133070 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018814263729007878, "loss": 2.2027, "step": 133075 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.00018814176428732173, "loss": 2.2022, "step": 133080 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018814089125445386, "loss": 1.9777, "step": 133085 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018814001819147548, "loss": 2.1658, "step": 133090 }, { "epoch": 0.31, "grad_norm": 1.7578125, "learning_rate": 0.00018813914509838687, "loss": 2.1346, "step": 133095 }, { "epoch": 0.31, "grad_norm": 1.8203125, "learning_rate": 0.00018813827197518832, "loss": 2.0647, "step": 133100 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.00018813739882188015, "loss": 2.3096, "step": 133105 }, { "epoch": 0.31, "grad_norm": 1.84375, "learning_rate": 0.00018813652563846262, "loss": 2.0611, "step": 133110 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018813565242493607, "loss": 2.0077, "step": 133115 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.0001881347791813008, "loss": 2.0575, "step": 133120 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018813390590755705, "loss": 2.0987, "step": 133125 }, { "epoch": 0.31, "grad_norm": 2.515625, "learning_rate": 0.00018813303260370519, "loss": 2.1874, "step": 133130 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.0001881321592697455, "loss": 2.0789, "step": 133135 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018813128590567825, "loss": 2.1396, "step": 133140 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018813041251150372, "loss": 2.092, "step": 133145 }, { "epoch": 0.31, "grad_norm": 2.875, "learning_rate": 0.00018812953908722228, "loss": 1.8566, "step": 133150 }, { "epoch": 0.31, "grad_norm": 2.046875, "learning_rate": 0.00018812866563283417, "loss": 2.1515, "step": 133155 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018812779214833974, "loss": 2.2498, "step": 133160 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018812691863373925, "loss": 2.0828, "step": 133165 }, { "epoch": 0.31, "grad_norm": 1.9453125, "learning_rate": 0.000188126045089033, "loss": 2.1826, "step": 133170 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.0001881251715142213, "loss": 2.2462, "step": 133175 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.00018812429790930445, "loss": 2.2159, "step": 133180 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.0001881234242742827, "loss": 2.2348, "step": 133185 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018812255060915643, "loss": 2.1216, "step": 133190 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.00018812167691392587, "loss": 2.1896, "step": 133195 }, { "epoch": 0.31, "grad_norm": 1.984375, "learning_rate": 0.00018812080318859138, "loss": 2.0416, "step": 133200 }, { "epoch": 0.31, "grad_norm": 1.8046875, "learning_rate": 0.0001881199294331532, "loss": 2.2494, "step": 133205 }, { "epoch": 0.31, "grad_norm": 1.9453125, "learning_rate": 0.00018811905564761165, "loss": 2.2755, "step": 133210 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018811818183196705, "loss": 1.8191, "step": 133215 }, { "epoch": 0.31, "grad_norm": 1.9375, "learning_rate": 0.00018811730798621969, "loss": 2.2418, "step": 133220 }, { "epoch": 0.31, "grad_norm": 2.265625, "learning_rate": 0.00018811643411036983, "loss": 2.1423, "step": 133225 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018811556020441783, "loss": 2.201, "step": 133230 }, { "epoch": 0.31, "grad_norm": 2.546875, "learning_rate": 0.00018811468626836394, "loss": 2.1489, "step": 133235 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018811381230220847, "loss": 2.1321, "step": 133240 }, { "epoch": 0.31, "grad_norm": 1.7109375, "learning_rate": 0.00018811293830595173, "loss": 2.0341, "step": 133245 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018811206427959404, "loss": 2.1742, "step": 133250 }, { "epoch": 0.31, "grad_norm": 1.890625, "learning_rate": 0.0001881111902231356, "loss": 2.1489, "step": 133255 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018811031613657684, "loss": 2.1698, "step": 133260 }, { "epoch": 0.31, "grad_norm": 2.546875, "learning_rate": 0.00018810944201991798, "loss": 2.0227, "step": 133265 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.00018810856787315934, "loss": 2.0777, "step": 133270 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018810769369630121, "loss": 2.1696, "step": 133275 }, { "epoch": 0.31, "grad_norm": 2.578125, "learning_rate": 0.0001881068194893439, "loss": 2.1162, "step": 133280 }, { "epoch": 0.31, "grad_norm": 3.09375, "learning_rate": 0.0001881059452522877, "loss": 2.0888, "step": 133285 }, { "epoch": 0.31, "grad_norm": 2.3125, "learning_rate": 0.0001881050709851329, "loss": 2.1572, "step": 133290 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018810419668787982, "loss": 2.2578, "step": 133295 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.00018810332236052877, "loss": 2.1481, "step": 133300 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018810244800308, "loss": 2.1438, "step": 133305 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018810157361553382, "loss": 2.2392, "step": 133310 }, { "epoch": 0.31, "grad_norm": 1.9375, "learning_rate": 0.0001881006991978906, "loss": 2.223, "step": 133315 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018809982475015053, "loss": 2.0872, "step": 133320 }, { "epoch": 0.31, "grad_norm": 2.59375, "learning_rate": 0.00018809895027231398, "loss": 2.3045, "step": 133325 }, { "epoch": 0.31, "grad_norm": 2.53125, "learning_rate": 0.00018809807576438122, "loss": 2.1311, "step": 133330 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018809720122635258, "loss": 2.217, "step": 133335 }, { "epoch": 0.31, "grad_norm": 2.390625, "learning_rate": 0.00018809632665822834, "loss": 2.0053, "step": 133340 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018809545206000878, "loss": 2.1786, "step": 133345 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018809457743169423, "loss": 2.3722, "step": 133350 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018809370277328498, "loss": 2.2305, "step": 133355 }, { "epoch": 0.31, "grad_norm": 1.8671875, "learning_rate": 0.0001880928280847813, "loss": 2.3344, "step": 133360 }, { "epoch": 0.31, "grad_norm": 1.4609375, "learning_rate": 0.00018809195336618354, "loss": 2.0176, "step": 133365 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018809107861749192, "loss": 2.1647, "step": 133370 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.00018809020383870684, "loss": 2.0457, "step": 133375 }, { "epoch": 0.31, "grad_norm": 2.5, "learning_rate": 0.00018808932902982854, "loss": 2.3366, "step": 133380 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018808845419085733, "loss": 2.2561, "step": 133385 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018808757932179348, "loss": 2.155, "step": 133390 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018808670442263734, "loss": 1.9607, "step": 133395 }, { "epoch": 0.31, "grad_norm": 2.453125, "learning_rate": 0.00018808582949338918, "loss": 2.2511, "step": 133400 }, { "epoch": 0.31, "grad_norm": 1.8515625, "learning_rate": 0.00018808495453404928, "loss": 2.1539, "step": 133405 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.000188084079544618, "loss": 2.3585, "step": 133410 }, { "epoch": 0.31, "grad_norm": 2.390625, "learning_rate": 0.00018808320452509558, "loss": 2.0957, "step": 133415 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018808232947548233, "loss": 2.0261, "step": 133420 }, { "epoch": 0.31, "grad_norm": 1.9453125, "learning_rate": 0.0001880814543957786, "loss": 2.1639, "step": 133425 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018808057928598458, "loss": 2.1646, "step": 133430 }, { "epoch": 0.31, "grad_norm": 1.7890625, "learning_rate": 0.0001880797041461007, "loss": 2.2332, "step": 133435 }, { "epoch": 0.31, "grad_norm": 1.9765625, "learning_rate": 0.0001880788289761272, "loss": 2.2219, "step": 133440 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.00018807795377606432, "loss": 2.1533, "step": 133445 }, { "epoch": 0.31, "grad_norm": 2.421875, "learning_rate": 0.00018807707854591243, "loss": 2.0539, "step": 133450 }, { "epoch": 0.31, "grad_norm": 2.1875, "learning_rate": 0.00018807620328567184, "loss": 2.0503, "step": 133455 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.0001880753279953428, "loss": 2.259, "step": 133460 }, { "epoch": 0.31, "grad_norm": 2.3125, "learning_rate": 0.00018807445267492564, "loss": 2.1225, "step": 133465 }, { "epoch": 0.31, "grad_norm": 2.359375, "learning_rate": 0.00018807357732442064, "loss": 2.3715, "step": 133470 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001880727019438281, "loss": 2.2005, "step": 133475 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018807182653314834, "loss": 1.9984, "step": 133480 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018807095109238167, "loss": 2.1628, "step": 133485 }, { "epoch": 0.31, "grad_norm": 1.7421875, "learning_rate": 0.00018807007562152833, "loss": 2.1814, "step": 133490 }, { "epoch": 0.31, "grad_norm": 2.4375, "learning_rate": 0.00018806920012058868, "loss": 2.2179, "step": 133495 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018806832458956297, "loss": 2.0951, "step": 133500 }, { "epoch": 0.31, "grad_norm": 6.375, "learning_rate": 0.00018806744902845153, "loss": 1.9825, "step": 133505 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018806657343725466, "loss": 2.1381, "step": 133510 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018806569781597269, "loss": 2.2555, "step": 133515 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018806482216460579, "loss": 2.2167, "step": 133520 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018806394648315442, "loss": 2.2064, "step": 133525 }, { "epoch": 0.31, "grad_norm": 1.8671875, "learning_rate": 0.0001880630707716188, "loss": 2.0801, "step": 133530 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.00018806219502999922, "loss": 2.1313, "step": 133535 }, { "epoch": 0.31, "grad_norm": 2.203125, "learning_rate": 0.000188061319258296, "loss": 2.1387, "step": 133540 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018806044345650943, "loss": 2.0462, "step": 133545 }, { "epoch": 0.31, "grad_norm": 1.9140625, "learning_rate": 0.00018805956762463983, "loss": 2.1021, "step": 133550 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018805869176268748, "loss": 2.2969, "step": 133555 }, { "epoch": 0.31, "grad_norm": 2.390625, "learning_rate": 0.00018805781587065269, "loss": 2.0758, "step": 133560 }, { "epoch": 0.31, "grad_norm": 2.53125, "learning_rate": 0.00018805693994853573, "loss": 2.2146, "step": 133565 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018805606399633696, "loss": 2.2292, "step": 133570 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.0001880551880140566, "loss": 2.2397, "step": 133575 }, { "epoch": 0.31, "grad_norm": 2.359375, "learning_rate": 0.00018805431200169501, "loss": 2.1575, "step": 133580 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018805343595925246, "loss": 2.199, "step": 133585 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.00018805255988672929, "loss": 2.2527, "step": 133590 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.00018805168378412574, "loss": 2.2551, "step": 133595 }, { "epoch": 0.31, "grad_norm": 2.828125, "learning_rate": 0.00018805080765144216, "loss": 2.3108, "step": 133600 }, { "epoch": 0.31, "grad_norm": 2.234375, "learning_rate": 0.0001880499314886788, "loss": 1.9743, "step": 133605 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.000188049055295836, "loss": 2.1693, "step": 133610 }, { "epoch": 0.31, "grad_norm": 2.328125, "learning_rate": 0.00018804817907291406, "loss": 2.1293, "step": 133615 }, { "epoch": 0.31, "grad_norm": 2.453125, "learning_rate": 0.00018804730281991324, "loss": 2.1968, "step": 133620 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.00018804642653683388, "loss": 2.282, "step": 133625 }, { "epoch": 0.31, "grad_norm": 2.109375, "learning_rate": 0.00018804555022367627, "loss": 2.1418, "step": 133630 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.0001880446738804407, "loss": 2.1736, "step": 133635 }, { "epoch": 0.31, "grad_norm": 2.125, "learning_rate": 0.00018804379750712746, "loss": 2.2398, "step": 133640 }, { "epoch": 0.31, "grad_norm": 2.21875, "learning_rate": 0.00018804292110373688, "loss": 2.2347, "step": 133645 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.00018804204467026922, "loss": 2.0856, "step": 133650 }, { "epoch": 0.31, "grad_norm": 1.9296875, "learning_rate": 0.00018804116820672482, "loss": 2.0992, "step": 133655 }, { "epoch": 0.31, "grad_norm": 1.9765625, "learning_rate": 0.00018804029171310394, "loss": 2.2104, "step": 133660 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.0001880394151894069, "loss": 2.203, "step": 133665 }, { "epoch": 0.31, "grad_norm": 2.5, "learning_rate": 0.00018803853863563402, "loss": 2.131, "step": 133670 }, { "epoch": 0.31, "grad_norm": 2.375, "learning_rate": 0.00018803766205178557, "loss": 2.1529, "step": 133675 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.00018803678543786184, "loss": 2.1388, "step": 133680 }, { "epoch": 0.31, "grad_norm": 1.96875, "learning_rate": 0.0001880359087938632, "loss": 2.1387, "step": 133685 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018803503211978984, "loss": 1.971, "step": 133690 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.00018803415541564214, "loss": 2.1561, "step": 133695 }, { "epoch": 0.31, "grad_norm": 1.875, "learning_rate": 0.00018803327868142038, "loss": 2.2737, "step": 133700 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018803240191712483, "loss": 2.1447, "step": 133705 }, { "epoch": 0.31, "grad_norm": 2.5625, "learning_rate": 0.00018803152512275584, "loss": 2.208, "step": 133710 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018803064829831366, "loss": 2.196, "step": 133715 }, { "epoch": 0.31, "grad_norm": 2.46875, "learning_rate": 0.00018802977144379866, "loss": 1.9143, "step": 133720 }, { "epoch": 0.31, "grad_norm": 2.546875, "learning_rate": 0.00018802889455921107, "loss": 2.1273, "step": 133725 }, { "epoch": 0.31, "grad_norm": 2.25, "learning_rate": 0.0001880280176445512, "loss": 2.3464, "step": 133730 }, { "epoch": 0.31, "grad_norm": 2.28125, "learning_rate": 0.00018802714069981934, "loss": 2.2736, "step": 133735 }, { "epoch": 0.31, "grad_norm": 1.9296875, "learning_rate": 0.00018802626372501586, "loss": 2.2527, "step": 133740 }, { "epoch": 0.31, "grad_norm": 1.7109375, "learning_rate": 0.000188025386720141, "loss": 1.9092, "step": 133745 }, { "epoch": 0.31, "grad_norm": 2.140625, "learning_rate": 0.00018802450968519508, "loss": 2.1146, "step": 133750 }, { "epoch": 0.31, "grad_norm": 2.578125, "learning_rate": 0.00018802363262017838, "loss": 2.2001, "step": 133755 }, { "epoch": 0.31, "grad_norm": 2.703125, "learning_rate": 0.0001880227555250912, "loss": 2.1496, "step": 133760 }, { "epoch": 0.31, "grad_norm": 1.9296875, "learning_rate": 0.00018802187839993387, "loss": 1.8056, "step": 133765 }, { "epoch": 0.31, "grad_norm": 2.03125, "learning_rate": 0.00018802100124470664, "loss": 2.3287, "step": 133770 }, { "epoch": 0.31, "grad_norm": 1.9921875, "learning_rate": 0.00018802012405940986, "loss": 2.1778, "step": 133775 }, { "epoch": 0.31, "grad_norm": 2.15625, "learning_rate": 0.0001880192468440438, "loss": 2.2778, "step": 133780 }, { "epoch": 0.31, "grad_norm": 1.921875, "learning_rate": 0.0001880183695986088, "loss": 2.3417, "step": 133785 }, { "epoch": 0.31, "grad_norm": 1.9453125, "learning_rate": 0.0001880174923231051, "loss": 2.3002, "step": 133790 }, { "epoch": 0.31, "grad_norm": 2.53125, "learning_rate": 0.00018801661501753302, "loss": 2.1277, "step": 133795 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.0001880157376818929, "loss": 2.1548, "step": 133800 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.00018801486031618498, "loss": 2.0381, "step": 133805 }, { "epoch": 0.31, "grad_norm": 2.0, "learning_rate": 0.0001880139829204096, "loss": 2.1792, "step": 133810 }, { "epoch": 0.31, "grad_norm": 2.34375, "learning_rate": 0.00018801310549456702, "loss": 2.3261, "step": 133815 }, { "epoch": 0.31, "grad_norm": 2.578125, "learning_rate": 0.0001880122280386576, "loss": 2.1934, "step": 133820 }, { "epoch": 0.31, "grad_norm": 2.296875, "learning_rate": 0.0001880113505526816, "loss": 2.0874, "step": 133825 }, { "epoch": 0.31, "grad_norm": 2.09375, "learning_rate": 0.00018801047303663936, "loss": 2.2113, "step": 133830 }, { "epoch": 0.31, "grad_norm": 2.0625, "learning_rate": 0.00018800959549053107, "loss": 2.2061, "step": 133835 }, { "epoch": 0.31, "grad_norm": 2.015625, "learning_rate": 0.00018800871791435717, "loss": 2.1761, "step": 133840 }, { "epoch": 0.31, "grad_norm": 2.078125, "learning_rate": 0.00018800784030811787, "loss": 2.1708, "step": 133845 }, { "epoch": 0.31, "grad_norm": 2.171875, "learning_rate": 0.0001880069626718135, "loss": 2.1473, "step": 133850 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018800608500544434, "loss": 2.3024, "step": 133855 }, { "epoch": 0.32, "grad_norm": 1.921875, "learning_rate": 0.0001880052073090107, "loss": 2.2165, "step": 133860 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018800432958251292, "loss": 2.1236, "step": 133865 }, { "epoch": 0.32, "grad_norm": 1.7734375, "learning_rate": 0.00018800345182595126, "loss": 2.3156, "step": 133870 }, { "epoch": 0.32, "grad_norm": 1.8203125, "learning_rate": 0.000188002574039326, "loss": 2.2335, "step": 133875 }, { "epoch": 0.32, "grad_norm": 2.640625, "learning_rate": 0.0001880016962226375, "loss": 2.1527, "step": 133880 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018800081837588598, "loss": 2.1442, "step": 133885 }, { "epoch": 0.32, "grad_norm": 1.8515625, "learning_rate": 0.00018799994049907182, "loss": 2.1633, "step": 133890 }, { "epoch": 0.32, "grad_norm": 1.8203125, "learning_rate": 0.0001879990625921953, "loss": 2.1621, "step": 133895 }, { "epoch": 0.32, "grad_norm": 3.984375, "learning_rate": 0.00018799818465525665, "loss": 2.0839, "step": 133900 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018799730668825625, "loss": 2.0666, "step": 133905 }, { "epoch": 0.32, "grad_norm": 1.75, "learning_rate": 0.0001879964286911944, "loss": 2.1795, "step": 133910 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018799555066407133, "loss": 2.2014, "step": 133915 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.0001879946726068874, "loss": 2.1002, "step": 133920 }, { "epoch": 0.32, "grad_norm": 2.40625, "learning_rate": 0.0001879937945196429, "loss": 2.04, "step": 133925 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018799291640233813, "loss": 2.1406, "step": 133930 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018799203825497335, "loss": 2.2599, "step": 133935 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018799116007754894, "loss": 2.082, "step": 133940 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.0001879902818700651, "loss": 2.1993, "step": 133945 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018798940363252221, "loss": 2.0119, "step": 133950 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 0.00018798852536492058, "loss": 2.2965, "step": 133955 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018798764706726045, "loss": 2.037, "step": 133960 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018798676873954214, "loss": 2.0543, "step": 133965 }, { "epoch": 0.32, "grad_norm": 2.40625, "learning_rate": 0.00018798589038176595, "loss": 2.0202, "step": 133970 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018798501199393216, "loss": 2.2264, "step": 133975 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.00018798413357604113, "loss": 2.2801, "step": 133980 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.00018798325512809312, "loss": 2.1678, "step": 133985 }, { "epoch": 0.32, "grad_norm": 2.015625, "learning_rate": 0.00018798237665008843, "loss": 2.0261, "step": 133990 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018798149814202737, "loss": 2.1768, "step": 133995 }, { "epoch": 0.32, "grad_norm": 1.9453125, "learning_rate": 0.00018798061960391022, "loss": 2.1502, "step": 134000 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018797974103573731, "loss": 2.2255, "step": 134005 }, { "epoch": 0.32, "grad_norm": 1.7734375, "learning_rate": 0.0001879788624375089, "loss": 1.9844, "step": 134010 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.00018797798380922536, "loss": 2.2069, "step": 134015 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018797710515088692, "loss": 2.1456, "step": 134020 }, { "epoch": 0.32, "grad_norm": 2.3125, "learning_rate": 0.00018797622646249388, "loss": 2.0832, "step": 134025 }, { "epoch": 0.32, "grad_norm": 1.9140625, "learning_rate": 0.00018797534774404659, "loss": 2.22, "step": 134030 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018797446899554535, "loss": 2.0808, "step": 134035 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.0001879735902169904, "loss": 2.1768, "step": 134040 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018797271140838206, "loss": 2.2116, "step": 134045 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.0001879718325697207, "loss": 2.1547, "step": 134050 }, { "epoch": 0.32, "grad_norm": 1.9765625, "learning_rate": 0.00018797095370100652, "loss": 2.3988, "step": 134055 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.0001879700748022399, "loss": 2.2683, "step": 134060 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.0001879691958734211, "loss": 2.2382, "step": 134065 }, { "epoch": 0.32, "grad_norm": 2.390625, "learning_rate": 0.0001879683169145504, "loss": 2.2866, "step": 134070 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018796743792562817, "loss": 2.1056, "step": 134075 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018796655890665465, "loss": 2.4056, "step": 134080 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018796567985763013, "loss": 2.1918, "step": 134085 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018796480077855495, "loss": 2.2102, "step": 134090 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018796392166942942, "loss": 2.199, "step": 134095 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018796304253025382, "loss": 2.2314, "step": 134100 }, { "epoch": 0.32, "grad_norm": 1.875, "learning_rate": 0.00018796216336102844, "loss": 2.0611, "step": 134105 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.0001879612841617536, "loss": 2.0135, "step": 134110 }, { "epoch": 0.32, "grad_norm": 1.9921875, "learning_rate": 0.00018796040493242956, "loss": 2.1309, "step": 134115 }, { "epoch": 0.32, "grad_norm": 2.015625, "learning_rate": 0.00018795952567305667, "loss": 2.1294, "step": 134120 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018795864638363523, "loss": 2.1414, "step": 134125 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.0001879577670641655, "loss": 2.1632, "step": 134130 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018795688771464778, "loss": 2.0691, "step": 134135 }, { "epoch": 0.32, "grad_norm": 1.984375, "learning_rate": 0.00018795600833508244, "loss": 2.1648, "step": 134140 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.0001879551289254697, "loss": 2.2522, "step": 134145 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 0.00018795424948580988, "loss": 2.1183, "step": 134150 }, { "epoch": 0.32, "grad_norm": 2.71875, "learning_rate": 0.00018795337001610332, "loss": 2.1154, "step": 134155 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.0001879524905163503, "loss": 2.0426, "step": 134160 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.0001879516109865511, "loss": 2.0994, "step": 134165 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018795073142670603, "loss": 2.2408, "step": 134170 }, { "epoch": 0.32, "grad_norm": 1.90625, "learning_rate": 0.0001879498518368154, "loss": 2.205, "step": 134175 }, { "epoch": 0.32, "grad_norm": 2.703125, "learning_rate": 0.0001879489722168795, "loss": 2.1856, "step": 134180 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018794809256689866, "loss": 2.2302, "step": 134185 }, { "epoch": 0.32, "grad_norm": 2.015625, "learning_rate": 0.00018794721288687314, "loss": 2.1367, "step": 134190 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018794633317680326, "loss": 2.1779, "step": 134195 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.0001879454534366893, "loss": 2.1859, "step": 134200 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.0001879445736665316, "loss": 2.2486, "step": 134205 }, { "epoch": 0.32, "grad_norm": 1.96875, "learning_rate": 0.00018794369386633043, "loss": 2.0717, "step": 134210 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018794281403608612, "loss": 2.2383, "step": 134215 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.0001879419341757989, "loss": 2.3102, "step": 134220 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018794105428546914, "loss": 2.2833, "step": 134225 }, { "epoch": 0.32, "grad_norm": 1.828125, "learning_rate": 0.00018794017436509717, "loss": 2.1447, "step": 134230 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018793929441468318, "loss": 2.1742, "step": 134235 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018793841443422754, "loss": 2.3303, "step": 134240 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018793753442373058, "loss": 2.1972, "step": 134245 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018793665438319256, "loss": 2.152, "step": 134250 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018793577431261372, "loss": 2.0462, "step": 134255 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.0001879348942119945, "loss": 1.8933, "step": 134260 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.0001879340140813351, "loss": 2.2317, "step": 134265 }, { "epoch": 0.32, "grad_norm": 1.921875, "learning_rate": 0.00018793313392063583, "loss": 1.9873, "step": 134270 }, { "epoch": 0.32, "grad_norm": 1.84375, "learning_rate": 0.000187932253729897, "loss": 2.1608, "step": 134275 }, { "epoch": 0.32, "grad_norm": 2.703125, "learning_rate": 0.00018793137350911897, "loss": 2.1986, "step": 134280 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018793049325830193, "loss": 2.1093, "step": 134285 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018792961297744626, "loss": 2.2455, "step": 134290 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.00018792873266655223, "loss": 2.0227, "step": 134295 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.0001879278523256202, "loss": 2.1243, "step": 134300 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018792697195465038, "loss": 2.1445, "step": 134305 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.0001879260915536431, "loss": 2.2226, "step": 134310 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018792521112259868, "loss": 2.2058, "step": 134315 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018792433066151744, "loss": 2.1273, "step": 134320 }, { "epoch": 0.32, "grad_norm": 2.53125, "learning_rate": 0.0001879234501703996, "loss": 2.2338, "step": 134325 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018792256964924556, "loss": 2.1655, "step": 134330 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018792168909805557, "loss": 2.2454, "step": 134335 }, { "epoch": 0.32, "grad_norm": 1.9765625, "learning_rate": 0.00018792080851682993, "loss": 2.217, "step": 134340 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018791992790556897, "loss": 2.3378, "step": 134345 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018791904726427294, "loss": 1.9345, "step": 134350 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018791816659294216, "loss": 2.021, "step": 134355 }, { "epoch": 0.32, "grad_norm": 1.890625, "learning_rate": 0.00018791728589157697, "loss": 2.1671, "step": 134360 }, { "epoch": 0.32, "grad_norm": 1.9453125, "learning_rate": 0.00018791640516017762, "loss": 2.2665, "step": 134365 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018791552439874446, "loss": 2.2243, "step": 134370 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.0001879146436072777, "loss": 2.1695, "step": 134375 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018791376278577777, "loss": 1.9299, "step": 134380 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.00018791288193424489, "loss": 2.0503, "step": 134385 }, { "epoch": 0.32, "grad_norm": 2.71875, "learning_rate": 0.00018791200105267933, "loss": 2.2501, "step": 134390 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.0001879111201410815, "loss": 2.0009, "step": 134395 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.0001879102391994516, "loss": 2.0837, "step": 134400 }, { "epoch": 0.32, "grad_norm": 2.3125, "learning_rate": 0.00018790935822778995, "loss": 2.2465, "step": 134405 }, { "epoch": 0.32, "grad_norm": 1.7890625, "learning_rate": 0.00018790847722609691, "loss": 2.0509, "step": 134410 }, { "epoch": 0.32, "grad_norm": 2.546875, "learning_rate": 0.00018790759619437272, "loss": 2.1563, "step": 134415 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.0001879067151326177, "loss": 1.9905, "step": 134420 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018790583404083218, "loss": 2.2031, "step": 134425 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.0001879049529190164, "loss": 2.0669, "step": 134430 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018790407176717068, "loss": 1.9813, "step": 134435 }, { "epoch": 0.32, "grad_norm": 2.46875, "learning_rate": 0.00018790319058529535, "loss": 2.2848, "step": 134440 }, { "epoch": 0.32, "grad_norm": 1.921875, "learning_rate": 0.00018790230937339072, "loss": 1.9499, "step": 134445 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018790142813145705, "loss": 2.0088, "step": 134450 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018790054685949466, "loss": 2.1816, "step": 134455 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018789966555750386, "loss": 1.9604, "step": 134460 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.0001878987842254849, "loss": 2.0911, "step": 134465 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018789790286343817, "loss": 2.2366, "step": 134470 }, { "epoch": 0.32, "grad_norm": 1.953125, "learning_rate": 0.0001878970214713639, "loss": 2.2659, "step": 134475 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.0001878961400492624, "loss": 2.0306, "step": 134480 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.00018789525859713398, "loss": 1.9942, "step": 134485 }, { "epoch": 0.32, "grad_norm": 2.71875, "learning_rate": 0.000187894377114979, "loss": 2.1597, "step": 134490 }, { "epoch": 0.32, "grad_norm": 1.953125, "learning_rate": 0.00018789349560279766, "loss": 2.1947, "step": 134495 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018789261406059032, "loss": 2.0947, "step": 134500 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018789173248835725, "loss": 2.172, "step": 134505 }, { "epoch": 0.32, "grad_norm": 2.4375, "learning_rate": 0.00018789085088609882, "loss": 2.1159, "step": 134510 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018788996925381523, "loss": 2.0465, "step": 134515 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018788908759150686, "loss": 2.1467, "step": 134520 }, { "epoch": 0.32, "grad_norm": 1.640625, "learning_rate": 0.00018788820589917397, "loss": 2.2617, "step": 134525 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018788732417681687, "loss": 2.1766, "step": 134530 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018788644242443588, "loss": 2.1664, "step": 134535 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.0001878855606420313, "loss": 2.0722, "step": 134540 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.0001878846788296034, "loss": 2.2322, "step": 134545 }, { "epoch": 0.32, "grad_norm": 2.53125, "learning_rate": 0.00018788379698715248, "loss": 2.2318, "step": 134550 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.00018788291511467891, "loss": 2.1761, "step": 134555 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.0001878820332121829, "loss": 2.1367, "step": 134560 }, { "epoch": 0.32, "grad_norm": 2.4375, "learning_rate": 0.0001878811512796648, "loss": 2.0118, "step": 134565 }, { "epoch": 0.32, "grad_norm": 1.953125, "learning_rate": 0.0001878802693171249, "loss": 2.1537, "step": 134570 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018787938732456354, "loss": 2.0835, "step": 134575 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018787850530198097, "loss": 2.2139, "step": 134580 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.0001878776232493775, "loss": 2.2441, "step": 134585 }, { "epoch": 0.32, "grad_norm": 1.984375, "learning_rate": 0.00018787674116675347, "loss": 2.3096, "step": 134590 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018787585905410912, "loss": 2.0462, "step": 134595 }, { "epoch": 0.32, "grad_norm": 2.515625, "learning_rate": 0.0001878749769114448, "loss": 2.2047, "step": 134600 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018787409473876077, "loss": 2.3034, "step": 134605 }, { "epoch": 0.32, "grad_norm": 4.4375, "learning_rate": 0.00018787321253605738, "loss": 2.2346, "step": 134610 }, { "epoch": 0.32, "grad_norm": 2.3125, "learning_rate": 0.00018787233030333487, "loss": 2.1572, "step": 134615 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.00018787144804059363, "loss": 2.1578, "step": 134620 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.0001878705657478339, "loss": 1.8083, "step": 134625 }, { "epoch": 0.32, "grad_norm": 1.9140625, "learning_rate": 0.00018786968342505595, "loss": 2.2015, "step": 134630 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018786880107226015, "loss": 2.0946, "step": 134635 }, { "epoch": 0.32, "grad_norm": 2.40625, "learning_rate": 0.0001878679186894468, "loss": 2.1949, "step": 134640 }, { "epoch": 0.32, "grad_norm": 2.3125, "learning_rate": 0.00018786703627661613, "loss": 2.1977, "step": 134645 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.0001878661538337685, "loss": 2.1196, "step": 134650 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.0001878652713609042, "loss": 2.3229, "step": 134655 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018786438885802354, "loss": 2.0986, "step": 134660 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018786350632512682, "loss": 2.1747, "step": 134665 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.0001878626237622143, "loss": 2.0391, "step": 134670 }, { "epoch": 0.32, "grad_norm": 1.9921875, "learning_rate": 0.00018786174116928634, "loss": 2.0649, "step": 134675 }, { "epoch": 0.32, "grad_norm": 1.921875, "learning_rate": 0.0001878608585463432, "loss": 2.0825, "step": 134680 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018785997589338522, "loss": 2.1655, "step": 134685 }, { "epoch": 0.32, "grad_norm": 1.953125, "learning_rate": 0.00018785909321041266, "loss": 2.2971, "step": 134690 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018785821049742583, "loss": 2.1854, "step": 134695 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018785732775442507, "loss": 2.1276, "step": 134700 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018785644498141063, "loss": 2.2076, "step": 134705 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018785556217838285, "loss": 2.1975, "step": 134710 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018785467934534202, "loss": 2.2736, "step": 134715 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.0001878537964822884, "loss": 2.3345, "step": 134720 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018785291358922238, "loss": 2.1184, "step": 134725 }, { "epoch": 0.32, "grad_norm": 2.71875, "learning_rate": 0.0001878520306661442, "loss": 2.167, "step": 134730 }, { "epoch": 0.32, "grad_norm": 1.8828125, "learning_rate": 0.00018785114771305414, "loss": 2.0722, "step": 134735 }, { "epoch": 0.32, "grad_norm": 1.921875, "learning_rate": 0.00018785026472995257, "loss": 2.0587, "step": 134740 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018784938171683977, "loss": 2.3382, "step": 134745 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.000187848498673716, "loss": 2.1552, "step": 134750 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 0.0001878476156005816, "loss": 1.852, "step": 134755 }, { "epoch": 0.32, "grad_norm": 1.859375, "learning_rate": 0.00018784673249743684, "loss": 2.0558, "step": 134760 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018784584936428205, "loss": 2.2412, "step": 134765 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018784496620111754, "loss": 2.2151, "step": 134770 }, { "epoch": 0.32, "grad_norm": 1.90625, "learning_rate": 0.0001878440830079436, "loss": 2.0765, "step": 134775 }, { "epoch": 0.32, "grad_norm": 1.890625, "learning_rate": 0.0001878431997847605, "loss": 2.0951, "step": 134780 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018784231653156858, "loss": 2.1756, "step": 134785 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018784143324836815, "loss": 2.2557, "step": 134790 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018784054993515947, "loss": 2.2555, "step": 134795 }, { "epoch": 0.32, "grad_norm": 1.9375, "learning_rate": 0.0001878396665919429, "loss": 2.0628, "step": 134800 }, { "epoch": 0.32, "grad_norm": 2.46875, "learning_rate": 0.00018783878321871863, "loss": 2.0618, "step": 134805 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018783789981548712, "loss": 1.9862, "step": 134810 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018783701638224856, "loss": 2.1936, "step": 134815 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.0001878361329190033, "loss": 2.1646, "step": 134820 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018783524942575158, "loss": 2.0906, "step": 134825 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.0001878343659024938, "loss": 2.168, "step": 134830 }, { "epoch": 0.32, "grad_norm": 2.3125, "learning_rate": 0.00018783348234923018, "loss": 2.0998, "step": 134835 }, { "epoch": 0.32, "grad_norm": 1.9453125, "learning_rate": 0.00018783259876596105, "loss": 2.1913, "step": 134840 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.0001878317151526867, "loss": 2.0829, "step": 134845 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.00018783083150940747, "loss": 2.1496, "step": 134850 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.0001878299478361236, "loss": 2.2138, "step": 134855 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018782906413283544, "loss": 2.2311, "step": 134860 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018782818039954328, "loss": 2.2277, "step": 134865 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.00018782729663624744, "loss": 2.1876, "step": 134870 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.0001878264128429482, "loss": 2.3041, "step": 134875 }, { "epoch": 0.32, "grad_norm": 1.8671875, "learning_rate": 0.00018782552901964585, "loss": 2.1711, "step": 134880 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018782464516634072, "loss": 2.0646, "step": 134885 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018782376128303308, "loss": 2.0873, "step": 134890 }, { "epoch": 0.32, "grad_norm": 1.859375, "learning_rate": 0.00018782287736972327, "loss": 2.0602, "step": 134895 }, { "epoch": 0.32, "grad_norm": 1.78125, "learning_rate": 0.00018782199342641157, "loss": 2.1718, "step": 134900 }, { "epoch": 0.32, "grad_norm": 1.828125, "learning_rate": 0.00018782110945309827, "loss": 2.1028, "step": 134905 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018782022544978365, "loss": 2.0935, "step": 134910 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018781934141646813, "loss": 2.0837, "step": 134915 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.0001878184573531519, "loss": 2.2881, "step": 134920 }, { "epoch": 0.32, "grad_norm": 1.8125, "learning_rate": 0.00018781757325983525, "loss": 2.2269, "step": 134925 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018781668913651854, "loss": 2.2048, "step": 134930 }, { "epoch": 0.32, "grad_norm": 2.796875, "learning_rate": 0.00018781580498320212, "loss": 2.1186, "step": 134935 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018781492079988617, "loss": 2.1914, "step": 134940 }, { "epoch": 0.32, "grad_norm": 2.46875, "learning_rate": 0.00018781403658657105, "loss": 2.2409, "step": 134945 }, { "epoch": 0.32, "grad_norm": 1.890625, "learning_rate": 0.0001878131523432571, "loss": 2.0973, "step": 134950 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018781226806994454, "loss": 2.1214, "step": 134955 }, { "epoch": 0.32, "grad_norm": 1.9765625, "learning_rate": 0.00018781138376663375, "loss": 2.1341, "step": 134960 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018781049943332497, "loss": 2.2565, "step": 134965 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018780961507001856, "loss": 2.2542, "step": 134970 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018780873067671477, "loss": 2.2276, "step": 134975 }, { "epoch": 0.32, "grad_norm": 1.9609375, "learning_rate": 0.00018780784625341396, "loss": 2.0523, "step": 134980 }, { "epoch": 0.32, "grad_norm": 2.390625, "learning_rate": 0.00018780696180011635, "loss": 2.2194, "step": 134985 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.0001878060773168223, "loss": 2.2376, "step": 134990 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018780519280353213, "loss": 2.1246, "step": 134995 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.0001878043082602461, "loss": 2.1333, "step": 135000 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018780342368696448, "loss": 2.1216, "step": 135005 }, { "epoch": 0.32, "grad_norm": 3.046875, "learning_rate": 0.00018780253908368767, "loss": 2.1941, "step": 135010 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.00018780165445041592, "loss": 2.1507, "step": 135015 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.0001878007697871495, "loss": 2.1346, "step": 135020 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.0001877998850938888, "loss": 2.108, "step": 135025 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018779900037063402, "loss": 2.1667, "step": 135030 }, { "epoch": 0.32, "grad_norm": 1.96875, "learning_rate": 0.00018779811561738555, "loss": 2.0671, "step": 135035 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018779723083414362, "loss": 2.3301, "step": 135040 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018779634602090854, "loss": 2.1612, "step": 135045 }, { "epoch": 0.32, "grad_norm": 1.828125, "learning_rate": 0.00018779546117768067, "loss": 2.1666, "step": 135050 }, { "epoch": 0.32, "grad_norm": 1.9609375, "learning_rate": 0.00018779457630446026, "loss": 2.0328, "step": 135055 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018779369140124765, "loss": 2.1672, "step": 135060 }, { "epoch": 0.32, "grad_norm": 2.859375, "learning_rate": 0.00018779280646804314, "loss": 2.3398, "step": 135065 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018779192150484696, "loss": 2.2692, "step": 135070 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018779103651165953, "loss": 2.028, "step": 135075 }, { "epoch": 0.32, "grad_norm": 2.40625, "learning_rate": 0.00018779015148848104, "loss": 2.3338, "step": 135080 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018778926643531186, "loss": 1.9841, "step": 135085 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.0001877883813521523, "loss": 2.2815, "step": 135090 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018778749623900258, "loss": 2.24, "step": 135095 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.0001877866110958631, "loss": 2.1568, "step": 135100 }, { "epoch": 0.32, "grad_norm": 2.015625, "learning_rate": 0.0001877857259227341, "loss": 2.1402, "step": 135105 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018778484071961592, "loss": 2.1921, "step": 135110 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018778395548650886, "loss": 2.2063, "step": 135115 }, { "epoch": 0.32, "grad_norm": 1.8984375, "learning_rate": 0.00018778307022341317, "loss": 2.073, "step": 135120 }, { "epoch": 0.32, "grad_norm": 1.8828125, "learning_rate": 0.00018778218493032921, "loss": 2.3375, "step": 135125 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.0001877812996072573, "loss": 2.1465, "step": 135130 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018778041425419764, "loss": 2.12, "step": 135135 }, { "epoch": 0.32, "grad_norm": 1.9921875, "learning_rate": 0.00018777952887115066, "loss": 1.9449, "step": 135140 }, { "epoch": 0.32, "grad_norm": 2.59375, "learning_rate": 0.00018777864345811659, "loss": 2.234, "step": 135145 }, { "epoch": 0.32, "grad_norm": 1.953125, "learning_rate": 0.0001877777580150957, "loss": 2.2737, "step": 135150 }, { "epoch": 0.32, "grad_norm": 2.5, "learning_rate": 0.00018777687254208834, "loss": 2.1187, "step": 135155 }, { "epoch": 0.32, "grad_norm": 1.7890625, "learning_rate": 0.00018777598703909485, "loss": 2.2222, "step": 135160 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018777510150611547, "loss": 2.1584, "step": 135165 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018777421594315052, "loss": 2.1044, "step": 135170 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.0001877733303502003, "loss": 2.0408, "step": 135175 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018777244472726514, "loss": 2.1655, "step": 135180 }, { "epoch": 0.32, "grad_norm": 2.40625, "learning_rate": 0.00018777155907434532, "loss": 2.3378, "step": 135185 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018777067339144113, "loss": 2.052, "step": 135190 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018776978767855287, "loss": 2.0302, "step": 135195 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.0001877689019356809, "loss": 2.0588, "step": 135200 }, { "epoch": 0.32, "grad_norm": 1.984375, "learning_rate": 0.00018776801616282544, "loss": 2.1079, "step": 135205 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018776713035998686, "loss": 2.1514, "step": 135210 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018776624452716543, "loss": 2.0032, "step": 135215 }, { "epoch": 0.32, "grad_norm": 1.96875, "learning_rate": 0.00018776535866436148, "loss": 2.1992, "step": 135220 }, { "epoch": 0.32, "grad_norm": 1.984375, "learning_rate": 0.00018776447277157524, "loss": 2.1275, "step": 135225 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.0001877635868488071, "loss": 2.1497, "step": 135230 }, { "epoch": 0.32, "grad_norm": 1.7265625, "learning_rate": 0.00018776270089605732, "loss": 1.9922, "step": 135235 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.00018776181491332624, "loss": 2.1913, "step": 135240 }, { "epoch": 0.32, "grad_norm": 2.765625, "learning_rate": 0.00018776092890061407, "loss": 1.9482, "step": 135245 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.0001877600428579212, "loss": 2.1939, "step": 135250 }, { "epoch": 0.32, "grad_norm": 2.015625, "learning_rate": 0.00018775915678524794, "loss": 2.1309, "step": 135255 }, { "epoch": 0.32, "grad_norm": 1.90625, "learning_rate": 0.00018775827068259453, "loss": 2.0569, "step": 135260 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.0001877573845499613, "loss": 2.0089, "step": 135265 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018775649838734859, "loss": 2.0985, "step": 135270 }, { "epoch": 0.32, "grad_norm": 1.65625, "learning_rate": 0.00018775561219475665, "loss": 2.1031, "step": 135275 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.0001877547259721858, "loss": 2.3244, "step": 135280 }, { "epoch": 0.32, "grad_norm": 1.921875, "learning_rate": 0.0001877538397196363, "loss": 2.0455, "step": 135285 }, { "epoch": 0.32, "grad_norm": 2.3125, "learning_rate": 0.00018775295343710858, "loss": 2.2406, "step": 135290 }, { "epoch": 0.32, "grad_norm": 2.46875, "learning_rate": 0.0001877520671246028, "loss": 2.1708, "step": 135295 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018775118078211933, "loss": 2.0901, "step": 135300 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018775029440965847, "loss": 2.1153, "step": 135305 }, { "epoch": 0.32, "grad_norm": 2.390625, "learning_rate": 0.00018774940800722053, "loss": 2.1057, "step": 135310 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.00018774852157480578, "loss": 2.1209, "step": 135315 }, { "epoch": 0.32, "grad_norm": 2.390625, "learning_rate": 0.00018774763511241458, "loss": 2.3008, "step": 135320 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018774674862004717, "loss": 2.1989, "step": 135325 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018774586209770387, "loss": 2.0789, "step": 135330 }, { "epoch": 0.32, "grad_norm": 2.4375, "learning_rate": 0.000187744975545385, "loss": 2.2488, "step": 135335 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018774408896309085, "loss": 2.0708, "step": 135340 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018774320235082174, "loss": 2.1968, "step": 135345 }, { "epoch": 0.32, "grad_norm": 1.796875, "learning_rate": 0.00018774231570857798, "loss": 2.1589, "step": 135350 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018774142903635983, "loss": 2.0314, "step": 135355 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.0001877405423341676, "loss": 2.1827, "step": 135360 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018773965560200164, "loss": 2.2097, "step": 135365 }, { "epoch": 0.32, "grad_norm": 1.9765625, "learning_rate": 0.0001877387688398622, "loss": 2.1349, "step": 135370 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018773788204774964, "loss": 2.2793, "step": 135375 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 0.0001877369952256642, "loss": 2.0966, "step": 135380 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.0001877361083736062, "loss": 2.3206, "step": 135385 }, { "epoch": 0.32, "grad_norm": 1.890625, "learning_rate": 0.000187735221491576, "loss": 2.0461, "step": 135390 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018773433457957382, "loss": 2.1985, "step": 135395 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.0001877334476376, "loss": 2.1045, "step": 135400 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018773256066565485, "loss": 2.2521, "step": 135405 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018773167366373868, "loss": 2.0963, "step": 135410 }, { "epoch": 0.32, "grad_norm": 2.703125, "learning_rate": 0.00018773078663185175, "loss": 2.145, "step": 135415 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.0001877298995699944, "loss": 2.1888, "step": 135420 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018772901247816694, "loss": 2.1696, "step": 135425 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.00018772812535636964, "loss": 2.2671, "step": 135430 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018772723820460283, "loss": 2.2452, "step": 135435 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018772635102286682, "loss": 2.1901, "step": 135440 }, { "epoch": 0.32, "grad_norm": 1.859375, "learning_rate": 0.0001877254638111619, "loss": 2.2138, "step": 135445 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018772457656948834, "loss": 2.0003, "step": 135450 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.0001877236892978465, "loss": 2.4051, "step": 135455 }, { "epoch": 0.32, "grad_norm": 1.921875, "learning_rate": 0.00018772280199623662, "loss": 1.9792, "step": 135460 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018772191466465908, "loss": 2.2834, "step": 135465 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018772102730311413, "loss": 2.251, "step": 135470 }, { "epoch": 0.32, "grad_norm": 3.15625, "learning_rate": 0.00018772013991160208, "loss": 2.0751, "step": 135475 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018771925249012322, "loss": 1.8931, "step": 135480 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018771836503867794, "loss": 2.1862, "step": 135485 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018771747755726642, "loss": 2.268, "step": 135490 }, { "epoch": 0.32, "grad_norm": 1.734375, "learning_rate": 0.000187716590045889, "loss": 2.1233, "step": 135495 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018771570250454605, "loss": 2.1833, "step": 135500 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018771481493323782, "loss": 2.1487, "step": 135505 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.0001877139273319646, "loss": 2.1207, "step": 135510 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018771303970072673, "loss": 2.092, "step": 135515 }, { "epoch": 0.32, "grad_norm": 2.609375, "learning_rate": 0.00018771215203952445, "loss": 2.0931, "step": 135520 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018771126434835816, "loss": 2.1552, "step": 135525 }, { "epoch": 0.32, "grad_norm": 2.5625, "learning_rate": 0.00018771037662722808, "loss": 2.1202, "step": 135530 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.00018770948887613458, "loss": 2.0675, "step": 135535 }, { "epoch": 0.32, "grad_norm": 3.15625, "learning_rate": 0.00018770860109507787, "loss": 2.2079, "step": 135540 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018770771328405837, "loss": 2.0871, "step": 135545 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 0.00018770682544307633, "loss": 1.9317, "step": 135550 }, { "epoch": 0.32, "grad_norm": 2.609375, "learning_rate": 0.000187705937572132, "loss": 2.1546, "step": 135555 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.00018770504967122574, "loss": 2.1519, "step": 135560 }, { "epoch": 0.32, "grad_norm": 1.8828125, "learning_rate": 0.00018770416174035786, "loss": 2.0795, "step": 135565 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018770327377952864, "loss": 2.1059, "step": 135570 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018770238578873837, "loss": 2.1724, "step": 135575 }, { "epoch": 0.32, "grad_norm": 1.9609375, "learning_rate": 0.0001877014977679874, "loss": 2.0763, "step": 135580 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018770060971727603, "loss": 2.1034, "step": 135585 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.0001876997216366045, "loss": 2.1185, "step": 135590 }, { "epoch": 0.32, "grad_norm": 3.265625, "learning_rate": 0.00018769883352597318, "loss": 2.3563, "step": 135595 }, { "epoch": 0.32, "grad_norm": 1.9453125, "learning_rate": 0.00018769794538538235, "loss": 2.3112, "step": 135600 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.00018769705721483228, "loss": 2.1322, "step": 135605 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018769616901432334, "loss": 2.3147, "step": 135610 }, { "epoch": 0.32, "grad_norm": 1.9609375, "learning_rate": 0.00018769528078385576, "loss": 2.0755, "step": 135615 }, { "epoch": 0.32, "grad_norm": 3.109375, "learning_rate": 0.00018769439252342993, "loss": 2.2209, "step": 135620 }, { "epoch": 0.32, "grad_norm": 2.546875, "learning_rate": 0.00018769350423304606, "loss": 2.2839, "step": 135625 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018769261591270454, "loss": 2.0866, "step": 135630 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.0001876917275624056, "loss": 2.1313, "step": 135635 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.0001876908391821496, "loss": 2.0871, "step": 135640 }, { "epoch": 0.32, "grad_norm": 1.890625, "learning_rate": 0.0001876899507719368, "loss": 2.1182, "step": 135645 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018768906233176754, "loss": 2.0288, "step": 135650 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018768817386164205, "loss": 2.1936, "step": 135655 }, { "epoch": 0.32, "grad_norm": 2.671875, "learning_rate": 0.00018768728536156074, "loss": 2.1189, "step": 135660 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018768639683152386, "loss": 2.207, "step": 135665 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018768550827153173, "loss": 1.9801, "step": 135670 }, { "epoch": 0.32, "grad_norm": 2.4375, "learning_rate": 0.00018768461968158458, "loss": 2.2877, "step": 135675 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018768373106168286, "loss": 2.2146, "step": 135680 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.00018768284241182673, "loss": 2.0296, "step": 135685 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018768195373201655, "loss": 2.0957, "step": 135690 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018768106502225265, "loss": 2.06, "step": 135695 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.0001876801762825353, "loss": 2.2097, "step": 135700 }, { "epoch": 0.32, "grad_norm": 1.8671875, "learning_rate": 0.0001876792875128648, "loss": 2.1768, "step": 135705 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.0001876783987132415, "loss": 2.2443, "step": 135710 }, { "epoch": 0.32, "grad_norm": 3.71875, "learning_rate": 0.00018767750988366562, "loss": 2.1003, "step": 135715 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.00018767662102413753, "loss": 2.1712, "step": 135720 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.00018767573213465753, "loss": 2.3103, "step": 135725 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.0001876748432152259, "loss": 2.3212, "step": 135730 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018767395426584296, "loss": 2.2343, "step": 135735 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.000187673065286509, "loss": 2.0473, "step": 135740 }, { "epoch": 0.32, "grad_norm": 1.9921875, "learning_rate": 0.00018767217627722432, "loss": 2.1107, "step": 135745 }, { "epoch": 0.32, "grad_norm": 2.46875, "learning_rate": 0.00018767128723798927, "loss": 2.1915, "step": 135750 }, { "epoch": 0.32, "grad_norm": 2.90625, "learning_rate": 0.00018767039816880408, "loss": 2.1207, "step": 135755 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.0001876695090696691, "loss": 2.1505, "step": 135760 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018766861994058465, "loss": 2.143, "step": 135765 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.000187667730781551, "loss": 2.0845, "step": 135770 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018766684159256844, "loss": 2.2085, "step": 135775 }, { "epoch": 0.32, "grad_norm": 1.71875, "learning_rate": 0.0001876659523736373, "loss": 2.1668, "step": 135780 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018766506312475792, "loss": 2.1583, "step": 135785 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.0001876641738459305, "loss": 2.0199, "step": 135790 }, { "epoch": 0.32, "grad_norm": 2.390625, "learning_rate": 0.00018766328453715547, "loss": 2.2766, "step": 135795 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018766239519843304, "loss": 1.9902, "step": 135800 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.0001876615058297636, "loss": 2.1995, "step": 135805 }, { "epoch": 0.32, "grad_norm": 1.8671875, "learning_rate": 0.0001876606164311473, "loss": 2.2117, "step": 135810 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.0001876597270025846, "loss": 2.0897, "step": 135815 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018765883754407576, "loss": 2.1507, "step": 135820 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018765794805562105, "loss": 2.1795, "step": 135825 }, { "epoch": 0.32, "grad_norm": 2.40625, "learning_rate": 0.00018765705853722082, "loss": 2.0266, "step": 135830 }, { "epoch": 0.32, "grad_norm": 2.75, "learning_rate": 0.0001876561689888753, "loss": 2.0484, "step": 135835 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.0001876552794105849, "loss": 2.2589, "step": 135840 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018765438980234984, "loss": 2.1704, "step": 135845 }, { "epoch": 0.32, "grad_norm": 1.984375, "learning_rate": 0.00018765350016417044, "loss": 2.2238, "step": 135850 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018765261049604704, "loss": 2.1255, "step": 135855 }, { "epoch": 0.32, "grad_norm": 1.8203125, "learning_rate": 0.0001876517207979799, "loss": 2.119, "step": 135860 }, { "epoch": 0.32, "grad_norm": 1.9921875, "learning_rate": 0.00018765083106996933, "loss": 2.0874, "step": 135865 }, { "epoch": 0.32, "grad_norm": 1.9921875, "learning_rate": 0.00018764994131201568, "loss": 2.2679, "step": 135870 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.0001876490515241192, "loss": 1.93, "step": 135875 }, { "epoch": 0.32, "grad_norm": 1.75, "learning_rate": 0.00018764816170628023, "loss": 2.1648, "step": 135880 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018764727185849905, "loss": 2.0286, "step": 135885 }, { "epoch": 0.32, "grad_norm": 3.125, "learning_rate": 0.00018764638198077595, "loss": 2.0689, "step": 135890 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.00018764549207311127, "loss": 2.1482, "step": 135895 }, { "epoch": 0.32, "grad_norm": 1.7890625, "learning_rate": 0.0001876446021355053, "loss": 2.2175, "step": 135900 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018764371216795835, "loss": 1.9968, "step": 135905 }, { "epoch": 0.32, "grad_norm": 1.828125, "learning_rate": 0.00018764282217047075, "loss": 2.034, "step": 135910 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.0001876419321430427, "loss": 2.0626, "step": 135915 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018764104208567463, "loss": 2.1567, "step": 135920 }, { "epoch": 0.32, "grad_norm": 1.75, "learning_rate": 0.0001876401519983668, "loss": 2.4114, "step": 135925 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.0001876392618811195, "loss": 2.1824, "step": 135930 }, { "epoch": 0.32, "grad_norm": 2.546875, "learning_rate": 0.000187638371733933, "loss": 1.9863, "step": 135935 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018763748155680766, "loss": 2.1558, "step": 135940 }, { "epoch": 0.32, "grad_norm": 1.8828125, "learning_rate": 0.00018763659134974375, "loss": 1.999, "step": 135945 }, { "epoch": 0.32, "grad_norm": 1.9453125, "learning_rate": 0.00018763570111274164, "loss": 2.1659, "step": 135950 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018763481084580158, "loss": 2.1441, "step": 135955 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018763392054892383, "loss": 2.0472, "step": 135960 }, { "epoch": 0.32, "grad_norm": 2.796875, "learning_rate": 0.00018763303022210876, "loss": 2.1664, "step": 135965 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 0.00018763213986535665, "loss": 2.1014, "step": 135970 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018763124947866785, "loss": 2.2054, "step": 135975 }, { "epoch": 0.32, "grad_norm": 2.515625, "learning_rate": 0.0001876303590620426, "loss": 2.1634, "step": 135980 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018762946861548122, "loss": 2.1383, "step": 135985 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018762857813898407, "loss": 2.2626, "step": 135990 }, { "epoch": 0.32, "grad_norm": 1.84375, "learning_rate": 0.00018762768763255137, "loss": 2.1232, "step": 135995 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.0001876267970961835, "loss": 2.2035, "step": 136000 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.00018762590652988067, "loss": 2.19, "step": 136005 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.0001876250159336433, "loss": 2.1515, "step": 136010 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018762412530747157, "loss": 2.0279, "step": 136015 }, { "epoch": 0.32, "grad_norm": 1.8828125, "learning_rate": 0.0001876232346513659, "loss": 2.1777, "step": 136020 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018762234396532652, "loss": 1.9958, "step": 136025 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018762145324935375, "loss": 2.0654, "step": 136030 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018762056250344795, "loss": 2.319, "step": 136035 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018761967172760933, "loss": 2.0256, "step": 136040 }, { "epoch": 0.32, "grad_norm": 1.9375, "learning_rate": 0.00018761878092183825, "loss": 2.0333, "step": 136045 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.000187617890086135, "loss": 2.264, "step": 136050 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.00018761699922049992, "loss": 2.2865, "step": 136055 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 0.00018761610832493327, "loss": 2.1333, "step": 136060 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018761521739943534, "loss": 2.1376, "step": 136065 }, { "epoch": 0.32, "grad_norm": 2.5625, "learning_rate": 0.0001876143264440065, "loss": 2.0021, "step": 136070 }, { "epoch": 0.32, "grad_norm": 1.8984375, "learning_rate": 0.000187613435458647, "loss": 2.1692, "step": 136075 }, { "epoch": 0.32, "grad_norm": 1.890625, "learning_rate": 0.00018761254444335718, "loss": 2.2013, "step": 136080 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018761165339813732, "loss": 2.2482, "step": 136085 }, { "epoch": 0.32, "grad_norm": 2.4375, "learning_rate": 0.00018761076232298772, "loss": 2.1865, "step": 136090 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.00018760987121790867, "loss": 2.0963, "step": 136095 }, { "epoch": 0.32, "grad_norm": 2.71875, "learning_rate": 0.00018760898008290052, "loss": 2.1134, "step": 136100 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018760808891796357, "loss": 1.9177, "step": 136105 }, { "epoch": 0.32, "grad_norm": 1.953125, "learning_rate": 0.00018760719772309813, "loss": 2.1411, "step": 136110 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.00018760630649830443, "loss": 2.0723, "step": 136115 }, { "epoch": 0.32, "grad_norm": 2.6875, "learning_rate": 0.00018760541524358285, "loss": 2.1521, "step": 136120 }, { "epoch": 0.32, "grad_norm": 1.8984375, "learning_rate": 0.00018760452395893366, "loss": 2.3548, "step": 136125 }, { "epoch": 0.32, "grad_norm": 2.515625, "learning_rate": 0.00018760363264435718, "loss": 2.0124, "step": 136130 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.0001876027412998537, "loss": 1.9118, "step": 136135 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018760184992542355, "loss": 1.9677, "step": 136140 }, { "epoch": 0.32, "grad_norm": 2.875, "learning_rate": 0.00018760095852106702, "loss": 2.2191, "step": 136145 }, { "epoch": 0.32, "grad_norm": 2.671875, "learning_rate": 0.00018760006708678443, "loss": 2.0405, "step": 136150 }, { "epoch": 0.32, "grad_norm": 2.578125, "learning_rate": 0.00018759917562257602, "loss": 2.1222, "step": 136155 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018759828412844218, "loss": 2.1466, "step": 136160 }, { "epoch": 0.32, "grad_norm": 3.046875, "learning_rate": 0.00018759739260438318, "loss": 2.2709, "step": 136165 }, { "epoch": 0.32, "grad_norm": 2.625, "learning_rate": 0.00018759650105039933, "loss": 2.3441, "step": 136170 }, { "epoch": 0.32, "grad_norm": 2.703125, "learning_rate": 0.00018759560946649087, "loss": 2.2721, "step": 136175 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.00018759471785265822, "loss": 2.1678, "step": 136180 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.0001875938262089016, "loss": 2.1001, "step": 136185 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.0001875929345352214, "loss": 2.2465, "step": 136190 }, { "epoch": 0.32, "grad_norm": 2.609375, "learning_rate": 0.00018759204283161774, "loss": 2.2953, "step": 136195 }, { "epoch": 0.32, "grad_norm": 1.96875, "learning_rate": 0.00018759115109809112, "loss": 2.1084, "step": 136200 }, { "epoch": 0.32, "grad_norm": 2.640625, "learning_rate": 0.0001875902593346418, "loss": 2.089, "step": 136205 }, { "epoch": 0.32, "grad_norm": 2.515625, "learning_rate": 0.00018758936754127004, "loss": 2.1495, "step": 136210 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018758847571797614, "loss": 2.0947, "step": 136215 }, { "epoch": 0.32, "grad_norm": 1.9375, "learning_rate": 0.00018758758386476046, "loss": 2.093, "step": 136220 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018758669198162326, "loss": 2.1015, "step": 136225 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018758580006856485, "loss": 1.9977, "step": 136230 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018758490812558555, "loss": 2.0151, "step": 136235 }, { "epoch": 0.32, "grad_norm": 1.921875, "learning_rate": 0.00018758401615268566, "loss": 2.0548, "step": 136240 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018758312414986547, "loss": 2.2479, "step": 136245 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018758223211712532, "loss": 2.1149, "step": 136250 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018758134005446547, "loss": 2.1249, "step": 136255 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018758044796188624, "loss": 1.9821, "step": 136260 }, { "epoch": 0.32, "grad_norm": 2.515625, "learning_rate": 0.00018757955583938795, "loss": 2.1657, "step": 136265 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.0001875786636869709, "loss": 2.0026, "step": 136270 }, { "epoch": 0.32, "grad_norm": 2.015625, "learning_rate": 0.0001875777715046354, "loss": 2.1879, "step": 136275 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018757687929238173, "loss": 2.2109, "step": 136280 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.0001875759870502102, "loss": 2.1951, "step": 136285 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018757509477812113, "loss": 2.2119, "step": 136290 }, { "epoch": 0.32, "grad_norm": 2.40625, "learning_rate": 0.00018757420247611484, "loss": 2.0092, "step": 136295 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.0001875733101441916, "loss": 2.2293, "step": 136300 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018757241778235174, "loss": 2.3383, "step": 136305 }, { "epoch": 0.32, "grad_norm": 1.8515625, "learning_rate": 0.00018757152539059551, "loss": 2.009, "step": 136310 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.0001875706329689233, "loss": 2.1484, "step": 136315 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.0001875697405173354, "loss": 2.0926, "step": 136320 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.000187568848035832, "loss": 2.2309, "step": 136325 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018756795552441356, "loss": 2.1892, "step": 136330 }, { "epoch": 0.32, "grad_norm": 2.015625, "learning_rate": 0.00018756706298308031, "loss": 2.298, "step": 136335 }, { "epoch": 0.32, "grad_norm": 2.4375, "learning_rate": 0.00018756617041183255, "loss": 1.9792, "step": 136340 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018756527781067062, "loss": 1.9982, "step": 136345 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018756438517959477, "loss": 2.0277, "step": 136350 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018756349251860537, "loss": 2.1127, "step": 136355 }, { "epoch": 0.32, "grad_norm": 1.9140625, "learning_rate": 0.00018756259982770268, "loss": 2.1469, "step": 136360 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.000187561707106887, "loss": 2.1227, "step": 136365 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018756081435615868, "loss": 2.2048, "step": 136370 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018755992157551798, "loss": 2.1126, "step": 136375 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.00018755902876496525, "loss": 2.0958, "step": 136380 }, { "epoch": 0.32, "grad_norm": 1.84375, "learning_rate": 0.00018755813592450076, "loss": 2.0501, "step": 136385 }, { "epoch": 0.32, "grad_norm": 1.734375, "learning_rate": 0.00018755724305412482, "loss": 2.1227, "step": 136390 }, { "epoch": 0.32, "grad_norm": 1.9140625, "learning_rate": 0.0001875563501538377, "loss": 2.0074, "step": 136395 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018755545722363976, "loss": 2.1179, "step": 136400 }, { "epoch": 0.32, "grad_norm": 2.390625, "learning_rate": 0.00018755456426353134, "loss": 2.1278, "step": 136405 }, { "epoch": 0.32, "grad_norm": 1.90625, "learning_rate": 0.00018755367127351263, "loss": 2.1715, "step": 136410 }, { "epoch": 0.32, "grad_norm": 1.859375, "learning_rate": 0.000187552778253584, "loss": 2.0848, "step": 136415 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.0001875518852037458, "loss": 2.0321, "step": 136420 }, { "epoch": 0.32, "grad_norm": 2.609375, "learning_rate": 0.00018755099212399828, "loss": 2.1048, "step": 136425 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018755009901434173, "loss": 2.2037, "step": 136430 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018754920587477647, "loss": 2.1397, "step": 136435 }, { "epoch": 0.32, "grad_norm": 2.6875, "learning_rate": 0.00018754831270530281, "loss": 2.1409, "step": 136440 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.0001875474195059211, "loss": 2.1046, "step": 136445 }, { "epoch": 0.32, "grad_norm": 2.46875, "learning_rate": 0.00018754652627663156, "loss": 1.9549, "step": 136450 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018754563301743457, "loss": 2.1917, "step": 136455 }, { "epoch": 0.32, "grad_norm": 1.8203125, "learning_rate": 0.0001875447397283304, "loss": 2.3196, "step": 136460 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018754384640931935, "loss": 1.9177, "step": 136465 }, { "epoch": 0.32, "grad_norm": 1.9296875, "learning_rate": 0.00018754295306040172, "loss": 2.0788, "step": 136470 }, { "epoch": 0.32, "grad_norm": 2.40625, "learning_rate": 0.00018754205968157786, "loss": 2.1198, "step": 136475 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018754116627284803, "loss": 2.242, "step": 136480 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018754027283421256, "loss": 2.1679, "step": 136485 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.0001875393793656717, "loss": 2.3808, "step": 136490 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018753848586722585, "loss": 2.0489, "step": 136495 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018753759233887526, "loss": 2.296, "step": 136500 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 0.00018753669878062023, "loss": 1.8837, "step": 136505 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018753580519246109, "loss": 2.0569, "step": 136510 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.0001875349115743981, "loss": 2.2162, "step": 136515 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018753401792643163, "loss": 2.2764, "step": 136520 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018753312424856194, "loss": 2.1006, "step": 136525 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.00018753223054078933, "loss": 2.1605, "step": 136530 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018753133680311416, "loss": 2.036, "step": 136535 }, { "epoch": 0.32, "grad_norm": 1.90625, "learning_rate": 0.00018753044303553668, "loss": 2.0965, "step": 136540 }, { "epoch": 0.32, "grad_norm": 1.90625, "learning_rate": 0.00018752954923805722, "loss": 2.0984, "step": 136545 }, { "epoch": 0.32, "grad_norm": 1.7109375, "learning_rate": 0.00018752865541067605, "loss": 2.094, "step": 136550 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018752776155339353, "loss": 2.1353, "step": 136555 }, { "epoch": 0.32, "grad_norm": 1.875, "learning_rate": 0.00018752686766620994, "loss": 2.3362, "step": 136560 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018752597374912557, "loss": 2.2253, "step": 136565 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018752507980214074, "loss": 2.2296, "step": 136570 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.00018752418582525574, "loss": 2.1412, "step": 136575 }, { "epoch": 0.32, "grad_norm": 2.5, "learning_rate": 0.00018752329181847093, "loss": 2.1422, "step": 136580 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018752239778178654, "loss": 2.1278, "step": 136585 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018752150371520294, "loss": 2.2726, "step": 136590 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.0001875206096187204, "loss": 2.2269, "step": 136595 }, { "epoch": 0.32, "grad_norm": 1.9609375, "learning_rate": 0.00018751971549233924, "loss": 2.1202, "step": 136600 }, { "epoch": 0.32, "grad_norm": 2.3125, "learning_rate": 0.00018751882133605973, "loss": 2.0465, "step": 136605 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018751792714988223, "loss": 2.1415, "step": 136610 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018751703293380702, "loss": 2.1906, "step": 136615 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.0001875161386878344, "loss": 2.1629, "step": 136620 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 0.00018751524441196468, "loss": 2.1694, "step": 136625 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018751435010619814, "loss": 2.1888, "step": 136630 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018751345577053513, "loss": 2.1007, "step": 136635 }, { "epoch": 0.32, "grad_norm": 1.78125, "learning_rate": 0.00018751256140497592, "loss": 2.117, "step": 136640 }, { "epoch": 0.32, "grad_norm": 2.953125, "learning_rate": 0.00018751166700952087, "loss": 2.3812, "step": 136645 }, { "epoch": 0.32, "grad_norm": 2.578125, "learning_rate": 0.00018751077258417022, "loss": 2.0514, "step": 136650 }, { "epoch": 0.32, "grad_norm": 3.03125, "learning_rate": 0.00018750987812892432, "loss": 2.1064, "step": 136655 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018750898364378347, "loss": 2.1151, "step": 136660 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018750808912874791, "loss": 2.2591, "step": 136665 }, { "epoch": 0.32, "grad_norm": 1.7890625, "learning_rate": 0.00018750719458381806, "loss": 2.016, "step": 136670 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018750630000899412, "loss": 2.2982, "step": 136675 }, { "epoch": 0.32, "grad_norm": 1.9375, "learning_rate": 0.00018750540540427645, "loss": 2.2219, "step": 136680 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018750451076966535, "loss": 2.2606, "step": 136685 }, { "epoch": 0.32, "grad_norm": 1.8359375, "learning_rate": 0.00018750361610516113, "loss": 2.139, "step": 136690 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.0001875027214107641, "loss": 2.0536, "step": 136695 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018750182668647453, "loss": 2.1782, "step": 136700 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018750093193229276, "loss": 2.1538, "step": 136705 }, { "epoch": 0.32, "grad_norm": 2.5625, "learning_rate": 0.00018750003714821908, "loss": 2.2511, "step": 136710 }, { "epoch": 0.32, "grad_norm": 2.96875, "learning_rate": 0.0001874991423342538, "loss": 2.0943, "step": 136715 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.0001874982474903972, "loss": 2.0935, "step": 136720 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018749735261664968, "loss": 2.2243, "step": 136725 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018749645771301142, "loss": 1.9555, "step": 136730 }, { "epoch": 0.32, "grad_norm": 2.75, "learning_rate": 0.00018749556277948283, "loss": 2.1511, "step": 136735 }, { "epoch": 0.32, "grad_norm": 2.3125, "learning_rate": 0.0001874946678160641, "loss": 2.1318, "step": 136740 }, { "epoch": 0.32, "grad_norm": 1.890625, "learning_rate": 0.00018749377282275566, "loss": 2.1857, "step": 136745 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018749287779955775, "loss": 2.0509, "step": 136750 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018749198274647068, "loss": 2.18, "step": 136755 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018749108766349473, "loss": 2.0788, "step": 136760 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.0001874901925506303, "loss": 2.1056, "step": 136765 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.0001874892974078776, "loss": 2.1353, "step": 136770 }, { "epoch": 0.32, "grad_norm": 1.984375, "learning_rate": 0.00018748840223523696, "loss": 2.1416, "step": 136775 }, { "epoch": 0.32, "grad_norm": 1.8984375, "learning_rate": 0.00018748750703270872, "loss": 1.89, "step": 136780 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018748661180029315, "loss": 2.0174, "step": 136785 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018748571653799057, "loss": 1.9107, "step": 136790 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.00018748482124580126, "loss": 2.062, "step": 136795 }, { "epoch": 0.32, "grad_norm": 1.671875, "learning_rate": 0.00018748392592372556, "loss": 1.9392, "step": 136800 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018748303057176377, "loss": 2.1651, "step": 136805 }, { "epoch": 0.32, "grad_norm": 2.390625, "learning_rate": 0.00018748213518991622, "loss": 2.204, "step": 136810 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018748123977818315, "loss": 2.3075, "step": 136815 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018748034433656492, "loss": 2.2313, "step": 136820 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.0001874794488650618, "loss": 1.9988, "step": 136825 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018747855336367412, "loss": 1.8241, "step": 136830 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018747765783240218, "loss": 2.2912, "step": 136835 }, { "epoch": 0.32, "grad_norm": 1.8515625, "learning_rate": 0.0001874767622712463, "loss": 2.1286, "step": 136840 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018747586668020673, "loss": 2.2653, "step": 136845 }, { "epoch": 0.32, "grad_norm": 1.8671875, "learning_rate": 0.00018747497105928385, "loss": 2.1814, "step": 136850 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.0001874740754084779, "loss": 2.1839, "step": 136855 }, { "epoch": 0.32, "grad_norm": 2.703125, "learning_rate": 0.00018747317972778928, "loss": 1.9274, "step": 136860 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.0001874722840172182, "loss": 1.9086, "step": 136865 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.000187471388276765, "loss": 2.142, "step": 136870 }, { "epoch": 0.32, "grad_norm": 1.9609375, "learning_rate": 0.00018747049250642997, "loss": 2.0703, "step": 136875 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.0001874695967062135, "loss": 2.098, "step": 136880 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018746870087611576, "loss": 2.3132, "step": 136885 }, { "epoch": 0.32, "grad_norm": 1.8203125, "learning_rate": 0.00018746780501613716, "loss": 2.1317, "step": 136890 }, { "epoch": 0.32, "grad_norm": 1.953125, "learning_rate": 0.00018746690912627796, "loss": 2.0789, "step": 136895 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.0001874660132065385, "loss": 2.1869, "step": 136900 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.000187465117256919, "loss": 2.1179, "step": 136905 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.0001874642212774199, "loss": 2.1662, "step": 136910 }, { "epoch": 0.32, "grad_norm": 2.015625, "learning_rate": 0.00018746332526804141, "loss": 2.1431, "step": 136915 }, { "epoch": 0.32, "grad_norm": 1.8984375, "learning_rate": 0.00018746242922878387, "loss": 1.9501, "step": 136920 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018746153315964757, "loss": 2.0993, "step": 136925 }, { "epoch": 0.32, "grad_norm": 2.40625, "learning_rate": 0.0001874606370606328, "loss": 2.0922, "step": 136930 }, { "epoch": 0.32, "grad_norm": 2.5625, "learning_rate": 0.00018745974093173993, "loss": 2.1525, "step": 136935 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.0001874588447729692, "loss": 2.3194, "step": 136940 }, { "epoch": 0.32, "grad_norm": 1.90625, "learning_rate": 0.00018745794858432096, "loss": 2.1541, "step": 136945 }, { "epoch": 0.32, "grad_norm": 1.890625, "learning_rate": 0.00018745705236579546, "loss": 2.1633, "step": 136950 }, { "epoch": 0.32, "grad_norm": 1.84375, "learning_rate": 0.0001874561561173931, "loss": 2.2462, "step": 136955 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.0001874552598391141, "loss": 1.9637, "step": 136960 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.0001874543635309588, "loss": 2.1722, "step": 136965 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018745346719292752, "loss": 2.1907, "step": 136970 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018745257082502053, "loss": 2.2088, "step": 136975 }, { "epoch": 0.32, "grad_norm": 2.3125, "learning_rate": 0.00018745167442723814, "loss": 2.1793, "step": 136980 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.0001874507779995807, "loss": 2.1346, "step": 136985 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.0001874498815420485, "loss": 2.2552, "step": 136990 }, { "epoch": 0.32, "grad_norm": 1.7109375, "learning_rate": 0.0001874489850546418, "loss": 2.1005, "step": 136995 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018744808853736098, "loss": 2.1616, "step": 137000 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018744719199020627, "loss": 2.0869, "step": 137005 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018744629541317802, "loss": 2.2092, "step": 137010 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018744539880627653, "loss": 1.9671, "step": 137015 }, { "epoch": 0.32, "grad_norm": 1.96875, "learning_rate": 0.00018744450216950212, "loss": 2.0605, "step": 137020 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018744360550285508, "loss": 2.1607, "step": 137025 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.0001874427088063357, "loss": 2.1464, "step": 137030 }, { "epoch": 0.32, "grad_norm": 3.0, "learning_rate": 0.00018744181207994433, "loss": 2.2643, "step": 137035 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018744091532368125, "loss": 2.1879, "step": 137040 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018744001853754675, "loss": 2.2808, "step": 137045 }, { "epoch": 0.32, "grad_norm": 1.734375, "learning_rate": 0.00018743912172154118, "loss": 2.0322, "step": 137050 }, { "epoch": 0.32, "grad_norm": 14.3125, "learning_rate": 0.00018743822487566478, "loss": 2.3788, "step": 137055 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.0001874373279999179, "loss": 2.2564, "step": 137060 }, { "epoch": 0.32, "grad_norm": 3.28125, "learning_rate": 0.00018743643109430088, "loss": 2.0919, "step": 137065 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018743553415881395, "loss": 2.1618, "step": 137070 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.0001874346371934575, "loss": 2.3054, "step": 137075 }, { "epoch": 0.32, "grad_norm": 1.8984375, "learning_rate": 0.00018743374019823173, "loss": 2.0792, "step": 137080 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018743284317313707, "loss": 2.2472, "step": 137085 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018743194611817377, "loss": 2.0342, "step": 137090 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018743104903334209, "loss": 2.0595, "step": 137095 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018743015191864237, "loss": 2.0652, "step": 137100 }, { "epoch": 0.32, "grad_norm": 1.9375, "learning_rate": 0.00018742925477407496, "loss": 2.1187, "step": 137105 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.0001874283575996401, "loss": 1.9875, "step": 137110 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018742746039533817, "loss": 2.0908, "step": 137115 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.0001874265631611694, "loss": 2.3263, "step": 137120 }, { "epoch": 0.32, "grad_norm": 1.90625, "learning_rate": 0.00018742566589713416, "loss": 2.0618, "step": 137125 }, { "epoch": 0.32, "grad_norm": 2.8125, "learning_rate": 0.00018742476860323268, "loss": 2.232, "step": 137130 }, { "epoch": 0.32, "grad_norm": 1.9296875, "learning_rate": 0.00018742387127946533, "loss": 2.2612, "step": 137135 }, { "epoch": 0.32, "grad_norm": 1.9765625, "learning_rate": 0.00018742297392583242, "loss": 1.9379, "step": 137140 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018742207654233423, "loss": 2.2124, "step": 137145 }, { "epoch": 0.32, "grad_norm": 1.84375, "learning_rate": 0.00018742117912897105, "loss": 2.0679, "step": 137150 }, { "epoch": 0.32, "grad_norm": 2.4375, "learning_rate": 0.00018742028168574324, "loss": 2.3294, "step": 137155 }, { "epoch": 0.32, "grad_norm": 1.8515625, "learning_rate": 0.00018741938421265106, "loss": 2.0998, "step": 137160 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018741848670969483, "loss": 2.0659, "step": 137165 }, { "epoch": 0.32, "grad_norm": 2.46875, "learning_rate": 0.00018741758917687487, "loss": 2.1894, "step": 137170 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.0001874166916141915, "loss": 2.0295, "step": 137175 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018741579402164495, "loss": 2.1392, "step": 137180 }, { "epoch": 0.32, "grad_norm": 2.3125, "learning_rate": 0.0001874148963992356, "loss": 2.1199, "step": 137185 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018741399874696378, "loss": 2.0895, "step": 137190 }, { "epoch": 0.32, "grad_norm": 3.328125, "learning_rate": 0.0001874131010648297, "loss": 2.1737, "step": 137195 }, { "epoch": 0.32, "grad_norm": 2.5, "learning_rate": 0.00018741220335283373, "loss": 1.9095, "step": 137200 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018741130561097618, "loss": 1.9837, "step": 137205 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018741040783925732, "loss": 2.1243, "step": 137210 }, { "epoch": 0.32, "grad_norm": 2.6875, "learning_rate": 0.0001874095100376775, "loss": 2.0083, "step": 137215 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018740861220623698, "loss": 1.9686, "step": 137220 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018740771434493613, "loss": 2.2146, "step": 137225 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018740681645377518, "loss": 2.1211, "step": 137230 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.00018740591853275452, "loss": 2.249, "step": 137235 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.0001874050205818744, "loss": 2.1905, "step": 137240 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018740412260113512, "loss": 2.2717, "step": 137245 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.000187403224590537, "loss": 1.9514, "step": 137250 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018740232655008036, "loss": 2.1817, "step": 137255 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018740142847976552, "loss": 2.1032, "step": 137260 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018740053037959276, "loss": 1.9159, "step": 137265 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.0001873996322495624, "loss": 1.9896, "step": 137270 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018739873408967473, "loss": 2.3054, "step": 137275 }, { "epoch": 0.32, "grad_norm": 2.4375, "learning_rate": 0.00018739783589993007, "loss": 2.2815, "step": 137280 }, { "epoch": 0.32, "grad_norm": 2.75, "learning_rate": 0.00018739693768032872, "loss": 2.3231, "step": 137285 }, { "epoch": 0.32, "grad_norm": 2.609375, "learning_rate": 0.000187396039430871, "loss": 2.1045, "step": 137290 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.0001873951411515572, "loss": 2.2793, "step": 137295 }, { "epoch": 0.32, "grad_norm": 3.40625, "learning_rate": 0.00018739424284238763, "loss": 2.0527, "step": 137300 }, { "epoch": 0.32, "grad_norm": 2.015625, "learning_rate": 0.0001873933445033626, "loss": 2.1588, "step": 137305 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018739244613448242, "loss": 1.993, "step": 137310 }, { "epoch": 0.32, "grad_norm": 2.609375, "learning_rate": 0.0001873915477357474, "loss": 2.2007, "step": 137315 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018739064930715785, "loss": 2.1182, "step": 137320 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018738975084871406, "loss": 2.046, "step": 137325 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.00018738885236041635, "loss": 2.2612, "step": 137330 }, { "epoch": 0.32, "grad_norm": 2.390625, "learning_rate": 0.000187387953842265, "loss": 2.2923, "step": 137335 }, { "epoch": 0.32, "grad_norm": 2.578125, "learning_rate": 0.0001873870552942604, "loss": 2.2338, "step": 137340 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.00018738615671640277, "loss": 2.0274, "step": 137345 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018738525810869244, "loss": 2.1574, "step": 137350 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.0001873843594711297, "loss": 2.0907, "step": 137355 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018738346080371488, "loss": 2.0451, "step": 137360 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018738256210644832, "loss": 2.0333, "step": 137365 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018738166337933026, "loss": 2.1945, "step": 137370 }, { "epoch": 0.32, "grad_norm": 2.5, "learning_rate": 0.00018738076462236104, "loss": 2.235, "step": 137375 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018737986583554096, "loss": 2.12, "step": 137380 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018737896701887037, "loss": 2.1957, "step": 137385 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.0001873780681723495, "loss": 1.8589, "step": 137390 }, { "epoch": 0.32, "grad_norm": 2.390625, "learning_rate": 0.00018737716929597873, "loss": 2.1627, "step": 137395 }, { "epoch": 0.32, "grad_norm": 1.8359375, "learning_rate": 0.0001873762703897583, "loss": 2.0347, "step": 137400 }, { "epoch": 0.32, "grad_norm": 2.5625, "learning_rate": 0.0001873753714536886, "loss": 2.1431, "step": 137405 }, { "epoch": 0.32, "grad_norm": 2.53125, "learning_rate": 0.00018737447248776986, "loss": 2.3819, "step": 137410 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.0001873735734920024, "loss": 2.2071, "step": 137415 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018737267446638657, "loss": 2.1979, "step": 137420 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018737177541092262, "loss": 1.9184, "step": 137425 }, { "epoch": 0.32, "grad_norm": 1.953125, "learning_rate": 0.00018737087632561092, "loss": 1.9167, "step": 137430 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018736997721045172, "loss": 2.1993, "step": 137435 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018736907806544536, "loss": 1.957, "step": 137440 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018736817889059216, "loss": 2.1412, "step": 137445 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.0001873672796858924, "loss": 2.1971, "step": 137450 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018736638045134635, "loss": 2.0544, "step": 137455 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 0.0001873654811869544, "loss": 2.0499, "step": 137460 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018736458189271677, "loss": 2.1697, "step": 137465 }, { "epoch": 0.32, "grad_norm": 2.53125, "learning_rate": 0.00018736368256863385, "loss": 2.1179, "step": 137470 }, { "epoch": 0.32, "grad_norm": 2.46875, "learning_rate": 0.00018736278321470593, "loss": 2.0978, "step": 137475 }, { "epoch": 0.32, "grad_norm": 2.953125, "learning_rate": 0.00018736188383093328, "loss": 2.2403, "step": 137480 }, { "epoch": 0.32, "grad_norm": 2.515625, "learning_rate": 0.0001873609844173162, "loss": 2.2557, "step": 137485 }, { "epoch": 0.32, "grad_norm": 2.046875, "learning_rate": 0.00018736008497385507, "loss": 2.1742, "step": 137490 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018735918550055015, "loss": 1.9389, "step": 137495 }, { "epoch": 0.32, "grad_norm": 2.546875, "learning_rate": 0.0001873582859974017, "loss": 2.1373, "step": 137500 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.0001873573864644101, "loss": 2.1419, "step": 137505 }, { "epoch": 0.32, "grad_norm": 2.53125, "learning_rate": 0.00018735648690157563, "loss": 2.2713, "step": 137510 }, { "epoch": 0.32, "grad_norm": 2.015625, "learning_rate": 0.00018735558730889861, "loss": 2.2899, "step": 137515 }, { "epoch": 0.32, "grad_norm": 1.90625, "learning_rate": 0.00018735468768637934, "loss": 2.0236, "step": 137520 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.0001873537880340181, "loss": 2.0029, "step": 137525 }, { "epoch": 0.32, "grad_norm": 2.4375, "learning_rate": 0.00018735288835181524, "loss": 2.2449, "step": 137530 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018735198863977103, "loss": 2.063, "step": 137535 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018735108889788584, "loss": 2.0044, "step": 137540 }, { "epoch": 0.32, "grad_norm": 2.515625, "learning_rate": 0.00018735018912615988, "loss": 2.0886, "step": 137545 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018734928932459356, "loss": 2.0938, "step": 137550 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.0001873483894931871, "loss": 2.0537, "step": 137555 }, { "epoch": 0.32, "grad_norm": 2.609375, "learning_rate": 0.00018734748963194087, "loss": 2.0453, "step": 137560 }, { "epoch": 0.32, "grad_norm": 2.390625, "learning_rate": 0.00018734658974085515, "loss": 2.0191, "step": 137565 }, { "epoch": 0.32, "grad_norm": 1.8125, "learning_rate": 0.00018734568981993027, "loss": 1.9876, "step": 137570 }, { "epoch": 0.32, "grad_norm": 1.96875, "learning_rate": 0.00018734478986916648, "loss": 2.0783, "step": 137575 }, { "epoch": 0.32, "grad_norm": 1.96875, "learning_rate": 0.00018734388988856412, "loss": 2.158, "step": 137580 }, { "epoch": 0.32, "grad_norm": 2.46875, "learning_rate": 0.00018734298987812354, "loss": 2.0512, "step": 137585 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.000187342089837845, "loss": 2.0666, "step": 137590 }, { "epoch": 0.32, "grad_norm": 2.40625, "learning_rate": 0.00018734118976772881, "loss": 2.1625, "step": 137595 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.0001873402896677753, "loss": 2.2319, "step": 137600 }, { "epoch": 0.32, "grad_norm": 2.75, "learning_rate": 0.00018733938953798473, "loss": 2.2335, "step": 137605 }, { "epoch": 0.32, "grad_norm": 2.0625, "learning_rate": 0.00018733848937835747, "loss": 2.2957, "step": 137610 }, { "epoch": 0.32, "grad_norm": 1.984375, "learning_rate": 0.00018733758918889379, "loss": 2.0386, "step": 137615 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.000187336688969594, "loss": 2.0353, "step": 137620 }, { "epoch": 0.32, "grad_norm": 1.7578125, "learning_rate": 0.00018733578872045842, "loss": 2.0868, "step": 137625 }, { "epoch": 0.32, "grad_norm": 1.84375, "learning_rate": 0.00018733488844148734, "loss": 2.2608, "step": 137630 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.0001873339881326811, "loss": 2.2242, "step": 137635 }, { "epoch": 0.32, "grad_norm": 2.484375, "learning_rate": 0.00018733308779403997, "loss": 2.1437, "step": 137640 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.0001873321874255643, "loss": 2.1425, "step": 137645 }, { "epoch": 0.32, "grad_norm": 2.03125, "learning_rate": 0.00018733128702725434, "loss": 2.1491, "step": 137650 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.00018733038659911043, "loss": 2.057, "step": 137655 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.00018732948614113288, "loss": 2.0131, "step": 137660 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018732858565332201, "loss": 2.0473, "step": 137665 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018732768513567808, "loss": 2.3925, "step": 137670 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.00018732678458820146, "loss": 2.2371, "step": 137675 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018732588401089241, "loss": 2.2137, "step": 137680 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018732498340375125, "loss": 2.1834, "step": 137685 }, { "epoch": 0.32, "grad_norm": 1.9921875, "learning_rate": 0.0001873240827667783, "loss": 2.104, "step": 137690 }, { "epoch": 0.32, "grad_norm": 2.546875, "learning_rate": 0.00018732318209997386, "loss": 2.2356, "step": 137695 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018732228140333823, "loss": 2.2352, "step": 137700 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018732138067687176, "loss": 2.2671, "step": 137705 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018732047992057468, "loss": 2.3584, "step": 137710 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.00018731957913444735, "loss": 2.1573, "step": 137715 }, { "epoch": 0.32, "grad_norm": 1.734375, "learning_rate": 0.00018731867831849007, "loss": 2.0708, "step": 137720 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018731777747270316, "loss": 2.1882, "step": 137725 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018731687659708686, "loss": 2.1676, "step": 137730 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.00018731597569164162, "loss": 2.067, "step": 137735 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.0001873150747563676, "loss": 2.016, "step": 137740 }, { "epoch": 0.32, "grad_norm": 2.453125, "learning_rate": 0.00018731417379126518, "loss": 1.9941, "step": 137745 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018731327279633464, "loss": 1.9241, "step": 137750 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.00018731237177157633, "loss": 2.0299, "step": 137755 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018731147071699052, "loss": 2.0796, "step": 137760 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.0001873105696325775, "loss": 1.9424, "step": 137765 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018730966851833764, "loss": 2.1352, "step": 137770 }, { "epoch": 0.32, "grad_norm": 2.46875, "learning_rate": 0.0001873087673742712, "loss": 2.2867, "step": 137775 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018730786620037851, "loss": 2.1731, "step": 137780 }, { "epoch": 0.32, "grad_norm": 2.46875, "learning_rate": 0.00018730696499665988, "loss": 2.0949, "step": 137785 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018730606376311557, "loss": 2.1892, "step": 137790 }, { "epoch": 0.32, "grad_norm": 2.375, "learning_rate": 0.00018730516249974597, "loss": 2.1196, "step": 137795 }, { "epoch": 0.32, "grad_norm": 2.578125, "learning_rate": 0.00018730426120655132, "loss": 2.1998, "step": 137800 }, { "epoch": 0.32, "grad_norm": 1.9921875, "learning_rate": 0.00018730335988353196, "loss": 2.1371, "step": 137805 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018730245853068817, "loss": 1.9849, "step": 137810 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018730155714802028, "loss": 2.2774, "step": 137815 }, { "epoch": 0.32, "grad_norm": 2.5, "learning_rate": 0.00018730065573552862, "loss": 2.2741, "step": 137820 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018729975429321346, "loss": 1.9947, "step": 137825 }, { "epoch": 0.32, "grad_norm": 1.875, "learning_rate": 0.00018729885282107512, "loss": 2.2361, "step": 137830 }, { "epoch": 0.32, "grad_norm": 2.15625, "learning_rate": 0.0001872979513191139, "loss": 1.903, "step": 137835 }, { "epoch": 0.32, "grad_norm": 1.9609375, "learning_rate": 0.0001872970497873301, "loss": 2.042, "step": 137840 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.00018729614822572408, "loss": 2.025, "step": 137845 }, { "epoch": 0.32, "grad_norm": 2.296875, "learning_rate": 0.0001872952466342961, "loss": 2.1696, "step": 137850 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.0001872943450130465, "loss": 1.9406, "step": 137855 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018729344336197552, "loss": 2.1429, "step": 137860 }, { "epoch": 0.32, "grad_norm": 2.3125, "learning_rate": 0.00018729254168108354, "loss": 1.8703, "step": 137865 }, { "epoch": 0.32, "grad_norm": 2.84375, "learning_rate": 0.00018729163997037085, "loss": 2.3917, "step": 137870 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018729073822983775, "loss": 2.154, "step": 137875 }, { "epoch": 0.32, "grad_norm": 2.015625, "learning_rate": 0.00018728983645948456, "loss": 2.1293, "step": 137880 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 0.00018728893465931156, "loss": 2.2177, "step": 137885 }, { "epoch": 0.32, "grad_norm": 2.25, "learning_rate": 0.0001872880328293191, "loss": 2.1295, "step": 137890 }, { "epoch": 0.32, "grad_norm": 2.140625, "learning_rate": 0.00018728713096950744, "loss": 2.1318, "step": 137895 }, { "epoch": 0.32, "grad_norm": 2.4375, "learning_rate": 0.00018728622907987694, "loss": 2.1657, "step": 137900 }, { "epoch": 0.32, "grad_norm": 1.9609375, "learning_rate": 0.00018728532716042788, "loss": 2.2599, "step": 137905 }, { "epoch": 0.32, "grad_norm": 2.90625, "learning_rate": 0.00018728442521116054, "loss": 2.2387, "step": 137910 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018728352323207526, "loss": 2.1605, "step": 137915 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.00018728262122317236, "loss": 2.0937, "step": 137920 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.00018728171918445217, "loss": 2.1449, "step": 137925 }, { "epoch": 0.32, "grad_norm": 2.21875, "learning_rate": 0.00018728081711591492, "loss": 1.933, "step": 137930 }, { "epoch": 0.32, "grad_norm": 1.8984375, "learning_rate": 0.00018727991501756095, "loss": 2.0374, "step": 137935 }, { "epoch": 0.32, "grad_norm": 1.8359375, "learning_rate": 0.0001872790128893906, "loss": 2.23, "step": 137940 }, { "epoch": 0.32, "grad_norm": 2.421875, "learning_rate": 0.00018727811073140417, "loss": 2.1561, "step": 137945 }, { "epoch": 0.32, "grad_norm": 2.28125, "learning_rate": 0.00018727720854360192, "loss": 2.0835, "step": 137950 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.0001872763063259842, "loss": 2.2917, "step": 137955 }, { "epoch": 0.32, "grad_norm": 2.109375, "learning_rate": 0.0001872754040785513, "loss": 2.1216, "step": 137960 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.00018727450180130358, "loss": 2.0809, "step": 137965 }, { "epoch": 0.32, "grad_norm": 3.703125, "learning_rate": 0.00018727359949424126, "loss": 2.2435, "step": 137970 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018727269715736473, "loss": 2.041, "step": 137975 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018727179479067425, "loss": 2.0256, "step": 137980 }, { "epoch": 0.32, "grad_norm": 2.5625, "learning_rate": 0.00018727089239417016, "loss": 2.2014, "step": 137985 }, { "epoch": 0.32, "grad_norm": 2.546875, "learning_rate": 0.00018726998996785273, "loss": 2.1917, "step": 137990 }, { "epoch": 0.32, "grad_norm": 1.953125, "learning_rate": 0.0001872690875117223, "loss": 2.0325, "step": 137995 }, { "epoch": 0.32, "grad_norm": 2.46875, "learning_rate": 0.00018726818502577913, "loss": 2.1872, "step": 138000 }, { "epoch": 0.32, "grad_norm": 2.265625, "learning_rate": 0.00018726728251002364, "loss": 2.0212, "step": 138005 }, { "epoch": 0.32, "grad_norm": 2.0, "learning_rate": 0.000187266379964456, "loss": 2.0709, "step": 138010 }, { "epoch": 0.32, "grad_norm": 2.125, "learning_rate": 0.0001872654773890766, "loss": 2.0684, "step": 138015 }, { "epoch": 0.32, "grad_norm": 2.34375, "learning_rate": 0.00018726457478388574, "loss": 2.1522, "step": 138020 }, { "epoch": 0.32, "grad_norm": 1.8125, "learning_rate": 0.0001872636721488837, "loss": 1.9928, "step": 138025 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018726276948407083, "loss": 2.2084, "step": 138030 }, { "epoch": 0.32, "grad_norm": 2.234375, "learning_rate": 0.00018726186678944738, "loss": 2.2382, "step": 138035 }, { "epoch": 0.32, "grad_norm": 2.078125, "learning_rate": 0.00018726096406501374, "loss": 2.1662, "step": 138040 }, { "epoch": 0.32, "grad_norm": 2.09375, "learning_rate": 0.00018726006131077015, "loss": 2.2028, "step": 138045 }, { "epoch": 0.32, "grad_norm": 2.328125, "learning_rate": 0.00018725915852671693, "loss": 2.1642, "step": 138050 }, { "epoch": 0.32, "grad_norm": 2.171875, "learning_rate": 0.00018725825571285442, "loss": 2.0233, "step": 138055 }, { "epoch": 0.32, "grad_norm": 1.953125, "learning_rate": 0.0001872573528691829, "loss": 2.0336, "step": 138060 }, { "epoch": 0.32, "grad_norm": 1.7421875, "learning_rate": 0.00018725644999570266, "loss": 2.1304, "step": 138065 }, { "epoch": 0.32, "grad_norm": 2.1875, "learning_rate": 0.00018725554709241406, "loss": 2.2125, "step": 138070 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.0001872546441593174, "loss": 2.1552, "step": 138075 }, { "epoch": 0.32, "grad_norm": 1.9921875, "learning_rate": 0.00018725374119641294, "loss": 1.9979, "step": 138080 }, { "epoch": 0.32, "grad_norm": 2.578125, "learning_rate": 0.00018725283820370103, "loss": 2.11, "step": 138085 }, { "epoch": 0.32, "grad_norm": 2.203125, "learning_rate": 0.000187251935181182, "loss": 2.0176, "step": 138090 }, { "epoch": 0.32, "grad_norm": 2.359375, "learning_rate": 0.00018725103212885607, "loss": 2.2528, "step": 138095 }, { "epoch": 0.32, "grad_norm": 1.7734375, "learning_rate": 0.00018725012904672364, "loss": 2.2774, "step": 138100 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.00018724922593478497, "loss": 2.2381, "step": 138105 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.00018724832279304036, "loss": 2.2922, "step": 138110 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018724741962149018, "loss": 2.1421, "step": 138115 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.0001872465164201347, "loss": 2.1596, "step": 138120 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.0001872456131889742, "loss": 2.1136, "step": 138125 }, { "epoch": 0.33, "grad_norm": 2.546875, "learning_rate": 0.00018724470992800902, "loss": 2.2741, "step": 138130 }, { "epoch": 0.33, "grad_norm": 2.765625, "learning_rate": 0.00018724380663723945, "loss": 2.0998, "step": 138135 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018724290331666586, "loss": 2.1419, "step": 138140 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018724199996628848, "loss": 2.1103, "step": 138145 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018724109658610767, "loss": 2.1387, "step": 138150 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.0001872401931761237, "loss": 2.3949, "step": 138155 }, { "epoch": 0.33, "grad_norm": 1.9609375, "learning_rate": 0.0001872392897363369, "loss": 2.0811, "step": 138160 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.0001872383862667476, "loss": 2.2702, "step": 138165 }, { "epoch": 0.33, "grad_norm": 4.34375, "learning_rate": 0.00018723748276735608, "loss": 2.0918, "step": 138170 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018723657923816264, "loss": 1.9761, "step": 138175 }, { "epoch": 0.33, "grad_norm": 2.453125, "learning_rate": 0.0001872356756791676, "loss": 1.9881, "step": 138180 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018723477209037125, "loss": 2.1527, "step": 138185 }, { "epoch": 0.33, "grad_norm": 1.6328125, "learning_rate": 0.00018723386847177396, "loss": 2.0446, "step": 138190 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.000187232964823376, "loss": 2.0728, "step": 138195 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018723206114517767, "loss": 2.1873, "step": 138200 }, { "epoch": 0.33, "grad_norm": 2.03125, "learning_rate": 0.00018723115743717926, "loss": 2.1184, "step": 138205 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018723025369938112, "loss": 2.2075, "step": 138210 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.00018722934993178355, "loss": 2.0846, "step": 138215 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018722844613438684, "loss": 2.023, "step": 138220 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.0001872275423071913, "loss": 2.2754, "step": 138225 }, { "epoch": 0.33, "grad_norm": 1.9453125, "learning_rate": 0.00018722663845019728, "loss": 2.1159, "step": 138230 }, { "epoch": 0.33, "grad_norm": 2.671875, "learning_rate": 0.00018722573456340504, "loss": 2.2971, "step": 138235 }, { "epoch": 0.33, "grad_norm": 1.875, "learning_rate": 0.00018722483064681492, "loss": 2.0923, "step": 138240 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.0001872239267004272, "loss": 2.2401, "step": 138245 }, { "epoch": 0.33, "grad_norm": 2.71875, "learning_rate": 0.00018722302272424223, "loss": 2.2007, "step": 138250 }, { "epoch": 0.33, "grad_norm": 2.78125, "learning_rate": 0.00018722211871826026, "loss": 2.2851, "step": 138255 }, { "epoch": 0.33, "grad_norm": 2.5, "learning_rate": 0.00018722121468248167, "loss": 2.0389, "step": 138260 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018722031061690667, "loss": 2.0718, "step": 138265 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018721940652153572, "loss": 2.0349, "step": 138270 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018721850239636895, "loss": 2.1908, "step": 138275 }, { "epoch": 0.33, "grad_norm": 3.671875, "learning_rate": 0.00018721759824140682, "loss": 2.1284, "step": 138280 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018721669405664953, "loss": 2.1151, "step": 138285 }, { "epoch": 0.33, "grad_norm": 1.96875, "learning_rate": 0.00018721578984209748, "loss": 2.0365, "step": 138290 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018721488559775092, "loss": 2.1781, "step": 138295 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018721398132361014, "loss": 2.2089, "step": 138300 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.0001872130770196755, "loss": 2.1323, "step": 138305 }, { "epoch": 0.33, "grad_norm": 2.484375, "learning_rate": 0.0001872121726859473, "loss": 2.2872, "step": 138310 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018721126832242586, "loss": 2.1804, "step": 138315 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018721036392911145, "loss": 2.1756, "step": 138320 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.0001872094595060044, "loss": 2.2428, "step": 138325 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018720855505310503, "loss": 2.2144, "step": 138330 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.00018720765057041362, "loss": 2.2512, "step": 138335 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.0001872067460579305, "loss": 2.1878, "step": 138340 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018720584151565595, "loss": 2.3183, "step": 138345 }, { "epoch": 0.33, "grad_norm": 2.359375, "learning_rate": 0.00018720493694359035, "loss": 2.333, "step": 138350 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.0001872040323417339, "loss": 1.9972, "step": 138355 }, { "epoch": 0.33, "grad_norm": 2.546875, "learning_rate": 0.00018720312771008702, "loss": 2.2441, "step": 138360 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018720222304864997, "loss": 2.1026, "step": 138365 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018720131835742304, "loss": 2.1118, "step": 138370 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.00018720041363640654, "loss": 2.0331, "step": 138375 }, { "epoch": 0.33, "grad_norm": 2.453125, "learning_rate": 0.00018719950888560082, "loss": 2.1322, "step": 138380 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.0001871986041050062, "loss": 2.2662, "step": 138385 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.0001871976992946229, "loss": 2.0559, "step": 138390 }, { "epoch": 0.33, "grad_norm": 2.78125, "learning_rate": 0.0001871967944544513, "loss": 2.3836, "step": 138395 }, { "epoch": 0.33, "grad_norm": 2.546875, "learning_rate": 0.0001871958895844917, "loss": 2.0144, "step": 138400 }, { "epoch": 0.33, "grad_norm": 3.0, "learning_rate": 0.00018719498468474443, "loss": 2.2075, "step": 138405 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018719407975520972, "loss": 2.0269, "step": 138410 }, { "epoch": 0.33, "grad_norm": 2.484375, "learning_rate": 0.00018719317479588796, "loss": 2.3157, "step": 138415 }, { "epoch": 0.33, "grad_norm": 2.5625, "learning_rate": 0.0001871922698067794, "loss": 2.0051, "step": 138420 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018719136478788443, "loss": 2.0943, "step": 138425 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.00018719045973920328, "loss": 2.061, "step": 138430 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.0001871895546607363, "loss": 2.2456, "step": 138435 }, { "epoch": 0.33, "grad_norm": 2.546875, "learning_rate": 0.00018718864955248376, "loss": 2.2774, "step": 138440 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.00018718774441444603, "loss": 2.0544, "step": 138445 }, { "epoch": 0.33, "grad_norm": 1.9140625, "learning_rate": 0.00018718683924662336, "loss": 2.0278, "step": 138450 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.0001871859340490161, "loss": 2.1623, "step": 138455 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018718502882162453, "loss": 2.1672, "step": 138460 }, { "epoch": 0.33, "grad_norm": 1.8046875, "learning_rate": 0.000187184123564449, "loss": 2.0122, "step": 138465 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018718321827748978, "loss": 2.1895, "step": 138470 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018718231296074715, "loss": 2.1395, "step": 138475 }, { "epoch": 0.33, "grad_norm": 1.5625, "learning_rate": 0.0001871814076142215, "loss": 2.1327, "step": 138480 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018718050223791313, "loss": 2.2626, "step": 138485 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.00018717959683182226, "loss": 2.0508, "step": 138490 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018717869139594928, "loss": 2.2108, "step": 138495 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.0001871777859302945, "loss": 2.1053, "step": 138500 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018717688043485818, "loss": 1.9882, "step": 138505 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018717597490964068, "loss": 2.1743, "step": 138510 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018717506935464226, "loss": 2.3648, "step": 138515 }, { "epoch": 0.33, "grad_norm": 2.6875, "learning_rate": 0.00018717416376986328, "loss": 2.2072, "step": 138520 }, { "epoch": 0.33, "grad_norm": 1.8515625, "learning_rate": 0.00018717325815530397, "loss": 2.2352, "step": 138525 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018717235251096477, "loss": 2.0506, "step": 138530 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018717144683684587, "loss": 2.0684, "step": 138535 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018717054113294763, "loss": 1.9746, "step": 138540 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018716963539927036, "loss": 2.0917, "step": 138545 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018716872963581437, "loss": 1.9913, "step": 138550 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018716782384257992, "loss": 2.0476, "step": 138555 }, { "epoch": 0.33, "grad_norm": 2.71875, "learning_rate": 0.00018716691801956738, "loss": 2.198, "step": 138560 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018716601216677706, "loss": 2.1559, "step": 138565 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018716510628420923, "loss": 2.0216, "step": 138570 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018716420037186423, "loss": 2.0998, "step": 138575 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018716329442974234, "loss": 2.1066, "step": 138580 }, { "epoch": 0.33, "grad_norm": 2.515625, "learning_rate": 0.0001871623884578439, "loss": 1.9707, "step": 138585 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018716148245616923, "loss": 1.9776, "step": 138590 }, { "epoch": 0.33, "grad_norm": 1.8828125, "learning_rate": 0.00018716057642471858, "loss": 2.1435, "step": 138595 }, { "epoch": 0.33, "grad_norm": 2.71875, "learning_rate": 0.0001871596703634923, "loss": 2.3318, "step": 138600 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.0001871587642724907, "loss": 1.9997, "step": 138605 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018715785815171413, "loss": 2.0761, "step": 138610 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.0001871569520011628, "loss": 2.1516, "step": 138615 }, { "epoch": 0.33, "grad_norm": 2.359375, "learning_rate": 0.0001871560458208371, "loss": 2.3055, "step": 138620 }, { "epoch": 0.33, "grad_norm": 1.75, "learning_rate": 0.0001871551396107373, "loss": 2.0019, "step": 138625 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018715423337086376, "loss": 2.1626, "step": 138630 }, { "epoch": 0.33, "grad_norm": 1.8046875, "learning_rate": 0.00018715332710121668, "loss": 1.9509, "step": 138635 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.0001871524208017965, "loss": 2.3311, "step": 138640 }, { "epoch": 0.33, "grad_norm": 2.671875, "learning_rate": 0.00018715151447260347, "loss": 2.1589, "step": 138645 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018715060811363786, "loss": 2.0291, "step": 138650 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018714970172490006, "loss": 2.11, "step": 138655 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018714879530639037, "loss": 2.0735, "step": 138660 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018714788885810903, "loss": 2.0037, "step": 138665 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018714698238005636, "loss": 1.9861, "step": 138670 }, { "epoch": 0.33, "grad_norm": 1.7578125, "learning_rate": 0.00018714607587223277, "loss": 2.0395, "step": 138675 }, { "epoch": 0.33, "grad_norm": 2.984375, "learning_rate": 0.00018714516933463843, "loss": 2.134, "step": 138680 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.0001871442627672738, "loss": 2.1851, "step": 138685 }, { "epoch": 0.33, "grad_norm": 1.9921875, "learning_rate": 0.00018714335617013906, "loss": 2.273, "step": 138690 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018714244954323457, "loss": 2.3655, "step": 138695 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018714154288656062, "loss": 2.1719, "step": 138700 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018714063620011757, "loss": 2.2085, "step": 138705 }, { "epoch": 0.33, "grad_norm": 1.8125, "learning_rate": 0.00018713972948390567, "loss": 2.2241, "step": 138710 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.00018713882273792528, "loss": 2.141, "step": 138715 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018713791596217672, "loss": 2.2447, "step": 138720 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018713700915666022, "loss": 2.1809, "step": 138725 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018713610232137616, "loss": 2.3197, "step": 138730 }, { "epoch": 0.33, "grad_norm": 2.03125, "learning_rate": 0.00018713519545632482, "loss": 2.1735, "step": 138735 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.0001871342885615065, "loss": 1.9499, "step": 138740 }, { "epoch": 0.33, "grad_norm": 2.015625, "learning_rate": 0.00018713338163692153, "loss": 2.0794, "step": 138745 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.00018713247468257025, "loss": 2.2371, "step": 138750 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.0001871315676984529, "loss": 1.9981, "step": 138755 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018713066068456983, "loss": 2.1713, "step": 138760 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.0001871297536409214, "loss": 2.0847, "step": 138765 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.0001871288465675078, "loss": 2.1837, "step": 138770 }, { "epoch": 0.33, "grad_norm": 1.8984375, "learning_rate": 0.00018712793946432942, "loss": 1.9865, "step": 138775 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018712703233138655, "loss": 2.2467, "step": 138780 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018712612516867954, "loss": 2.2009, "step": 138785 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018712521797620862, "loss": 2.0404, "step": 138790 }, { "epoch": 0.33, "grad_norm": 1.921875, "learning_rate": 0.0001871243107539742, "loss": 1.9589, "step": 138795 }, { "epoch": 0.33, "grad_norm": 2.03125, "learning_rate": 0.0001871234035019765, "loss": 2.1187, "step": 138800 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018712249622021587, "loss": 2.0952, "step": 138805 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.00018712158890869262, "loss": 2.1673, "step": 138810 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.00018712068156740704, "loss": 2.0918, "step": 138815 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018711977419635947, "loss": 1.8494, "step": 138820 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018711886679555022, "loss": 2.0703, "step": 138825 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018711795936497957, "loss": 2.0734, "step": 138830 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018711705190464783, "loss": 2.1995, "step": 138835 }, { "epoch": 0.33, "grad_norm": 2.546875, "learning_rate": 0.00018711614441455535, "loss": 2.0934, "step": 138840 }, { "epoch": 0.33, "grad_norm": 2.015625, "learning_rate": 0.0001871152368947024, "loss": 2.1167, "step": 138845 }, { "epoch": 0.33, "grad_norm": 1.9921875, "learning_rate": 0.0001871143293450893, "loss": 1.9517, "step": 138850 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.00018711342176571636, "loss": 2.1743, "step": 138855 }, { "epoch": 0.33, "grad_norm": 2.59375, "learning_rate": 0.00018711251415658392, "loss": 2.1332, "step": 138860 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.00018711160651769225, "loss": 2.1871, "step": 138865 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.0001871106988490417, "loss": 2.222, "step": 138870 }, { "epoch": 0.33, "grad_norm": 2.734375, "learning_rate": 0.00018710979115063253, "loss": 2.1696, "step": 138875 }, { "epoch": 0.33, "grad_norm": 2.6875, "learning_rate": 0.00018710888342246508, "loss": 2.2348, "step": 138880 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018710797566453966, "loss": 2.2382, "step": 138885 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018710706787685656, "loss": 1.9704, "step": 138890 }, { "epoch": 0.33, "grad_norm": 2.734375, "learning_rate": 0.00018710616005941611, "loss": 2.1935, "step": 138895 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018710525221221863, "loss": 2.1749, "step": 138900 }, { "epoch": 0.33, "grad_norm": 1.8046875, "learning_rate": 0.0001871043443352644, "loss": 2.1266, "step": 138905 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018710343642855377, "loss": 2.0923, "step": 138910 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.000187102528492087, "loss": 1.9595, "step": 138915 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018710162052586445, "loss": 2.115, "step": 138920 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018710071252988638, "loss": 2.2138, "step": 138925 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018709980450415316, "loss": 2.1, "step": 138930 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018709889644866502, "loss": 2.1021, "step": 138935 }, { "epoch": 0.33, "grad_norm": 1.9453125, "learning_rate": 0.00018709798836342236, "loss": 2.0745, "step": 138940 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.00018709708024842545, "loss": 1.9293, "step": 138945 }, { "epoch": 0.33, "grad_norm": 1.9296875, "learning_rate": 0.00018709617210367459, "loss": 2.2177, "step": 138950 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018709526392917007, "loss": 2.0269, "step": 138955 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018709435572491226, "loss": 1.9589, "step": 138960 }, { "epoch": 0.33, "grad_norm": 1.6875, "learning_rate": 0.0001870934474909014, "loss": 2.2574, "step": 138965 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018709253922713788, "loss": 2.1549, "step": 138970 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018709163093362196, "loss": 2.0336, "step": 138975 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018709072261035393, "loss": 2.0533, "step": 138980 }, { "epoch": 0.33, "grad_norm": 2.359375, "learning_rate": 0.00018708981425733415, "loss": 2.3153, "step": 138985 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018708890587456294, "loss": 2.0102, "step": 138990 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018708799746204054, "loss": 2.1519, "step": 138995 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.0001870870890197673, "loss": 2.0049, "step": 139000 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.00018708618054774358, "loss": 2.2542, "step": 139005 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.00018708527204596963, "loss": 2.2147, "step": 139010 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018708436351444572, "loss": 2.3539, "step": 139015 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018708345495317222, "loss": 1.9996, "step": 139020 }, { "epoch": 0.33, "grad_norm": 1.9375, "learning_rate": 0.00018708254636214946, "loss": 2.2331, "step": 139025 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.0001870816377413777, "loss": 2.1256, "step": 139030 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018708072909085733, "loss": 1.9883, "step": 139035 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.00018707982041058855, "loss": 2.1705, "step": 139040 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018707891170057175, "loss": 2.0981, "step": 139045 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.0001870780029608072, "loss": 1.9518, "step": 139050 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.0001870770941912952, "loss": 2.2335, "step": 139055 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018707618539203614, "loss": 2.0865, "step": 139060 }, { "epoch": 0.33, "grad_norm": 2.015625, "learning_rate": 0.00018707527656303024, "loss": 1.9317, "step": 139065 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018707436770427785, "loss": 2.2445, "step": 139070 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.0001870734588157793, "loss": 2.1853, "step": 139075 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018707254989753486, "loss": 2.1667, "step": 139080 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.00018707164094954485, "loss": 2.4277, "step": 139085 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.0001870707319718096, "loss": 1.9333, "step": 139090 }, { "epoch": 0.33, "grad_norm": 1.90625, "learning_rate": 0.00018706982296432938, "loss": 2.2516, "step": 139095 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018706891392710455, "loss": 2.2009, "step": 139100 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018706800486013544, "loss": 2.1164, "step": 139105 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018706709576342227, "loss": 2.1378, "step": 139110 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.0001870661866369654, "loss": 2.142, "step": 139115 }, { "epoch": 0.33, "grad_norm": 2.609375, "learning_rate": 0.00018706527748076516, "loss": 2.2482, "step": 139120 }, { "epoch": 0.33, "grad_norm": 1.921875, "learning_rate": 0.00018706436829482183, "loss": 2.0991, "step": 139125 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018706345907913577, "loss": 2.2916, "step": 139130 }, { "epoch": 0.33, "grad_norm": 2.5625, "learning_rate": 0.0001870625498337072, "loss": 2.1927, "step": 139135 }, { "epoch": 0.33, "grad_norm": 1.984375, "learning_rate": 0.0001870616405585365, "loss": 1.912, "step": 139140 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018706073125362397, "loss": 1.8916, "step": 139145 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018705982191896992, "loss": 2.0482, "step": 139150 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018705891255457467, "loss": 2.2505, "step": 139155 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.0001870580031604385, "loss": 2.104, "step": 139160 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018705709373656175, "loss": 2.0794, "step": 139165 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018705618428294468, "loss": 2.1992, "step": 139170 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.0001870552747995877, "loss": 1.9037, "step": 139175 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018705436528649097, "loss": 2.1446, "step": 139180 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018705345574365496, "loss": 2.2208, "step": 139185 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.0001870525461710799, "loss": 2.1321, "step": 139190 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.0001870516365687661, "loss": 1.9442, "step": 139195 }, { "epoch": 0.33, "grad_norm": 1.96875, "learning_rate": 0.0001870507269367139, "loss": 2.1897, "step": 139200 }, { "epoch": 0.33, "grad_norm": 2.359375, "learning_rate": 0.00018704981727492358, "loss": 2.0616, "step": 139205 }, { "epoch": 0.33, "grad_norm": 1.8984375, "learning_rate": 0.00018704890758339547, "loss": 2.1466, "step": 139210 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018704799786212987, "loss": 1.9477, "step": 139215 }, { "epoch": 0.33, "grad_norm": 1.8359375, "learning_rate": 0.0001870470881111271, "loss": 2.1199, "step": 139220 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018704617833038744, "loss": 1.9658, "step": 139225 }, { "epoch": 0.33, "grad_norm": 1.96875, "learning_rate": 0.00018704526851991124, "loss": 2.0908, "step": 139230 }, { "epoch": 0.33, "grad_norm": 2.484375, "learning_rate": 0.00018704435867969882, "loss": 2.2915, "step": 139235 }, { "epoch": 0.33, "grad_norm": 1.953125, "learning_rate": 0.00018704344880975048, "loss": 2.2422, "step": 139240 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018704253891006648, "loss": 2.0916, "step": 139245 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.00018704162898064718, "loss": 2.2838, "step": 139250 }, { "epoch": 0.33, "grad_norm": 1.5859375, "learning_rate": 0.00018704071902149287, "loss": 1.7899, "step": 139255 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018703980903260389, "loss": 2.3235, "step": 139260 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018703889901398052, "loss": 2.1087, "step": 139265 }, { "epoch": 0.33, "grad_norm": 1.796875, "learning_rate": 0.0001870379889656231, "loss": 1.8821, "step": 139270 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018703707888753193, "loss": 2.1513, "step": 139275 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018703616877970727, "loss": 2.1513, "step": 139280 }, { "epoch": 0.33, "grad_norm": 1.8046875, "learning_rate": 0.00018703525864214953, "loss": 2.2032, "step": 139285 }, { "epoch": 0.33, "grad_norm": 2.03125, "learning_rate": 0.00018703434847485895, "loss": 2.1232, "step": 139290 }, { "epoch": 0.33, "grad_norm": 3.046875, "learning_rate": 0.00018703343827783584, "loss": 1.9119, "step": 139295 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018703252805108057, "loss": 2.1669, "step": 139300 }, { "epoch": 0.33, "grad_norm": 2.59375, "learning_rate": 0.00018703161779459336, "loss": 2.204, "step": 139305 }, { "epoch": 0.33, "grad_norm": 1.9765625, "learning_rate": 0.0001870307075083746, "loss": 2.0161, "step": 139310 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018702979719242456, "loss": 2.0526, "step": 139315 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018702888684674356, "loss": 2.1321, "step": 139320 }, { "epoch": 0.33, "grad_norm": 2.671875, "learning_rate": 0.00018702797647133192, "loss": 2.1698, "step": 139325 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018702706606618994, "loss": 2.1835, "step": 139330 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018702615563131794, "loss": 2.0808, "step": 139335 }, { "epoch": 0.33, "grad_norm": 2.359375, "learning_rate": 0.00018702524516671623, "loss": 2.2061, "step": 139340 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018702433467238515, "loss": 1.9218, "step": 139345 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018702342414832492, "loss": 2.0941, "step": 139350 }, { "epoch": 0.33, "grad_norm": 2.5, "learning_rate": 0.00018702251359453595, "loss": 2.1759, "step": 139355 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018702160301101848, "loss": 2.1047, "step": 139360 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018702069239777286, "loss": 2.238, "step": 139365 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018701978175479942, "loss": 2.1734, "step": 139370 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.0001870188710820984, "loss": 2.0068, "step": 139375 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.0001870179603796702, "loss": 2.0523, "step": 139380 }, { "epoch": 0.33, "grad_norm": 2.6875, "learning_rate": 0.00018701704964751506, "loss": 1.938, "step": 139385 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.00018701613888563335, "loss": 2.1861, "step": 139390 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018701522809402531, "loss": 2.1353, "step": 139395 }, { "epoch": 0.33, "grad_norm": 1.8046875, "learning_rate": 0.00018701431727269132, "loss": 2.0392, "step": 139400 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018701340642163165, "loss": 2.042, "step": 139405 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.0001870124955408466, "loss": 2.005, "step": 139410 }, { "epoch": 0.33, "grad_norm": 1.9921875, "learning_rate": 0.00018701158463033655, "loss": 2.3041, "step": 139415 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.0001870106736901017, "loss": 2.1214, "step": 139420 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.0001870097627201425, "loss": 2.0619, "step": 139425 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018700885172045912, "loss": 2.0972, "step": 139430 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018700794069105198, "loss": 2.0053, "step": 139435 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018700702963192134, "loss": 2.2653, "step": 139440 }, { "epoch": 0.33, "grad_norm": 1.9921875, "learning_rate": 0.0001870061185430675, "loss": 2.2488, "step": 139445 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018700520742449085, "loss": 2.2206, "step": 139450 }, { "epoch": 0.33, "grad_norm": 2.53125, "learning_rate": 0.00018700429627619157, "loss": 2.2176, "step": 139455 }, { "epoch": 0.33, "grad_norm": 2.75, "learning_rate": 0.0001870033850981701, "loss": 2.1427, "step": 139460 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018700247389042668, "loss": 2.1959, "step": 139465 }, { "epoch": 0.33, "grad_norm": 1.8203125, "learning_rate": 0.00018700156265296164, "loss": 2.0967, "step": 139470 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018700065138577525, "loss": 2.2363, "step": 139475 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018699974008886788, "loss": 2.195, "step": 139480 }, { "epoch": 0.33, "grad_norm": 1.7734375, "learning_rate": 0.00018699882876223987, "loss": 2.1538, "step": 139485 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018699791740589143, "loss": 2.0642, "step": 139490 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018699700601982293, "loss": 2.0934, "step": 139495 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018699609460403469, "loss": 2.1536, "step": 139500 }, { "epoch": 0.33, "grad_norm": 1.953125, "learning_rate": 0.000186995183158527, "loss": 2.151, "step": 139505 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018699427168330016, "loss": 2.2036, "step": 139510 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.00018699336017835453, "loss": 2.2095, "step": 139515 }, { "epoch": 0.33, "grad_norm": 1.953125, "learning_rate": 0.00018699244864369037, "loss": 2.0751, "step": 139520 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018699153707930801, "loss": 2.112, "step": 139525 }, { "epoch": 0.33, "grad_norm": 2.484375, "learning_rate": 0.00018699062548520776, "loss": 2.0899, "step": 139530 }, { "epoch": 0.33, "grad_norm": 1.96875, "learning_rate": 0.00018698971386138996, "loss": 2.1814, "step": 139535 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018698880220785488, "loss": 2.1787, "step": 139540 }, { "epoch": 0.33, "grad_norm": 2.796875, "learning_rate": 0.00018698789052460285, "loss": 1.9893, "step": 139545 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018698697881163416, "loss": 1.8561, "step": 139550 }, { "epoch": 0.33, "grad_norm": 1.859375, "learning_rate": 0.00018698606706894918, "loss": 2.1376, "step": 139555 }, { "epoch": 0.33, "grad_norm": 1.7578125, "learning_rate": 0.00018698515529654816, "loss": 2.1653, "step": 139560 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018698424349443142, "loss": 2.1707, "step": 139565 }, { "epoch": 0.33, "grad_norm": 2.625, "learning_rate": 0.0001869833316625993, "loss": 2.2523, "step": 139570 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018698241980105208, "loss": 2.1434, "step": 139575 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018698150790979012, "loss": 1.9544, "step": 139580 }, { "epoch": 0.33, "grad_norm": 2.53125, "learning_rate": 0.00018698059598881367, "loss": 2.202, "step": 139585 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.0001869796840381231, "loss": 2.2562, "step": 139590 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018697877205771864, "loss": 2.2217, "step": 139595 }, { "epoch": 0.33, "grad_norm": 2.8125, "learning_rate": 0.0001869778600476007, "loss": 2.0601, "step": 139600 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.00018697694800776956, "loss": 2.1753, "step": 139605 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018697603593822548, "loss": 2.1402, "step": 139610 }, { "epoch": 0.33, "grad_norm": 2.859375, "learning_rate": 0.00018697512383896882, "loss": 2.2922, "step": 139615 }, { "epoch": 0.33, "grad_norm": 1.6640625, "learning_rate": 0.00018697421170999986, "loss": 2.0973, "step": 139620 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.00018697329955131897, "loss": 2.181, "step": 139625 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018697238736292641, "loss": 2.1573, "step": 139630 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018697147514482252, "loss": 2.215, "step": 139635 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018697056289700755, "loss": 2.1405, "step": 139640 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.0001869696506194819, "loss": 2.2687, "step": 139645 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018696873831224582, "loss": 2.2453, "step": 139650 }, { "epoch": 0.33, "grad_norm": 1.875, "learning_rate": 0.00018696782597529965, "loss": 2.1775, "step": 139655 }, { "epoch": 0.33, "grad_norm": 1.984375, "learning_rate": 0.00018696691360864368, "loss": 1.915, "step": 139660 }, { "epoch": 0.33, "grad_norm": 1.8671875, "learning_rate": 0.00018696600121227824, "loss": 2.0804, "step": 139665 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018696508878620366, "loss": 2.1598, "step": 139670 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018696417633042023, "loss": 2.1807, "step": 139675 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018696326384492822, "loss": 2.1442, "step": 139680 }, { "epoch": 0.33, "grad_norm": 2.03125, "learning_rate": 0.00018696235132972802, "loss": 2.091, "step": 139685 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.0001869614387848199, "loss": 2.2537, "step": 139690 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018696052621020414, "loss": 1.8517, "step": 139695 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.0001869596136058811, "loss": 2.1822, "step": 139700 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.0001869587009718511, "loss": 2.1298, "step": 139705 }, { "epoch": 0.33, "grad_norm": 2.796875, "learning_rate": 0.00018695778830811444, "loss": 1.9704, "step": 139710 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018695687561467138, "loss": 2.2097, "step": 139715 }, { "epoch": 0.33, "grad_norm": 2.59375, "learning_rate": 0.00018695596289152228, "loss": 2.197, "step": 139720 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.00018695505013866748, "loss": 2.0537, "step": 139725 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018695413735610723, "loss": 2.1155, "step": 139730 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.0001869532245438419, "loss": 2.2585, "step": 139735 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.00018695231170187175, "loss": 2.1804, "step": 139740 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.0001869513988301971, "loss": 1.9743, "step": 139745 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.0001869504859288183, "loss": 2.1582, "step": 139750 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.0001869495729977356, "loss": 2.0107, "step": 139755 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.0001869486600369494, "loss": 2.2684, "step": 139760 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018694774704645995, "loss": 2.2375, "step": 139765 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018694683402626755, "loss": 2.0842, "step": 139770 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018694592097637255, "loss": 2.1966, "step": 139775 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018694500789677524, "loss": 2.3115, "step": 139780 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018694409478747591, "loss": 2.0738, "step": 139785 }, { "epoch": 0.33, "grad_norm": 1.9921875, "learning_rate": 0.00018694318164847495, "loss": 2.0461, "step": 139790 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018694226847977262, "loss": 2.3854, "step": 139795 }, { "epoch": 0.33, "grad_norm": 2.03125, "learning_rate": 0.0001869413552813692, "loss": 2.3244, "step": 139800 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018694044205326503, "loss": 2.057, "step": 139805 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018693952879546047, "loss": 1.8774, "step": 139810 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018693861550795577, "loss": 2.1071, "step": 139815 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018693770219075127, "loss": 2.0601, "step": 139820 }, { "epoch": 0.33, "grad_norm": 1.9140625, "learning_rate": 0.00018693678884384727, "loss": 2.0754, "step": 139825 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018693587546724408, "loss": 2.1687, "step": 139830 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018693496206094202, "loss": 2.1642, "step": 139835 }, { "epoch": 0.33, "grad_norm": 1.8828125, "learning_rate": 0.0001869340486249414, "loss": 2.0744, "step": 139840 }, { "epoch": 0.33, "grad_norm": 1.9609375, "learning_rate": 0.00018693313515924254, "loss": 2.084, "step": 139845 }, { "epoch": 0.33, "grad_norm": 1.9375, "learning_rate": 0.00018693222166384573, "loss": 2.0485, "step": 139850 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018693130813875132, "loss": 2.2294, "step": 139855 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018693039458395958, "loss": 1.9293, "step": 139860 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018692948099947084, "loss": 2.129, "step": 139865 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018692856738528543, "loss": 2.2045, "step": 139870 }, { "epoch": 0.33, "grad_norm": 2.515625, "learning_rate": 0.00018692765374140363, "loss": 2.0445, "step": 139875 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018692674006782575, "loss": 2.0904, "step": 139880 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018692582636455215, "loss": 2.2828, "step": 139885 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.0001869249126315831, "loss": 1.9617, "step": 139890 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.00018692399886891892, "loss": 2.2377, "step": 139895 }, { "epoch": 0.33, "grad_norm": 1.8203125, "learning_rate": 0.00018692308507655995, "loss": 2.0656, "step": 139900 }, { "epoch": 0.33, "grad_norm": 2.453125, "learning_rate": 0.00018692217125450644, "loss": 2.2761, "step": 139905 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018692125740275874, "loss": 2.2193, "step": 139910 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.0001869203435213172, "loss": 2.0144, "step": 139915 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018691942961018208, "loss": 2.149, "step": 139920 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018691851566935368, "loss": 2.323, "step": 139925 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018691760169883237, "loss": 2.1287, "step": 139930 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.0001869166876986184, "loss": 2.0162, "step": 139935 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.00018691577366871217, "loss": 2.1817, "step": 139940 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.0001869148596091139, "loss": 2.0522, "step": 139945 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.0001869139455198239, "loss": 2.0988, "step": 139950 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018691303140084257, "loss": 2.0414, "step": 139955 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018691211725217015, "loss": 2.2192, "step": 139960 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018691120307380698, "loss": 2.1696, "step": 139965 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018691028886575338, "loss": 2.1327, "step": 139970 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018690937462800963, "loss": 2.1191, "step": 139975 }, { "epoch": 0.33, "grad_norm": 1.96875, "learning_rate": 0.00018690846036057606, "loss": 2.1774, "step": 139980 }, { "epoch": 0.33, "grad_norm": 1.96875, "learning_rate": 0.000186907546063453, "loss": 2.1079, "step": 139985 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018690663173664073, "loss": 2.1325, "step": 139990 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.0001869057173801396, "loss": 2.1465, "step": 139995 }, { "epoch": 0.33, "grad_norm": 2.5625, "learning_rate": 0.00018690480299394987, "loss": 2.3125, "step": 140000 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018690388857807192, "loss": 2.1092, "step": 140005 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.000186902974132506, "loss": 2.3873, "step": 140010 }, { "epoch": 0.33, "grad_norm": 2.640625, "learning_rate": 0.00018690205965725247, "loss": 2.223, "step": 140015 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.0001869011451523116, "loss": 2.0798, "step": 140020 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018690023061768374, "loss": 2.1465, "step": 140025 }, { "epoch": 0.33, "grad_norm": 2.015625, "learning_rate": 0.00018689931605336913, "loss": 2.1528, "step": 140030 }, { "epoch": 0.33, "grad_norm": 1.96875, "learning_rate": 0.0001868984014593682, "loss": 2.1483, "step": 140035 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018689748683568117, "loss": 2.1512, "step": 140040 }, { "epoch": 0.33, "grad_norm": 2.765625, "learning_rate": 0.0001868965721823084, "loss": 2.1065, "step": 140045 }, { "epoch": 0.33, "grad_norm": 2.75, "learning_rate": 0.0001868956574992502, "loss": 2.1902, "step": 140050 }, { "epoch": 0.33, "grad_norm": 1.96875, "learning_rate": 0.00018689474278650684, "loss": 2.3842, "step": 140055 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.00018689382804407866, "loss": 2.2732, "step": 140060 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.000186892913271966, "loss": 2.388, "step": 140065 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.0001868919984701691, "loss": 2.1944, "step": 140070 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018689108363868835, "loss": 2.13, "step": 140075 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018689016877752401, "loss": 2.2235, "step": 140080 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018688925388667645, "loss": 2.1093, "step": 140085 }, { "epoch": 0.33, "grad_norm": 1.9140625, "learning_rate": 0.0001868883389661459, "loss": 1.9166, "step": 140090 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.00018688742401593275, "loss": 2.0565, "step": 140095 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018688650903603727, "loss": 2.0755, "step": 140100 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018688559402645975, "loss": 2.1764, "step": 140105 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018688467898720057, "loss": 2.1636, "step": 140110 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018688376391826, "loss": 2.1812, "step": 140115 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018688284881963836, "loss": 2.1442, "step": 140120 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.000186881933691336, "loss": 2.0328, "step": 140125 }, { "epoch": 0.33, "grad_norm": 1.9921875, "learning_rate": 0.00018688101853335314, "loss": 2.0678, "step": 140130 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018688010334569016, "loss": 2.2916, "step": 140135 }, { "epoch": 0.33, "grad_norm": 2.03125, "learning_rate": 0.00018687918812834738, "loss": 2.1192, "step": 140140 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018687827288132506, "loss": 2.0167, "step": 140145 }, { "epoch": 0.33, "grad_norm": 1.9296875, "learning_rate": 0.0001868773576046236, "loss": 2.2312, "step": 140150 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018687644229824321, "loss": 2.0618, "step": 140155 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018687552696218429, "loss": 2.1423, "step": 140160 }, { "epoch": 0.33, "grad_norm": 2.015625, "learning_rate": 0.00018687461159644708, "loss": 2.0345, "step": 140165 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018687369620103194, "loss": 2.0785, "step": 140170 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018687278077593916, "loss": 2.0686, "step": 140175 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.0001868718653211691, "loss": 2.2468, "step": 140180 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.000186870949836722, "loss": 2.2087, "step": 140185 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018687003432259824, "loss": 2.1641, "step": 140190 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018686911877879807, "loss": 2.108, "step": 140195 }, { "epoch": 0.33, "grad_norm": 1.984375, "learning_rate": 0.00018686820320532185, "loss": 2.0474, "step": 140200 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.00018686728760216985, "loss": 2.1138, "step": 140205 }, { "epoch": 0.33, "grad_norm": 2.484375, "learning_rate": 0.00018686637196934245, "loss": 2.202, "step": 140210 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.0001868654563068399, "loss": 2.1217, "step": 140215 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.0001868645406146625, "loss": 2.101, "step": 140220 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.00018686362489281065, "loss": 2.1455, "step": 140225 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.0001868627091412846, "loss": 2.0286, "step": 140230 }, { "epoch": 0.33, "grad_norm": 2.5, "learning_rate": 0.00018686179336008466, "loss": 2.095, "step": 140235 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.00018686087754921118, "loss": 2.0586, "step": 140240 }, { "epoch": 0.33, "grad_norm": 2.640625, "learning_rate": 0.0001868599617086644, "loss": 2.2626, "step": 140245 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018685904583844474, "loss": 2.2673, "step": 140250 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018685812993855244, "loss": 2.1939, "step": 140255 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.0001868572140089878, "loss": 2.1447, "step": 140260 }, { "epoch": 0.33, "grad_norm": 1.8359375, "learning_rate": 0.00018685629804975117, "loss": 2.1649, "step": 140265 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018685538206084288, "loss": 2.0637, "step": 140270 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018685446604226318, "loss": 2.0122, "step": 140275 }, { "epoch": 0.33, "grad_norm": 2.5, "learning_rate": 0.00018685354999401245, "loss": 2.104, "step": 140280 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.00018685263391609094, "loss": 2.2409, "step": 140285 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.000186851717808499, "loss": 1.9826, "step": 140290 }, { "epoch": 0.33, "grad_norm": 2.671875, "learning_rate": 0.00018685080167123695, "loss": 2.0468, "step": 140295 }, { "epoch": 0.33, "grad_norm": 2.515625, "learning_rate": 0.0001868498855043051, "loss": 2.1215, "step": 140300 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018684896930770375, "loss": 2.2001, "step": 140305 }, { "epoch": 0.33, "grad_norm": 2.515625, "learning_rate": 0.0001868480530814332, "loss": 2.1721, "step": 140310 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018684713682549383, "loss": 2.1146, "step": 140315 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018684622053988584, "loss": 2.097, "step": 140320 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018684530422460963, "loss": 2.2106, "step": 140325 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018684438787966548, "loss": 2.2046, "step": 140330 }, { "epoch": 0.33, "grad_norm": 1.953125, "learning_rate": 0.00018684347150505374, "loss": 2.1144, "step": 140335 }, { "epoch": 0.33, "grad_norm": 1.8515625, "learning_rate": 0.0001868425551007747, "loss": 2.0221, "step": 140340 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.00018684163866682863, "loss": 2.1321, "step": 140345 }, { "epoch": 0.33, "grad_norm": 1.9921875, "learning_rate": 0.0001868407222032159, "loss": 2.3262, "step": 140350 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.0001868398057099368, "loss": 2.0895, "step": 140355 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018683888918699163, "loss": 2.2033, "step": 140360 }, { "epoch": 0.33, "grad_norm": 2.03125, "learning_rate": 0.00018683797263438075, "loss": 2.1996, "step": 140365 }, { "epoch": 0.33, "grad_norm": 1.9765625, "learning_rate": 0.00018683705605210445, "loss": 2.1532, "step": 140370 }, { "epoch": 0.33, "grad_norm": 1.9140625, "learning_rate": 0.000186836139440163, "loss": 2.2868, "step": 140375 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.00018683522279855675, "loss": 2.0499, "step": 140380 }, { "epoch": 0.33, "grad_norm": 2.625, "learning_rate": 0.00018683430612728604, "loss": 2.1853, "step": 140385 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018683338942635116, "loss": 2.204, "step": 140390 }, { "epoch": 0.33, "grad_norm": 1.9140625, "learning_rate": 0.00018683247269575242, "loss": 2.019, "step": 140395 }, { "epoch": 0.33, "grad_norm": 2.359375, "learning_rate": 0.00018683155593549012, "loss": 2.293, "step": 140400 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018683063914556457, "loss": 2.2533, "step": 140405 }, { "epoch": 0.33, "grad_norm": 2.53125, "learning_rate": 0.00018682972232597614, "loss": 1.9808, "step": 140410 }, { "epoch": 0.33, "grad_norm": 1.9765625, "learning_rate": 0.00018682880547672505, "loss": 2.1157, "step": 140415 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.0001868278885978117, "loss": 2.2136, "step": 140420 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.0001868269716892364, "loss": 2.0654, "step": 140425 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018682605475099938, "loss": 2.1634, "step": 140430 }, { "epoch": 0.33, "grad_norm": 1.90625, "learning_rate": 0.000186825137783101, "loss": 2.1054, "step": 140435 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.0001868242207855416, "loss": 1.9464, "step": 140440 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018682330375832147, "loss": 2.0843, "step": 140445 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018682238670144092, "loss": 2.1563, "step": 140450 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018682146961490026, "loss": 2.0174, "step": 140455 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018682055249869984, "loss": 2.0856, "step": 140460 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018681963535283993, "loss": 2.1097, "step": 140465 }, { "epoch": 0.33, "grad_norm": 1.5703125, "learning_rate": 0.00018681871817732084, "loss": 1.9094, "step": 140470 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018681780097214292, "loss": 2.2752, "step": 140475 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018681688373730645, "loss": 2.272, "step": 140480 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018681596647281178, "loss": 2.0639, "step": 140485 }, { "epoch": 0.33, "grad_norm": 1.96875, "learning_rate": 0.0001868150491786592, "loss": 2.0312, "step": 140490 }, { "epoch": 0.33, "grad_norm": 1.9375, "learning_rate": 0.000186814131854849, "loss": 2.1591, "step": 140495 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018681321450138153, "loss": 2.1415, "step": 140500 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.0001868122971182571, "loss": 2.0732, "step": 140505 }, { "epoch": 0.33, "grad_norm": 1.5859375, "learning_rate": 0.00018681137970547602, "loss": 2.0274, "step": 140510 }, { "epoch": 0.33, "grad_norm": 1.9140625, "learning_rate": 0.00018681046226303857, "loss": 2.2746, "step": 140515 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018680954479094512, "loss": 2.1579, "step": 140520 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018680862728919596, "loss": 2.1121, "step": 140525 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018680770975779137, "loss": 2.0295, "step": 140530 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018680679219673172, "loss": 2.0656, "step": 140535 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.0001868058746060173, "loss": 2.1736, "step": 140540 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018680495698564838, "loss": 2.1596, "step": 140545 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.0001868040393356253, "loss": 2.2019, "step": 140550 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018680312165594845, "loss": 2.267, "step": 140555 }, { "epoch": 0.33, "grad_norm": 2.015625, "learning_rate": 0.00018680220394661805, "loss": 2.0617, "step": 140560 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018680128620763444, "loss": 2.2851, "step": 140565 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018680036843899794, "loss": 1.9647, "step": 140570 }, { "epoch": 0.33, "grad_norm": 2.703125, "learning_rate": 0.00018679945064070883, "loss": 2.1964, "step": 140575 }, { "epoch": 0.33, "grad_norm": 2.484375, "learning_rate": 0.0001867985328127675, "loss": 2.2285, "step": 140580 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018679761495517418, "loss": 2.0363, "step": 140585 }, { "epoch": 0.33, "grad_norm": 2.53125, "learning_rate": 0.00018679669706792924, "loss": 2.1216, "step": 140590 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.000186795779151033, "loss": 2.1621, "step": 140595 }, { "epoch": 0.33, "grad_norm": 1.9765625, "learning_rate": 0.00018679486120448568, "loss": 2.2693, "step": 140600 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018679394322828772, "loss": 2.0599, "step": 140605 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018679302522243936, "loss": 2.2416, "step": 140610 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.0001867921071869409, "loss": 2.0317, "step": 140615 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018679118912179272, "loss": 2.0911, "step": 140620 }, { "epoch": 0.33, "grad_norm": 2.5625, "learning_rate": 0.00018679027102699508, "loss": 2.0819, "step": 140625 }, { "epoch": 0.33, "grad_norm": 2.609375, "learning_rate": 0.0001867893529025483, "loss": 2.1073, "step": 140630 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.00018678843474845275, "loss": 2.096, "step": 140635 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.00018678751656470865, "loss": 2.0131, "step": 140640 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018678659835131637, "loss": 2.1319, "step": 140645 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018678568010827622, "loss": 2.0446, "step": 140650 }, { "epoch": 0.33, "grad_norm": 2.546875, "learning_rate": 0.00018678476183558852, "loss": 2.2458, "step": 140655 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018678384353325355, "loss": 2.312, "step": 140660 }, { "epoch": 0.33, "grad_norm": 2.03125, "learning_rate": 0.00018678292520127164, "loss": 2.2604, "step": 140665 }, { "epoch": 0.33, "grad_norm": 2.015625, "learning_rate": 0.0001867820068396431, "loss": 2.2788, "step": 140670 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018678108844836827, "loss": 1.982, "step": 140675 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018678017002744744, "loss": 2.1356, "step": 140680 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018677925157688097, "loss": 2.1387, "step": 140685 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018677833309666908, "loss": 2.3353, "step": 140690 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018677741458681218, "loss": 2.1355, "step": 140695 }, { "epoch": 0.33, "grad_norm": 1.953125, "learning_rate": 0.0001867764960473105, "loss": 2.1543, "step": 140700 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.0001867755774781644, "loss": 2.1539, "step": 140705 }, { "epoch": 0.33, "grad_norm": 2.96875, "learning_rate": 0.00018677465887937422, "loss": 2.3056, "step": 140710 }, { "epoch": 0.33, "grad_norm": 1.8671875, "learning_rate": 0.00018677374025094024, "loss": 2.0011, "step": 140715 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.00018677282159286276, "loss": 2.1384, "step": 140720 }, { "epoch": 0.33, "grad_norm": 2.75, "learning_rate": 0.0001867719029051421, "loss": 2.0449, "step": 140725 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018677098418777864, "loss": 2.1317, "step": 140730 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018677006544077257, "loss": 2.1324, "step": 140735 }, { "epoch": 0.33, "grad_norm": 1.8984375, "learning_rate": 0.0001867691466641243, "loss": 2.0269, "step": 140740 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.00018676822785783413, "loss": 2.0847, "step": 140745 }, { "epoch": 0.33, "grad_norm": 1.90625, "learning_rate": 0.00018676730902190234, "loss": 2.1887, "step": 140750 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018676639015632928, "loss": 2.1828, "step": 140755 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018676547126111523, "loss": 2.1886, "step": 140760 }, { "epoch": 0.33, "grad_norm": 2.453125, "learning_rate": 0.0001867645523362605, "loss": 2.3692, "step": 140765 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018676363338176547, "loss": 2.1501, "step": 140770 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.0001867627143976304, "loss": 2.1462, "step": 140775 }, { "epoch": 0.33, "grad_norm": 2.515625, "learning_rate": 0.0001867617953838556, "loss": 2.1629, "step": 140780 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018676087634044138, "loss": 2.1071, "step": 140785 }, { "epoch": 0.33, "grad_norm": 2.765625, "learning_rate": 0.0001867599572673881, "loss": 2.229, "step": 140790 }, { "epoch": 0.33, "grad_norm": 2.03125, "learning_rate": 0.00018675903816469603, "loss": 2.2347, "step": 140795 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.00018675811903236554, "loss": 2.0035, "step": 140800 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.00018675719987039686, "loss": 2.2441, "step": 140805 }, { "epoch": 0.33, "grad_norm": 2.59375, "learning_rate": 0.00018675628067879038, "loss": 2.2442, "step": 140810 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.00018675536145754633, "loss": 2.162, "step": 140815 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.0001867544422066651, "loss": 2.1438, "step": 140820 }, { "epoch": 0.33, "grad_norm": 1.828125, "learning_rate": 0.000186753522926147, "loss": 2.1172, "step": 140825 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.0001867526036159923, "loss": 2.1351, "step": 140830 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018675168427620132, "loss": 2.1948, "step": 140835 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018675076490677443, "loss": 2.169, "step": 140840 }, { "epoch": 0.33, "grad_norm": 2.609375, "learning_rate": 0.0001867498455077119, "loss": 2.1595, "step": 140845 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018674892607901405, "loss": 2.1622, "step": 140850 }, { "epoch": 0.33, "grad_norm": 1.9765625, "learning_rate": 0.00018674800662068117, "loss": 2.1017, "step": 140855 }, { "epoch": 0.33, "grad_norm": 2.359375, "learning_rate": 0.00018674708713271363, "loss": 2.1548, "step": 140860 }, { "epoch": 0.33, "grad_norm": 1.9609375, "learning_rate": 0.00018674616761511168, "loss": 1.9756, "step": 140865 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.0001867452480678757, "loss": 2.148, "step": 140870 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018674432849100593, "loss": 2.2349, "step": 140875 }, { "epoch": 0.33, "grad_norm": 1.890625, "learning_rate": 0.00018674340888450274, "loss": 2.265, "step": 140880 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.00018674248924836642, "loss": 2.1415, "step": 140885 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018674156958259732, "loss": 2.0169, "step": 140890 }, { "epoch": 0.33, "grad_norm": 2.015625, "learning_rate": 0.0001867406498871957, "loss": 2.0362, "step": 140895 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018673973016216194, "loss": 2.1095, "step": 140900 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018673881040749625, "loss": 2.1214, "step": 140905 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018673789062319908, "loss": 2.1767, "step": 140910 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018673697080927063, "loss": 2.1935, "step": 140915 }, { "epoch": 0.33, "grad_norm": 1.9921875, "learning_rate": 0.00018673605096571126, "loss": 2.027, "step": 140920 }, { "epoch": 0.33, "grad_norm": 2.359375, "learning_rate": 0.0001867351310925213, "loss": 2.221, "step": 140925 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.000186734211189701, "loss": 2.1805, "step": 140930 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018673329125725076, "loss": 2.1201, "step": 140935 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018673237129517088, "loss": 1.9672, "step": 140940 }, { "epoch": 0.33, "grad_norm": 2.359375, "learning_rate": 0.0001867314513034616, "loss": 2.0926, "step": 140945 }, { "epoch": 0.33, "grad_norm": 2.5625, "learning_rate": 0.0001867305312821233, "loss": 2.0602, "step": 140950 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.0001867296112311563, "loss": 2.281, "step": 140955 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.00018672869115056087, "loss": 2.1303, "step": 140960 }, { "epoch": 0.33, "grad_norm": 2.359375, "learning_rate": 0.00018672777104033735, "loss": 2.1436, "step": 140965 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.00018672685090048602, "loss": 2.2972, "step": 140970 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.0001867259307310073, "loss": 2.2755, "step": 140975 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018672501053190136, "loss": 2.1948, "step": 140980 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018672409030316863, "loss": 2.2049, "step": 140985 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.00018672317004480934, "loss": 2.2077, "step": 140990 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018672224975682384, "loss": 2.1118, "step": 140995 }, { "epoch": 0.33, "grad_norm": 1.9921875, "learning_rate": 0.00018672132943921244, "loss": 2.2585, "step": 141000 }, { "epoch": 0.33, "grad_norm": 1.796875, "learning_rate": 0.0001867204090919755, "loss": 2.1292, "step": 141005 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018671948871511329, "loss": 2.1425, "step": 141010 }, { "epoch": 0.33, "grad_norm": 1.9609375, "learning_rate": 0.00018671856830862612, "loss": 1.9873, "step": 141015 }, { "epoch": 0.33, "grad_norm": 1.9296875, "learning_rate": 0.00018671764787251433, "loss": 2.2462, "step": 141020 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.0001867167274067782, "loss": 2.1823, "step": 141025 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018671580691141806, "loss": 2.2383, "step": 141030 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.0001867148863864342, "loss": 2.2368, "step": 141035 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018671396583182703, "loss": 2.1768, "step": 141040 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018671304524759677, "loss": 1.9097, "step": 141045 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018671212463374374, "loss": 2.0793, "step": 141050 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.0001867112039902683, "loss": 2.3388, "step": 141055 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018671028331717073, "loss": 2.0871, "step": 141060 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.00018670936261445134, "loss": 2.1913, "step": 141065 }, { "epoch": 0.33, "grad_norm": 3.390625, "learning_rate": 0.00018670844188211047, "loss": 2.0141, "step": 141070 }, { "epoch": 0.33, "grad_norm": 2.5625, "learning_rate": 0.00018670752112014843, "loss": 2.3337, "step": 141075 }, { "epoch": 0.33, "grad_norm": 1.703125, "learning_rate": 0.00018670660032856553, "loss": 2.2281, "step": 141080 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.0001867056795073621, "loss": 2.2309, "step": 141085 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018670475865653838, "loss": 2.2605, "step": 141090 }, { "epoch": 0.33, "grad_norm": 1.9296875, "learning_rate": 0.00018670383777609479, "loss": 2.0927, "step": 141095 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018670291686603157, "loss": 2.1359, "step": 141100 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.0001867019959263491, "loss": 2.206, "step": 141105 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018670107495704762, "loss": 2.1136, "step": 141110 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.0001867001539581275, "loss": 2.0835, "step": 141115 }, { "epoch": 0.33, "grad_norm": 1.703125, "learning_rate": 0.00018669923292958905, "loss": 2.1386, "step": 141120 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018669831187143252, "loss": 2.1619, "step": 141125 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.0001866973907836583, "loss": 2.1379, "step": 141130 }, { "epoch": 0.33, "grad_norm": 2.359375, "learning_rate": 0.00018669646966626668, "loss": 2.175, "step": 141135 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.000186695548519258, "loss": 2.1209, "step": 141140 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.0001866946273426325, "loss": 2.083, "step": 141145 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018669370613639055, "loss": 2.1767, "step": 141150 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018669278490053248, "loss": 2.2286, "step": 141155 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018669186363505857, "loss": 2.0449, "step": 141160 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018669094233996915, "loss": 2.2288, "step": 141165 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018669002101526455, "loss": 2.0804, "step": 141170 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018668909966094502, "loss": 2.1613, "step": 141175 }, { "epoch": 0.33, "grad_norm": 2.78125, "learning_rate": 0.00018668817827701098, "loss": 2.1194, "step": 141180 }, { "epoch": 0.33, "grad_norm": 2.6875, "learning_rate": 0.00018668725686346265, "loss": 2.0946, "step": 141185 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018668633542030038, "loss": 2.1221, "step": 141190 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.0001866854139475245, "loss": 2.0991, "step": 141195 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.00018668449244513532, "loss": 2.0861, "step": 141200 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.0001866835709131331, "loss": 2.1614, "step": 141205 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.00018668264935151825, "loss": 2.1083, "step": 141210 }, { "epoch": 0.33, "grad_norm": 2.546875, "learning_rate": 0.00018668172776029103, "loss": 2.1927, "step": 141215 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018668080613945174, "loss": 1.8664, "step": 141220 }, { "epoch": 0.33, "grad_norm": 1.9765625, "learning_rate": 0.0001866798844890007, "loss": 2.0378, "step": 141225 }, { "epoch": 0.33, "grad_norm": 1.9453125, "learning_rate": 0.00018667896280893825, "loss": 2.1273, "step": 141230 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018667804109926472, "loss": 2.1171, "step": 141235 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018667711935998037, "loss": 2.0931, "step": 141240 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018667619759108557, "loss": 2.1531, "step": 141245 }, { "epoch": 0.33, "grad_norm": 2.671875, "learning_rate": 0.00018667527579258058, "loss": 2.0111, "step": 141250 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018667435396446577, "loss": 2.1846, "step": 141255 }, { "epoch": 0.33, "grad_norm": 1.8125, "learning_rate": 0.0001866734321067414, "loss": 2.0095, "step": 141260 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.00018667251021940785, "loss": 2.1796, "step": 141265 }, { "epoch": 0.33, "grad_norm": 2.90625, "learning_rate": 0.00018667158830246538, "loss": 2.2295, "step": 141270 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.00018667066635591432, "loss": 2.2025, "step": 141275 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.000186669744379755, "loss": 2.0554, "step": 141280 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018666882237398772, "loss": 2.3541, "step": 141285 }, { "epoch": 0.33, "grad_norm": 2.0, "learning_rate": 0.00018666790033861278, "loss": 2.1942, "step": 141290 }, { "epoch": 0.33, "grad_norm": 1.921875, "learning_rate": 0.00018666697827363052, "loss": 2.2722, "step": 141295 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018666605617904128, "loss": 2.2335, "step": 141300 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018666513405484532, "loss": 2.1236, "step": 141305 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018666421190104298, "loss": 2.1117, "step": 141310 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018666328971763457, "loss": 2.0482, "step": 141315 }, { "epoch": 0.33, "grad_norm": 2.453125, "learning_rate": 0.00018666236750462041, "loss": 2.2325, "step": 141320 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.0001866614452620008, "loss": 2.188, "step": 141325 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018666052298977608, "loss": 1.8736, "step": 141330 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018665960068794656, "loss": 2.1406, "step": 141335 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018665867835651256, "loss": 2.0344, "step": 141340 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018665775599547435, "loss": 2.0854, "step": 141345 }, { "epoch": 0.33, "grad_norm": 2.453125, "learning_rate": 0.0001866568336048323, "loss": 2.2487, "step": 141350 }, { "epoch": 0.33, "grad_norm": 2.59375, "learning_rate": 0.0001866559111845867, "loss": 1.9302, "step": 141355 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018665498873473787, "loss": 2.0956, "step": 141360 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.0001866540662552861, "loss": 2.068, "step": 141365 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018665314374623177, "loss": 2.07, "step": 141370 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018665222120757514, "loss": 2.1793, "step": 141375 }, { "epoch": 0.33, "grad_norm": 3.125, "learning_rate": 0.00018665129863931656, "loss": 2.0457, "step": 141380 }, { "epoch": 0.33, "grad_norm": 1.984375, "learning_rate": 0.0001866503760414563, "loss": 2.1897, "step": 141385 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018664945341399469, "loss": 1.9349, "step": 141390 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018664853075693206, "loss": 2.1529, "step": 141395 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018664760807026874, "loss": 2.0772, "step": 141400 }, { "epoch": 0.33, "grad_norm": 1.8671875, "learning_rate": 0.00018664668535400502, "loss": 1.9778, "step": 141405 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.00018664576260814123, "loss": 2.1258, "step": 141410 }, { "epoch": 0.33, "grad_norm": 2.03125, "learning_rate": 0.00018664483983267766, "loss": 2.1172, "step": 141415 }, { "epoch": 0.33, "grad_norm": 1.9296875, "learning_rate": 0.00018664391702761463, "loss": 2.1089, "step": 141420 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.00018664299419295248, "loss": 2.1549, "step": 141425 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.0001866420713286915, "loss": 2.2788, "step": 141430 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.00018664114843483206, "loss": 2.1432, "step": 141435 }, { "epoch": 0.33, "grad_norm": 1.9765625, "learning_rate": 0.0001866402255113744, "loss": 2.1669, "step": 141440 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.00018663930255831884, "loss": 2.2433, "step": 141445 }, { "epoch": 0.33, "grad_norm": 2.625, "learning_rate": 0.00018663837957566575, "loss": 2.0181, "step": 141450 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018663745656341543, "loss": 2.1612, "step": 141455 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018663653352156816, "loss": 2.0663, "step": 141460 }, { "epoch": 0.33, "grad_norm": 2.546875, "learning_rate": 0.0001866356104501243, "loss": 2.1572, "step": 141465 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.00018663468734908415, "loss": 2.0749, "step": 141470 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.000186633764218448, "loss": 2.221, "step": 141475 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.00018663284105821618, "loss": 2.1334, "step": 141480 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018663191786838902, "loss": 2.1621, "step": 141485 }, { "epoch": 0.33, "grad_norm": 1.875, "learning_rate": 0.00018663099464896683, "loss": 2.1783, "step": 141490 }, { "epoch": 0.33, "grad_norm": 1.8984375, "learning_rate": 0.0001866300713999499, "loss": 2.1371, "step": 141495 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.0001866291481213386, "loss": 2.2492, "step": 141500 }, { "epoch": 0.33, "grad_norm": 2.53125, "learning_rate": 0.00018662822481313321, "loss": 2.1533, "step": 141505 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018662730147533404, "loss": 2.2181, "step": 141510 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.00018662637810794143, "loss": 2.2534, "step": 141515 }, { "epoch": 0.33, "grad_norm": 2.0, "learning_rate": 0.00018662545471095563, "loss": 1.9915, "step": 141520 }, { "epoch": 0.33, "grad_norm": 1.9140625, "learning_rate": 0.00018662453128437701, "loss": 1.9676, "step": 141525 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.0001866236078282059, "loss": 2.0775, "step": 141530 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.0001866226843424426, "loss": 1.8959, "step": 141535 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.0001866217608270874, "loss": 2.107, "step": 141540 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018662083728214067, "loss": 2.1789, "step": 141545 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018661991370760264, "loss": 2.2753, "step": 141550 }, { "epoch": 0.33, "grad_norm": 2.53125, "learning_rate": 0.00018661899010347373, "loss": 2.2679, "step": 141555 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018661806646975417, "loss": 2.3058, "step": 141560 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.0001866171428064443, "loss": 2.1776, "step": 141565 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018661621911354446, "loss": 2.1269, "step": 141570 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.000186615295391055, "loss": 2.1522, "step": 141575 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018661437163897612, "loss": 2.1433, "step": 141580 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018661344785730817, "loss": 2.0543, "step": 141585 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018661252404605154, "loss": 1.8297, "step": 141590 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018661160020520652, "loss": 2.1683, "step": 141595 }, { "epoch": 0.33, "grad_norm": 2.828125, "learning_rate": 0.00018661067633477337, "loss": 2.1307, "step": 141600 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018660975243475246, "loss": 1.9635, "step": 141605 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018660882850514407, "loss": 2.1892, "step": 141610 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018660790454594857, "loss": 2.2006, "step": 141615 }, { "epoch": 0.33, "grad_norm": 1.9296875, "learning_rate": 0.0001866069805571662, "loss": 2.0406, "step": 141620 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018660605653879735, "loss": 1.9275, "step": 141625 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018660513249084225, "loss": 2.0834, "step": 141630 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.0001866042084133013, "loss": 2.0902, "step": 141635 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.0001866032843061748, "loss": 2.1891, "step": 141640 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.000186602360169463, "loss": 2.0568, "step": 141645 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.0001866014360031663, "loss": 1.938, "step": 141650 }, { "epoch": 0.33, "grad_norm": 2.6875, "learning_rate": 0.00018660051180728498, "loss": 2.0482, "step": 141655 }, { "epoch": 0.33, "grad_norm": 2.5625, "learning_rate": 0.00018659958758181935, "loss": 2.3354, "step": 141660 }, { "epoch": 0.33, "grad_norm": 2.609375, "learning_rate": 0.0001865986633267697, "loss": 2.3427, "step": 141665 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018659773904213642, "loss": 2.0357, "step": 141670 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.00018659681472791973, "loss": 1.9623, "step": 141675 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018659589038412007, "loss": 2.1055, "step": 141680 }, { "epoch": 0.33, "grad_norm": 2.015625, "learning_rate": 0.00018659496601073763, "loss": 2.0835, "step": 141685 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018659404160777278, "loss": 2.1463, "step": 141690 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018659311717522586, "loss": 2.0844, "step": 141695 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018659219271309713, "loss": 2.1414, "step": 141700 }, { "epoch": 0.33, "grad_norm": 2.59375, "learning_rate": 0.00018659126822138697, "loss": 1.9975, "step": 141705 }, { "epoch": 0.33, "grad_norm": 2.546875, "learning_rate": 0.00018659034370009566, "loss": 2.104, "step": 141710 }, { "epoch": 0.33, "grad_norm": 2.453125, "learning_rate": 0.0001865894191492235, "loss": 2.2269, "step": 141715 }, { "epoch": 0.33, "grad_norm": 2.515625, "learning_rate": 0.00018658849456877082, "loss": 2.3424, "step": 141720 }, { "epoch": 0.33, "grad_norm": 2.453125, "learning_rate": 0.00018658756995873794, "loss": 2.2319, "step": 141725 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.00018658664531912518, "loss": 2.1593, "step": 141730 }, { "epoch": 0.33, "grad_norm": 2.484375, "learning_rate": 0.00018658572064993284, "loss": 2.1754, "step": 141735 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.0001865847959511613, "loss": 2.0338, "step": 141740 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018658387122281079, "loss": 2.1675, "step": 141745 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018658294646488167, "loss": 2.1836, "step": 141750 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.0001865820216773742, "loss": 2.3512, "step": 141755 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.0001865810968602888, "loss": 2.1674, "step": 141760 }, { "epoch": 0.33, "grad_norm": 1.9296875, "learning_rate": 0.00018658017201362567, "loss": 2.209, "step": 141765 }, { "epoch": 0.33, "grad_norm": 2.296875, "learning_rate": 0.00018657924713738519, "loss": 1.9819, "step": 141770 }, { "epoch": 0.33, "grad_norm": 2.015625, "learning_rate": 0.0001865783222315677, "loss": 2.2685, "step": 141775 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.00018657739729617347, "loss": 2.0342, "step": 141780 }, { "epoch": 0.33, "grad_norm": 1.8828125, "learning_rate": 0.00018657647233120286, "loss": 2.1162, "step": 141785 }, { "epoch": 0.33, "grad_norm": 2.25, "learning_rate": 0.0001865755473366561, "loss": 2.2106, "step": 141790 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018657462231253362, "loss": 1.9494, "step": 141795 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018657369725883567, "loss": 2.128, "step": 141800 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018657277217556256, "loss": 2.178, "step": 141805 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.0001865718470627146, "loss": 2.1972, "step": 141810 }, { "epoch": 0.33, "grad_norm": 1.9609375, "learning_rate": 0.00018657092192029214, "loss": 1.9597, "step": 141815 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.0001865699967482955, "loss": 2.3496, "step": 141820 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.000186569071546725, "loss": 2.0633, "step": 141825 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018656814631558087, "loss": 2.3077, "step": 141830 }, { "epoch": 0.33, "grad_norm": 1.8515625, "learning_rate": 0.00018656722105486355, "loss": 1.9761, "step": 141835 }, { "epoch": 0.33, "grad_norm": 2.328125, "learning_rate": 0.00018656629576457328, "loss": 2.2315, "step": 141840 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.0001865653704447104, "loss": 2.2604, "step": 141845 }, { "epoch": 0.33, "grad_norm": 2.65625, "learning_rate": 0.00018656444509527518, "loss": 2.3204, "step": 141850 }, { "epoch": 0.33, "grad_norm": 1.9609375, "learning_rate": 0.000186563519716268, "loss": 2.0796, "step": 141855 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018656259430768917, "loss": 2.1343, "step": 141860 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018656166886953898, "loss": 2.0852, "step": 141865 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.00018656074340181773, "loss": 2.1158, "step": 141870 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018655981790452578, "loss": 2.1274, "step": 141875 }, { "epoch": 0.33, "grad_norm": 2.625, "learning_rate": 0.0001865588923776634, "loss": 2.0758, "step": 141880 }, { "epoch": 0.33, "grad_norm": 2.578125, "learning_rate": 0.000186557966821231, "loss": 2.1786, "step": 141885 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018655704123522876, "loss": 2.1791, "step": 141890 }, { "epoch": 0.33, "grad_norm": 1.953125, "learning_rate": 0.0001865561156196571, "loss": 2.3244, "step": 141895 }, { "epoch": 0.33, "grad_norm": 1.953125, "learning_rate": 0.0001865551899745163, "loss": 2.238, "step": 141900 }, { "epoch": 0.33, "grad_norm": 2.546875, "learning_rate": 0.00018655426429980664, "loss": 1.9887, "step": 141905 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018655333859552851, "loss": 1.9888, "step": 141910 }, { "epoch": 0.33, "grad_norm": 1.84375, "learning_rate": 0.0001865524128616822, "loss": 2.1785, "step": 141915 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.000186551487098268, "loss": 2.2369, "step": 141920 }, { "epoch": 0.33, "grad_norm": 2.8125, "learning_rate": 0.00018655056130528622, "loss": 2.14, "step": 141925 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018654963548273723, "loss": 2.1374, "step": 141930 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.0001865487096306213, "loss": 2.235, "step": 141935 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018654778374893876, "loss": 2.2998, "step": 141940 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018654685783768993, "loss": 2.2315, "step": 141945 }, { "epoch": 0.33, "grad_norm": 2.6875, "learning_rate": 0.00018654593189687513, "loss": 2.2297, "step": 141950 }, { "epoch": 0.33, "grad_norm": 1.875, "learning_rate": 0.00018654500592649466, "loss": 2.2015, "step": 141955 }, { "epoch": 0.33, "grad_norm": 2.0, "learning_rate": 0.00018654407992654885, "loss": 2.2261, "step": 141960 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.000186543153897038, "loss": 2.0868, "step": 141965 }, { "epoch": 0.33, "grad_norm": 2.84375, "learning_rate": 0.00018654222783796246, "loss": 2.2741, "step": 141970 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018654130174932255, "loss": 2.1431, "step": 141975 }, { "epoch": 0.33, "grad_norm": 2.015625, "learning_rate": 0.0001865403756311185, "loss": 2.1593, "step": 141980 }, { "epoch": 0.33, "grad_norm": 2.6875, "learning_rate": 0.00018653944948335074, "loss": 2.0408, "step": 141985 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.0001865385233060195, "loss": 2.0578, "step": 141990 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018653759709912516, "loss": 2.1601, "step": 141995 }, { "epoch": 0.33, "grad_norm": 1.9140625, "learning_rate": 0.000186536670862668, "loss": 1.9909, "step": 142000 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018653574459664832, "loss": 2.1703, "step": 142005 }, { "epoch": 0.33, "grad_norm": 2.796875, "learning_rate": 0.00018653481830106647, "loss": 2.1076, "step": 142010 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018653389197592277, "loss": 2.2704, "step": 142015 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.0001865329656212175, "loss": 2.1563, "step": 142020 }, { "epoch": 0.33, "grad_norm": 2.21875, "learning_rate": 0.00018653203923695103, "loss": 2.2025, "step": 142025 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.00018653111282312363, "loss": 2.076, "step": 142030 }, { "epoch": 0.33, "grad_norm": 2.1875, "learning_rate": 0.00018653018637973564, "loss": 2.1263, "step": 142035 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018652925990678737, "loss": 2.1972, "step": 142040 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.0001865283334042791, "loss": 2.1669, "step": 142045 }, { "epoch": 0.33, "grad_norm": 2.46875, "learning_rate": 0.00018652740687221121, "loss": 1.9907, "step": 142050 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018652648031058399, "loss": 2.1664, "step": 142055 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.00018652555371939777, "loss": 2.0563, "step": 142060 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018652462709865285, "loss": 1.9878, "step": 142065 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.0001865237004483495, "loss": 2.1775, "step": 142070 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.0001865227737684881, "loss": 2.284, "step": 142075 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.000186521847059069, "loss": 1.9932, "step": 142080 }, { "epoch": 0.33, "grad_norm": 2.625, "learning_rate": 0.00018652092032009246, "loss": 2.1159, "step": 142085 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.00018651999355155877, "loss": 2.1162, "step": 142090 }, { "epoch": 0.33, "grad_norm": 2.0, "learning_rate": 0.0001865190667534683, "loss": 2.0663, "step": 142095 }, { "epoch": 0.33, "grad_norm": 2.09375, "learning_rate": 0.00018651813992582136, "loss": 2.3828, "step": 142100 }, { "epoch": 0.33, "grad_norm": 1.9140625, "learning_rate": 0.00018651721306861823, "loss": 2.1657, "step": 142105 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018651628618185923, "loss": 2.165, "step": 142110 }, { "epoch": 0.33, "grad_norm": 2.40625, "learning_rate": 0.00018651535926554472, "loss": 2.2245, "step": 142115 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.000186514432319675, "loss": 2.0271, "step": 142120 }, { "epoch": 0.33, "grad_norm": 2.109375, "learning_rate": 0.00018651350534425038, "loss": 2.0281, "step": 142125 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018651257833927117, "loss": 2.0884, "step": 142130 }, { "epoch": 0.33, "grad_norm": 1.703125, "learning_rate": 0.0001865116513047377, "loss": 2.1072, "step": 142135 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.0001865107242406503, "loss": 2.1404, "step": 142140 }, { "epoch": 0.33, "grad_norm": 1.9765625, "learning_rate": 0.00018650979714700925, "loss": 2.3104, "step": 142145 }, { "epoch": 0.33, "grad_norm": 2.3125, "learning_rate": 0.0001865088700238149, "loss": 2.1401, "step": 142150 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.00018650794287106753, "loss": 2.1296, "step": 142155 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.00018650701568876746, "loss": 2.0654, "step": 142160 }, { "epoch": 0.33, "grad_norm": 1.578125, "learning_rate": 0.00018650608847691508, "loss": 2.1078, "step": 142165 }, { "epoch": 0.33, "grad_norm": 2.734375, "learning_rate": 0.0001865051612355106, "loss": 2.1792, "step": 142170 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.0001865042339645544, "loss": 2.0724, "step": 142175 }, { "epoch": 0.33, "grad_norm": 2.734375, "learning_rate": 0.00018650330666404678, "loss": 2.3046, "step": 142180 }, { "epoch": 0.33, "grad_norm": 2.4375, "learning_rate": 0.0001865023793339881, "loss": 1.9278, "step": 142185 }, { "epoch": 0.33, "grad_norm": 1.875, "learning_rate": 0.00018650145197437857, "loss": 2.1567, "step": 142190 }, { "epoch": 0.33, "grad_norm": 1.921875, "learning_rate": 0.00018650052458521864, "loss": 2.1962, "step": 142195 }, { "epoch": 0.33, "grad_norm": 2.0625, "learning_rate": 0.00018649959716650853, "loss": 2.3001, "step": 142200 }, { "epoch": 0.33, "grad_norm": 2.453125, "learning_rate": 0.0001864986697182486, "loss": 2.2069, "step": 142205 }, { "epoch": 0.33, "grad_norm": 1.78125, "learning_rate": 0.00018649774224043916, "loss": 2.1653, "step": 142210 }, { "epoch": 0.33, "grad_norm": 2.28125, "learning_rate": 0.0001864968147330805, "loss": 2.21, "step": 142215 }, { "epoch": 0.33, "grad_norm": 4.875, "learning_rate": 0.000186495887196173, "loss": 2.203, "step": 142220 }, { "epoch": 0.33, "grad_norm": 2.484375, "learning_rate": 0.0001864949596297169, "loss": 1.9325, "step": 142225 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018649403203371256, "loss": 2.2561, "step": 142230 }, { "epoch": 0.33, "grad_norm": 2.890625, "learning_rate": 0.0001864931044081603, "loss": 1.9228, "step": 142235 }, { "epoch": 0.33, "grad_norm": 2.375, "learning_rate": 0.00018649217675306042, "loss": 2.1992, "step": 142240 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.00018649124906841325, "loss": 2.0717, "step": 142245 }, { "epoch": 0.33, "grad_norm": 2.484375, "learning_rate": 0.0001864903213542191, "loss": 2.1348, "step": 142250 }, { "epoch": 0.33, "grad_norm": 2.203125, "learning_rate": 0.00018648939361047827, "loss": 1.9534, "step": 142255 }, { "epoch": 0.33, "grad_norm": 2.046875, "learning_rate": 0.00018648846583719112, "loss": 2.1601, "step": 142260 }, { "epoch": 0.33, "grad_norm": 2.171875, "learning_rate": 0.00018648753803435794, "loss": 2.1762, "step": 142265 }, { "epoch": 0.33, "grad_norm": 2.234375, "learning_rate": 0.000186486610201979, "loss": 2.0916, "step": 142270 }, { "epoch": 0.33, "grad_norm": 2.015625, "learning_rate": 0.00018648568234005475, "loss": 2.0416, "step": 142275 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018648475444858536, "loss": 1.8881, "step": 142280 }, { "epoch": 0.33, "grad_norm": 1.921875, "learning_rate": 0.00018648382652757125, "loss": 2.1606, "step": 142285 }, { "epoch": 0.33, "grad_norm": 2.15625, "learning_rate": 0.00018648289857701265, "loss": 2.278, "step": 142290 }, { "epoch": 0.33, "grad_norm": 1.890625, "learning_rate": 0.00018648197059690995, "loss": 2.2874, "step": 142295 }, { "epoch": 0.33, "grad_norm": 2.34375, "learning_rate": 0.00018648104258726345, "loss": 2.1028, "step": 142300 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.0001864801145480734, "loss": 2.1967, "step": 142305 }, { "epoch": 0.33, "grad_norm": 2.625, "learning_rate": 0.00018647918647934024, "loss": 2.0269, "step": 142310 }, { "epoch": 0.33, "grad_norm": 2.125, "learning_rate": 0.0001864782583810642, "loss": 2.2878, "step": 142315 }, { "epoch": 0.33, "grad_norm": 2.265625, "learning_rate": 0.00018647733025324562, "loss": 2.2565, "step": 142320 }, { "epoch": 0.33, "grad_norm": 1.96875, "learning_rate": 0.0001864764020958848, "loss": 1.912, "step": 142325 }, { "epoch": 0.33, "grad_norm": 2.390625, "learning_rate": 0.0001864754739089821, "loss": 2.0934, "step": 142330 }, { "epoch": 0.33, "grad_norm": 2.421875, "learning_rate": 0.0001864745456925378, "loss": 2.0517, "step": 142335 }, { "epoch": 0.33, "grad_norm": 2.5, "learning_rate": 0.00018647361744655224, "loss": 2.1326, "step": 142340 }, { "epoch": 0.33, "grad_norm": 2.140625, "learning_rate": 0.0001864726891710257, "loss": 2.0513, "step": 142345 }, { "epoch": 0.33, "grad_norm": 2.078125, "learning_rate": 0.00018647176086595852, "loss": 2.1274, "step": 142350 }, { "epoch": 0.34, "grad_norm": 2.484375, "learning_rate": 0.00018647083253135104, "loss": 2.031, "step": 142355 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018646990416720352, "loss": 2.1355, "step": 142360 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018646897577351634, "loss": 2.2078, "step": 142365 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018646804735028978, "loss": 1.9359, "step": 142370 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018646711889752418, "loss": 2.3061, "step": 142375 }, { "epoch": 0.34, "grad_norm": 1.9609375, "learning_rate": 0.00018646619041521982, "loss": 2.0127, "step": 142380 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018646526190337705, "loss": 2.1945, "step": 142385 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.0001864643333619962, "loss": 2.0438, "step": 142390 }, { "epoch": 0.34, "grad_norm": 1.9609375, "learning_rate": 0.00018646340479107755, "loss": 1.9246, "step": 142395 }, { "epoch": 0.34, "grad_norm": 2.640625, "learning_rate": 0.00018646247619062142, "loss": 2.1344, "step": 142400 }, { "epoch": 0.34, "grad_norm": 2.546875, "learning_rate": 0.00018646154756062818, "loss": 1.9233, "step": 142405 }, { "epoch": 0.34, "grad_norm": 1.9375, "learning_rate": 0.00018646061890109804, "loss": 1.8472, "step": 142410 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018645969021203145, "loss": 2.1156, "step": 142415 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018645876149342864, "loss": 2.0701, "step": 142420 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018645783274528994, "loss": 2.2481, "step": 142425 }, { "epoch": 0.34, "grad_norm": 1.96875, "learning_rate": 0.00018645690396761565, "loss": 2.2659, "step": 142430 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018645597516040615, "loss": 2.2586, "step": 142435 }, { "epoch": 0.34, "grad_norm": 2.359375, "learning_rate": 0.00018645504632366173, "loss": 2.1203, "step": 142440 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.0001864541174573827, "loss": 2.1046, "step": 142445 }, { "epoch": 0.34, "grad_norm": 1.9140625, "learning_rate": 0.00018645318856156935, "loss": 2.0765, "step": 142450 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.00018645225963622204, "loss": 2.059, "step": 142455 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018645133068134105, "loss": 2.0266, "step": 142460 }, { "epoch": 0.34, "grad_norm": 2.796875, "learning_rate": 0.00018645040169692673, "loss": 2.1976, "step": 142465 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.0001864494726829794, "loss": 2.1449, "step": 142470 }, { "epoch": 0.34, "grad_norm": 2.515625, "learning_rate": 0.00018644854363949933, "loss": 2.1497, "step": 142475 }, { "epoch": 0.34, "grad_norm": 2.5625, "learning_rate": 0.0001864476145664869, "loss": 2.1286, "step": 142480 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.00018644668546394239, "loss": 2.2019, "step": 142485 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018644575633186612, "loss": 2.2033, "step": 142490 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.0001864448271702584, "loss": 2.1522, "step": 142495 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.00018644389797911956, "loss": 2.3124, "step": 142500 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.00018644296875844997, "loss": 2.0783, "step": 142505 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018644203950824982, "loss": 2.1694, "step": 142510 }, { "epoch": 0.34, "grad_norm": 1.8515625, "learning_rate": 0.00018644111022851954, "loss": 1.9079, "step": 142515 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018644018091925938, "loss": 1.9834, "step": 142520 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018643925158046972, "loss": 2.0634, "step": 142525 }, { "epoch": 0.34, "grad_norm": 1.8984375, "learning_rate": 0.00018643832221215082, "loss": 2.1504, "step": 142530 }, { "epoch": 0.34, "grad_norm": 1.8828125, "learning_rate": 0.00018643739281430305, "loss": 2.0982, "step": 142535 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018643646338692668, "loss": 2.1221, "step": 142540 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.00018643553393002206, "loss": 2.2596, "step": 142545 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.0001864346044435895, "loss": 2.0226, "step": 142550 }, { "epoch": 0.34, "grad_norm": 1.859375, "learning_rate": 0.00018643367492762928, "loss": 2.1957, "step": 142555 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018643274538214177, "loss": 2.3694, "step": 142560 }, { "epoch": 0.34, "grad_norm": 2.515625, "learning_rate": 0.00018643181580712726, "loss": 2.3116, "step": 142565 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.00018643088620258608, "loss": 2.252, "step": 142570 }, { "epoch": 0.34, "grad_norm": 1.8359375, "learning_rate": 0.00018642995656851851, "loss": 1.9083, "step": 142575 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018642902690492493, "loss": 2.2753, "step": 142580 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018642809721180562, "loss": 1.9877, "step": 142585 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.0001864271674891609, "loss": 2.0508, "step": 142590 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.0001864262377369911, "loss": 2.2675, "step": 142595 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018642530795529654, "loss": 2.1318, "step": 142600 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.00018642437814407752, "loss": 2.2138, "step": 142605 }, { "epoch": 0.34, "grad_norm": 2.359375, "learning_rate": 0.00018642344830333436, "loss": 2.1691, "step": 142610 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018642251843306736, "loss": 2.3258, "step": 142615 }, { "epoch": 0.34, "grad_norm": 1.890625, "learning_rate": 0.00018642158853327692, "loss": 2.1527, "step": 142620 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018642065860396324, "loss": 2.08, "step": 142625 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018641972864512674, "loss": 1.9598, "step": 142630 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.00018641879865676766, "loss": 1.9492, "step": 142635 }, { "epoch": 0.34, "grad_norm": 3.1875, "learning_rate": 0.00018641786863888635, "loss": 2.1312, "step": 142640 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018641693859148317, "loss": 1.9089, "step": 142645 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018641600851455835, "loss": 2.0984, "step": 142650 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.00018641507840811227, "loss": 2.1233, "step": 142655 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018641414827214522, "loss": 2.1532, "step": 142660 }, { "epoch": 0.34, "grad_norm": 2.359375, "learning_rate": 0.00018641321810665756, "loss": 2.0668, "step": 142665 }, { "epoch": 0.34, "grad_norm": 1.953125, "learning_rate": 0.00018641228791164958, "loss": 1.851, "step": 142670 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018641135768712158, "loss": 2.3103, "step": 142675 }, { "epoch": 0.34, "grad_norm": 1.953125, "learning_rate": 0.00018641042743307388, "loss": 2.0557, "step": 142680 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.00018640949714950682, "loss": 2.0742, "step": 142685 }, { "epoch": 0.34, "grad_norm": 2.453125, "learning_rate": 0.0001864085668364207, "loss": 2.2294, "step": 142690 }, { "epoch": 0.34, "grad_norm": 1.921875, "learning_rate": 0.00018640763649381588, "loss": 2.1937, "step": 142695 }, { "epoch": 0.34, "grad_norm": 1.9375, "learning_rate": 0.0001864067061216926, "loss": 2.1875, "step": 142700 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018640577572005124, "loss": 2.1951, "step": 142705 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018640484528889212, "loss": 2.2722, "step": 142710 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.0001864039148282155, "loss": 2.0158, "step": 142715 }, { "epoch": 0.34, "grad_norm": 2.0, "learning_rate": 0.00018640298433802177, "loss": 2.2793, "step": 142720 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.0001864020538183112, "loss": 2.1555, "step": 142725 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018640112326908411, "loss": 2.2483, "step": 142730 }, { "epoch": 0.34, "grad_norm": 3.453125, "learning_rate": 0.00018640019269034085, "loss": 2.0812, "step": 142735 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018639926208208172, "loss": 2.0919, "step": 142740 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.000186398331444307, "loss": 2.0848, "step": 142745 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018639740077701705, "loss": 2.2349, "step": 142750 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.00018639647008021222, "loss": 1.9922, "step": 142755 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018639553935389275, "loss": 2.1343, "step": 142760 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.000186394608598059, "loss": 2.0608, "step": 142765 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018639367781271134, "loss": 2.158, "step": 142770 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018639274699784996, "loss": 2.1897, "step": 142775 }, { "epoch": 0.34, "grad_norm": 1.984375, "learning_rate": 0.00018639181615347529, "loss": 2.0869, "step": 142780 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.0001863908852795876, "loss": 2.3366, "step": 142785 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.0001863899543761872, "loss": 2.2668, "step": 142790 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018638902344327444, "loss": 2.1355, "step": 142795 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.00018638809248084964, "loss": 2.0593, "step": 142800 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018638716148891308, "loss": 2.2428, "step": 142805 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.0001863862304674651, "loss": 1.9861, "step": 142810 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.00018638529941650603, "loss": 2.3067, "step": 142815 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018638436833603617, "loss": 2.3128, "step": 142820 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018638343722605582, "loss": 2.0183, "step": 142825 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018638250608656536, "loss": 1.9528, "step": 142830 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018638157491756506, "loss": 2.1487, "step": 142835 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.0001863806437190552, "loss": 2.1543, "step": 142840 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.0001863797124910362, "loss": 2.1788, "step": 142845 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.00018637878123350831, "loss": 2.1741, "step": 142850 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018637784994647184, "loss": 2.2345, "step": 142855 }, { "epoch": 0.34, "grad_norm": 2.453125, "learning_rate": 0.00018637691862992718, "loss": 2.2001, "step": 142860 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018637598728387454, "loss": 2.1874, "step": 142865 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018637505590831435, "loss": 2.072, "step": 142870 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018637412450324683, "loss": 2.1153, "step": 142875 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018637319306867236, "loss": 2.0885, "step": 142880 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.00018637226160459126, "loss": 2.0949, "step": 142885 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.0001863713301110038, "loss": 2.0752, "step": 142890 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018637039858791034, "loss": 2.2314, "step": 142895 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.00018636946703531117, "loss": 2.1768, "step": 142900 }, { "epoch": 0.34, "grad_norm": 1.9921875, "learning_rate": 0.00018636853545320664, "loss": 2.1625, "step": 142905 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018636760384159703, "loss": 2.2525, "step": 142910 }, { "epoch": 0.34, "grad_norm": 2.0, "learning_rate": 0.0001863666722004827, "loss": 2.1453, "step": 142915 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018636574052986396, "loss": 2.1086, "step": 142920 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.0001863648088297411, "loss": 2.1513, "step": 142925 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.00018636387710011448, "loss": 2.1017, "step": 142930 }, { "epoch": 0.34, "grad_norm": 1.96875, "learning_rate": 0.00018636294534098434, "loss": 2.0123, "step": 142935 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018636201355235108, "loss": 2.3062, "step": 142940 }, { "epoch": 0.34, "grad_norm": 1.8828125, "learning_rate": 0.000186361081734215, "loss": 2.0602, "step": 142945 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.0001863601498865764, "loss": 2.3198, "step": 142950 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.0001863592180094356, "loss": 2.0146, "step": 142955 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018635828610279293, "loss": 2.1095, "step": 142960 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.0001863573541666487, "loss": 2.0089, "step": 142965 }, { "epoch": 0.34, "grad_norm": 2.546875, "learning_rate": 0.00018635642220100323, "loss": 2.1782, "step": 142970 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018635549020585686, "loss": 2.0003, "step": 142975 }, { "epoch": 0.34, "grad_norm": 2.703125, "learning_rate": 0.00018635455818120987, "loss": 1.9978, "step": 142980 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018635362612706263, "loss": 2.1151, "step": 142985 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.0001863526940434154, "loss": 2.0629, "step": 142990 }, { "epoch": 0.34, "grad_norm": 1.84375, "learning_rate": 0.00018635176193026852, "loss": 2.2555, "step": 142995 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018635082978762227, "loss": 2.0449, "step": 143000 }, { "epoch": 0.34, "grad_norm": 1.9296875, "learning_rate": 0.00018634989761547708, "loss": 2.1086, "step": 143005 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018634896541383318, "loss": 2.3188, "step": 143010 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.0001863480331826909, "loss": 2.1363, "step": 143015 }, { "epoch": 0.34, "grad_norm": 1.9765625, "learning_rate": 0.00018634710092205054, "loss": 1.9557, "step": 143020 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018634616863191246, "loss": 2.199, "step": 143025 }, { "epoch": 0.34, "grad_norm": 1.984375, "learning_rate": 0.000186345236312277, "loss": 2.0272, "step": 143030 }, { "epoch": 0.34, "grad_norm": 1.75, "learning_rate": 0.0001863443039631444, "loss": 2.1746, "step": 143035 }, { "epoch": 0.34, "grad_norm": 2.5625, "learning_rate": 0.00018634337158451506, "loss": 2.1189, "step": 143040 }, { "epoch": 0.34, "grad_norm": 3.8125, "learning_rate": 0.00018634243917638922, "loss": 1.9527, "step": 143045 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018634150673876726, "loss": 2.2574, "step": 143050 }, { "epoch": 0.34, "grad_norm": 2.59375, "learning_rate": 0.00018634057427164945, "loss": 2.2516, "step": 143055 }, { "epoch": 0.34, "grad_norm": 2.0, "learning_rate": 0.00018633964177503616, "loss": 2.2747, "step": 143060 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.00018633870924892766, "loss": 2.1759, "step": 143065 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.0001863377766933243, "loss": 2.2598, "step": 143070 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.0001863368441082264, "loss": 2.168, "step": 143075 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018633591149363427, "loss": 2.084, "step": 143080 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.0001863349788495482, "loss": 2.1432, "step": 143085 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.00018633404617596856, "loss": 2.2688, "step": 143090 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018633311347289564, "loss": 2.387, "step": 143095 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018633218074032976, "loss": 2.29, "step": 143100 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018633124797827125, "loss": 2.1937, "step": 143105 }, { "epoch": 0.34, "grad_norm": 1.4609375, "learning_rate": 0.0001863303151867204, "loss": 1.9261, "step": 143110 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.00018632938236567753, "loss": 2.088, "step": 143115 }, { "epoch": 0.34, "grad_norm": 2.359375, "learning_rate": 0.00018632844951514303, "loss": 2.0966, "step": 143120 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018632751663511714, "loss": 2.1271, "step": 143125 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.00018632658372560022, "loss": 2.0493, "step": 143130 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018632565078659257, "loss": 2.1546, "step": 143135 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.0001863247178180945, "loss": 2.1292, "step": 143140 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018632378482010632, "loss": 2.1384, "step": 143145 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.0001863228517926284, "loss": 2.1374, "step": 143150 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018632191873566101, "loss": 2.2179, "step": 143155 }, { "epoch": 0.34, "grad_norm": 2.734375, "learning_rate": 0.0001863209856492045, "loss": 2.011, "step": 143160 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018632005253325917, "loss": 2.1152, "step": 143165 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018631911938782537, "loss": 2.2062, "step": 143170 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.00018631818621290335, "loss": 2.1696, "step": 143175 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018631725300849352, "loss": 2.1169, "step": 143180 }, { "epoch": 0.34, "grad_norm": 1.9375, "learning_rate": 0.0001863163197745961, "loss": 2.1886, "step": 143185 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018631538651121149, "loss": 2.3474, "step": 143190 }, { "epoch": 0.34, "grad_norm": 2.515625, "learning_rate": 0.00018631445321833998, "loss": 2.0191, "step": 143195 }, { "epoch": 0.34, "grad_norm": 2.75, "learning_rate": 0.00018631351989598186, "loss": 2.084, "step": 143200 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018631258654413753, "loss": 2.1775, "step": 143205 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018631165316280722, "loss": 2.2917, "step": 143210 }, { "epoch": 0.34, "grad_norm": 2.0, "learning_rate": 0.0001863107197519913, "loss": 2.006, "step": 143215 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.00018630978631169006, "loss": 2.0975, "step": 143220 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.0001863088528419038, "loss": 2.0688, "step": 143225 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018630791934263292, "loss": 2.2446, "step": 143230 }, { "epoch": 0.34, "grad_norm": 2.90625, "learning_rate": 0.00018630698581387765, "loss": 2.2398, "step": 143235 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018630605225563836, "loss": 2.1562, "step": 143240 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.0001863051186679154, "loss": 2.1783, "step": 143245 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.000186304185050709, "loss": 2.1248, "step": 143250 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018630325140401953, "loss": 2.2237, "step": 143255 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.00018630231772784732, "loss": 2.0395, "step": 143260 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.00018630138402219267, "loss": 2.0275, "step": 143265 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018630045028705588, "loss": 2.0776, "step": 143270 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.0001862995165224373, "loss": 2.1573, "step": 143275 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018629858272833726, "loss": 2.1766, "step": 143280 }, { "epoch": 0.34, "grad_norm": 1.96875, "learning_rate": 0.00018629764890475606, "loss": 2.1073, "step": 143285 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018629671505169398, "loss": 2.0269, "step": 143290 }, { "epoch": 0.34, "grad_norm": 1.984375, "learning_rate": 0.00018629578116915142, "loss": 2.0818, "step": 143295 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018629484725712865, "loss": 2.1239, "step": 143300 }, { "epoch": 0.34, "grad_norm": 2.640625, "learning_rate": 0.00018629391331562598, "loss": 2.0397, "step": 143305 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.00018629297934464375, "loss": 2.3325, "step": 143310 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.00018629204534418225, "loss": 1.9693, "step": 143315 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018629111131424186, "loss": 2.0737, "step": 143320 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.00018629017725482283, "loss": 2.1441, "step": 143325 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.00018628924316592554, "loss": 2.3478, "step": 143330 }, { "epoch": 0.34, "grad_norm": 1.90625, "learning_rate": 0.00018628830904755025, "loss": 2.1915, "step": 143335 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.00018628737489969735, "loss": 2.0337, "step": 143340 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018628644072236707, "loss": 2.1357, "step": 143345 }, { "epoch": 0.34, "grad_norm": 1.828125, "learning_rate": 0.0001862855065155598, "loss": 2.2667, "step": 143350 }, { "epoch": 0.34, "grad_norm": 2.5625, "learning_rate": 0.00018628457227927583, "loss": 2.2042, "step": 143355 }, { "epoch": 0.34, "grad_norm": 2.0, "learning_rate": 0.0001862836380135155, "loss": 2.0774, "step": 143360 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.0001862827037182791, "loss": 2.0547, "step": 143365 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018628176939356695, "loss": 2.1462, "step": 143370 }, { "epoch": 0.34, "grad_norm": 1.9765625, "learning_rate": 0.0001862808350393794, "loss": 2.1756, "step": 143375 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018627990065571678, "loss": 2.1414, "step": 143380 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018627896624257934, "loss": 1.9708, "step": 143385 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018627803179996745, "loss": 2.0373, "step": 143390 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018627709732788142, "loss": 2.2335, "step": 143395 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.00018627616282632158, "loss": 2.1721, "step": 143400 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.0001862752282952882, "loss": 2.2227, "step": 143405 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.0001862742937347817, "loss": 2.1993, "step": 143410 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018627335914480231, "loss": 2.09, "step": 143415 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018627242452535033, "loss": 1.8984, "step": 143420 }, { "epoch": 0.34, "grad_norm": 1.9765625, "learning_rate": 0.0001862714898764262, "loss": 2.2413, "step": 143425 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018627055519803012, "loss": 2.2124, "step": 143430 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018626962049016248, "loss": 2.1769, "step": 143435 }, { "epoch": 0.34, "grad_norm": 1.96875, "learning_rate": 0.00018626868575282352, "loss": 2.183, "step": 143440 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.00018626775098601366, "loss": 2.0513, "step": 143445 }, { "epoch": 0.34, "grad_norm": 1.7890625, "learning_rate": 0.00018626681618973319, "loss": 2.0624, "step": 143450 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018626588136398239, "loss": 2.1167, "step": 143455 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.00018626494650876156, "loss": 2.2447, "step": 143460 }, { "epoch": 0.34, "grad_norm": 1.9375, "learning_rate": 0.0001862640116240711, "loss": 2.1012, "step": 143465 }, { "epoch": 0.34, "grad_norm": 2.5625, "learning_rate": 0.00018626307670991128, "loss": 2.1314, "step": 143470 }, { "epoch": 0.34, "grad_norm": 2.453125, "learning_rate": 0.00018626214176628242, "loss": 2.1876, "step": 143475 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018626120679318485, "loss": 2.1413, "step": 143480 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018626027179061892, "loss": 2.0608, "step": 143485 }, { "epoch": 0.34, "grad_norm": 1.953125, "learning_rate": 0.0001862593367585849, "loss": 2.253, "step": 143490 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.0001862584016970831, "loss": 2.1326, "step": 143495 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018625746660611388, "loss": 2.1329, "step": 143500 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.00018625653148567753, "loss": 2.252, "step": 143505 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018625559633577443, "loss": 2.2724, "step": 143510 }, { "epoch": 0.34, "grad_norm": 1.9921875, "learning_rate": 0.0001862546611564048, "loss": 2.0837, "step": 143515 }, { "epoch": 0.34, "grad_norm": 2.59375, "learning_rate": 0.00018625372594756906, "loss": 2.154, "step": 143520 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018625279070926744, "loss": 2.0874, "step": 143525 }, { "epoch": 0.34, "grad_norm": 2.484375, "learning_rate": 0.00018625185544150034, "loss": 2.1326, "step": 143530 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018625092014426802, "loss": 2.0457, "step": 143535 }, { "epoch": 0.34, "grad_norm": 2.671875, "learning_rate": 0.00018624998481757085, "loss": 2.0828, "step": 143540 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018624904946140907, "loss": 2.0324, "step": 143545 }, { "epoch": 0.34, "grad_norm": 1.9375, "learning_rate": 0.0001862481140757831, "loss": 1.9668, "step": 143550 }, { "epoch": 0.34, "grad_norm": 1.8125, "learning_rate": 0.00018624717866069318, "loss": 1.9803, "step": 143555 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018624624321613968, "loss": 1.9953, "step": 143560 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.0001862453077421229, "loss": 2.1288, "step": 143565 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.00018624437223864316, "loss": 2.2244, "step": 143570 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018624343670570073, "loss": 2.1953, "step": 143575 }, { "epoch": 0.34, "grad_norm": 2.453125, "learning_rate": 0.00018624250114329603, "loss": 2.2405, "step": 143580 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.0001862415655514293, "loss": 2.2619, "step": 143585 }, { "epoch": 0.34, "grad_norm": 2.53125, "learning_rate": 0.0001862406299301009, "loss": 2.1353, "step": 143590 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018623969427931114, "loss": 2.2194, "step": 143595 }, { "epoch": 0.34, "grad_norm": 1.8046875, "learning_rate": 0.00018623875859906033, "loss": 2.0308, "step": 143600 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.0001862378228893488, "loss": 2.0069, "step": 143605 }, { "epoch": 0.34, "grad_norm": 1.9375, "learning_rate": 0.00018623688715017686, "loss": 2.1871, "step": 143610 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018623595138154487, "loss": 2.0957, "step": 143615 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.00018623501558345305, "loss": 2.1558, "step": 143620 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018623407975590183, "loss": 2.1273, "step": 143625 }, { "epoch": 0.34, "grad_norm": 2.578125, "learning_rate": 0.00018623314389889148, "loss": 2.0432, "step": 143630 }, { "epoch": 0.34, "grad_norm": 2.53125, "learning_rate": 0.00018623220801242233, "loss": 2.2886, "step": 143635 }, { "epoch": 0.34, "grad_norm": 2.484375, "learning_rate": 0.00018623127209649467, "loss": 2.0224, "step": 143640 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.00018623033615110886, "loss": 2.0974, "step": 143645 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018622940017626517, "loss": 2.0776, "step": 143650 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.000186228464171964, "loss": 2.1903, "step": 143655 }, { "epoch": 0.34, "grad_norm": 2.578125, "learning_rate": 0.00018622752813820558, "loss": 2.126, "step": 143660 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.0001862265920749903, "loss": 2.1526, "step": 143665 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018622565598231847, "loss": 2.3061, "step": 143670 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018622471986019036, "loss": 2.3037, "step": 143675 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018622378370860635, "loss": 1.94, "step": 143680 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.0001862228475275667, "loss": 2.155, "step": 143685 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.00018622191131707178, "loss": 2.0792, "step": 143690 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.00018622097507712187, "loss": 2.1587, "step": 143695 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018622003880771734, "loss": 2.2407, "step": 143700 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018621910250885847, "loss": 2.0466, "step": 143705 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.0001862181661805456, "loss": 2.2873, "step": 143710 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018621722982277902, "loss": 2.2031, "step": 143715 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.0001862162934355591, "loss": 2.1007, "step": 143720 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.0001862153570188861, "loss": 2.1503, "step": 143725 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018621442057276038, "loss": 2.1265, "step": 143730 }, { "epoch": 0.34, "grad_norm": 1.8671875, "learning_rate": 0.00018621348409718228, "loss": 2.1216, "step": 143735 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018621254759215204, "loss": 1.9343, "step": 143740 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018621161105767008, "loss": 2.2502, "step": 143745 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.00018621067449373663, "loss": 2.1433, "step": 143750 }, { "epoch": 0.34, "grad_norm": 1.8359375, "learning_rate": 0.00018620973790035205, "loss": 2.0173, "step": 143755 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.00018620880127751669, "loss": 2.1796, "step": 143760 }, { "epoch": 0.34, "grad_norm": 2.453125, "learning_rate": 0.00018620786462523082, "loss": 1.9183, "step": 143765 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018620692794349476, "loss": 2.1307, "step": 143770 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.00018620599123230888, "loss": 2.1957, "step": 143775 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018620505449167346, "loss": 2.3041, "step": 143780 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.00018620411772158883, "loss": 2.1703, "step": 143785 }, { "epoch": 0.34, "grad_norm": 1.984375, "learning_rate": 0.00018620318092205532, "loss": 2.4049, "step": 143790 }, { "epoch": 0.34, "grad_norm": 2.578125, "learning_rate": 0.00018620224409307326, "loss": 2.0244, "step": 143795 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.0001862013072346429, "loss": 2.1233, "step": 143800 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018620037034676465, "loss": 2.0381, "step": 143805 }, { "epoch": 0.34, "grad_norm": 2.515625, "learning_rate": 0.00018619943342943875, "loss": 2.1028, "step": 143810 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.0001861984964826656, "loss": 2.2403, "step": 143815 }, { "epoch": 0.34, "grad_norm": 2.0, "learning_rate": 0.00018619755950644547, "loss": 2.1479, "step": 143820 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.0001861966225007787, "loss": 2.28, "step": 143825 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018619568546566554, "loss": 2.1056, "step": 143830 }, { "epoch": 0.34, "grad_norm": 2.78125, "learning_rate": 0.00018619474840110645, "loss": 1.9793, "step": 143835 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018619381130710163, "loss": 1.9421, "step": 143840 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018619287418365142, "loss": 2.1971, "step": 143845 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.0001861919370307562, "loss": 2.158, "step": 143850 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.00018619099984841625, "loss": 2.1926, "step": 143855 }, { "epoch": 0.34, "grad_norm": 2.53125, "learning_rate": 0.0001861900626366319, "loss": 2.2678, "step": 143860 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018618912539540344, "loss": 2.1683, "step": 143865 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.0001861881881247312, "loss": 2.267, "step": 143870 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.00018618725082461554, "loss": 2.1792, "step": 143875 }, { "epoch": 0.34, "grad_norm": 1.796875, "learning_rate": 0.00018618631349505673, "loss": 2.1725, "step": 143880 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018618537613605512, "loss": 1.9295, "step": 143885 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018618443874761101, "loss": 2.2068, "step": 143890 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018618350132972475, "loss": 2.0253, "step": 143895 }, { "epoch": 0.34, "grad_norm": 2.515625, "learning_rate": 0.00018618256388239663, "loss": 2.2282, "step": 143900 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.000186181626405627, "loss": 2.0635, "step": 143905 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018618068889941615, "loss": 2.169, "step": 143910 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.0001861797513637644, "loss": 2.332, "step": 143915 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018617881379867215, "loss": 2.1022, "step": 143920 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.0001861778762041396, "loss": 2.1075, "step": 143925 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018617693858016715, "loss": 2.2488, "step": 143930 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018617600092675507, "loss": 2.3681, "step": 143935 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.0001861750632439037, "loss": 2.0446, "step": 143940 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018617412553161337, "loss": 1.9592, "step": 143945 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018617318778988443, "loss": 2.1238, "step": 143950 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018617225001871712, "loss": 2.1393, "step": 143955 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018617131221811183, "loss": 2.1339, "step": 143960 }, { "epoch": 0.34, "grad_norm": 2.546875, "learning_rate": 0.00018617037438806885, "loss": 2.0646, "step": 143965 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.0001861694365285885, "loss": 2.0183, "step": 143970 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018616849863967115, "loss": 2.2579, "step": 143975 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.000186167560721317, "loss": 2.1161, "step": 143980 }, { "epoch": 0.34, "grad_norm": 1.9375, "learning_rate": 0.0001861666227735265, "loss": 2.2223, "step": 143985 }, { "epoch": 0.34, "grad_norm": 2.75, "learning_rate": 0.00018616568479629992, "loss": 1.9807, "step": 143990 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.0001861647467896376, "loss": 2.0882, "step": 143995 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.0001861638087535398, "loss": 2.1477, "step": 144000 }, { "epoch": 0.34, "grad_norm": 1.7578125, "learning_rate": 0.00018616287068800688, "loss": 2.0482, "step": 144005 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018616193259303918, "loss": 2.3447, "step": 144010 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.000186160994468637, "loss": 2.142, "step": 144015 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018616005631480063, "loss": 2.2109, "step": 144020 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018615911813153047, "loss": 2.3071, "step": 144025 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.0001861581799188268, "loss": 2.2965, "step": 144030 }, { "epoch": 0.34, "grad_norm": 1.8515625, "learning_rate": 0.00018615724167668986, "loss": 2.1582, "step": 144035 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.0001861563034051201, "loss": 2.2529, "step": 144040 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018615536510411778, "loss": 2.0093, "step": 144045 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.0001861544267736832, "loss": 2.2353, "step": 144050 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018615348841381673, "loss": 2.093, "step": 144055 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.00018615255002451866, "loss": 2.0797, "step": 144060 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.0001861516116057893, "loss": 2.1566, "step": 144065 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.00018615067315762903, "loss": 2.0833, "step": 144070 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.0001861497346800381, "loss": 2.1747, "step": 144075 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018614879617301683, "loss": 2.1261, "step": 144080 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.0001861478576365656, "loss": 2.0341, "step": 144085 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.0001861469190706847, "loss": 2.0838, "step": 144090 }, { "epoch": 0.34, "grad_norm": 1.96875, "learning_rate": 0.00018614598047537445, "loss": 2.2052, "step": 144095 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018614504185063518, "loss": 2.097, "step": 144100 }, { "epoch": 0.34, "grad_norm": 2.703125, "learning_rate": 0.00018614410319646717, "loss": 2.0369, "step": 144105 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.0001861431645128708, "loss": 2.1294, "step": 144110 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018614222579984636, "loss": 2.0629, "step": 144115 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018614128705739415, "loss": 2.1149, "step": 144120 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018614034828551455, "loss": 2.2435, "step": 144125 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.00018613940948420781, "loss": 2.219, "step": 144130 }, { "epoch": 0.34, "grad_norm": 2.453125, "learning_rate": 0.0001861384706534743, "loss": 2.0515, "step": 144135 }, { "epoch": 0.34, "grad_norm": 1.9453125, "learning_rate": 0.0001861375317933143, "loss": 1.8655, "step": 144140 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018613659290372822, "loss": 2.0298, "step": 144145 }, { "epoch": 0.34, "grad_norm": 1.8359375, "learning_rate": 0.0001861356539847163, "loss": 2.1894, "step": 144150 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018613471503627885, "loss": 1.9806, "step": 144155 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.0001861337760584162, "loss": 2.1677, "step": 144160 }, { "epoch": 0.34, "grad_norm": 1.9296875, "learning_rate": 0.00018613283705112873, "loss": 2.0319, "step": 144165 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.0001861318980144167, "loss": 1.959, "step": 144170 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018613095894828046, "loss": 1.9605, "step": 144175 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018613001985272036, "loss": 2.2016, "step": 144180 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.00018612908072773664, "loss": 2.2171, "step": 144185 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.00018612814157332967, "loss": 2.0736, "step": 144190 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018612720238949976, "loss": 2.0589, "step": 144195 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.00018612626317624723, "loss": 2.1496, "step": 144200 }, { "epoch": 0.34, "grad_norm": 1.9296875, "learning_rate": 0.00018612532393357241, "loss": 2.178, "step": 144205 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018612438466147565, "loss": 2.0869, "step": 144210 }, { "epoch": 0.34, "grad_norm": 1.9453125, "learning_rate": 0.00018612344535995723, "loss": 2.1893, "step": 144215 }, { "epoch": 0.34, "grad_norm": 1.96875, "learning_rate": 0.00018612250602901745, "loss": 2.2162, "step": 144220 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018612156666865666, "loss": 2.1833, "step": 144225 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.0001861206272788752, "loss": 2.0128, "step": 144230 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018611968785967338, "loss": 2.1165, "step": 144235 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.0001861187484110515, "loss": 2.2228, "step": 144240 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018611780893300987, "loss": 1.9784, "step": 144245 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018611686942554887, "loss": 2.2781, "step": 144250 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018611592988866877, "loss": 2.225, "step": 144255 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018611499032236993, "loss": 2.032, "step": 144260 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.0001861140507266526, "loss": 2.2862, "step": 144265 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.00018611311110151716, "loss": 2.2287, "step": 144270 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018611217144696394, "loss": 2.0716, "step": 144275 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018611123176299324, "loss": 2.0072, "step": 144280 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018611029204960538, "loss": 2.1527, "step": 144285 }, { "epoch": 0.34, "grad_norm": 2.65625, "learning_rate": 0.00018610935230680068, "loss": 2.0005, "step": 144290 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018610841253457946, "loss": 2.2174, "step": 144295 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018610747273294204, "loss": 2.1674, "step": 144300 }, { "epoch": 0.34, "grad_norm": 2.546875, "learning_rate": 0.00018610653290188878, "loss": 2.095, "step": 144305 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.00018610559304141992, "loss": 2.1411, "step": 144310 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018610465315153585, "loss": 2.0974, "step": 144315 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018610371323223688, "loss": 2.1174, "step": 144320 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.0001861027732835233, "loss": 2.1797, "step": 144325 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018610183330539546, "loss": 2.1161, "step": 144330 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018610089329785368, "loss": 2.1985, "step": 144335 }, { "epoch": 0.34, "grad_norm": 2.546875, "learning_rate": 0.00018609995326089823, "loss": 2.1141, "step": 144340 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018609901319452954, "loss": 2.0931, "step": 144345 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018609807309874783, "loss": 2.2651, "step": 144350 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018609713297355345, "loss": 2.1812, "step": 144355 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.00018609619281894672, "loss": 2.1833, "step": 144360 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.000186095252634928, "loss": 2.131, "step": 144365 }, { "epoch": 0.34, "grad_norm": 2.734375, "learning_rate": 0.00018609431242149756, "loss": 2.0935, "step": 144370 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.00018609337217865576, "loss": 2.1806, "step": 144375 }, { "epoch": 0.34, "grad_norm": 2.359375, "learning_rate": 0.00018609243190640287, "loss": 2.1756, "step": 144380 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.00018609149160473928, "loss": 2.2453, "step": 144385 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018609055127366526, "loss": 2.2129, "step": 144390 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018608961091318117, "loss": 1.9976, "step": 144395 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018608867052328726, "loss": 2.0795, "step": 144400 }, { "epoch": 0.34, "grad_norm": 1.9765625, "learning_rate": 0.00018608773010398393, "loss": 2.0222, "step": 144405 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018608678965527148, "loss": 2.069, "step": 144410 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018608584917715018, "loss": 2.1619, "step": 144415 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.0001860849086696204, "loss": 2.1595, "step": 144420 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.0001860839681326825, "loss": 2.2859, "step": 144425 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.00018608302756633672, "loss": 1.9958, "step": 144430 }, { "epoch": 0.34, "grad_norm": 1.71875, "learning_rate": 0.00018608208697058342, "loss": 2.0016, "step": 144435 }, { "epoch": 0.34, "grad_norm": 1.9140625, "learning_rate": 0.00018608114634542292, "loss": 1.9906, "step": 144440 }, { "epoch": 0.34, "grad_norm": 2.765625, "learning_rate": 0.00018608020569085552, "loss": 2.23, "step": 144445 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.0001860792650068816, "loss": 2.1935, "step": 144450 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018607832429350143, "loss": 2.0911, "step": 144455 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018607738355071535, "loss": 2.0375, "step": 144460 }, { "epoch": 0.34, "grad_norm": 3.34375, "learning_rate": 0.00018607644277852364, "loss": 2.1042, "step": 144465 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.0001860755019769267, "loss": 2.1811, "step": 144470 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018607456114592475, "loss": 2.0323, "step": 144475 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.0001860736202855182, "loss": 2.1035, "step": 144480 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018607267939570735, "loss": 2.231, "step": 144485 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.0001860717384764925, "loss": 2.2701, "step": 144490 }, { "epoch": 0.34, "grad_norm": 1.96875, "learning_rate": 0.00018607079752787402, "loss": 2.0396, "step": 144495 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018606985654985217, "loss": 2.4084, "step": 144500 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018606891554242728, "loss": 2.103, "step": 144505 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018606797450559966, "loss": 2.0032, "step": 144510 }, { "epoch": 0.34, "grad_norm": 1.953125, "learning_rate": 0.0001860670334393697, "loss": 2.2507, "step": 144515 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.0001860660923437377, "loss": 2.1343, "step": 144520 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018606515121870394, "loss": 2.0861, "step": 144525 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018606421006426874, "loss": 2.2907, "step": 144530 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.0001860632688804325, "loss": 2.0039, "step": 144535 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018606232766719543, "loss": 2.1416, "step": 144540 }, { "epoch": 0.34, "grad_norm": 2.828125, "learning_rate": 0.00018606138642455794, "loss": 1.9879, "step": 144545 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.0001860604451525203, "loss": 2.3819, "step": 144550 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018605950385108287, "loss": 2.1165, "step": 144555 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018605856252024595, "loss": 2.0663, "step": 144560 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018605762116000985, "loss": 2.2336, "step": 144565 }, { "epoch": 0.34, "grad_norm": 1.8515625, "learning_rate": 0.0001860566797703749, "loss": 2.0046, "step": 144570 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018605573835134146, "loss": 2.2232, "step": 144575 }, { "epoch": 0.34, "grad_norm": 1.9140625, "learning_rate": 0.0001860547969029098, "loss": 2.0827, "step": 144580 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018605385542508026, "loss": 2.2131, "step": 144585 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.00018605291391785315, "loss": 2.1012, "step": 144590 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.00018605197238122882, "loss": 2.2186, "step": 144595 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018605103081520757, "loss": 1.9111, "step": 144600 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018605008921978976, "loss": 2.1842, "step": 144605 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018604914759497564, "loss": 2.2149, "step": 144610 }, { "epoch": 0.34, "grad_norm": 2.0, "learning_rate": 0.00018604820594076557, "loss": 2.1347, "step": 144615 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018604726425715989, "loss": 2.238, "step": 144620 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018604632254415888, "loss": 2.1103, "step": 144625 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.0001860453808017629, "loss": 2.164, "step": 144630 }, { "epoch": 0.34, "grad_norm": 1.8125, "learning_rate": 0.00018604443902997227, "loss": 2.0077, "step": 144635 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.0001860434972287873, "loss": 2.2668, "step": 144640 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.0001860425553982083, "loss": 2.0663, "step": 144645 }, { "epoch": 0.34, "grad_norm": 2.828125, "learning_rate": 0.0001860416135382356, "loss": 2.1259, "step": 144650 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018604067164886952, "loss": 2.1263, "step": 144655 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.0001860397297301104, "loss": 2.1911, "step": 144660 }, { "epoch": 0.34, "grad_norm": 2.828125, "learning_rate": 0.00018603878778195853, "loss": 2.0027, "step": 144665 }, { "epoch": 0.34, "grad_norm": 2.359375, "learning_rate": 0.00018603784580441425, "loss": 2.1964, "step": 144670 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.0001860369037974779, "loss": 2.2977, "step": 144675 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018603596176114978, "loss": 2.0648, "step": 144680 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.0001860350196954302, "loss": 2.3798, "step": 144685 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018603407760031952, "loss": 2.1193, "step": 144690 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.000186033135475818, "loss": 2.1612, "step": 144695 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018603219332192605, "loss": 2.1853, "step": 144700 }, { "epoch": 0.34, "grad_norm": 2.765625, "learning_rate": 0.00018603125113864392, "loss": 2.2641, "step": 144705 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018603030892597197, "loss": 2.0375, "step": 144710 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.0001860293666839105, "loss": 2.1693, "step": 144715 }, { "epoch": 0.34, "grad_norm": 2.59375, "learning_rate": 0.0001860284244124598, "loss": 2.1105, "step": 144720 }, { "epoch": 0.34, "grad_norm": 2.640625, "learning_rate": 0.00018602748211162028, "loss": 2.1785, "step": 144725 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.00018602653978139216, "loss": 2.1145, "step": 144730 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018602559742177585, "loss": 2.0755, "step": 144735 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018602465503277164, "loss": 2.1923, "step": 144740 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018602371261437984, "loss": 1.9943, "step": 144745 }, { "epoch": 0.34, "grad_norm": 2.59375, "learning_rate": 0.00018602277016660076, "loss": 1.937, "step": 144750 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018602182768943476, "loss": 1.9924, "step": 144755 }, { "epoch": 0.34, "grad_norm": 2.796875, "learning_rate": 0.00018602088518288214, "loss": 2.036, "step": 144760 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018601994264694325, "loss": 2.2684, "step": 144765 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018601900008161836, "loss": 2.0736, "step": 144770 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018601805748690782, "loss": 2.1507, "step": 144775 }, { "epoch": 0.34, "grad_norm": 1.9140625, "learning_rate": 0.00018601711486281193, "loss": 2.1845, "step": 144780 }, { "epoch": 0.34, "grad_norm": 2.609375, "learning_rate": 0.00018601617220933108, "loss": 1.933, "step": 144785 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018601522952646553, "loss": 2.1737, "step": 144790 }, { "epoch": 0.34, "grad_norm": 2.6875, "learning_rate": 0.00018601428681421563, "loss": 2.0326, "step": 144795 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.00018601334407258166, "loss": 2.2154, "step": 144800 }, { "epoch": 0.34, "grad_norm": 1.953125, "learning_rate": 0.000186012401301564, "loss": 2.1941, "step": 144805 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.0001860114585011629, "loss": 2.1259, "step": 144810 }, { "epoch": 0.34, "grad_norm": 2.59375, "learning_rate": 0.00018601051567137877, "loss": 1.942, "step": 144815 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018600957281221186, "loss": 2.1039, "step": 144820 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018600862992366253, "loss": 2.1518, "step": 144825 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.0001860076870057311, "loss": 2.0193, "step": 144830 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.0001860067440584179, "loss": 2.1653, "step": 144835 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018600580108172322, "loss": 2.0382, "step": 144840 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018600485807564735, "loss": 2.1144, "step": 144845 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018600391504019071, "loss": 2.3478, "step": 144850 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018600297197535358, "loss": 2.2026, "step": 144855 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018600202888113625, "loss": 2.0382, "step": 144860 }, { "epoch": 0.34, "grad_norm": 1.9921875, "learning_rate": 0.00018600108575753907, "loss": 2.1667, "step": 144865 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018600014260456238, "loss": 2.115, "step": 144870 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018599919942220648, "loss": 2.0668, "step": 144875 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018599825621047167, "loss": 2.012, "step": 144880 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018599731296935833, "loss": 2.3315, "step": 144885 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018599636969886672, "loss": 2.2071, "step": 144890 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.0001859954263989972, "loss": 2.0388, "step": 144895 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018599448306975006, "loss": 2.167, "step": 144900 }, { "epoch": 0.34, "grad_norm": 2.5625, "learning_rate": 0.0001859935397111257, "loss": 2.0328, "step": 144905 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018599259632312435, "loss": 2.2982, "step": 144910 }, { "epoch": 0.34, "grad_norm": 1.9765625, "learning_rate": 0.00018599165290574636, "loss": 1.9589, "step": 144915 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018599070945899207, "loss": 2.2478, "step": 144920 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.0001859897659828618, "loss": 2.1833, "step": 144925 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018598882247735588, "loss": 2.2218, "step": 144930 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018598787894247458, "loss": 2.0838, "step": 144935 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018598693537821829, "loss": 2.133, "step": 144940 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.00018598599178458732, "loss": 1.9024, "step": 144945 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018598504816158196, "loss": 2.3759, "step": 144950 }, { "epoch": 0.34, "grad_norm": 1.7734375, "learning_rate": 0.00018598410450920252, "loss": 2.228, "step": 144955 }, { "epoch": 0.34, "grad_norm": 1.8515625, "learning_rate": 0.00018598316082744936, "loss": 2.1401, "step": 144960 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.00018598221711632284, "loss": 2.0616, "step": 144965 }, { "epoch": 0.34, "grad_norm": 1.9609375, "learning_rate": 0.0001859812733758232, "loss": 2.2609, "step": 144970 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.0001859803296059508, "loss": 1.9592, "step": 144975 }, { "epoch": 0.34, "grad_norm": 2.609375, "learning_rate": 0.00018597938580670596, "loss": 2.1721, "step": 144980 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.000185978441978089, "loss": 1.9689, "step": 144985 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018597749812010025, "loss": 2.1637, "step": 144990 }, { "epoch": 0.34, "grad_norm": 2.828125, "learning_rate": 0.00018597655423274002, "loss": 2.0217, "step": 144995 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018597561031600862, "loss": 2.2189, "step": 145000 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.0001859746663699064, "loss": 1.9435, "step": 145005 }, { "epoch": 0.34, "grad_norm": 1.9453125, "learning_rate": 0.00018597372239443372, "loss": 2.0283, "step": 145010 }, { "epoch": 0.34, "grad_norm": 1.9609375, "learning_rate": 0.0001859727783895908, "loss": 2.0939, "step": 145015 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.00018597183435537808, "loss": 1.9658, "step": 145020 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018597089029179578, "loss": 2.2183, "step": 145025 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018596994619884426, "loss": 2.047, "step": 145030 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.00018596900207652387, "loss": 2.2154, "step": 145035 }, { "epoch": 0.34, "grad_norm": 1.8515625, "learning_rate": 0.0001859680579248349, "loss": 2.2279, "step": 145040 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018596711374377768, "loss": 2.1315, "step": 145045 }, { "epoch": 0.34, "grad_norm": 2.84375, "learning_rate": 0.00018596616953335255, "loss": 2.2461, "step": 145050 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.0001859652252935598, "loss": 2.0914, "step": 145055 }, { "epoch": 0.34, "grad_norm": 1.90625, "learning_rate": 0.00018596428102439977, "loss": 2.0623, "step": 145060 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.0001859633367258728, "loss": 2.1396, "step": 145065 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.0001859623923979792, "loss": 2.0575, "step": 145070 }, { "epoch": 0.34, "grad_norm": 2.65625, "learning_rate": 0.00018596144804071925, "loss": 2.2139, "step": 145075 }, { "epoch": 0.34, "grad_norm": 1.8828125, "learning_rate": 0.00018596050365409335, "loss": 2.1989, "step": 145080 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018595955923810176, "loss": 2.1376, "step": 145085 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018595861479274483, "loss": 2.1815, "step": 145090 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018595767031802287, "loss": 2.2992, "step": 145095 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018595672581393622, "loss": 2.0045, "step": 145100 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.0001859557812804852, "loss": 2.1746, "step": 145105 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018595483671767014, "loss": 2.2531, "step": 145110 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.0001859538921254913, "loss": 1.9469, "step": 145115 }, { "epoch": 0.34, "grad_norm": 2.59375, "learning_rate": 0.0001859529475039491, "loss": 2.138, "step": 145120 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018595200285304378, "loss": 2.0596, "step": 145125 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018595105817277571, "loss": 2.021, "step": 145130 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.0001859501134631452, "loss": 2.2865, "step": 145135 }, { "epoch": 0.34, "grad_norm": 2.71875, "learning_rate": 0.00018594916872415257, "loss": 2.0862, "step": 145140 }, { "epoch": 0.34, "grad_norm": 2.484375, "learning_rate": 0.00018594822395579815, "loss": 2.1423, "step": 145145 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018594727915808226, "loss": 2.2049, "step": 145150 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018594633433100522, "loss": 2.0772, "step": 145155 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018594538947456734, "loss": 2.1695, "step": 145160 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.000185944444588769, "loss": 1.9733, "step": 145165 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018594349967361042, "loss": 2.1339, "step": 145170 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.000185942554729092, "loss": 2.2492, "step": 145175 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018594160975521405, "loss": 2.0813, "step": 145180 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.00018594066475197688, "loss": 2.2879, "step": 145185 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.00018593971971938084, "loss": 1.9669, "step": 145190 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018593877465742625, "loss": 2.1223, "step": 145195 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.0001859378295661134, "loss": 2.1251, "step": 145200 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.0001859368844454426, "loss": 2.0896, "step": 145205 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.0001859359392954142, "loss": 2.1662, "step": 145210 }, { "epoch": 0.34, "grad_norm": 2.515625, "learning_rate": 0.00018593499411602858, "loss": 2.3139, "step": 145215 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018593404890728598, "loss": 2.0789, "step": 145220 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.0001859331036691867, "loss": 2.1513, "step": 145225 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018593215840173117, "loss": 2.0811, "step": 145230 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018593121310491964, "loss": 2.2459, "step": 145235 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018593026777875244, "loss": 2.0099, "step": 145240 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018592932242322992, "loss": 2.0995, "step": 145245 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.00018592837703835236, "loss": 2.157, "step": 145250 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.00018592743162412014, "loss": 2.1909, "step": 145255 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018592648618053354, "loss": 2.0744, "step": 145260 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018592554070759288, "loss": 2.2426, "step": 145265 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.00018592459520529853, "loss": 2.1101, "step": 145270 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.00018592364967365076, "loss": 2.0917, "step": 145275 }, { "epoch": 0.34, "grad_norm": 2.0, "learning_rate": 0.0001859227041126499, "loss": 2.4734, "step": 145280 }, { "epoch": 0.34, "grad_norm": 1.9765625, "learning_rate": 0.0001859217585222963, "loss": 2.2052, "step": 145285 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018592081290259028, "loss": 2.2299, "step": 145290 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.0001859198672535321, "loss": 2.2878, "step": 145295 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.0001859189215751222, "loss": 2.2881, "step": 145300 }, { "epoch": 0.34, "grad_norm": 1.703125, "learning_rate": 0.0001859179758673608, "loss": 1.9291, "step": 145305 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.00018591703013024827, "loss": 2.0137, "step": 145310 }, { "epoch": 0.34, "grad_norm": 2.609375, "learning_rate": 0.00018591608436378495, "loss": 2.1948, "step": 145315 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.0001859151385679711, "loss": 1.9853, "step": 145320 }, { "epoch": 0.34, "grad_norm": 1.96875, "learning_rate": 0.00018591419274280712, "loss": 2.3597, "step": 145325 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018591324688829324, "loss": 1.9785, "step": 145330 }, { "epoch": 0.34, "grad_norm": 1.8671875, "learning_rate": 0.00018591230100442987, "loss": 2.0748, "step": 145335 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.0001859113550912173, "loss": 2.1598, "step": 145340 }, { "epoch": 0.34, "grad_norm": 2.453125, "learning_rate": 0.00018591040914865583, "loss": 2.0682, "step": 145345 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.00018590946317674582, "loss": 2.1569, "step": 145350 }, { "epoch": 0.34, "grad_norm": 2.546875, "learning_rate": 0.0001859085171754876, "loss": 1.9139, "step": 145355 }, { "epoch": 0.34, "grad_norm": 2.453125, "learning_rate": 0.00018590757114488144, "loss": 2.1931, "step": 145360 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018590662508492773, "loss": 2.176, "step": 145365 }, { "epoch": 0.34, "grad_norm": 1.984375, "learning_rate": 0.00018590567899562675, "loss": 2.1551, "step": 145370 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018590473287697883, "loss": 1.9248, "step": 145375 }, { "epoch": 0.34, "grad_norm": 1.8671875, "learning_rate": 0.00018590378672898427, "loss": 2.0337, "step": 145380 }, { "epoch": 0.34, "grad_norm": 1.953125, "learning_rate": 0.00018590284055164345, "loss": 2.0907, "step": 145385 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018590189434495663, "loss": 2.0295, "step": 145390 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018590094810892418, "loss": 2.2006, "step": 145395 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018590000184354644, "loss": 2.0398, "step": 145400 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.00018589905554882366, "loss": 2.0901, "step": 145405 }, { "epoch": 0.34, "grad_norm": 2.53125, "learning_rate": 0.0001858981092247562, "loss": 2.0828, "step": 145410 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018589716287134442, "loss": 2.1215, "step": 145415 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.0001858962164885886, "loss": 2.0973, "step": 145420 }, { "epoch": 0.34, "grad_norm": 2.875, "learning_rate": 0.00018589527007648905, "loss": 2.1611, "step": 145425 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.00018589432363504615, "loss": 2.2973, "step": 145430 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018589337716426018, "loss": 1.9843, "step": 145435 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018589243066413147, "loss": 2.0703, "step": 145440 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018589148413466035, "loss": 2.1707, "step": 145445 }, { "epoch": 0.34, "grad_norm": 2.84375, "learning_rate": 0.00018589053757584717, "loss": 2.0226, "step": 145450 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.0001858895909876922, "loss": 2.1259, "step": 145455 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018588864437019578, "loss": 2.1337, "step": 145460 }, { "epoch": 0.34, "grad_norm": 2.625, "learning_rate": 0.00018588769772335826, "loss": 2.0654, "step": 145465 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018588675104717992, "loss": 2.2572, "step": 145470 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018588580434166115, "loss": 1.9971, "step": 145475 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018588485760680218, "loss": 2.1707, "step": 145480 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.0001858839108426034, "loss": 2.1597, "step": 145485 }, { "epoch": 0.34, "grad_norm": 1.984375, "learning_rate": 0.00018588296404906514, "loss": 1.9409, "step": 145490 }, { "epoch": 0.34, "grad_norm": 1.921875, "learning_rate": 0.0001858820172261877, "loss": 2.1734, "step": 145495 }, { "epoch": 0.34, "grad_norm": 2.453125, "learning_rate": 0.00018588107037397137, "loss": 2.255, "step": 145500 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018588012349241652, "loss": 2.1465, "step": 145505 }, { "epoch": 0.34, "grad_norm": 2.609375, "learning_rate": 0.0001858791765815235, "loss": 2.0537, "step": 145510 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018587822964129255, "loss": 2.2415, "step": 145515 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.00018587728267172406, "loss": 2.2571, "step": 145520 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018587633567281834, "loss": 2.1176, "step": 145525 }, { "epoch": 0.34, "grad_norm": 1.8046875, "learning_rate": 0.0001858753886445757, "loss": 2.2229, "step": 145530 }, { "epoch": 0.34, "grad_norm": 2.53125, "learning_rate": 0.00018587444158699648, "loss": 2.2017, "step": 145535 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.000185873494500081, "loss": 1.9355, "step": 145540 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018587254738382952, "loss": 2.0831, "step": 145545 }, { "epoch": 0.34, "grad_norm": 2.59375, "learning_rate": 0.0001858716002382425, "loss": 2.2535, "step": 145550 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.00018587065306332012, "loss": 2.0926, "step": 145555 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.0001858697058590628, "loss": 2.3828, "step": 145560 }, { "epoch": 0.34, "grad_norm": 1.9140625, "learning_rate": 0.00018586875862547082, "loss": 2.2358, "step": 145565 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.0001858678113625445, "loss": 2.146, "step": 145570 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.0001858668640702842, "loss": 2.0108, "step": 145575 }, { "epoch": 0.34, "grad_norm": 1.9921875, "learning_rate": 0.00018586591674869024, "loss": 2.176, "step": 145580 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.0001858649693977629, "loss": 2.0438, "step": 145585 }, { "epoch": 0.34, "grad_norm": 2.453125, "learning_rate": 0.00018586402201750253, "loss": 2.1199, "step": 145590 }, { "epoch": 0.34, "grad_norm": 2.53125, "learning_rate": 0.00018586307460790943, "loss": 2.1843, "step": 145595 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.000185862127168984, "loss": 2.0913, "step": 145600 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018586117970072647, "loss": 2.0595, "step": 145605 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.0001858602322031372, "loss": 2.0305, "step": 145610 }, { "epoch": 0.34, "grad_norm": 1.9375, "learning_rate": 0.00018585928467621657, "loss": 2.1099, "step": 145615 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.0001858583371199648, "loss": 2.0121, "step": 145620 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018585738953438228, "loss": 2.1345, "step": 145625 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.0001858564419194693, "loss": 1.9563, "step": 145630 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.00018585549427522623, "loss": 2.3965, "step": 145635 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018585454660165334, "loss": 1.9917, "step": 145640 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018585359889875102, "loss": 2.3129, "step": 145645 }, { "epoch": 0.34, "grad_norm": 1.84375, "learning_rate": 0.00018585265116651952, "loss": 2.0324, "step": 145650 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.0001858517034049592, "loss": 2.2192, "step": 145655 }, { "epoch": 0.34, "grad_norm": 1.8984375, "learning_rate": 0.0001858507556140704, "loss": 2.0223, "step": 145660 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.0001858498077938534, "loss": 2.1773, "step": 145665 }, { "epoch": 0.34, "grad_norm": 2.578125, "learning_rate": 0.0001858488599443086, "loss": 2.1141, "step": 145670 }, { "epoch": 0.34, "grad_norm": 1.9375, "learning_rate": 0.00018584791206543622, "loss": 2.1312, "step": 145675 }, { "epoch": 0.34, "grad_norm": 2.765625, "learning_rate": 0.00018584696415723664, "loss": 2.0119, "step": 145680 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.0001858460162197102, "loss": 2.1822, "step": 145685 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.0001858450682528572, "loss": 2.2092, "step": 145690 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018584412025667794, "loss": 2.1313, "step": 145695 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.00018584317223117283, "loss": 2.1864, "step": 145700 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018584222417634207, "loss": 2.1653, "step": 145705 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.0001858412760921861, "loss": 2.0794, "step": 145710 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018584032797870517, "loss": 2.2181, "step": 145715 }, { "epoch": 0.34, "grad_norm": 2.484375, "learning_rate": 0.00018583937983589964, "loss": 2.312, "step": 145720 }, { "epoch": 0.34, "grad_norm": 1.96875, "learning_rate": 0.0001858384316637698, "loss": 2.1242, "step": 145725 }, { "epoch": 0.34, "grad_norm": 2.015625, "learning_rate": 0.000185837483462316, "loss": 2.2499, "step": 145730 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018583653523153855, "loss": 2.1104, "step": 145735 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.0001858355869714378, "loss": 2.0273, "step": 145740 }, { "epoch": 0.34, "grad_norm": 2.53125, "learning_rate": 0.00018583463868201405, "loss": 2.2356, "step": 145745 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.00018583369036326763, "loss": 2.0591, "step": 145750 }, { "epoch": 0.34, "grad_norm": 2.546875, "learning_rate": 0.00018583274201519888, "loss": 2.3633, "step": 145755 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.0001858317936378081, "loss": 2.2813, "step": 145760 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018583084523109565, "loss": 2.1228, "step": 145765 }, { "epoch": 0.34, "grad_norm": 1.9609375, "learning_rate": 0.00018582989679506178, "loss": 1.969, "step": 145770 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.0001858289483297069, "loss": 1.9202, "step": 145775 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018582799983503126, "loss": 2.1446, "step": 145780 }, { "epoch": 0.34, "grad_norm": 2.75, "learning_rate": 0.0001858270513110352, "loss": 2.1178, "step": 145785 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.0001858261027577191, "loss": 2.0494, "step": 145790 }, { "epoch": 0.34, "grad_norm": 2.453125, "learning_rate": 0.00018582515417508324, "loss": 2.1862, "step": 145795 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.00018582420556312795, "loss": 2.2494, "step": 145800 }, { "epoch": 0.34, "grad_norm": 2.421875, "learning_rate": 0.00018582325692185356, "loss": 2.242, "step": 145805 }, { "epoch": 0.34, "grad_norm": 1.9609375, "learning_rate": 0.0001858223082512604, "loss": 2.1249, "step": 145810 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018582135955134876, "loss": 2.1291, "step": 145815 }, { "epoch": 0.34, "grad_norm": 1.7421875, "learning_rate": 0.000185820410822119, "loss": 1.9083, "step": 145820 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.00018581946206357142, "loss": 2.1924, "step": 145825 }, { "epoch": 0.34, "grad_norm": 3.28125, "learning_rate": 0.0001858185132757064, "loss": 2.056, "step": 145830 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.00018581756445852415, "loss": 2.1558, "step": 145835 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018581661561202509, "loss": 2.333, "step": 145840 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018581566673620952, "loss": 2.0598, "step": 145845 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018581471783107778, "loss": 2.0427, "step": 145850 }, { "epoch": 0.34, "grad_norm": 2.921875, "learning_rate": 0.00018581376889663015, "loss": 2.2013, "step": 145855 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018581281993286698, "loss": 1.9599, "step": 145860 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018581187093978863, "loss": 2.1782, "step": 145865 }, { "epoch": 0.34, "grad_norm": 1.8671875, "learning_rate": 0.00018581092191739535, "loss": 2.0798, "step": 145870 }, { "epoch": 0.34, "grad_norm": 2.671875, "learning_rate": 0.00018580997286568753, "loss": 2.2165, "step": 145875 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018580902378466545, "loss": 2.1165, "step": 145880 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018580807467432944, "loss": 2.0062, "step": 145885 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018580712553467986, "loss": 2.1357, "step": 145890 }, { "epoch": 0.34, "grad_norm": 2.828125, "learning_rate": 0.000185806176365717, "loss": 1.98, "step": 145895 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.0001858052271674412, "loss": 2.1628, "step": 145900 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018580427793985277, "loss": 2.2569, "step": 145905 }, { "epoch": 0.34, "grad_norm": 1.8828125, "learning_rate": 0.00018580332868295204, "loss": 2.1761, "step": 145910 }, { "epoch": 0.34, "grad_norm": 2.625, "learning_rate": 0.00018580237939673933, "loss": 2.1642, "step": 145915 }, { "epoch": 0.34, "grad_norm": 1.71875, "learning_rate": 0.000185801430081215, "loss": 2.208, "step": 145920 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018580048073637935, "loss": 1.9311, "step": 145925 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018579953136223267, "loss": 2.0574, "step": 145930 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018579858195877532, "loss": 2.1771, "step": 145935 }, { "epoch": 0.34, "grad_norm": 1.8515625, "learning_rate": 0.00018579763252600761, "loss": 1.8967, "step": 145940 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.0001857966830639299, "loss": 2.1816, "step": 145945 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018579573357254247, "loss": 2.1885, "step": 145950 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.00018579478405184564, "loss": 2.0166, "step": 145955 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018579383450183977, "loss": 2.2992, "step": 145960 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.0001857928849225252, "loss": 2.224, "step": 145965 }, { "epoch": 0.34, "grad_norm": 2.359375, "learning_rate": 0.0001857919353139022, "loss": 2.2653, "step": 145970 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.0001857909856759711, "loss": 1.918, "step": 145975 }, { "epoch": 0.34, "grad_norm": 1.3828125, "learning_rate": 0.0001857900360087323, "loss": 2.1425, "step": 145980 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.000185789086312186, "loss": 2.1079, "step": 145985 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018578813658633262, "loss": 2.1557, "step": 145990 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.0001857871868311725, "loss": 2.253, "step": 145995 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018578623704670587, "loss": 2.1972, "step": 146000 }, { "epoch": 0.34, "grad_norm": 2.53125, "learning_rate": 0.00018578528723293313, "loss": 2.1918, "step": 146005 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018578433738985453, "loss": 2.0077, "step": 146010 }, { "epoch": 0.34, "grad_norm": 2.5, "learning_rate": 0.0001857833875174705, "loss": 2.3983, "step": 146015 }, { "epoch": 0.34, "grad_norm": 2.625, "learning_rate": 0.0001857824376157813, "loss": 2.077, "step": 146020 }, { "epoch": 0.34, "grad_norm": 1.8359375, "learning_rate": 0.00018578148768478724, "loss": 2.1621, "step": 146025 }, { "epoch": 0.34, "grad_norm": 2.359375, "learning_rate": 0.0001857805377244887, "loss": 2.0474, "step": 146030 }, { "epoch": 0.34, "grad_norm": 2.59375, "learning_rate": 0.00018577958773488597, "loss": 2.1965, "step": 146035 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018577863771597936, "loss": 2.1224, "step": 146040 }, { "epoch": 0.34, "grad_norm": 2.5625, "learning_rate": 0.00018577768766776922, "loss": 2.1439, "step": 146045 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018577673759025585, "loss": 2.1978, "step": 146050 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.0001857757874834396, "loss": 2.3856, "step": 146055 }, { "epoch": 0.34, "grad_norm": 2.515625, "learning_rate": 0.0001857748373473208, "loss": 2.1196, "step": 146060 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018577388718189975, "loss": 2.174, "step": 146065 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.00018577293698717677, "loss": 2.093, "step": 146070 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018577198676315222, "loss": 2.2087, "step": 146075 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018577103650982638, "loss": 2.1446, "step": 146080 }, { "epoch": 0.34, "grad_norm": 1.859375, "learning_rate": 0.00018577008622719962, "loss": 1.9636, "step": 146085 }, { "epoch": 0.34, "grad_norm": 1.90625, "learning_rate": 0.00018576913591527222, "loss": 1.999, "step": 146090 }, { "epoch": 0.34, "grad_norm": 2.125, "learning_rate": 0.00018576818557404455, "loss": 2.2198, "step": 146095 }, { "epoch": 0.34, "grad_norm": 2.609375, "learning_rate": 0.0001857672352035169, "loss": 1.8248, "step": 146100 }, { "epoch": 0.34, "grad_norm": 2.59375, "learning_rate": 0.00018576628480368963, "loss": 2.1367, "step": 146105 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018576533437456304, "loss": 2.1074, "step": 146110 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018576438391613743, "loss": 2.29, "step": 146115 }, { "epoch": 0.34, "grad_norm": 3.78125, "learning_rate": 0.00018576343342841315, "loss": 2.1798, "step": 146120 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018576248291139055, "loss": 2.1047, "step": 146125 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018576153236506991, "loss": 2.1755, "step": 146130 }, { "epoch": 0.34, "grad_norm": 2.625, "learning_rate": 0.00018576058178945159, "loss": 2.2052, "step": 146135 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.0001857596311845359, "loss": 2.1072, "step": 146140 }, { "epoch": 0.34, "grad_norm": 2.09375, "learning_rate": 0.00018575868055032316, "loss": 2.1742, "step": 146145 }, { "epoch": 0.34, "grad_norm": 2.65625, "learning_rate": 0.0001857577298868137, "loss": 2.0458, "step": 146150 }, { "epoch": 0.34, "grad_norm": 2.65625, "learning_rate": 0.00018575677919400785, "loss": 2.2461, "step": 146155 }, { "epoch": 0.34, "grad_norm": 1.921875, "learning_rate": 0.00018575582847190592, "loss": 2.0875, "step": 146160 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018575487772050822, "loss": 1.9966, "step": 146165 }, { "epoch": 0.34, "grad_norm": 1.90625, "learning_rate": 0.00018575392693981512, "loss": 2.1463, "step": 146170 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018575297612982694, "loss": 2.1847, "step": 146175 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018575202529054398, "loss": 2.0528, "step": 146180 }, { "epoch": 0.34, "grad_norm": 2.609375, "learning_rate": 0.00018575107442196656, "loss": 2.1728, "step": 146185 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.00018575012352409502, "loss": 2.1658, "step": 146190 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.00018574917259692967, "loss": 2.205, "step": 146195 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.0001857482216404709, "loss": 2.3037, "step": 146200 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018574727065471893, "loss": 2.2, "step": 146205 }, { "epoch": 0.34, "grad_norm": 1.9296875, "learning_rate": 0.00018574631963967416, "loss": 2.2076, "step": 146210 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.0001857453685953369, "loss": 2.0142, "step": 146215 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018574441752170745, "loss": 1.9775, "step": 146220 }, { "epoch": 0.34, "grad_norm": 2.234375, "learning_rate": 0.00018574346641878615, "loss": 2.1309, "step": 146225 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.0001857425152865733, "loss": 2.1786, "step": 146230 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.0001857415641250693, "loss": 2.1354, "step": 146235 }, { "epoch": 0.34, "grad_norm": 2.40625, "learning_rate": 0.00018574061293427443, "loss": 2.0498, "step": 146240 }, { "epoch": 0.34, "grad_norm": 2.734375, "learning_rate": 0.00018573966171418898, "loss": 2.1582, "step": 146245 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.00018573871046481335, "loss": 1.9477, "step": 146250 }, { "epoch": 0.34, "grad_norm": 2.046875, "learning_rate": 0.00018573775918614779, "loss": 2.0754, "step": 146255 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.00018573680787819266, "loss": 2.0571, "step": 146260 }, { "epoch": 0.34, "grad_norm": 1.890625, "learning_rate": 0.00018573585654094825, "loss": 2.1414, "step": 146265 }, { "epoch": 0.34, "grad_norm": 2.546875, "learning_rate": 0.00018573490517441498, "loss": 2.2072, "step": 146270 }, { "epoch": 0.34, "grad_norm": 2.078125, "learning_rate": 0.00018573395377859307, "loss": 2.0615, "step": 146275 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.00018573300235348288, "loss": 2.159, "step": 146280 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018573205089908475, "loss": 2.3944, "step": 146285 }, { "epoch": 0.34, "grad_norm": 2.359375, "learning_rate": 0.000185731099415399, "loss": 2.0863, "step": 146290 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018573014790242596, "loss": 2.2252, "step": 146295 }, { "epoch": 0.34, "grad_norm": 2.625, "learning_rate": 0.00018572919636016596, "loss": 2.2384, "step": 146300 }, { "epoch": 0.34, "grad_norm": 2.15625, "learning_rate": 0.00018572824478861927, "loss": 1.9554, "step": 146305 }, { "epoch": 0.34, "grad_norm": 2.984375, "learning_rate": 0.00018572729318778628, "loss": 2.2573, "step": 146310 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018572634155766728, "loss": 2.065, "step": 146315 }, { "epoch": 0.34, "grad_norm": 2.484375, "learning_rate": 0.00018572538989826263, "loss": 2.1855, "step": 146320 }, { "epoch": 0.34, "grad_norm": 2.390625, "learning_rate": 0.00018572443820957262, "loss": 2.1703, "step": 146325 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.0001857234864915976, "loss": 2.1361, "step": 146330 }, { "epoch": 0.34, "grad_norm": 1.953125, "learning_rate": 0.00018572253474433785, "loss": 2.0352, "step": 146335 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018572158296779377, "loss": 2.1393, "step": 146340 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.0001857206311619656, "loss": 2.1482, "step": 146345 }, { "epoch": 0.34, "grad_norm": 2.3125, "learning_rate": 0.00018571967932685374, "loss": 2.0234, "step": 146350 }, { "epoch": 0.34, "grad_norm": 2.75, "learning_rate": 0.00018571872746245846, "loss": 1.9694, "step": 146355 }, { "epoch": 0.34, "grad_norm": 2.546875, "learning_rate": 0.0001857177755687801, "loss": 2.2007, "step": 146360 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018571682364581902, "loss": 2.2561, "step": 146365 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.0001857158716935755, "loss": 2.1944, "step": 146370 }, { "epoch": 0.34, "grad_norm": 2.25, "learning_rate": 0.00018571491971204992, "loss": 2.2095, "step": 146375 }, { "epoch": 0.34, "grad_norm": 2.84375, "learning_rate": 0.00018571396770124255, "loss": 2.2838, "step": 146380 }, { "epoch": 0.34, "grad_norm": 2.328125, "learning_rate": 0.00018571301566115372, "loss": 1.9872, "step": 146385 }, { "epoch": 0.34, "grad_norm": 2.640625, "learning_rate": 0.00018571206359178376, "loss": 2.117, "step": 146390 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018571111149313303, "loss": 2.1918, "step": 146395 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018571015936520182, "loss": 2.0351, "step": 146400 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.00018570920720799045, "loss": 2.0222, "step": 146405 }, { "epoch": 0.34, "grad_norm": 2.0, "learning_rate": 0.00018570825502149928, "loss": 2.1492, "step": 146410 }, { "epoch": 0.34, "grad_norm": 2.46875, "learning_rate": 0.00018570730280572861, "loss": 2.2078, "step": 146415 }, { "epoch": 0.34, "grad_norm": 1.9921875, "learning_rate": 0.00018570635056067877, "loss": 2.0159, "step": 146420 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.0001857053982863501, "loss": 2.1627, "step": 146425 }, { "epoch": 0.34, "grad_norm": 1.890625, "learning_rate": 0.0001857044459827429, "loss": 2.1441, "step": 146430 }, { "epoch": 0.34, "grad_norm": 2.0625, "learning_rate": 0.00018570349364985752, "loss": 2.0256, "step": 146435 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.00018570254128769426, "loss": 2.1375, "step": 146440 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018570158889625343, "loss": 2.107, "step": 146445 }, { "epoch": 0.34, "grad_norm": 2.515625, "learning_rate": 0.0001857006364755354, "loss": 2.3047, "step": 146450 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.0001856996840255405, "loss": 2.0849, "step": 146455 }, { "epoch": 0.34, "grad_norm": 2.515625, "learning_rate": 0.00018569873154626905, "loss": 2.2096, "step": 146460 }, { "epoch": 0.34, "grad_norm": 1.921875, "learning_rate": 0.00018569777903772132, "loss": 2.2865, "step": 146465 }, { "epoch": 0.34, "grad_norm": 2.4375, "learning_rate": 0.0001856968264998977, "loss": 2.1526, "step": 146470 }, { "epoch": 0.34, "grad_norm": 2.34375, "learning_rate": 0.00018569587393279846, "loss": 2.3046, "step": 146475 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.00018569492133642396, "loss": 2.1639, "step": 146480 }, { "epoch": 0.34, "grad_norm": 2.109375, "learning_rate": 0.00018569396871077456, "loss": 1.9875, "step": 146485 }, { "epoch": 0.34, "grad_norm": 2.171875, "learning_rate": 0.0001856930160558505, "loss": 2.0518, "step": 146490 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.00018569206337165216, "loss": 2.2262, "step": 146495 }, { "epoch": 0.34, "grad_norm": 1.984375, "learning_rate": 0.00018569111065817987, "loss": 2.1511, "step": 146500 }, { "epoch": 0.34, "grad_norm": 2.609375, "learning_rate": 0.00018569015791543395, "loss": 2.1759, "step": 146505 }, { "epoch": 0.34, "grad_norm": 2.203125, "learning_rate": 0.0001856892051434147, "loss": 2.2225, "step": 146510 }, { "epoch": 0.34, "grad_norm": 2.375, "learning_rate": 0.00018568825234212247, "loss": 2.1537, "step": 146515 }, { "epoch": 0.34, "grad_norm": 2.1875, "learning_rate": 0.0001856872995115576, "loss": 2.1763, "step": 146520 }, { "epoch": 0.34, "grad_norm": 1.984375, "learning_rate": 0.00018568634665172036, "loss": 2.0513, "step": 146525 }, { "epoch": 0.34, "grad_norm": 2.546875, "learning_rate": 0.0001856853937626111, "loss": 2.1999, "step": 146530 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.0001856844408442302, "loss": 2.3305, "step": 146535 }, { "epoch": 0.34, "grad_norm": 2.59375, "learning_rate": 0.00018568348789657792, "loss": 2.3083, "step": 146540 }, { "epoch": 0.34, "grad_norm": 2.296875, "learning_rate": 0.0001856825349196546, "loss": 2.0054, "step": 146545 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.0001856815819134606, "loss": 2.3154, "step": 146550 }, { "epoch": 0.34, "grad_norm": 2.578125, "learning_rate": 0.0001856806288779962, "loss": 1.9745, "step": 146555 }, { "epoch": 0.34, "grad_norm": 2.265625, "learning_rate": 0.00018567967581326173, "loss": 2.1479, "step": 146560 }, { "epoch": 0.34, "grad_norm": 2.03125, "learning_rate": 0.00018567872271925753, "loss": 2.2856, "step": 146565 }, { "epoch": 0.34, "grad_norm": 2.21875, "learning_rate": 0.00018567776959598392, "loss": 2.1944, "step": 146570 }, { "epoch": 0.34, "grad_norm": 2.9375, "learning_rate": 0.00018567681644344127, "loss": 2.1827, "step": 146575 }, { "epoch": 0.34, "grad_norm": 2.625, "learning_rate": 0.00018567586326162982, "loss": 2.1664, "step": 146580 }, { "epoch": 0.34, "grad_norm": 2.640625, "learning_rate": 0.00018567491005054999, "loss": 2.0904, "step": 146585 }, { "epoch": 0.34, "grad_norm": 2.28125, "learning_rate": 0.000185673956810202, "loss": 2.0787, "step": 146590 }, { "epoch": 0.34, "grad_norm": 2.453125, "learning_rate": 0.00018567300354058625, "loss": 2.1113, "step": 146595 }, { "epoch": 0.34, "grad_norm": 2.140625, "learning_rate": 0.00018567205024170308, "loss": 2.0443, "step": 146600 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018567109691355275, "loss": 2.157, "step": 146605 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018567014355613565, "loss": 2.2527, "step": 146610 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018566919016945206, "loss": 2.1111, "step": 146615 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.0001856682367535023, "loss": 2.2081, "step": 146620 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018566728330828673, "loss": 2.1009, "step": 146625 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018566632983380566, "loss": 2.186, "step": 146630 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018566537633005946, "loss": 2.12, "step": 146635 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018566442279704836, "loss": 2.274, "step": 146640 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018566346923477276, "loss": 2.0688, "step": 146645 }, { "epoch": 0.35, "grad_norm": 1.671875, "learning_rate": 0.00018566251564323296, "loss": 2.2424, "step": 146650 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.0001856615620224293, "loss": 2.0971, "step": 146655 }, { "epoch": 0.35, "grad_norm": 1.9375, "learning_rate": 0.00018566060837236207, "loss": 2.2615, "step": 146660 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018565965469303166, "loss": 2.1317, "step": 146665 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018565870098443833, "loss": 2.0467, "step": 146670 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018565774724658244, "loss": 2.1747, "step": 146675 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018565679347946434, "loss": 2.0578, "step": 146680 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.0001856558396830843, "loss": 1.9213, "step": 146685 }, { "epoch": 0.35, "grad_norm": 2.53125, "learning_rate": 0.00018565488585744268, "loss": 2.0062, "step": 146690 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.0001856539320025398, "loss": 2.1306, "step": 146695 }, { "epoch": 0.35, "grad_norm": 1.9921875, "learning_rate": 0.00018565297811837597, "loss": 2.1163, "step": 146700 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018565202420495154, "loss": 2.0968, "step": 146705 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.0001856510702622668, "loss": 2.146, "step": 146710 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018565011629032214, "loss": 2.1878, "step": 146715 }, { "epoch": 0.35, "grad_norm": 2.625, "learning_rate": 0.00018564916228911782, "loss": 2.2033, "step": 146720 }, { "epoch": 0.35, "grad_norm": 2.53125, "learning_rate": 0.0001856482082586542, "loss": 2.0997, "step": 146725 }, { "epoch": 0.35, "grad_norm": 1.9609375, "learning_rate": 0.0001856472541989316, "loss": 2.1695, "step": 146730 }, { "epoch": 0.35, "grad_norm": 2.734375, "learning_rate": 0.00018564630010995035, "loss": 2.2894, "step": 146735 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018564534599171075, "loss": 2.154, "step": 146740 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.0001856443918442132, "loss": 2.0504, "step": 146745 }, { "epoch": 0.35, "grad_norm": 2.53125, "learning_rate": 0.0001856434376674579, "loss": 2.2994, "step": 146750 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.00018564248346144527, "loss": 2.0744, "step": 146755 }, { "epoch": 0.35, "grad_norm": 2.03125, "learning_rate": 0.00018564152922617564, "loss": 2.187, "step": 146760 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.0001856405749616493, "loss": 2.0556, "step": 146765 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018563962066786658, "loss": 2.1225, "step": 146770 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018563866634482783, "loss": 2.2451, "step": 146775 }, { "epoch": 0.35, "grad_norm": 2.59375, "learning_rate": 0.00018563771199253332, "loss": 2.0742, "step": 146780 }, { "epoch": 0.35, "grad_norm": 2.546875, "learning_rate": 0.00018563675761098345, "loss": 2.1519, "step": 146785 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.0001856358032001785, "loss": 1.976, "step": 146790 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.0001856348487601188, "loss": 2.0811, "step": 146795 }, { "epoch": 0.35, "grad_norm": 2.65625, "learning_rate": 0.0001856338942908047, "loss": 2.0908, "step": 146800 }, { "epoch": 0.35, "grad_norm": 2.671875, "learning_rate": 0.0001856329397922365, "loss": 2.2395, "step": 146805 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018563198526441453, "loss": 2.1221, "step": 146810 }, { "epoch": 0.35, "grad_norm": 2.546875, "learning_rate": 0.0001856310307073391, "loss": 2.0488, "step": 146815 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.0001856300761210106, "loss": 2.0619, "step": 146820 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.0001856291215054293, "loss": 2.1363, "step": 146825 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.0001856281668605955, "loss": 2.0272, "step": 146830 }, { "epoch": 0.35, "grad_norm": 2.015625, "learning_rate": 0.0001856272121865096, "loss": 2.0914, "step": 146835 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.0001856262574831719, "loss": 2.2568, "step": 146840 }, { "epoch": 0.35, "grad_norm": 1.9140625, "learning_rate": 0.0001856253027505827, "loss": 1.956, "step": 146845 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018562434798874238, "loss": 2.2542, "step": 146850 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018562339319765116, "loss": 2.1448, "step": 146855 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018562243837730948, "loss": 2.0926, "step": 146860 }, { "epoch": 0.35, "grad_norm": 2.625, "learning_rate": 0.00018562148352771764, "loss": 2.1548, "step": 146865 }, { "epoch": 0.35, "grad_norm": 2.734375, "learning_rate": 0.00018562052864887592, "loss": 2.0982, "step": 146870 }, { "epoch": 0.35, "grad_norm": 1.8828125, "learning_rate": 0.00018561957374078469, "loss": 2.0457, "step": 146875 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018561861880344425, "loss": 2.1572, "step": 146880 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.00018561766383685493, "loss": 2.0759, "step": 146885 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018561670884101707, "loss": 2.1985, "step": 146890 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.00018561575381593099, "loss": 2.2413, "step": 146895 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018561479876159703, "loss": 2.0278, "step": 146900 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.0001856138436780155, "loss": 2.1191, "step": 146905 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.0001856128885651867, "loss": 2.218, "step": 146910 }, { "epoch": 0.35, "grad_norm": 1.875, "learning_rate": 0.00018561193342311102, "loss": 2.0744, "step": 146915 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.0001856109782517887, "loss": 2.0809, "step": 146920 }, { "epoch": 0.35, "grad_norm": 1.984375, "learning_rate": 0.00018561002305122015, "loss": 2.2169, "step": 146925 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018560906782140565, "loss": 2.3051, "step": 146930 }, { "epoch": 0.35, "grad_norm": 2.359375, "learning_rate": 0.00018560811256234557, "loss": 2.2325, "step": 146935 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018560715727404019, "loss": 1.9035, "step": 146940 }, { "epoch": 0.35, "grad_norm": 1.9453125, "learning_rate": 0.00018560620195648986, "loss": 1.9662, "step": 146945 }, { "epoch": 0.35, "grad_norm": 2.703125, "learning_rate": 0.0001856052466096949, "loss": 2.2862, "step": 146950 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018560429123365562, "loss": 1.8832, "step": 146955 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.00018560333582837236, "loss": 2.1366, "step": 146960 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.00018560238039384545, "loss": 2.2404, "step": 146965 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018560142493007523, "loss": 2.0949, "step": 146970 }, { "epoch": 0.35, "grad_norm": 1.9453125, "learning_rate": 0.00018560046943706202, "loss": 2.1692, "step": 146975 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.0001855995139148061, "loss": 2.0428, "step": 146980 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018559855836330783, "loss": 2.0765, "step": 146985 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018559760278256756, "loss": 2.1351, "step": 146990 }, { "epoch": 0.35, "grad_norm": 1.921875, "learning_rate": 0.00018559664717258563, "loss": 2.2297, "step": 146995 }, { "epoch": 0.35, "grad_norm": 2.65625, "learning_rate": 0.0001855956915333623, "loss": 2.1239, "step": 147000 }, { "epoch": 0.35, "grad_norm": 1.9296875, "learning_rate": 0.0001855947358648979, "loss": 2.1238, "step": 147005 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018559378016719285, "loss": 2.1005, "step": 147010 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018559282444024738, "loss": 2.3453, "step": 147015 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.0001855918686840618, "loss": 2.2793, "step": 147020 }, { "epoch": 0.35, "grad_norm": 2.5625, "learning_rate": 0.00018559091289863656, "loss": 2.2358, "step": 147025 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.0001855899570839719, "loss": 2.289, "step": 147030 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018558900124006816, "loss": 2.0653, "step": 147035 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018558804536692564, "loss": 2.1177, "step": 147040 }, { "epoch": 0.35, "grad_norm": 3.5625, "learning_rate": 0.00018558708946454468, "loss": 2.0293, "step": 147045 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.00018558613353292564, "loss": 2.2194, "step": 147050 }, { "epoch": 0.35, "grad_norm": 2.859375, "learning_rate": 0.0001855851775720688, "loss": 2.2433, "step": 147055 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018558422158197456, "loss": 2.0036, "step": 147060 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018558326556264318, "loss": 2.003, "step": 147065 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.00018558230951407501, "loss": 2.094, "step": 147070 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018558135343627033, "loss": 1.9128, "step": 147075 }, { "epoch": 0.35, "grad_norm": 2.5625, "learning_rate": 0.00018558039732922954, "loss": 2.1663, "step": 147080 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018557944119295292, "loss": 2.0569, "step": 147085 }, { "epoch": 0.35, "grad_norm": 1.8828125, "learning_rate": 0.0001855784850274408, "loss": 2.1693, "step": 147090 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.00018557752883269357, "loss": 2.0412, "step": 147095 }, { "epoch": 0.35, "grad_norm": 2.5625, "learning_rate": 0.00018557657260871146, "loss": 2.2658, "step": 147100 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.00018557561635549488, "loss": 2.203, "step": 147105 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018557466007304408, "loss": 2.1865, "step": 147110 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.0001855737037613594, "loss": 2.0382, "step": 147115 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.00018557274742044124, "loss": 2.2509, "step": 147120 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018557179105028985, "loss": 2.1721, "step": 147125 }, { "epoch": 0.35, "grad_norm": 2.859375, "learning_rate": 0.0001855708346509056, "loss": 2.2043, "step": 147130 }, { "epoch": 0.35, "grad_norm": 2.609375, "learning_rate": 0.00018556987822228882, "loss": 2.1919, "step": 147135 }, { "epoch": 0.35, "grad_norm": 1.9921875, "learning_rate": 0.00018556892176443978, "loss": 2.195, "step": 147140 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018556796527735884, "loss": 2.3315, "step": 147145 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018556700876104637, "loss": 2.0675, "step": 147150 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.00018556605221550264, "loss": 2.1569, "step": 147155 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.000185565095640728, "loss": 2.0761, "step": 147160 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018556413903672274, "loss": 2.0446, "step": 147165 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018556318240348728, "loss": 2.0865, "step": 147170 }, { "epoch": 0.35, "grad_norm": 2.5625, "learning_rate": 0.00018556222574102183, "loss": 2.1089, "step": 147175 }, { "epoch": 0.35, "grad_norm": 3.8125, "learning_rate": 0.00018556126904932678, "loss": 2.0197, "step": 147180 }, { "epoch": 0.35, "grad_norm": 2.359375, "learning_rate": 0.00018556031232840249, "loss": 2.1142, "step": 147185 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.0001855593555782492, "loss": 1.996, "step": 147190 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.0001855583987988673, "loss": 2.0428, "step": 147195 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.0001855574419902571, "loss": 2.2392, "step": 147200 }, { "epoch": 0.35, "grad_norm": 2.546875, "learning_rate": 0.00018555648515241891, "loss": 2.0865, "step": 147205 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.00018555552828535308, "loss": 2.004, "step": 147210 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.00018555457138905995, "loss": 2.196, "step": 147215 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.0001855536144635398, "loss": 2.1313, "step": 147220 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.000185552657508793, "loss": 1.9796, "step": 147225 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018555170052481985, "loss": 2.0511, "step": 147230 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.0001855507435116207, "loss": 2.1156, "step": 147235 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.00018554978646919583, "loss": 2.1082, "step": 147240 }, { "epoch": 0.35, "grad_norm": 1.9453125, "learning_rate": 0.00018554882939754563, "loss": 2.2557, "step": 147245 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.0001855478722966704, "loss": 2.1802, "step": 147250 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018554691516657048, "loss": 2.2554, "step": 147255 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018554595800724614, "loss": 2.102, "step": 147260 }, { "epoch": 0.35, "grad_norm": 2.0, "learning_rate": 0.00018554500081869782, "loss": 2.2077, "step": 147265 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.0001855440436009257, "loss": 1.984, "step": 147270 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.00018554308635393023, "loss": 2.1146, "step": 147275 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018554212907771167, "loss": 2.0711, "step": 147280 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.00018554117177227038, "loss": 2.2314, "step": 147285 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018554021443760666, "loss": 2.0696, "step": 147290 }, { "epoch": 0.35, "grad_norm": 2.5, "learning_rate": 0.00018553925707372084, "loss": 2.1508, "step": 147295 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.0001855382996806133, "loss": 2.4178, "step": 147300 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.0001855373422582843, "loss": 2.1908, "step": 147305 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.0001855363848067342, "loss": 2.1328, "step": 147310 }, { "epoch": 0.35, "grad_norm": 2.359375, "learning_rate": 0.0001855354273259633, "loss": 2.0962, "step": 147315 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.00018553446981597196, "loss": 2.2824, "step": 147320 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018553351227676047, "loss": 2.2766, "step": 147325 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.0001855325547083292, "loss": 1.9866, "step": 147330 }, { "epoch": 0.35, "grad_norm": 2.546875, "learning_rate": 0.00018553159711067848, "loss": 2.1334, "step": 147335 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.0001855306394838086, "loss": 2.1271, "step": 147340 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.0001855296818277199, "loss": 2.2927, "step": 147345 }, { "epoch": 0.35, "grad_norm": 2.0, "learning_rate": 0.00018552872414241272, "loss": 2.1142, "step": 147350 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.00018552776642788738, "loss": 2.3729, "step": 147355 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018552680868414418, "loss": 2.1345, "step": 147360 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.0001855258509111835, "loss": 2.2579, "step": 147365 }, { "epoch": 0.35, "grad_norm": 2.5, "learning_rate": 0.0001855248931090056, "loss": 2.1514, "step": 147370 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018552393527761088, "loss": 2.0112, "step": 147375 }, { "epoch": 0.35, "grad_norm": 2.53125, "learning_rate": 0.0001855229774169996, "loss": 2.0405, "step": 147380 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018552201952717217, "loss": 2.0631, "step": 147385 }, { "epoch": 0.35, "grad_norm": 2.53125, "learning_rate": 0.00018552106160812882, "loss": 2.0882, "step": 147390 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018552010365986994, "loss": 2.167, "step": 147395 }, { "epoch": 0.35, "grad_norm": 1.90625, "learning_rate": 0.00018551914568239584, "loss": 2.1024, "step": 147400 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018551818767570684, "loss": 2.0341, "step": 147405 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.0001855172296398033, "loss": 2.0806, "step": 147410 }, { "epoch": 0.35, "grad_norm": 1.859375, "learning_rate": 0.00018551627157468553, "loss": 2.2054, "step": 147415 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.0001855153134803538, "loss": 2.1071, "step": 147420 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.00018551435535680853, "loss": 2.0783, "step": 147425 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018551339720404998, "loss": 1.9524, "step": 147430 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018551243902207852, "loss": 2.0852, "step": 147435 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018551148081089446, "loss": 2.0441, "step": 147440 }, { "epoch": 0.35, "grad_norm": 1.953125, "learning_rate": 0.00018551052257049814, "loss": 2.0549, "step": 147445 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018550956430088983, "loss": 2.1194, "step": 147450 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018550860600206995, "loss": 2.0533, "step": 147455 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.00018550764767403876, "loss": 2.3361, "step": 147460 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.0001855066893167966, "loss": 2.1628, "step": 147465 }, { "epoch": 0.35, "grad_norm": 1.8984375, "learning_rate": 0.0001855057309303438, "loss": 1.9103, "step": 147470 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018550477251468072, "loss": 2.0892, "step": 147475 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.00018550381406980763, "loss": 2.1914, "step": 147480 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.0001855028555957249, "loss": 2.1513, "step": 147485 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018550189709243283, "loss": 2.0435, "step": 147490 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018550093855993177, "loss": 2.2106, "step": 147495 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.00018549997999822205, "loss": 2.1753, "step": 147500 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018549902140730398, "loss": 2.1719, "step": 147505 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.0001854980627871779, "loss": 2.1198, "step": 147510 }, { "epoch": 0.35, "grad_norm": 1.9921875, "learning_rate": 0.00018549710413784412, "loss": 2.1501, "step": 147515 }, { "epoch": 0.35, "grad_norm": 2.359375, "learning_rate": 0.000185496145459303, "loss": 2.1537, "step": 147520 }, { "epoch": 0.35, "grad_norm": 1.8125, "learning_rate": 0.00018549518675155482, "loss": 1.9666, "step": 147525 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018549422801459993, "loss": 2.1794, "step": 147530 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018549326924843866, "loss": 2.0821, "step": 147535 }, { "epoch": 0.35, "grad_norm": 2.546875, "learning_rate": 0.0001854923104530714, "loss": 2.1342, "step": 147540 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018549135162849832, "loss": 2.1469, "step": 147545 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.0001854903927747199, "loss": 2.2363, "step": 147550 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.0001854894338917364, "loss": 2.2088, "step": 147555 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.0001854884749795482, "loss": 2.0627, "step": 147560 }, { "epoch": 0.35, "grad_norm": 2.59375, "learning_rate": 0.00018548751603815553, "loss": 2.1996, "step": 147565 }, { "epoch": 0.35, "grad_norm": 2.5625, "learning_rate": 0.0001854865570675588, "loss": 1.9999, "step": 147570 }, { "epoch": 0.35, "grad_norm": 1.9140625, "learning_rate": 0.00018548559806775828, "loss": 2.1215, "step": 147575 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018548463903875434, "loss": 2.3912, "step": 147580 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018548367998054733, "loss": 2.0713, "step": 147585 }, { "epoch": 0.35, "grad_norm": 2.546875, "learning_rate": 0.00018548272089313753, "loss": 2.2595, "step": 147590 }, { "epoch": 0.35, "grad_norm": 2.03125, "learning_rate": 0.00018548176177652528, "loss": 2.1299, "step": 147595 }, { "epoch": 0.35, "grad_norm": 2.015625, "learning_rate": 0.0001854808026307109, "loss": 2.0825, "step": 147600 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018547984345569475, "loss": 2.2121, "step": 147605 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.0001854788842514771, "loss": 2.2597, "step": 147610 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018547792501805835, "loss": 2.1413, "step": 147615 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018547696575543877, "loss": 2.3296, "step": 147620 }, { "epoch": 0.35, "grad_norm": 1.9765625, "learning_rate": 0.0001854760064636187, "loss": 2.0976, "step": 147625 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.0001854750471425985, "loss": 2.0788, "step": 147630 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.00018547408779237844, "loss": 2.141, "step": 147635 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018547312841295891, "loss": 2.1473, "step": 147640 }, { "epoch": 0.35, "grad_norm": 2.609375, "learning_rate": 0.0001854721690043402, "loss": 2.2433, "step": 147645 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.00018547120956652265, "loss": 2.0964, "step": 147650 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.00018547025009950661, "loss": 2.0944, "step": 147655 }, { "epoch": 0.35, "grad_norm": 1.9296875, "learning_rate": 0.00018546929060329233, "loss": 2.0882, "step": 147660 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018546833107788022, "loss": 2.1785, "step": 147665 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.0001854673715232706, "loss": 2.3198, "step": 147670 }, { "epoch": 0.35, "grad_norm": 2.6875, "learning_rate": 0.00018546641193946373, "loss": 2.0761, "step": 147675 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.00018546545232646, "loss": 2.1841, "step": 147680 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.00018546449268425975, "loss": 2.1977, "step": 147685 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018546353301286323, "loss": 2.064, "step": 147690 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018546257331227085, "loss": 2.0815, "step": 147695 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.00018546161358248288, "loss": 2.0714, "step": 147700 }, { "epoch": 0.35, "grad_norm": 2.015625, "learning_rate": 0.0001854606538234997, "loss": 2.1057, "step": 147705 }, { "epoch": 0.35, "grad_norm": 2.71875, "learning_rate": 0.0001854596940353216, "loss": 2.2798, "step": 147710 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.00018545873421794892, "loss": 2.2725, "step": 147715 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.000185457774371382, "loss": 2.1998, "step": 147720 }, { "epoch": 0.35, "grad_norm": 2.6875, "learning_rate": 0.00018545681449562114, "loss": 2.052, "step": 147725 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018545585459066666, "loss": 2.1366, "step": 147730 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018545489465651894, "loss": 2.2376, "step": 147735 }, { "epoch": 0.35, "grad_norm": 1.9609375, "learning_rate": 0.0001854539346931783, "loss": 2.288, "step": 147740 }, { "epoch": 0.35, "grad_norm": 1.9609375, "learning_rate": 0.000185452974700645, "loss": 2.1594, "step": 147745 }, { "epoch": 0.35, "grad_norm": 2.453125, "learning_rate": 0.00018545201467891942, "loss": 2.1659, "step": 147750 }, { "epoch": 0.35, "grad_norm": 2.0, "learning_rate": 0.0001854510546280019, "loss": 2.2417, "step": 147755 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018545009454789274, "loss": 2.0351, "step": 147760 }, { "epoch": 0.35, "grad_norm": 1.9453125, "learning_rate": 0.00018544913443859228, "loss": 2.1401, "step": 147765 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018544817430010083, "loss": 2.1269, "step": 147770 }, { "epoch": 0.35, "grad_norm": 2.609375, "learning_rate": 0.00018544721413241876, "loss": 2.1468, "step": 147775 }, { "epoch": 0.35, "grad_norm": 1.78125, "learning_rate": 0.00018544625393554636, "loss": 1.9699, "step": 147780 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.000185445293709484, "loss": 2.1792, "step": 147785 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018544433345423194, "loss": 2.2004, "step": 147790 }, { "epoch": 0.35, "grad_norm": 2.71875, "learning_rate": 0.00018544337316979054, "loss": 1.955, "step": 147795 }, { "epoch": 0.35, "grad_norm": 2.640625, "learning_rate": 0.00018544241285616017, "loss": 1.9962, "step": 147800 }, { "epoch": 0.35, "grad_norm": 1.7265625, "learning_rate": 0.0001854414525133411, "loss": 2.2455, "step": 147805 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018544049214133367, "loss": 1.8894, "step": 147810 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018543953174013825, "loss": 2.0213, "step": 147815 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.0001854385713097551, "loss": 2.1887, "step": 147820 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.0001854376108501846, "loss": 2.0556, "step": 147825 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018543665036142707, "loss": 2.3408, "step": 147830 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.00018543568984348286, "loss": 2.1605, "step": 147835 }, { "epoch": 0.35, "grad_norm": 1.9921875, "learning_rate": 0.0001854347292963522, "loss": 2.2326, "step": 147840 }, { "epoch": 0.35, "grad_norm": 3.453125, "learning_rate": 0.00018543376872003555, "loss": 1.9815, "step": 147845 }, { "epoch": 0.35, "grad_norm": 2.453125, "learning_rate": 0.00018543280811453313, "loss": 2.2378, "step": 147850 }, { "epoch": 0.35, "grad_norm": 1.8984375, "learning_rate": 0.00018543184747984531, "loss": 2.1828, "step": 147855 }, { "epoch": 0.35, "grad_norm": 1.953125, "learning_rate": 0.00018543088681597248, "loss": 2.1633, "step": 147860 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.00018542992612291485, "loss": 2.1606, "step": 147865 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018542896540067286, "loss": 2.1597, "step": 147870 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018542800464924674, "loss": 2.1323, "step": 147875 }, { "epoch": 0.35, "grad_norm": 2.453125, "learning_rate": 0.00018542704386863688, "loss": 2.1076, "step": 147880 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018542608305884359, "loss": 2.0998, "step": 147885 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.0001854251222198672, "loss": 2.062, "step": 147890 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018542416135170804, "loss": 2.0465, "step": 147895 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.00018542320045436645, "loss": 2.118, "step": 147900 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.0001854222395278427, "loss": 2.1367, "step": 147905 }, { "epoch": 0.35, "grad_norm": 1.9140625, "learning_rate": 0.0001854212785721372, "loss": 2.1268, "step": 147910 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018542031758725028, "loss": 2.11, "step": 147915 }, { "epoch": 0.35, "grad_norm": 1.9140625, "learning_rate": 0.0001854193565731822, "loss": 2.3069, "step": 147920 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.0001854183955299333, "loss": 2.2499, "step": 147925 }, { "epoch": 0.35, "grad_norm": 1.890625, "learning_rate": 0.0001854174344575039, "loss": 2.0632, "step": 147930 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018541647335589443, "loss": 2.2341, "step": 147935 }, { "epoch": 0.35, "grad_norm": 2.59375, "learning_rate": 0.0001854155122251051, "loss": 2.1372, "step": 147940 }, { "epoch": 0.35, "grad_norm": 2.59375, "learning_rate": 0.0001854145510651363, "loss": 2.1875, "step": 147945 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.0001854135898759883, "loss": 2.0747, "step": 147950 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018541262865766152, "loss": 2.1806, "step": 147955 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.0001854116674101562, "loss": 2.1305, "step": 147960 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018541070613347271, "loss": 1.9778, "step": 147965 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018540974482761138, "loss": 2.265, "step": 147970 }, { "epoch": 0.35, "grad_norm": 1.875, "learning_rate": 0.00018540878349257255, "loss": 2.0783, "step": 147975 }, { "epoch": 0.35, "grad_norm": 2.5, "learning_rate": 0.00018540782212835653, "loss": 2.0553, "step": 147980 }, { "epoch": 0.35, "grad_norm": 1.734375, "learning_rate": 0.00018540686073496363, "loss": 2.2219, "step": 147985 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.0001854058993123942, "loss": 2.1369, "step": 147990 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018540493786064856, "loss": 2.1203, "step": 147995 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018540397637972707, "loss": 2.0977, "step": 148000 }, { "epoch": 0.35, "grad_norm": 1.828125, "learning_rate": 0.00018540301486963002, "loss": 2.209, "step": 148005 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018540205333035772, "loss": 2.0467, "step": 148010 }, { "epoch": 0.35, "grad_norm": 1.953125, "learning_rate": 0.00018540109176191057, "loss": 2.0951, "step": 148015 }, { "epoch": 0.35, "grad_norm": 2.671875, "learning_rate": 0.00018540013016428883, "loss": 2.3534, "step": 148020 }, { "epoch": 0.35, "grad_norm": 2.6875, "learning_rate": 0.00018539916853749287, "loss": 2.3322, "step": 148025 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018539820688152304, "loss": 2.1963, "step": 148030 }, { "epoch": 0.35, "grad_norm": 1.84375, "learning_rate": 0.00018539724519637957, "loss": 1.9229, "step": 148035 }, { "epoch": 0.35, "grad_norm": 1.90625, "learning_rate": 0.00018539628348206288, "loss": 2.0043, "step": 148040 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.0001853953217385733, "loss": 2.1154, "step": 148045 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018539435996591108, "loss": 2.1128, "step": 148050 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.0001853933981640766, "loss": 2.1085, "step": 148055 }, { "epoch": 0.35, "grad_norm": 2.65625, "learning_rate": 0.00018539243633307022, "loss": 2.2172, "step": 148060 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.0001853914744728922, "loss": 2.0773, "step": 148065 }, { "epoch": 0.35, "grad_norm": 1.9765625, "learning_rate": 0.00018539051258354295, "loss": 2.1162, "step": 148070 }, { "epoch": 0.35, "grad_norm": 1.9921875, "learning_rate": 0.00018538955066502272, "loss": 2.1395, "step": 148075 }, { "epoch": 0.35, "grad_norm": 2.640625, "learning_rate": 0.0001853885887173319, "loss": 2.0891, "step": 148080 }, { "epoch": 0.35, "grad_norm": 2.796875, "learning_rate": 0.00018538762674047077, "loss": 2.2827, "step": 148085 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.00018538666473443967, "loss": 2.1665, "step": 148090 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018538570269923893, "loss": 2.0634, "step": 148095 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.0001853847406348689, "loss": 1.9708, "step": 148100 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018538377854132988, "loss": 2.1643, "step": 148105 }, { "epoch": 0.35, "grad_norm": 2.59375, "learning_rate": 0.00018538281641862226, "loss": 2.1103, "step": 148110 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.00018538185426674627, "loss": 2.152, "step": 148115 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.0001853808920857023, "loss": 2.0334, "step": 148120 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018537992987549068, "loss": 2.1206, "step": 148125 }, { "epoch": 0.35, "grad_norm": 2.359375, "learning_rate": 0.00018537896763611174, "loss": 1.9668, "step": 148130 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018537800536756578, "loss": 2.2229, "step": 148135 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.00018537704306985316, "loss": 2.113, "step": 148140 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018537608074297414, "loss": 2.1545, "step": 148145 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.00018537511838692916, "loss": 2.0381, "step": 148150 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018537415600171846, "loss": 1.9613, "step": 148155 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.0001853731935873424, "loss": 2.1508, "step": 148160 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018537223114380134, "loss": 2.3875, "step": 148165 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018537126867109556, "loss": 1.9966, "step": 148170 }, { "epoch": 0.35, "grad_norm": 1.9765625, "learning_rate": 0.00018537030616922538, "loss": 2.1382, "step": 148175 }, { "epoch": 0.35, "grad_norm": 2.640625, "learning_rate": 0.00018536934363819118, "loss": 2.183, "step": 148180 }, { "epoch": 0.35, "grad_norm": 1.90625, "learning_rate": 0.0001853683810779933, "loss": 1.8363, "step": 148185 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018536741848863196, "loss": 2.3022, "step": 148190 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.0001853664558701076, "loss": 2.1947, "step": 148195 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.0001853654932224205, "loss": 2.2091, "step": 148200 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018536453054557103, "loss": 2.1421, "step": 148205 }, { "epoch": 0.35, "grad_norm": 2.6875, "learning_rate": 0.00018536356783955947, "loss": 2.2346, "step": 148210 }, { "epoch": 0.35, "grad_norm": 2.015625, "learning_rate": 0.00018536260510438614, "loss": 2.1084, "step": 148215 }, { "epoch": 0.35, "grad_norm": 2.59375, "learning_rate": 0.00018536164234005145, "loss": 2.0172, "step": 148220 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018536067954655564, "loss": 2.2019, "step": 148225 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018535971672389907, "loss": 2.2526, "step": 148230 }, { "epoch": 0.35, "grad_norm": 2.453125, "learning_rate": 0.00018535875387208208, "loss": 2.1399, "step": 148235 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.000185357790991105, "loss": 2.1071, "step": 148240 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018535682808096815, "loss": 2.1129, "step": 148245 }, { "epoch": 0.35, "grad_norm": 2.59375, "learning_rate": 0.00018535586514167185, "loss": 2.0158, "step": 148250 }, { "epoch": 0.35, "grad_norm": 2.03125, "learning_rate": 0.00018535490217321644, "loss": 2.0091, "step": 148255 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018535393917560225, "loss": 2.0286, "step": 148260 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018535297614882958, "loss": 2.1095, "step": 148265 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.00018535201309289882, "loss": 2.1655, "step": 148270 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018535105000781025, "loss": 2.3186, "step": 148275 }, { "epoch": 0.35, "grad_norm": 2.015625, "learning_rate": 0.0001853500868935642, "loss": 2.1463, "step": 148280 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018534912375016104, "loss": 2.1441, "step": 148285 }, { "epoch": 0.35, "grad_norm": 1.7734375, "learning_rate": 0.00018534816057760108, "loss": 2.0217, "step": 148290 }, { "epoch": 0.35, "grad_norm": 2.359375, "learning_rate": 0.00018534719737588458, "loss": 2.1137, "step": 148295 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.000185346234145012, "loss": 2.0466, "step": 148300 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018534527088498356, "loss": 2.0617, "step": 148305 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.00018534430759579963, "loss": 2.1046, "step": 148310 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.00018534334427746058, "loss": 2.1167, "step": 148315 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018534238092996662, "loss": 2.0998, "step": 148320 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018534141755331823, "loss": 2.1065, "step": 148325 }, { "epoch": 0.35, "grad_norm": 1.921875, "learning_rate": 0.0001853404541475156, "loss": 2.2696, "step": 148330 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.00018533949071255918, "loss": 2.133, "step": 148335 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.0001853385272484492, "loss": 2.2385, "step": 148340 }, { "epoch": 0.35, "grad_norm": 2.453125, "learning_rate": 0.00018533756375518605, "loss": 2.2319, "step": 148345 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018533660023277, "loss": 2.1299, "step": 148350 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018533563668120148, "loss": 2.1055, "step": 148355 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018533467310048073, "loss": 2.1873, "step": 148360 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018533370949060812, "loss": 2.035, "step": 148365 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.00018533274585158397, "loss": 2.1479, "step": 148370 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.0001853317821834086, "loss": 2.2307, "step": 148375 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.00018533081848608234, "loss": 2.1217, "step": 148380 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018532985475960552, "loss": 2.1427, "step": 148385 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.0001853288910039785, "loss": 2.3836, "step": 148390 }, { "epoch": 0.35, "grad_norm": 1.96875, "learning_rate": 0.00018532792721920153, "loss": 2.154, "step": 148395 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.00018532696340527505, "loss": 2.1543, "step": 148400 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.0001853259995621993, "loss": 2.1505, "step": 148405 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018532503568997465, "loss": 2.2613, "step": 148410 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018532407178860143, "loss": 2.0366, "step": 148415 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018532310785807993, "loss": 2.2341, "step": 148420 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.00018532214389841055, "loss": 2.1231, "step": 148425 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018532117990959355, "loss": 2.2433, "step": 148430 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.0001853202158916293, "loss": 2.1178, "step": 148435 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.0001853192518445181, "loss": 2.1621, "step": 148440 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.0001853182877682603, "loss": 2.1475, "step": 148445 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018531732366285626, "loss": 2.2403, "step": 148450 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018531635952830623, "loss": 2.2097, "step": 148455 }, { "epoch": 0.35, "grad_norm": 1.953125, "learning_rate": 0.0001853153953646106, "loss": 1.9261, "step": 148460 }, { "epoch": 0.35, "grad_norm": 2.359375, "learning_rate": 0.0001853144311717697, "loss": 2.0848, "step": 148465 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018531346694978382, "loss": 2.1315, "step": 148470 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018531250269865332, "loss": 2.1843, "step": 148475 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018531153841837853, "loss": 2.1411, "step": 148480 }, { "epoch": 0.35, "grad_norm": 2.015625, "learning_rate": 0.00018531057410895975, "loss": 2.1126, "step": 148485 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018530960977039735, "loss": 2.1311, "step": 148490 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.00018530864540269163, "loss": 2.0619, "step": 148495 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.00018530768100584295, "loss": 1.8887, "step": 148500 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.00018530671657985163, "loss": 2.2121, "step": 148505 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018530575212471792, "loss": 2.2464, "step": 148510 }, { "epoch": 0.35, "grad_norm": 2.6875, "learning_rate": 0.00018530478764044227, "loss": 2.0757, "step": 148515 }, { "epoch": 0.35, "grad_norm": 1.828125, "learning_rate": 0.00018530382312702495, "loss": 2.1654, "step": 148520 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.0001853028585844663, "loss": 2.077, "step": 148525 }, { "epoch": 0.35, "grad_norm": 2.578125, "learning_rate": 0.00018530189401276664, "loss": 2.1123, "step": 148530 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.0001853009294119263, "loss": 2.1171, "step": 148535 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.00018529996478194562, "loss": 2.018, "step": 148540 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018529900012282492, "loss": 2.1762, "step": 148545 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018529803543456454, "loss": 2.1993, "step": 148550 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018529707071716483, "loss": 2.1415, "step": 148555 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018529610597062604, "loss": 2.1188, "step": 148560 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018529514119494862, "loss": 2.1882, "step": 148565 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.0001852941763901328, "loss": 2.1602, "step": 148570 }, { "epoch": 0.35, "grad_norm": 2.609375, "learning_rate": 0.0001852932115561789, "loss": 2.1051, "step": 148575 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018529224669308736, "loss": 1.9441, "step": 148580 }, { "epoch": 0.35, "grad_norm": 1.78125, "learning_rate": 0.0001852912818008584, "loss": 2.1531, "step": 148585 }, { "epoch": 0.35, "grad_norm": 2.640625, "learning_rate": 0.0001852903168794924, "loss": 2.2336, "step": 148590 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.0001852893519289897, "loss": 2.2731, "step": 148595 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.0001852883869493506, "loss": 2.1615, "step": 148600 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.00018528742194057543, "loss": 2.1801, "step": 148605 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.00018528645690266452, "loss": 2.1753, "step": 148610 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.00018528549183561825, "loss": 2.1201, "step": 148615 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.00018528452673943687, "loss": 2.0638, "step": 148620 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018528356161412078, "loss": 2.213, "step": 148625 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018528259645967022, "loss": 1.8749, "step": 148630 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018528163127608564, "loss": 2.1356, "step": 148635 }, { "epoch": 0.35, "grad_norm": 2.78125, "learning_rate": 0.0001852806660633673, "loss": 2.1728, "step": 148640 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.0001852797008215155, "loss": 2.138, "step": 148645 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018527873555053065, "loss": 2.1394, "step": 148650 }, { "epoch": 0.35, "grad_norm": 1.921875, "learning_rate": 0.000185277770250413, "loss": 2.2139, "step": 148655 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018527680492116296, "loss": 2.0483, "step": 148660 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.0001852758395627808, "loss": 1.9243, "step": 148665 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018527487417526684, "loss": 2.1819, "step": 148670 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.0001852739087586215, "loss": 2.2056, "step": 148675 }, { "epoch": 0.35, "grad_norm": 2.6875, "learning_rate": 0.00018527294331284496, "loss": 2.0618, "step": 148680 }, { "epoch": 0.35, "grad_norm": 2.546875, "learning_rate": 0.0001852719778379377, "loss": 2.2736, "step": 148685 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018527101233389997, "loss": 2.1005, "step": 148690 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.0001852700468007321, "loss": 2.3386, "step": 148695 }, { "epoch": 0.35, "grad_norm": 1.734375, "learning_rate": 0.00018526908123843447, "loss": 2.0089, "step": 148700 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018526811564700734, "loss": 1.841, "step": 148705 }, { "epoch": 0.35, "grad_norm": 2.6875, "learning_rate": 0.0001852671500264511, "loss": 2.184, "step": 148710 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.00018526618437676604, "loss": 2.2816, "step": 148715 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.0001852652186979525, "loss": 2.1293, "step": 148720 }, { "epoch": 0.35, "grad_norm": 2.015625, "learning_rate": 0.0001852642529900108, "loss": 2.1615, "step": 148725 }, { "epoch": 0.35, "grad_norm": 2.359375, "learning_rate": 0.00018526328725294134, "loss": 2.2903, "step": 148730 }, { "epoch": 0.35, "grad_norm": 4.09375, "learning_rate": 0.00018526232148674437, "loss": 2.0365, "step": 148735 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018526135569142023, "loss": 2.1414, "step": 148740 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018526038986696927, "loss": 2.1275, "step": 148745 }, { "epoch": 0.35, "grad_norm": 1.8515625, "learning_rate": 0.0001852594240133918, "loss": 2.1877, "step": 148750 }, { "epoch": 0.35, "grad_norm": 1.9296875, "learning_rate": 0.00018525845813068822, "loss": 2.0095, "step": 148755 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018525749221885875, "loss": 2.1891, "step": 148760 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.0001852565262779038, "loss": 2.2082, "step": 148765 }, { "epoch": 0.35, "grad_norm": 1.9921875, "learning_rate": 0.00018525556030782366, "loss": 2.0503, "step": 148770 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018525459430861866, "loss": 1.8774, "step": 148775 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018525362828028917, "loss": 2.1624, "step": 148780 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.0001852526622228355, "loss": 2.0965, "step": 148785 }, { "epoch": 0.35, "grad_norm": 2.65625, "learning_rate": 0.00018525169613625795, "loss": 2.09, "step": 148790 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.0001852507300205569, "loss": 2.1567, "step": 148795 }, { "epoch": 0.35, "grad_norm": 2.890625, "learning_rate": 0.00018524976387573262, "loss": 2.2194, "step": 148800 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.0001852487977017855, "loss": 1.8916, "step": 148805 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018524783149871584, "loss": 2.0245, "step": 148810 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018524686526652398, "loss": 2.222, "step": 148815 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018524589900521026, "loss": 2.1808, "step": 148820 }, { "epoch": 0.35, "grad_norm": 1.8203125, "learning_rate": 0.00018524493271477495, "loss": 2.0603, "step": 148825 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.00018524396639521846, "loss": 2.0756, "step": 148830 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.00018524300004654106, "loss": 2.0515, "step": 148835 }, { "epoch": 0.35, "grad_norm": 1.9921875, "learning_rate": 0.00018524203366874313, "loss": 2.0731, "step": 148840 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018524106726182496, "loss": 1.9861, "step": 148845 }, { "epoch": 0.35, "grad_norm": 1.90625, "learning_rate": 0.0001852401008257869, "loss": 2.0987, "step": 148850 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018523913436062928, "loss": 2.0879, "step": 148855 }, { "epoch": 0.35, "grad_norm": 1.6015625, "learning_rate": 0.00018523816786635244, "loss": 2.2646, "step": 148860 }, { "epoch": 0.35, "grad_norm": 2.359375, "learning_rate": 0.00018523720134295667, "loss": 2.0208, "step": 148865 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018523623479044235, "loss": 2.0937, "step": 148870 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018523526820880975, "loss": 2.1004, "step": 148875 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018523430159805927, "loss": 1.9987, "step": 148880 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018523333495819118, "loss": 2.2231, "step": 148885 }, { "epoch": 0.35, "grad_norm": 2.03125, "learning_rate": 0.00018523236828920587, "loss": 2.1415, "step": 148890 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018523140159110358, "loss": 2.2274, "step": 148895 }, { "epoch": 0.35, "grad_norm": 1.84375, "learning_rate": 0.00018523043486388477, "loss": 2.3113, "step": 148900 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018522946810754966, "loss": 2.1488, "step": 148905 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.00018522850132209862, "loss": 2.3127, "step": 148910 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018522753450753195, "loss": 2.191, "step": 148915 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018522656766385004, "loss": 2.1633, "step": 148920 }, { "epoch": 0.35, "grad_norm": 2.0, "learning_rate": 0.0001852256007910532, "loss": 2.2222, "step": 148925 }, { "epoch": 0.35, "grad_norm": 2.6875, "learning_rate": 0.00018522463388914173, "loss": 2.0375, "step": 148930 }, { "epoch": 0.35, "grad_norm": 1.921875, "learning_rate": 0.000185223666958116, "loss": 2.0242, "step": 148935 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.0001852226999979763, "loss": 2.1232, "step": 148940 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.000185221733008723, "loss": 2.4309, "step": 148945 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.0001852207659903564, "loss": 2.1554, "step": 148950 }, { "epoch": 0.35, "grad_norm": 2.734375, "learning_rate": 0.0001852197989428768, "loss": 2.0935, "step": 148955 }, { "epoch": 0.35, "grad_norm": 2.59375, "learning_rate": 0.00018521883186628464, "loss": 1.9589, "step": 148960 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018521786476058015, "loss": 2.1533, "step": 148965 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.00018521689762576369, "loss": 2.3274, "step": 148970 }, { "epoch": 0.35, "grad_norm": 1.953125, "learning_rate": 0.00018521593046183559, "loss": 2.0787, "step": 148975 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.0001852149632687962, "loss": 1.8476, "step": 148980 }, { "epoch": 0.35, "grad_norm": 1.9921875, "learning_rate": 0.00018521399604664581, "loss": 2.0736, "step": 148985 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018521302879538477, "loss": 2.2023, "step": 148990 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018521206151501343, "loss": 2.1338, "step": 148995 }, { "epoch": 0.35, "grad_norm": 2.03125, "learning_rate": 0.00018521109420553211, "loss": 2.0632, "step": 149000 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018521012686694113, "loss": 2.0446, "step": 149005 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.0001852091594992408, "loss": 2.2152, "step": 149010 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018520819210243153, "loss": 2.2577, "step": 149015 }, { "epoch": 0.35, "grad_norm": 3.4375, "learning_rate": 0.00018520722467651354, "loss": 2.2788, "step": 149020 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018520625722148726, "loss": 2.0553, "step": 149025 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018520528973735295, "loss": 2.0384, "step": 149030 }, { "epoch": 0.35, "grad_norm": 2.78125, "learning_rate": 0.00018520432222411098, "loss": 2.1293, "step": 149035 }, { "epoch": 0.35, "grad_norm": 4.65625, "learning_rate": 0.00018520335468176163, "loss": 2.0156, "step": 149040 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018520238711030532, "loss": 2.2516, "step": 149045 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018520141950974234, "loss": 2.0059, "step": 149050 }, { "epoch": 0.35, "grad_norm": 1.703125, "learning_rate": 0.00018520045188007297, "loss": 1.928, "step": 149055 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.0001851994842212976, "loss": 2.2704, "step": 149060 }, { "epoch": 0.35, "grad_norm": 2.75, "learning_rate": 0.00018519851653341651, "loss": 2.2228, "step": 149065 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.0001851975488164301, "loss": 2.1695, "step": 149070 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018519658107033865, "loss": 2.0769, "step": 149075 }, { "epoch": 0.35, "grad_norm": 2.640625, "learning_rate": 0.00018519561329514249, "loss": 2.2349, "step": 149080 }, { "epoch": 0.35, "grad_norm": 3.265625, "learning_rate": 0.00018519464549084197, "loss": 1.9277, "step": 149085 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.0001851936776574374, "loss": 1.9511, "step": 149090 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018519270979492914, "loss": 2.152, "step": 149095 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.0001851917419033175, "loss": 2.2346, "step": 149100 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018519077398260284, "loss": 2.0563, "step": 149105 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018518980603278546, "loss": 2.1227, "step": 149110 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.0001851888380538657, "loss": 2.2227, "step": 149115 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018518787004584387, "loss": 2.2062, "step": 149120 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018518690200872032, "loss": 2.1688, "step": 149125 }, { "epoch": 0.35, "grad_norm": 2.609375, "learning_rate": 0.00018518593394249537, "loss": 2.2028, "step": 149130 }, { "epoch": 0.35, "grad_norm": 2.703125, "learning_rate": 0.00018518496584716938, "loss": 2.067, "step": 149135 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018518399772274265, "loss": 2.2198, "step": 149140 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018518302956921552, "loss": 2.1685, "step": 149145 }, { "epoch": 0.35, "grad_norm": 1.96875, "learning_rate": 0.00018518206138658832, "loss": 2.148, "step": 149150 }, { "epoch": 0.35, "grad_norm": 2.453125, "learning_rate": 0.00018518109317486138, "loss": 2.1858, "step": 149155 }, { "epoch": 0.35, "grad_norm": 2.578125, "learning_rate": 0.00018518012493403508, "loss": 2.0864, "step": 149160 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018517915666410963, "loss": 2.078, "step": 149165 }, { "epoch": 0.35, "grad_norm": 1.9375, "learning_rate": 0.0001851781883650855, "loss": 2.1424, "step": 149170 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018517722003696293, "loss": 2.1554, "step": 149175 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018517625167974226, "loss": 1.933, "step": 149180 }, { "epoch": 0.35, "grad_norm": 1.9453125, "learning_rate": 0.00018517528329342386, "loss": 2.0153, "step": 149185 }, { "epoch": 0.35, "grad_norm": 1.9921875, "learning_rate": 0.00018517431487800803, "loss": 2.1716, "step": 149190 }, { "epoch": 0.35, "grad_norm": 2.953125, "learning_rate": 0.00018517334643349513, "loss": 2.2504, "step": 149195 }, { "epoch": 0.35, "grad_norm": 2.875, "learning_rate": 0.00018517237795988542, "loss": 2.1146, "step": 149200 }, { "epoch": 0.35, "grad_norm": 1.984375, "learning_rate": 0.00018517140945717935, "loss": 2.0167, "step": 149205 }, { "epoch": 0.35, "grad_norm": 2.5, "learning_rate": 0.00018517044092537712, "loss": 2.0543, "step": 149210 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.00018516947236447916, "loss": 2.1937, "step": 149215 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018516850377448574, "loss": 2.2688, "step": 149220 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018516753515539723, "loss": 2.144, "step": 149225 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018516656650721392, "loss": 2.094, "step": 149230 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.0001851655978299362, "loss": 2.1744, "step": 149235 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018516462912356437, "loss": 2.1734, "step": 149240 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018516366038809874, "loss": 2.1878, "step": 149245 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018516269162353966, "loss": 2.1393, "step": 149250 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.00018516172282988746, "loss": 2.2382, "step": 149255 }, { "epoch": 0.35, "grad_norm": 1.8671875, "learning_rate": 0.00018516075400714244, "loss": 2.1954, "step": 149260 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018515978515530504, "loss": 2.101, "step": 149265 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018515881627437545, "loss": 2.3359, "step": 149270 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.0001851578473643541, "loss": 1.9979, "step": 149275 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018515687842524125, "loss": 2.0786, "step": 149280 }, { "epoch": 0.35, "grad_norm": 2.359375, "learning_rate": 0.0001851559094570373, "loss": 1.9251, "step": 149285 }, { "epoch": 0.35, "grad_norm": 2.5, "learning_rate": 0.00018515494045974254, "loss": 2.059, "step": 149290 }, { "epoch": 0.35, "grad_norm": 2.0, "learning_rate": 0.00018515397143335725, "loss": 2.06, "step": 149295 }, { "epoch": 0.35, "grad_norm": 2.59375, "learning_rate": 0.0001851530023778819, "loss": 2.1572, "step": 149300 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.0001851520332933167, "loss": 2.1083, "step": 149305 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.000185151064179662, "loss": 2.1449, "step": 149310 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.0001851500950369182, "loss": 2.1659, "step": 149315 }, { "epoch": 0.35, "grad_norm": 2.625, "learning_rate": 0.00018514912586508556, "loss": 2.033, "step": 149320 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.00018514815666416444, "loss": 2.0933, "step": 149325 }, { "epoch": 0.35, "grad_norm": 2.578125, "learning_rate": 0.00018514718743415517, "loss": 2.0287, "step": 149330 }, { "epoch": 0.35, "grad_norm": 2.015625, "learning_rate": 0.00018514621817505806, "loss": 2.2595, "step": 149335 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.0001851452488868735, "loss": 2.1855, "step": 149340 }, { "epoch": 0.35, "grad_norm": 2.03125, "learning_rate": 0.00018514427956960172, "loss": 1.9656, "step": 149345 }, { "epoch": 0.35, "grad_norm": 1.96875, "learning_rate": 0.00018514331022324312, "loss": 2.1153, "step": 149350 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018514234084779805, "loss": 2.1511, "step": 149355 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.0001851413714432668, "loss": 2.1408, "step": 149360 }, { "epoch": 0.35, "grad_norm": 1.9375, "learning_rate": 0.0001851404020096497, "loss": 2.1242, "step": 149365 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.00018513943254694712, "loss": 2.0405, "step": 149370 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018513846305515935, "loss": 2.0854, "step": 149375 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018513749353428675, "loss": 2.0663, "step": 149380 }, { "epoch": 0.35, "grad_norm": 2.53125, "learning_rate": 0.00018513652398432964, "loss": 2.2195, "step": 149385 }, { "epoch": 0.35, "grad_norm": 2.625, "learning_rate": 0.00018513555440528832, "loss": 2.097, "step": 149390 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018513458479716316, "loss": 2.1742, "step": 149395 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.00018513361515995447, "loss": 2.256, "step": 149400 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.00018513264549366266, "loss": 2.2757, "step": 149405 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018513167579828794, "loss": 2.0823, "step": 149410 }, { "epoch": 0.35, "grad_norm": 2.65625, "learning_rate": 0.00018513070607383068, "loss": 2.2961, "step": 149415 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018512973632029125, "loss": 2.268, "step": 149420 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018512876653766996, "loss": 2.2019, "step": 149425 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018512779672596717, "loss": 2.1434, "step": 149430 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.00018512682688518313, "loss": 2.1594, "step": 149435 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018512585701531824, "loss": 2.2079, "step": 149440 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.0001851248871163728, "loss": 2.1155, "step": 149445 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018512391718834717, "loss": 2.0804, "step": 149450 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018512294723124168, "loss": 2.0714, "step": 149455 }, { "epoch": 0.35, "grad_norm": 3.078125, "learning_rate": 0.0001851219772450566, "loss": 2.0716, "step": 149460 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018512100722979237, "loss": 2.0604, "step": 149465 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.0001851200371854492, "loss": 2.0001, "step": 149470 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018511906711202752, "loss": 2.1269, "step": 149475 }, { "epoch": 0.35, "grad_norm": 2.015625, "learning_rate": 0.00018511809700952765, "loss": 2.2311, "step": 149480 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018511712687794982, "loss": 2.2203, "step": 149485 }, { "epoch": 0.35, "grad_norm": 2.609375, "learning_rate": 0.0001851161567172945, "loss": 2.057, "step": 149490 }, { "epoch": 0.35, "grad_norm": 2.59375, "learning_rate": 0.00018511518652756192, "loss": 2.2611, "step": 149495 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018511421630875246, "loss": 2.1383, "step": 149500 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.00018511324606086644, "loss": 2.1154, "step": 149505 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.0001851122757839042, "loss": 2.301, "step": 149510 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018511130547786605, "loss": 2.2959, "step": 149515 }, { "epoch": 0.35, "grad_norm": 2.78125, "learning_rate": 0.00018511033514275233, "loss": 2.036, "step": 149520 }, { "epoch": 0.35, "grad_norm": 2.53125, "learning_rate": 0.00018510936477856343, "loss": 2.1104, "step": 149525 }, { "epoch": 0.35, "grad_norm": 2.5, "learning_rate": 0.00018510839438529955, "loss": 2.2026, "step": 149530 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.00018510742396296115, "loss": 2.1521, "step": 149535 }, { "epoch": 0.35, "grad_norm": 3.0625, "learning_rate": 0.0001851064535115485, "loss": 2.0218, "step": 149540 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018510548303106192, "loss": 2.0468, "step": 149545 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.00018510451252150176, "loss": 2.1086, "step": 149550 }, { "epoch": 0.35, "grad_norm": 2.578125, "learning_rate": 0.00018510354198286838, "loss": 2.168, "step": 149555 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.0001851025714151621, "loss": 2.1044, "step": 149560 }, { "epoch": 0.35, "grad_norm": 2.765625, "learning_rate": 0.0001851016008183832, "loss": 2.2505, "step": 149565 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018510063019253208, "loss": 2.1507, "step": 149570 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018509965953760903, "loss": 2.1425, "step": 149575 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018509868885361442, "loss": 2.2801, "step": 149580 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.0001850977181405485, "loss": 2.1819, "step": 149585 }, { "epoch": 0.35, "grad_norm": 2.84375, "learning_rate": 0.00018509674739841168, "loss": 2.1495, "step": 149590 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.00018509577662720428, "loss": 2.2371, "step": 149595 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018509480582692663, "loss": 2.3386, "step": 149600 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.000185093834997579, "loss": 2.0849, "step": 149605 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018509286413916182, "loss": 2.0449, "step": 149610 }, { "epoch": 0.35, "grad_norm": 1.890625, "learning_rate": 0.00018509189325167537, "loss": 2.0137, "step": 149615 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018509092233511996, "loss": 2.1303, "step": 149620 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018508995138949596, "loss": 2.2168, "step": 149625 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.00018508898041480368, "loss": 2.1022, "step": 149630 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018508800941104348, "loss": 2.208, "step": 149635 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018508703837821568, "loss": 2.0219, "step": 149640 }, { "epoch": 0.35, "grad_norm": 2.8125, "learning_rate": 0.00018508606731632056, "loss": 1.8955, "step": 149645 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018508509622535855, "loss": 1.9493, "step": 149650 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.0001850841251053299, "loss": 2.1567, "step": 149655 }, { "epoch": 0.35, "grad_norm": 1.71875, "learning_rate": 0.000185083153956235, "loss": 2.1106, "step": 149660 }, { "epoch": 0.35, "grad_norm": 2.578125, "learning_rate": 0.0001850821827780741, "loss": 2.1928, "step": 149665 }, { "epoch": 0.35, "grad_norm": 2.734375, "learning_rate": 0.00018508121157084762, "loss": 2.0716, "step": 149670 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.00018508024033455581, "loss": 2.0439, "step": 149675 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.0001850792690691991, "loss": 2.1121, "step": 149680 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018507829777477777, "loss": 1.9246, "step": 149685 }, { "epoch": 0.35, "grad_norm": 2.578125, "learning_rate": 0.00018507732645129212, "loss": 2.2741, "step": 149690 }, { "epoch": 0.35, "grad_norm": 2.03125, "learning_rate": 0.00018507635509874254, "loss": 2.0547, "step": 149695 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.0001850753837171293, "loss": 2.225, "step": 149700 }, { "epoch": 0.35, "grad_norm": 2.71875, "learning_rate": 0.00018507441230645278, "loss": 2.0478, "step": 149705 }, { "epoch": 0.35, "grad_norm": 1.984375, "learning_rate": 0.0001850734408667133, "loss": 1.9921, "step": 149710 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.00018507246939791118, "loss": 2.0555, "step": 149715 }, { "epoch": 0.35, "grad_norm": 1.8203125, "learning_rate": 0.0001850714979000468, "loss": 2.0082, "step": 149720 }, { "epoch": 0.35, "grad_norm": 2.65625, "learning_rate": 0.0001850705263731204, "loss": 2.1766, "step": 149725 }, { "epoch": 0.35, "grad_norm": 2.5, "learning_rate": 0.0001850695548171324, "loss": 2.2267, "step": 149730 }, { "epoch": 0.35, "grad_norm": 1.78125, "learning_rate": 0.00018506858323208308, "loss": 1.8551, "step": 149735 }, { "epoch": 0.35, "grad_norm": 3.46875, "learning_rate": 0.00018506761161797282, "loss": 2.2828, "step": 149740 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018506663997480186, "loss": 2.0763, "step": 149745 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.00018506566830257066, "loss": 2.0074, "step": 149750 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018506469660127946, "loss": 2.0402, "step": 149755 }, { "epoch": 0.35, "grad_norm": 2.640625, "learning_rate": 0.0001850637248709286, "loss": 2.1319, "step": 149760 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018506275311151843, "loss": 2.251, "step": 149765 }, { "epoch": 0.35, "grad_norm": 2.6875, "learning_rate": 0.0001850617813230493, "loss": 2.0086, "step": 149770 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018506080950552153, "loss": 2.1621, "step": 149775 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.0001850598376589354, "loss": 2.1123, "step": 149780 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018505886578329135, "loss": 2.2585, "step": 149785 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.0001850578938785896, "loss": 1.9339, "step": 149790 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018505692194483055, "loss": 2.3194, "step": 149795 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.00018505594998201452, "loss": 2.0455, "step": 149800 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.0001850549779901418, "loss": 2.1124, "step": 149805 }, { "epoch": 0.35, "grad_norm": 3.015625, "learning_rate": 0.00018505400596921282, "loss": 2.1275, "step": 149810 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.0001850530339192278, "loss": 2.1855, "step": 149815 }, { "epoch": 0.35, "grad_norm": 2.78125, "learning_rate": 0.00018505206184018714, "loss": 2.171, "step": 149820 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.00018505108973209113, "loss": 2.1491, "step": 149825 }, { "epoch": 0.35, "grad_norm": 2.640625, "learning_rate": 0.00018505011759494013, "loss": 2.2094, "step": 149830 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.0001850491454287345, "loss": 2.1981, "step": 149835 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018504817323347452, "loss": 2.1476, "step": 149840 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018504720100916052, "loss": 2.0682, "step": 149845 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.00018504622875579286, "loss": 2.1179, "step": 149850 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.0001850452564733719, "loss": 2.0256, "step": 149855 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.0001850442841618979, "loss": 2.1303, "step": 149860 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018504331182137124, "loss": 1.9559, "step": 149865 }, { "epoch": 0.35, "grad_norm": 3.234375, "learning_rate": 0.00018504233945179226, "loss": 2.1757, "step": 149870 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018504136705316127, "loss": 2.1828, "step": 149875 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.0001850403946254786, "loss": 2.1716, "step": 149880 }, { "epoch": 0.35, "grad_norm": 2.03125, "learning_rate": 0.00018503942216874458, "loss": 2.1023, "step": 149885 }, { "epoch": 0.35, "grad_norm": 2.453125, "learning_rate": 0.00018503844968295957, "loss": 2.1195, "step": 149890 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018503747716812387, "loss": 2.0816, "step": 149895 }, { "epoch": 0.35, "grad_norm": 2.625, "learning_rate": 0.00018503650462423782, "loss": 2.0897, "step": 149900 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.00018503553205130179, "loss": 1.9621, "step": 149905 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.00018503455944931605, "loss": 2.0594, "step": 149910 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018503358681828095, "loss": 2.2002, "step": 149915 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.0001850326141581969, "loss": 2.0695, "step": 149920 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.0001850316414690641, "loss": 2.3765, "step": 149925 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018503066875088297, "loss": 2.0807, "step": 149930 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018502969600365382, "loss": 2.2961, "step": 149935 }, { "epoch": 0.35, "grad_norm": 2.03125, "learning_rate": 0.000185028723227377, "loss": 2.2848, "step": 149940 }, { "epoch": 0.35, "grad_norm": 5.0, "learning_rate": 0.00018502775042205277, "loss": 2.1874, "step": 149945 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.00018502677758768158, "loss": 2.0866, "step": 149950 }, { "epoch": 0.35, "grad_norm": 1.9296875, "learning_rate": 0.00018502580472426366, "loss": 2.1188, "step": 149955 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018502483183179943, "loss": 2.1321, "step": 149960 }, { "epoch": 0.35, "grad_norm": 1.859375, "learning_rate": 0.00018502385891028914, "loss": 2.2033, "step": 149965 }, { "epoch": 0.35, "grad_norm": 2.609375, "learning_rate": 0.00018502288595973317, "loss": 2.0731, "step": 149970 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018502191298013182, "loss": 2.2117, "step": 149975 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018502093997148545, "loss": 2.1038, "step": 149980 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018501996693379442, "loss": 2.0666, "step": 149985 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.000185018993867059, "loss": 2.1095, "step": 149990 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.00018501802077127954, "loss": 2.3057, "step": 149995 }, { "epoch": 0.35, "grad_norm": 2.609375, "learning_rate": 0.0001850170476464564, "loss": 2.248, "step": 150000 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.0001850160744925899, "loss": 2.1032, "step": 150005 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018501510130968036, "loss": 2.1589, "step": 150010 }, { "epoch": 0.35, "grad_norm": 2.5, "learning_rate": 0.0001850141280977281, "loss": 2.1391, "step": 150015 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.0001850131548567335, "loss": 2.1422, "step": 150020 }, { "epoch": 0.35, "grad_norm": 2.0, "learning_rate": 0.00018501218158669685, "loss": 2.0137, "step": 150025 }, { "epoch": 0.35, "grad_norm": 3.296875, "learning_rate": 0.0001850112082876185, "loss": 2.2392, "step": 150030 }, { "epoch": 0.35, "grad_norm": 1.984375, "learning_rate": 0.0001850102349594988, "loss": 2.2595, "step": 150035 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.000185009261602338, "loss": 2.1418, "step": 150040 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018500828821613655, "loss": 2.0654, "step": 150045 }, { "epoch": 0.35, "grad_norm": 2.75, "learning_rate": 0.00018500731480089472, "loss": 2.2607, "step": 150050 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.0001850063413566128, "loss": 2.249, "step": 150055 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018500536788329123, "loss": 2.1647, "step": 150060 }, { "epoch": 0.35, "grad_norm": 2.90625, "learning_rate": 0.00018500439438093026, "loss": 2.0366, "step": 150065 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.0001850034208495303, "loss": 2.2733, "step": 150070 }, { "epoch": 0.35, "grad_norm": 3.921875, "learning_rate": 0.00018500244728909156, "loss": 2.0671, "step": 150075 }, { "epoch": 0.35, "grad_norm": 1.953125, "learning_rate": 0.00018500147369961446, "loss": 2.2444, "step": 150080 }, { "epoch": 0.35, "grad_norm": 2.578125, "learning_rate": 0.00018500050008109933, "loss": 2.1139, "step": 150085 }, { "epoch": 0.35, "grad_norm": 1.9609375, "learning_rate": 0.00018499952643354648, "loss": 1.9844, "step": 150090 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018499855275695625, "loss": 2.0226, "step": 150095 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018499757905132897, "loss": 2.0795, "step": 150100 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018499660531666497, "loss": 2.094, "step": 150105 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018499563155296463, "loss": 2.2917, "step": 150110 }, { "epoch": 0.35, "grad_norm": 2.765625, "learning_rate": 0.00018499465776022819, "loss": 2.2118, "step": 150115 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.00018499368393845605, "loss": 2.1218, "step": 150120 }, { "epoch": 0.35, "grad_norm": 2.625, "learning_rate": 0.00018499271008764852, "loss": 2.2118, "step": 150125 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018499173620780595, "loss": 2.1479, "step": 150130 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018499076229892865, "loss": 2.195, "step": 150135 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018498978836101697, "loss": 2.1155, "step": 150140 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018498881439407122, "loss": 2.3046, "step": 150145 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.0001849878403980918, "loss": 1.9575, "step": 150150 }, { "epoch": 0.35, "grad_norm": 2.75, "learning_rate": 0.00018498686637307893, "loss": 2.1417, "step": 150155 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.00018498589231903307, "loss": 2.1245, "step": 150160 }, { "epoch": 0.35, "grad_norm": 2.640625, "learning_rate": 0.00018498491823595444, "loss": 2.2078, "step": 150165 }, { "epoch": 0.35, "grad_norm": 2.359375, "learning_rate": 0.00018498394412384342, "loss": 2.0549, "step": 150170 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018498296998270037, "loss": 2.2035, "step": 150175 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.0001849819958125256, "loss": 2.1342, "step": 150180 }, { "epoch": 0.35, "grad_norm": 2.5, "learning_rate": 0.00018498102161331942, "loss": 2.3021, "step": 150185 }, { "epoch": 0.35, "grad_norm": 2.59375, "learning_rate": 0.0001849800473850822, "loss": 2.2141, "step": 150190 }, { "epoch": 0.35, "grad_norm": 2.609375, "learning_rate": 0.0001849790731278142, "loss": 2.0771, "step": 150195 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018497809884151587, "loss": 2.0154, "step": 150200 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.00018497712452618744, "loss": 2.1109, "step": 150205 }, { "epoch": 0.35, "grad_norm": 1.8671875, "learning_rate": 0.00018497615018182932, "loss": 2.0306, "step": 150210 }, { "epoch": 0.35, "grad_norm": 2.546875, "learning_rate": 0.0001849751758084418, "loss": 2.1466, "step": 150215 }, { "epoch": 0.35, "grad_norm": 2.484375, "learning_rate": 0.00018497420140602518, "loss": 2.1844, "step": 150220 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.00018497322697457987, "loss": 2.1425, "step": 150225 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018497225251410613, "loss": 2.0748, "step": 150230 }, { "epoch": 0.35, "grad_norm": 1.875, "learning_rate": 0.00018497127802460437, "loss": 2.0776, "step": 150235 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018497030350607486, "loss": 2.164, "step": 150240 }, { "epoch": 0.35, "grad_norm": 2.890625, "learning_rate": 0.00018496932895851792, "loss": 2.1148, "step": 150245 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018496835438193396, "loss": 2.1012, "step": 150250 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018496737977632326, "loss": 2.0863, "step": 150255 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018496640514168618, "loss": 2.141, "step": 150260 }, { "epoch": 0.35, "grad_norm": 2.453125, "learning_rate": 0.00018496543047802298, "loss": 2.0304, "step": 150265 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.0001849644557853341, "loss": 2.0815, "step": 150270 }, { "epoch": 0.35, "grad_norm": 2.65625, "learning_rate": 0.0001849634810636198, "loss": 2.0038, "step": 150275 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018496250631288045, "loss": 2.0416, "step": 150280 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.00018496153153311636, "loss": 1.9559, "step": 150285 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018496055672432787, "loss": 2.3097, "step": 150290 }, { "epoch": 0.35, "grad_norm": 2.53125, "learning_rate": 0.0001849595818865153, "loss": 2.1444, "step": 150295 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018495860701967897, "loss": 2.0185, "step": 150300 }, { "epoch": 0.35, "grad_norm": 1.9609375, "learning_rate": 0.0001849576321238193, "loss": 2.1631, "step": 150305 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.00018495665719893653, "loss": 2.0633, "step": 150310 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018495568224503103, "loss": 2.0797, "step": 150315 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.00018495470726210314, "loss": 2.275, "step": 150320 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018495373225015316, "loss": 2.1645, "step": 150325 }, { "epoch": 0.35, "grad_norm": 2.46875, "learning_rate": 0.00018495275720918145, "loss": 2.0645, "step": 150330 }, { "epoch": 0.35, "grad_norm": 1.96875, "learning_rate": 0.00018495178213918836, "loss": 2.1042, "step": 150335 }, { "epoch": 0.35, "grad_norm": 2.625, "learning_rate": 0.00018495080704017416, "loss": 2.084, "step": 150340 }, { "epoch": 0.35, "grad_norm": 1.9765625, "learning_rate": 0.00018494983191213925, "loss": 2.0922, "step": 150345 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.00018494885675508394, "loss": 2.0069, "step": 150350 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018494788156900852, "loss": 2.0547, "step": 150355 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.0001849469063539134, "loss": 2.1772, "step": 150360 }, { "epoch": 0.35, "grad_norm": 2.34375, "learning_rate": 0.00018494593110979888, "loss": 2.1825, "step": 150365 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018494495583666528, "loss": 2.3282, "step": 150370 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018494398053451295, "loss": 2.1629, "step": 150375 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.0001849430052033422, "loss": 2.2218, "step": 150380 }, { "epoch": 0.35, "grad_norm": 1.9453125, "learning_rate": 0.0001849420298431534, "loss": 2.0301, "step": 150385 }, { "epoch": 0.35, "grad_norm": 2.53125, "learning_rate": 0.00018494105445394685, "loss": 2.2277, "step": 150390 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.0001849400790357229, "loss": 2.1994, "step": 150395 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018493910358848188, "loss": 2.0538, "step": 150400 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018493812811222411, "loss": 2.0994, "step": 150405 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.00018493715260694993, "loss": 2.0903, "step": 150410 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.0001849361770726597, "loss": 2.2174, "step": 150415 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018493520150935372, "loss": 1.9695, "step": 150420 }, { "epoch": 0.35, "grad_norm": 2.53125, "learning_rate": 0.00018493422591703233, "loss": 2.1433, "step": 150425 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018493325029569587, "loss": 2.1385, "step": 150430 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018493227464534467, "loss": 2.2604, "step": 150435 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018493129896597908, "loss": 2.3287, "step": 150440 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.0001849303232575994, "loss": 2.0994, "step": 150445 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.000184929347520206, "loss": 1.941, "step": 150450 }, { "epoch": 0.35, "grad_norm": 2.359375, "learning_rate": 0.0001849283717537992, "loss": 2.1495, "step": 150455 }, { "epoch": 0.35, "grad_norm": 1.8046875, "learning_rate": 0.0001849273959583793, "loss": 2.0879, "step": 150460 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018492642013394667, "loss": 2.0829, "step": 150465 }, { "epoch": 0.35, "grad_norm": 2.21875, "learning_rate": 0.00018492544428050164, "loss": 1.9986, "step": 150470 }, { "epoch": 0.35, "grad_norm": 2.5, "learning_rate": 0.00018492446839804455, "loss": 2.2182, "step": 150475 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018492349248657567, "loss": 2.0476, "step": 150480 }, { "epoch": 0.35, "grad_norm": 1.890625, "learning_rate": 0.00018492251654609544, "loss": 2.0198, "step": 150485 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.00018492154057660412, "loss": 2.102, "step": 150490 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018492056457810205, "loss": 2.2125, "step": 150495 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018491958855058958, "loss": 2.1772, "step": 150500 }, { "epoch": 0.35, "grad_norm": 2.125, "learning_rate": 0.00018491861249406706, "loss": 2.2507, "step": 150505 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018491763640853478, "loss": 2.3065, "step": 150510 }, { "epoch": 0.35, "grad_norm": 2.328125, "learning_rate": 0.0001849166602939931, "loss": 2.0928, "step": 150515 }, { "epoch": 0.35, "grad_norm": 2.03125, "learning_rate": 0.00018491568415044234, "loss": 2.0933, "step": 150520 }, { "epoch": 0.35, "grad_norm": 1.9375, "learning_rate": 0.0001849147079778829, "loss": 2.2692, "step": 150525 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018491373177631498, "loss": 2.1274, "step": 150530 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.000184912755545739, "loss": 2.0535, "step": 150535 }, { "epoch": 0.35, "grad_norm": 1.9140625, "learning_rate": 0.0001849117792861553, "loss": 2.2585, "step": 150540 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018491080299756424, "loss": 2.2836, "step": 150545 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018490982667996605, "loss": 2.0813, "step": 150550 }, { "epoch": 0.35, "grad_norm": 2.625, "learning_rate": 0.00018490885033336114, "loss": 1.9484, "step": 150555 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.0001849078739577498, "loss": 2.089, "step": 150560 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.0001849068975531324, "loss": 2.0125, "step": 150565 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.0001849059211195093, "loss": 2.2717, "step": 150570 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018490494465688077, "loss": 2.2585, "step": 150575 }, { "epoch": 0.35, "grad_norm": 2.015625, "learning_rate": 0.0001849039681652472, "loss": 2.141, "step": 150580 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018490299164460887, "loss": 2.2972, "step": 150585 }, { "epoch": 0.35, "grad_norm": 2.28125, "learning_rate": 0.00018490201509496616, "loss": 2.0402, "step": 150590 }, { "epoch": 0.35, "grad_norm": 2.453125, "learning_rate": 0.00018490103851631935, "loss": 2.2518, "step": 150595 }, { "epoch": 0.35, "grad_norm": 2.390625, "learning_rate": 0.00018490006190866883, "loss": 2.2185, "step": 150600 }, { "epoch": 0.35, "grad_norm": 2.734375, "learning_rate": 0.0001848990852720149, "loss": 2.1755, "step": 150605 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018489810860635787, "loss": 2.0992, "step": 150610 }, { "epoch": 0.35, "grad_norm": 2.140625, "learning_rate": 0.00018489713191169815, "loss": 2.1736, "step": 150615 }, { "epoch": 0.35, "grad_norm": 2.03125, "learning_rate": 0.00018489615518803602, "loss": 2.1645, "step": 150620 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018489517843537184, "loss": 2.1407, "step": 150625 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.0001848942016537059, "loss": 2.0912, "step": 150630 }, { "epoch": 0.35, "grad_norm": 1.8984375, "learning_rate": 0.00018489322484303858, "loss": 1.949, "step": 150635 }, { "epoch": 0.35, "grad_norm": 2.109375, "learning_rate": 0.00018489224800337018, "loss": 1.9675, "step": 150640 }, { "epoch": 0.35, "grad_norm": 2.296875, "learning_rate": 0.00018489127113470105, "loss": 1.87, "step": 150645 }, { "epoch": 0.35, "grad_norm": 2.53125, "learning_rate": 0.00018489029423703152, "loss": 2.1986, "step": 150650 }, { "epoch": 0.35, "grad_norm": 2.40625, "learning_rate": 0.00018488931731036194, "loss": 2.1338, "step": 150655 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.0001848883403546926, "loss": 1.9024, "step": 150660 }, { "epoch": 0.35, "grad_norm": 2.875, "learning_rate": 0.00018488736337002388, "loss": 2.1997, "step": 150665 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.00018488638635635613, "loss": 2.0646, "step": 150670 }, { "epoch": 0.35, "grad_norm": 2.046875, "learning_rate": 0.0001848854093136896, "loss": 2.1606, "step": 150675 }, { "epoch": 0.35, "grad_norm": 2.59375, "learning_rate": 0.0001848844322420247, "loss": 2.1751, "step": 150680 }, { "epoch": 0.35, "grad_norm": 1.96875, "learning_rate": 0.00018488345514136177, "loss": 2.1636, "step": 150685 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018488247801170106, "loss": 2.0425, "step": 150690 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018488150085304295, "loss": 2.1432, "step": 150695 }, { "epoch": 0.35, "grad_norm": 2.03125, "learning_rate": 0.0001848805236653878, "loss": 2.1377, "step": 150700 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018487954644873594, "loss": 2.0282, "step": 150705 }, { "epoch": 0.35, "grad_norm": 2.1875, "learning_rate": 0.00018487856920308769, "loss": 2.2183, "step": 150710 }, { "epoch": 0.35, "grad_norm": 2.0625, "learning_rate": 0.00018487759192844337, "loss": 2.0994, "step": 150715 }, { "epoch": 0.35, "grad_norm": 2.3125, "learning_rate": 0.00018487661462480333, "loss": 1.9364, "step": 150720 }, { "epoch": 0.35, "grad_norm": 2.234375, "learning_rate": 0.00018487563729216788, "loss": 2.3591, "step": 150725 }, { "epoch": 0.35, "grad_norm": 2.265625, "learning_rate": 0.00018487465993053737, "loss": 2.136, "step": 150730 }, { "epoch": 0.35, "grad_norm": 2.078125, "learning_rate": 0.00018487368253991218, "loss": 2.1098, "step": 150735 }, { "epoch": 0.35, "grad_norm": 2.015625, "learning_rate": 0.00018487270512029255, "loss": 2.0697, "step": 150740 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018487172767167888, "loss": 2.0253, "step": 150745 }, { "epoch": 0.35, "grad_norm": 2.5, "learning_rate": 0.0001848707501940715, "loss": 2.051, "step": 150750 }, { "epoch": 0.35, "grad_norm": 1.984375, "learning_rate": 0.00018486977268747072, "loss": 2.252, "step": 150755 }, { "epoch": 0.35, "grad_norm": 2.171875, "learning_rate": 0.0001848687951518769, "loss": 2.2839, "step": 150760 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018486781758729034, "loss": 2.1344, "step": 150765 }, { "epoch": 0.35, "grad_norm": 1.9765625, "learning_rate": 0.00018486683999371142, "loss": 2.1338, "step": 150770 }, { "epoch": 0.35, "grad_norm": 2.15625, "learning_rate": 0.00018486586237114044, "loss": 2.1157, "step": 150775 }, { "epoch": 0.35, "grad_norm": 2.4375, "learning_rate": 0.00018486488471957773, "loss": 2.0136, "step": 150780 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018486390703902368, "loss": 2.0405, "step": 150785 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018486292932947852, "loss": 2.1284, "step": 150790 }, { "epoch": 0.35, "grad_norm": 2.609375, "learning_rate": 0.00018486195159094269, "loss": 2.0587, "step": 150795 }, { "epoch": 0.35, "grad_norm": 2.203125, "learning_rate": 0.00018486097382341645, "loss": 2.1255, "step": 150800 }, { "epoch": 0.35, "grad_norm": 2.421875, "learning_rate": 0.00018485999602690017, "loss": 2.2472, "step": 150805 }, { "epoch": 0.35, "grad_norm": 2.515625, "learning_rate": 0.00018485901820139417, "loss": 2.0256, "step": 150810 }, { "epoch": 0.35, "grad_norm": 2.453125, "learning_rate": 0.0001848580403468988, "loss": 2.2226, "step": 150815 }, { "epoch": 0.35, "grad_norm": 2.09375, "learning_rate": 0.0001848570624634144, "loss": 2.215, "step": 150820 }, { "epoch": 0.35, "grad_norm": 2.78125, "learning_rate": 0.00018485608455094126, "loss": 2.1166, "step": 150825 }, { "epoch": 0.35, "grad_norm": 2.25, "learning_rate": 0.00018485510660947975, "loss": 1.8919, "step": 150830 }, { "epoch": 0.35, "grad_norm": 3.15625, "learning_rate": 0.0001848541286390302, "loss": 2.3692, "step": 150835 }, { "epoch": 0.35, "grad_norm": 2.5, "learning_rate": 0.00018485315063959294, "loss": 1.9863, "step": 150840 }, { "epoch": 0.35, "grad_norm": 2.375, "learning_rate": 0.0001848521726111683, "loss": 2.0988, "step": 150845 }, { "epoch": 0.35, "grad_norm": 2.53125, "learning_rate": 0.00018485119455375663, "loss": 2.2725, "step": 150850 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018485021646735825, "loss": 2.2456, "step": 150855 }, { "epoch": 0.36, "grad_norm": 1.84375, "learning_rate": 0.0001848492383519735, "loss": 2.2107, "step": 150860 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.00018484826020760271, "loss": 2.1038, "step": 150865 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.0001848472820342462, "loss": 2.0531, "step": 150870 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018484630383190434, "loss": 2.1626, "step": 150875 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.00018484532560057742, "loss": 2.0647, "step": 150880 }, { "epoch": 0.36, "grad_norm": 2.640625, "learning_rate": 0.00018484434734026582, "loss": 1.9572, "step": 150885 }, { "epoch": 0.36, "grad_norm": 3.125, "learning_rate": 0.00018484336905096984, "loss": 2.0688, "step": 150890 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.00018484239073268986, "loss": 2.3164, "step": 150895 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018484141238542611, "loss": 2.0316, "step": 150900 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.00018484043400917904, "loss": 2.2658, "step": 150905 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018483945560394895, "loss": 2.0182, "step": 150910 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018483847716973615, "loss": 2.1354, "step": 150915 }, { "epoch": 0.36, "grad_norm": 2.59375, "learning_rate": 0.000184837498706541, "loss": 2.132, "step": 150920 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.0001848365202143638, "loss": 2.2207, "step": 150925 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018483554169320491, "loss": 2.0904, "step": 150930 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.00018483456314306466, "loss": 2.236, "step": 150935 }, { "epoch": 0.36, "grad_norm": 2.046875, "learning_rate": 0.0001848335845639434, "loss": 2.2656, "step": 150940 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018483260595584145, "loss": 2.1396, "step": 150945 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.0001848316273187591, "loss": 2.0526, "step": 150950 }, { "epoch": 0.36, "grad_norm": 2.859375, "learning_rate": 0.00018483064865269677, "loss": 2.2402, "step": 150955 }, { "epoch": 0.36, "grad_norm": 2.046875, "learning_rate": 0.00018482966995765474, "loss": 2.1204, "step": 150960 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018482869123363337, "loss": 2.2458, "step": 150965 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.000184827712480633, "loss": 2.2774, "step": 150970 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018482673369865388, "loss": 2.0563, "step": 150975 }, { "epoch": 0.36, "grad_norm": 2.828125, "learning_rate": 0.00018482575488769644, "loss": 2.1383, "step": 150980 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.000184824776047761, "loss": 2.1114, "step": 150985 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.00018482379717884787, "loss": 2.1655, "step": 150990 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.00018482281828095736, "loss": 2.2056, "step": 150995 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018482183935408986, "loss": 2.2021, "step": 151000 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.00018482086039824571, "loss": 2.0482, "step": 151005 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018481988141342518, "loss": 2.1671, "step": 151010 }, { "epoch": 0.36, "grad_norm": 2.171875, "learning_rate": 0.00018481890239962866, "loss": 2.2041, "step": 151015 }, { "epoch": 0.36, "grad_norm": 2.703125, "learning_rate": 0.00018481792335685646, "loss": 2.0748, "step": 151020 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018481694428510893, "loss": 2.16, "step": 151025 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018481596518438637, "loss": 2.07, "step": 151030 }, { "epoch": 0.36, "grad_norm": 2.84375, "learning_rate": 0.00018481498605468915, "loss": 2.0421, "step": 151035 }, { "epoch": 0.36, "grad_norm": 2.640625, "learning_rate": 0.00018481400689601758, "loss": 2.0664, "step": 151040 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018481302770837204, "loss": 2.1587, "step": 151045 }, { "epoch": 0.36, "grad_norm": 1.859375, "learning_rate": 0.0001848120484917528, "loss": 1.98, "step": 151050 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.00018481106924616021, "loss": 2.1847, "step": 151055 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018481008997159467, "loss": 2.189, "step": 151060 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018480911066805642, "loss": 2.1427, "step": 151065 }, { "epoch": 0.36, "grad_norm": 1.90625, "learning_rate": 0.00018480813133554586, "loss": 2.1176, "step": 151070 }, { "epoch": 0.36, "grad_norm": 2.015625, "learning_rate": 0.00018480715197406332, "loss": 2.1475, "step": 151075 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018480617258360908, "loss": 2.2932, "step": 151080 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018480519316418353, "loss": 1.9567, "step": 151085 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.000184804213715787, "loss": 2.126, "step": 151090 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018480323423841982, "loss": 2.0398, "step": 151095 }, { "epoch": 0.36, "grad_norm": 2.015625, "learning_rate": 0.0001848022547320823, "loss": 1.974, "step": 151100 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.0001848012751967748, "loss": 2.1521, "step": 151105 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.00018480029563249764, "loss": 2.0086, "step": 151110 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018479931603925117, "loss": 2.2017, "step": 151115 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.0001847983364170357, "loss": 2.2589, "step": 151120 }, { "epoch": 0.36, "grad_norm": 2.734375, "learning_rate": 0.0001847973567658516, "loss": 2.3303, "step": 151125 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018479637708569916, "loss": 2.2828, "step": 151130 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018479539737657877, "loss": 2.2608, "step": 151135 }, { "epoch": 0.36, "grad_norm": 3.265625, "learning_rate": 0.00018479441763849072, "loss": 2.2583, "step": 151140 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018479343787143534, "loss": 2.3459, "step": 151145 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.000184792458075413, "loss": 2.2307, "step": 151150 }, { "epoch": 0.36, "grad_norm": 2.609375, "learning_rate": 0.00018479147825042403, "loss": 2.0349, "step": 151155 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.00018479049839646873, "loss": 2.3795, "step": 151160 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018478951851354748, "loss": 2.0192, "step": 151165 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018478853860166058, "loss": 1.8094, "step": 151170 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.0001847875586608084, "loss": 1.8437, "step": 151175 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018478657869099122, "loss": 2.2397, "step": 151180 }, { "epoch": 0.36, "grad_norm": 2.484375, "learning_rate": 0.00018478559869220944, "loss": 2.2206, "step": 151185 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018478461866446334, "loss": 1.9365, "step": 151190 }, { "epoch": 0.36, "grad_norm": 2.609375, "learning_rate": 0.00018478363860775326, "loss": 2.2796, "step": 151195 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.0001847826585220796, "loss": 1.8857, "step": 151200 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.0001847816784074426, "loss": 2.2293, "step": 151205 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018478069826384266, "loss": 2.1989, "step": 151210 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.00018477971809128008, "loss": 2.0809, "step": 151215 }, { "epoch": 0.36, "grad_norm": 2.84375, "learning_rate": 0.00018477873788975524, "loss": 2.1868, "step": 151220 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018477775765926842, "loss": 2.0844, "step": 151225 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018477677739982, "loss": 2.1207, "step": 151230 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018477579711141028, "loss": 2.2375, "step": 151235 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.0001847748167940396, "loss": 2.0721, "step": 151240 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018477383644770833, "loss": 2.2916, "step": 151245 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018477285607241677, "loss": 2.1505, "step": 151250 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.00018477187566816528, "loss": 2.115, "step": 151255 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018477089523495416, "loss": 2.106, "step": 151260 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018476991477278375, "loss": 2.0889, "step": 151265 }, { "epoch": 0.36, "grad_norm": 2.0, "learning_rate": 0.00018476893428165446, "loss": 2.0915, "step": 151270 }, { "epoch": 0.36, "grad_norm": 2.171875, "learning_rate": 0.00018476795376156654, "loss": 2.0426, "step": 151275 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.0001847669732125203, "loss": 2.1936, "step": 151280 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018476599263451618, "loss": 2.0723, "step": 151285 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018476501202755442, "loss": 2.2112, "step": 151290 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018476403139163543, "loss": 2.2113, "step": 151295 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018476305072675947, "loss": 2.2208, "step": 151300 }, { "epoch": 0.36, "grad_norm": 2.59375, "learning_rate": 0.00018476207003292696, "loss": 2.0934, "step": 151305 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018476108931013817, "loss": 1.8913, "step": 151310 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018476010855839343, "loss": 2.0737, "step": 151315 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.00018475912777769313, "loss": 2.1208, "step": 151320 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018475814696803758, "loss": 2.0712, "step": 151325 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018475716612942708, "loss": 2.037, "step": 151330 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018475618526186204, "loss": 2.1435, "step": 151335 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.0001847552043653427, "loss": 2.1249, "step": 151340 }, { "epoch": 0.36, "grad_norm": 1.96875, "learning_rate": 0.00018475422343986944, "loss": 1.9882, "step": 151345 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018475324248544265, "loss": 2.1128, "step": 151350 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.00018475226150206262, "loss": 2.1474, "step": 151355 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018475128048972965, "loss": 2.0786, "step": 151360 }, { "epoch": 0.36, "grad_norm": 2.765625, "learning_rate": 0.00018475029944844411, "loss": 2.166, "step": 151365 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018474931837820632, "loss": 2.194, "step": 151370 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018474833727901663, "loss": 2.1098, "step": 151375 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018474735615087536, "loss": 2.1398, "step": 151380 }, { "epoch": 0.36, "grad_norm": 2.765625, "learning_rate": 0.00018474637499378286, "loss": 2.2344, "step": 151385 }, { "epoch": 0.36, "grad_norm": 2.703125, "learning_rate": 0.0001847453938077395, "loss": 2.1576, "step": 151390 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.00018474441259274556, "loss": 2.3414, "step": 151395 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018474343134880137, "loss": 2.1751, "step": 151400 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.0001847424500759073, "loss": 2.1179, "step": 151405 }, { "epoch": 0.36, "grad_norm": 2.515625, "learning_rate": 0.00018474146877406366, "loss": 2.004, "step": 151410 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018474048744327083, "loss": 2.0668, "step": 151415 }, { "epoch": 0.36, "grad_norm": 2.59375, "learning_rate": 0.00018473950608352907, "loss": 2.1154, "step": 151420 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018473852469483876, "loss": 2.26, "step": 151425 }, { "epoch": 0.36, "grad_norm": 3.140625, "learning_rate": 0.00018473754327720028, "loss": 2.1348, "step": 151430 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018473656183061387, "loss": 2.0551, "step": 151435 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018473558035507996, "loss": 2.0241, "step": 151440 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.0001847345988505988, "loss": 2.0822, "step": 151445 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.0001847336173171708, "loss": 2.2209, "step": 151450 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018473263575479623, "loss": 2.2723, "step": 151455 }, { "epoch": 0.36, "grad_norm": 1.9140625, "learning_rate": 0.00018473165416347544, "loss": 2.0598, "step": 151460 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.0001847306725432088, "loss": 1.9099, "step": 151465 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.00018472969089399662, "loss": 2.0412, "step": 151470 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018472870921583927, "loss": 2.0697, "step": 151475 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018472772750873705, "loss": 2.0955, "step": 151480 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018472674577269026, "loss": 1.8723, "step": 151485 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.0001847257640076993, "loss": 2.1084, "step": 151490 }, { "epoch": 0.36, "grad_norm": 2.59375, "learning_rate": 0.0001847247822137645, "loss": 2.1017, "step": 151495 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018472380039088616, "loss": 2.1236, "step": 151500 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018472281853906464, "loss": 2.1142, "step": 151505 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018472183665830025, "loss": 2.096, "step": 151510 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018472085474859336, "loss": 2.2212, "step": 151515 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018471987280994432, "loss": 2.1509, "step": 151520 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.0001847188908423534, "loss": 2.1212, "step": 151525 }, { "epoch": 0.36, "grad_norm": 1.8359375, "learning_rate": 0.00018471790884582096, "loss": 2.1598, "step": 151530 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018471692682034735, "loss": 2.3283, "step": 151535 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.00018471594476593292, "loss": 2.2729, "step": 151540 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018471496268257796, "loss": 2.019, "step": 151545 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018471398057028288, "loss": 1.9643, "step": 151550 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018471299842904792, "loss": 2.1294, "step": 151555 }, { "epoch": 0.36, "grad_norm": 1.90625, "learning_rate": 0.0001847120162588735, "loss": 2.0275, "step": 151560 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.0001847110340597599, "loss": 2.053, "step": 151565 }, { "epoch": 0.36, "grad_norm": 1.84375, "learning_rate": 0.00018471005183170748, "loss": 2.2189, "step": 151570 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018470906957471654, "loss": 2.0018, "step": 151575 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.0001847080872887875, "loss": 2.1552, "step": 151580 }, { "epoch": 0.36, "grad_norm": 1.96875, "learning_rate": 0.00018470710497392058, "loss": 2.1894, "step": 151585 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018470612263011624, "loss": 1.8705, "step": 151590 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.0001847051402573747, "loss": 1.9939, "step": 151595 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018470415785569637, "loss": 2.1232, "step": 151600 }, { "epoch": 0.36, "grad_norm": 2.640625, "learning_rate": 0.00018470317542508156, "loss": 2.053, "step": 151605 }, { "epoch": 0.36, "grad_norm": 1.828125, "learning_rate": 0.00018470219296553062, "loss": 2.1992, "step": 151610 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018470121047704384, "loss": 2.3399, "step": 151615 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.00018470022795962163, "loss": 2.1186, "step": 151620 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018469924541326428, "loss": 1.9783, "step": 151625 }, { "epoch": 0.36, "grad_norm": 2.46875, "learning_rate": 0.00018469826283797213, "loss": 2.0084, "step": 151630 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018469728023374552, "loss": 2.1663, "step": 151635 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018469629760058478, "loss": 1.9423, "step": 151640 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018469531493849024, "loss": 2.1127, "step": 151645 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.00018469433224746224, "loss": 2.0825, "step": 151650 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.00018469334952750111, "loss": 2.2698, "step": 151655 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.00018469236677860721, "loss": 2.2378, "step": 151660 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.0001846913840007809, "loss": 2.2204, "step": 151665 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.00018469040119402244, "loss": 2.0628, "step": 151670 }, { "epoch": 0.36, "grad_norm": 2.671875, "learning_rate": 0.0001846894183583322, "loss": 2.1717, "step": 151675 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.00018468843549371053, "loss": 2.0297, "step": 151680 }, { "epoch": 0.36, "grad_norm": 2.515625, "learning_rate": 0.00018468745260015774, "loss": 2.247, "step": 151685 }, { "epoch": 0.36, "grad_norm": 2.71875, "learning_rate": 0.00018468646967767418, "loss": 2.2014, "step": 151690 }, { "epoch": 0.36, "grad_norm": 2.65625, "learning_rate": 0.0001846854867262602, "loss": 2.0986, "step": 151695 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.00018468450374591614, "loss": 1.9927, "step": 151700 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018468352073664228, "loss": 2.1869, "step": 151705 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.000184682537698439, "loss": 2.1552, "step": 151710 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.00018468155463130664, "loss": 2.0776, "step": 151715 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.0001846805715352455, "loss": 2.057, "step": 151720 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.000184679588410256, "loss": 2.2976, "step": 151725 }, { "epoch": 0.36, "grad_norm": 1.703125, "learning_rate": 0.00018467860525633837, "loss": 1.9705, "step": 151730 }, { "epoch": 0.36, "grad_norm": 2.46875, "learning_rate": 0.000184677622073493, "loss": 2.0286, "step": 151735 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018467663886172022, "loss": 2.2236, "step": 151740 }, { "epoch": 0.36, "grad_norm": 2.546875, "learning_rate": 0.0001846756556210204, "loss": 2.06, "step": 151745 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018467467235139379, "loss": 2.3248, "step": 151750 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.0001846736890528408, "loss": 2.1307, "step": 151755 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.0001846727057253617, "loss": 2.0741, "step": 151760 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.0001846717223689569, "loss": 2.2113, "step": 151765 }, { "epoch": 0.36, "grad_norm": 1.984375, "learning_rate": 0.0001846707389836267, "loss": 1.8819, "step": 151770 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018466975556937146, "loss": 2.1541, "step": 151775 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.0001846687721261915, "loss": 2.2921, "step": 151780 }, { "epoch": 0.36, "grad_norm": 1.96875, "learning_rate": 0.00018466778865408712, "loss": 2.2973, "step": 151785 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018466680515305872, "loss": 2.3453, "step": 151790 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018466582162310658, "loss": 2.0644, "step": 151795 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.00018466483806423107, "loss": 2.2149, "step": 151800 }, { "epoch": 0.36, "grad_norm": 2.015625, "learning_rate": 0.00018466385447643254, "loss": 2.2058, "step": 151805 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018466287085971126, "loss": 2.15, "step": 151810 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018466188721406762, "loss": 1.9491, "step": 151815 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018466090353950194, "loss": 2.0715, "step": 151820 }, { "epoch": 0.36, "grad_norm": 1.9765625, "learning_rate": 0.00018465991983601456, "loss": 2.1334, "step": 151825 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018465893610360582, "loss": 2.1383, "step": 151830 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018465795234227605, "loss": 2.1132, "step": 151835 }, { "epoch": 0.36, "grad_norm": 2.59375, "learning_rate": 0.00018465696855202558, "loss": 1.9459, "step": 151840 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.0001846559847328548, "loss": 2.1247, "step": 151845 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018465500088476398, "loss": 2.0764, "step": 151850 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018465401700775344, "loss": 2.1006, "step": 151855 }, { "epoch": 0.36, "grad_norm": 2.8125, "learning_rate": 0.0001846530331018236, "loss": 2.3872, "step": 151860 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018465204916697468, "loss": 1.9495, "step": 151865 }, { "epoch": 0.36, "grad_norm": 3.25, "learning_rate": 0.00018465106520320715, "loss": 1.9916, "step": 151870 }, { "epoch": 0.36, "grad_norm": 1.9140625, "learning_rate": 0.0001846500812105213, "loss": 2.1947, "step": 151875 }, { "epoch": 0.36, "grad_norm": 1.9296875, "learning_rate": 0.00018464909718891737, "loss": 2.0227, "step": 151880 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.0001846481131383958, "loss": 2.1265, "step": 151885 }, { "epoch": 0.36, "grad_norm": 1.96875, "learning_rate": 0.0001846471290589569, "loss": 2.2456, "step": 151890 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018464614495060104, "loss": 2.1328, "step": 151895 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018464516081332847, "loss": 2.1327, "step": 151900 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018464417664713963, "loss": 2.0147, "step": 151905 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.0001846431924520348, "loss": 2.1787, "step": 151910 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018464220822801428, "loss": 2.0962, "step": 151915 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018464122397507846, "loss": 2.229, "step": 151920 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.0001846402396932277, "loss": 2.2924, "step": 151925 }, { "epoch": 0.36, "grad_norm": 3.078125, "learning_rate": 0.00018463925538246225, "loss": 2.1884, "step": 151930 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018463827104278247, "loss": 2.0612, "step": 151935 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018463728667418878, "loss": 2.0787, "step": 151940 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018463630227668145, "loss": 1.9676, "step": 151945 }, { "epoch": 0.36, "grad_norm": 2.0, "learning_rate": 0.00018463531785026084, "loss": 2.1881, "step": 151950 }, { "epoch": 0.36, "grad_norm": 1.9765625, "learning_rate": 0.00018463433339492723, "loss": 2.2976, "step": 151955 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018463334891068102, "loss": 2.1153, "step": 151960 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018463236439752252, "loss": 2.1863, "step": 151965 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018463137985545207, "loss": 2.1338, "step": 151970 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.00018463039528447, "loss": 2.1489, "step": 151975 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018462941068457663, "loss": 2.1136, "step": 151980 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018462842605577236, "loss": 2.0597, "step": 151985 }, { "epoch": 0.36, "grad_norm": 1.9296875, "learning_rate": 0.00018462744139805746, "loss": 2.2294, "step": 151990 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.0001846264567114323, "loss": 2.2863, "step": 151995 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.0001846254719958972, "loss": 2.1495, "step": 152000 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018462448725145252, "loss": 2.137, "step": 152005 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.0001846235024780986, "loss": 1.9529, "step": 152010 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.0001846225176758357, "loss": 2.1506, "step": 152015 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018462153284466425, "loss": 2.1434, "step": 152020 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018462054798458453, "loss": 2.2652, "step": 152025 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.0001846195630955969, "loss": 2.0426, "step": 152030 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.0001846185781777017, "loss": 2.1361, "step": 152035 }, { "epoch": 0.36, "grad_norm": 1.953125, "learning_rate": 0.00018461759323089928, "loss": 2.1336, "step": 152040 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018461660825518994, "loss": 2.2768, "step": 152045 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.00018461562325057401, "loss": 2.0541, "step": 152050 }, { "epoch": 0.36, "grad_norm": 1.984375, "learning_rate": 0.00018461463821705187, "loss": 2.0413, "step": 152055 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.0001846136531546238, "loss": 1.9057, "step": 152060 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.00018461266806329022, "loss": 2.1286, "step": 152065 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.0001846116829430514, "loss": 2.1073, "step": 152070 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.0001846106977939077, "loss": 2.0556, "step": 152075 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018460971261585944, "loss": 1.9541, "step": 152080 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018460872740890699, "loss": 2.1801, "step": 152085 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018460774217305063, "loss": 2.2982, "step": 152090 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018460675690829077, "loss": 2.1414, "step": 152095 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018460577161462767, "loss": 2.2513, "step": 152100 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.00018460478629206173, "loss": 2.0912, "step": 152105 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.00018460380094059323, "loss": 1.9888, "step": 152110 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018460281556022256, "loss": 2.0075, "step": 152115 }, { "epoch": 0.36, "grad_norm": 1.875, "learning_rate": 0.00018460183015095005, "loss": 2.2442, "step": 152120 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.00018460084471277598, "loss": 2.0762, "step": 152125 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018459985924570077, "loss": 2.1259, "step": 152130 }, { "epoch": 0.36, "grad_norm": 3.046875, "learning_rate": 0.00018459887374972466, "loss": 2.0999, "step": 152135 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.0001845978882248481, "loss": 1.9597, "step": 152140 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018459690267107136, "loss": 2.1551, "step": 152145 }, { "epoch": 0.36, "grad_norm": 2.046875, "learning_rate": 0.00018459591708839472, "loss": 2.1782, "step": 152150 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018459493147681864, "loss": 2.1106, "step": 152155 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.0001845939458363434, "loss": 2.1968, "step": 152160 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018459296016696929, "loss": 2.1658, "step": 152165 }, { "epoch": 0.36, "grad_norm": 1.9296875, "learning_rate": 0.00018459197446869672, "loss": 2.1624, "step": 152170 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.00018459098874152598, "loss": 2.1963, "step": 152175 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018459000298545747, "loss": 2.0326, "step": 152180 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018458901720049142, "loss": 2.0888, "step": 152185 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018458803138662826, "loss": 2.0508, "step": 152190 }, { "epoch": 0.36, "grad_norm": 1.953125, "learning_rate": 0.0001845870455438683, "loss": 2.1021, "step": 152195 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018458605967221184, "loss": 2.1552, "step": 152200 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018458507377165927, "loss": 2.0729, "step": 152205 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018458408784221092, "loss": 2.0975, "step": 152210 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.0001845831018838671, "loss": 2.1306, "step": 152215 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018458211589662815, "loss": 2.2888, "step": 152220 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.00018458112988049442, "loss": 2.2784, "step": 152225 }, { "epoch": 0.36, "grad_norm": 2.171875, "learning_rate": 0.00018458014383546624, "loss": 2.222, "step": 152230 }, { "epoch": 0.36, "grad_norm": 2.59375, "learning_rate": 0.00018457915776154396, "loss": 2.0786, "step": 152235 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018457817165872792, "loss": 2.1068, "step": 152240 }, { "epoch": 0.36, "grad_norm": 1.9296875, "learning_rate": 0.0001845771855270184, "loss": 1.9881, "step": 152245 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018457619936641578, "loss": 2.1664, "step": 152250 }, { "epoch": 0.36, "grad_norm": 2.171875, "learning_rate": 0.00018457521317692043, "loss": 2.2748, "step": 152255 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018457422695853263, "loss": 2.0482, "step": 152260 }, { "epoch": 0.36, "grad_norm": 3.09375, "learning_rate": 0.00018457324071125277, "loss": 2.007, "step": 152265 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.0001845722544350811, "loss": 2.094, "step": 152270 }, { "epoch": 0.36, "grad_norm": 1.9765625, "learning_rate": 0.00018457126813001807, "loss": 2.0391, "step": 152275 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.00018457028179606394, "loss": 2.1684, "step": 152280 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018456929543321908, "loss": 2.2794, "step": 152285 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018456830904148378, "loss": 2.1061, "step": 152290 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.00018456732262085845, "loss": 2.2367, "step": 152295 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018456633617134336, "loss": 2.1477, "step": 152300 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018456534969293892, "loss": 2.1649, "step": 152305 }, { "epoch": 0.36, "grad_norm": 1.9453125, "learning_rate": 0.0001845643631856454, "loss": 2.2366, "step": 152310 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018456337664946314, "loss": 2.1778, "step": 152315 }, { "epoch": 0.36, "grad_norm": 1.7734375, "learning_rate": 0.0001845623900843925, "loss": 2.0784, "step": 152320 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018456140349043382, "loss": 2.0939, "step": 152325 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018456041686758744, "loss": 2.1601, "step": 152330 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018455943021585366, "loss": 2.0969, "step": 152335 }, { "epoch": 0.36, "grad_norm": 1.9453125, "learning_rate": 0.0001845584435352329, "loss": 2.32, "step": 152340 }, { "epoch": 0.36, "grad_norm": 2.78125, "learning_rate": 0.00018455745682572538, "loss": 1.9748, "step": 152345 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018455647008733156, "loss": 2.0522, "step": 152350 }, { "epoch": 0.36, "grad_norm": 1.71875, "learning_rate": 0.00018455548332005166, "loss": 2.006, "step": 152355 }, { "epoch": 0.36, "grad_norm": 2.625, "learning_rate": 0.00018455449652388613, "loss": 2.0959, "step": 152360 }, { "epoch": 0.36, "grad_norm": 2.484375, "learning_rate": 0.0001845535096988352, "loss": 2.104, "step": 152365 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.0001845525228448993, "loss": 2.1231, "step": 152370 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018455153596207868, "loss": 2.2296, "step": 152375 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018455054905037376, "loss": 2.1154, "step": 152380 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.0001845495621097848, "loss": 2.1051, "step": 152385 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.0001845485751403122, "loss": 2.1163, "step": 152390 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018454758814195627, "loss": 2.1083, "step": 152395 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018454660111471736, "loss": 2.0633, "step": 152400 }, { "epoch": 0.36, "grad_norm": 2.59375, "learning_rate": 0.00018454561405859582, "loss": 2.054, "step": 152405 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.0001845446269735919, "loss": 1.9988, "step": 152410 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018454363985970606, "loss": 2.2978, "step": 152415 }, { "epoch": 0.36, "grad_norm": 2.015625, "learning_rate": 0.00018454265271693855, "loss": 2.0177, "step": 152420 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018454166554528973, "loss": 2.1709, "step": 152425 }, { "epoch": 0.36, "grad_norm": 2.625, "learning_rate": 0.00018454067834475997, "loss": 2.0841, "step": 152430 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.00018453969111534958, "loss": 1.9985, "step": 152435 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018453870385705886, "loss": 2.105, "step": 152440 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018453771656988822, "loss": 2.1729, "step": 152445 }, { "epoch": 0.36, "grad_norm": 2.640625, "learning_rate": 0.00018453672925383796, "loss": 2.1128, "step": 152450 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.0001845357419089084, "loss": 2.1141, "step": 152455 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018453475453509993, "loss": 2.1095, "step": 152460 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018453376713241284, "loss": 2.1316, "step": 152465 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018453277970084745, "loss": 2.2386, "step": 152470 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.00018453179224040418, "loss": 2.1415, "step": 152475 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.0001845308047510833, "loss": 2.114, "step": 152480 }, { "epoch": 0.36, "grad_norm": 1.9765625, "learning_rate": 0.00018452981723288516, "loss": 2.0462, "step": 152485 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.0001845288296858101, "loss": 2.0419, "step": 152490 }, { "epoch": 0.36, "grad_norm": 2.640625, "learning_rate": 0.00018452784210985844, "loss": 2.0689, "step": 152495 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018452685450503058, "loss": 2.0459, "step": 152500 }, { "epoch": 0.36, "grad_norm": 2.625, "learning_rate": 0.00018452586687132678, "loss": 2.162, "step": 152505 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.0001845248792087474, "loss": 2.1533, "step": 152510 }, { "epoch": 0.36, "grad_norm": 1.84375, "learning_rate": 0.00018452389151729285, "loss": 2.1717, "step": 152515 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018452290379696336, "loss": 2.1547, "step": 152520 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.0001845219160477593, "loss": 2.0115, "step": 152525 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018452092826968107, "loss": 2.2718, "step": 152530 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.00018451994046272892, "loss": 2.0801, "step": 152535 }, { "epoch": 0.36, "grad_norm": 2.046875, "learning_rate": 0.00018451895262690322, "loss": 2.015, "step": 152540 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.0001845179647622043, "loss": 2.1339, "step": 152545 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018451697686863255, "loss": 2.0232, "step": 152550 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018451598894618826, "loss": 2.1574, "step": 152555 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018451500099487178, "loss": 2.075, "step": 152560 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.0001845140130146834, "loss": 2.1308, "step": 152565 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018451302500562355, "loss": 2.2409, "step": 152570 }, { "epoch": 0.36, "grad_norm": 2.46875, "learning_rate": 0.00018451203696769248, "loss": 2.1145, "step": 152575 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.0001845110489008906, "loss": 2.2554, "step": 152580 }, { "epoch": 0.36, "grad_norm": 2.515625, "learning_rate": 0.0001845100608052182, "loss": 2.1303, "step": 152585 }, { "epoch": 0.36, "grad_norm": 1.953125, "learning_rate": 0.00018450907268067562, "loss": 2.2126, "step": 152590 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018450808452726323, "loss": 2.0623, "step": 152595 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.00018450709634498132, "loss": 2.2382, "step": 152600 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018450610813383026, "loss": 2.2256, "step": 152605 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.0001845051198938104, "loss": 2.1469, "step": 152610 }, { "epoch": 0.36, "grad_norm": 2.515625, "learning_rate": 0.00018450413162492203, "loss": 2.0231, "step": 152615 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.00018450314332716557, "loss": 2.2185, "step": 152620 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.00018450215500054125, "loss": 2.1622, "step": 152625 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018450116664504949, "loss": 2.0409, "step": 152630 }, { "epoch": 0.36, "grad_norm": 1.9765625, "learning_rate": 0.00018450017826069054, "loss": 2.1038, "step": 152635 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018449918984746487, "loss": 2.2024, "step": 152640 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.0001844982014053727, "loss": 1.962, "step": 152645 }, { "epoch": 0.36, "grad_norm": 1.96875, "learning_rate": 0.00018449721293441445, "loss": 2.2737, "step": 152650 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018449622443459038, "loss": 2.0793, "step": 152655 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.0001844952359059009, "loss": 1.9209, "step": 152660 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018449424734834629, "loss": 2.0662, "step": 152665 }, { "epoch": 0.36, "grad_norm": 1.953125, "learning_rate": 0.00018449325876192694, "loss": 2.0271, "step": 152670 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018449227014664311, "loss": 2.1695, "step": 152675 }, { "epoch": 0.36, "grad_norm": 2.015625, "learning_rate": 0.00018449128150249523, "loss": 2.1508, "step": 152680 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.0001844902928294836, "loss": 2.0514, "step": 152685 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018448930412760854, "loss": 2.0992, "step": 152690 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.0001844883153968704, "loss": 1.9346, "step": 152695 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018448732663726953, "loss": 2.0434, "step": 152700 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018448633784880626, "loss": 2.2249, "step": 152705 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.0001844853490314809, "loss": 2.1428, "step": 152710 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.0001844843601852938, "loss": 2.0699, "step": 152715 }, { "epoch": 0.36, "grad_norm": 2.703125, "learning_rate": 0.00018448337131024534, "loss": 2.0846, "step": 152720 }, { "epoch": 0.36, "grad_norm": 1.8828125, "learning_rate": 0.00018448238240633585, "loss": 2.1128, "step": 152725 }, { "epoch": 0.36, "grad_norm": 2.015625, "learning_rate": 0.00018448139347356562, "loss": 2.0238, "step": 152730 }, { "epoch": 0.36, "grad_norm": 2.859375, "learning_rate": 0.000184480404511935, "loss": 2.0866, "step": 152735 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018447941552144435, "loss": 2.0, "step": 152740 }, { "epoch": 0.36, "grad_norm": 1.9609375, "learning_rate": 0.00018447842650209402, "loss": 2.1531, "step": 152745 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018447743745388432, "loss": 2.2057, "step": 152750 }, { "epoch": 0.36, "grad_norm": 2.59375, "learning_rate": 0.0001844764483768156, "loss": 2.2917, "step": 152755 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018447545927088816, "loss": 2.2447, "step": 152760 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.0001844744701361024, "loss": 2.1807, "step": 152765 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.0001844734809724586, "loss": 2.0538, "step": 152770 }, { "epoch": 0.36, "grad_norm": 3.0, "learning_rate": 0.00018447249177995715, "loss": 1.9955, "step": 152775 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018447150255859838, "loss": 2.008, "step": 152780 }, { "epoch": 0.36, "grad_norm": 1.921875, "learning_rate": 0.0001844705133083826, "loss": 2.1669, "step": 152785 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018446952402931014, "loss": 2.1535, "step": 152790 }, { "epoch": 0.36, "grad_norm": 2.6875, "learning_rate": 0.0001844685347213814, "loss": 2.0615, "step": 152795 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018446754538459662, "loss": 2.267, "step": 152800 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018446655601895624, "loss": 2.208, "step": 152805 }, { "epoch": 0.36, "grad_norm": 1.984375, "learning_rate": 0.00018446556662446052, "loss": 2.0003, "step": 152810 }, { "epoch": 0.36, "grad_norm": 2.46875, "learning_rate": 0.00018446457720110987, "loss": 2.1321, "step": 152815 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018446358774890456, "loss": 1.9953, "step": 152820 }, { "epoch": 0.36, "grad_norm": 2.0, "learning_rate": 0.00018446259826784494, "loss": 2.3683, "step": 152825 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.0001844616087579314, "loss": 2.129, "step": 152830 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.00018446061921916423, "loss": 2.1462, "step": 152835 }, { "epoch": 0.36, "grad_norm": 2.71875, "learning_rate": 0.00018445962965154379, "loss": 2.1238, "step": 152840 }, { "epoch": 0.36, "grad_norm": 2.546875, "learning_rate": 0.00018445864005507042, "loss": 2.16, "step": 152845 }, { "epoch": 0.36, "grad_norm": 2.5625, "learning_rate": 0.00018445765042974442, "loss": 2.2864, "step": 152850 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018445666077556615, "loss": 2.3614, "step": 152855 }, { "epoch": 0.36, "grad_norm": 1.984375, "learning_rate": 0.000184455671092536, "loss": 2.0142, "step": 152860 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.0001844546813806542, "loss": 2.1689, "step": 152865 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.0001844536916399212, "loss": 2.0597, "step": 152870 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018445270187033726, "loss": 2.0219, "step": 152875 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018445171207190272, "loss": 1.874, "step": 152880 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.000184450722244618, "loss": 2.2367, "step": 152885 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.00018444973238848332, "loss": 2.1038, "step": 152890 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018444874250349912, "loss": 2.12, "step": 152895 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.00018444775258966568, "loss": 2.2078, "step": 152900 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018444676264698338, "loss": 2.19, "step": 152905 }, { "epoch": 0.36, "grad_norm": 2.515625, "learning_rate": 0.00018444577267545252, "loss": 2.2938, "step": 152910 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018444478267507349, "loss": 2.1892, "step": 152915 }, { "epoch": 0.36, "grad_norm": 2.765625, "learning_rate": 0.00018444379264584654, "loss": 2.1404, "step": 152920 }, { "epoch": 0.36, "grad_norm": 1.8828125, "learning_rate": 0.00018444280258777206, "loss": 2.0806, "step": 152925 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018444181250085043, "loss": 2.168, "step": 152930 }, { "epoch": 0.36, "grad_norm": 2.546875, "learning_rate": 0.0001844408223850819, "loss": 2.1697, "step": 152935 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.0001844398322404669, "loss": 2.0872, "step": 152940 }, { "epoch": 0.36, "grad_norm": 2.609375, "learning_rate": 0.00018443884206700565, "loss": 2.0998, "step": 152945 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.00018443785186469862, "loss": 2.1704, "step": 152950 }, { "epoch": 0.36, "grad_norm": 2.734375, "learning_rate": 0.00018443686163354607, "loss": 2.1598, "step": 152955 }, { "epoch": 0.36, "grad_norm": 2.46875, "learning_rate": 0.00018443587137354837, "loss": 2.1051, "step": 152960 }, { "epoch": 0.36, "grad_norm": 1.7890625, "learning_rate": 0.00018443488108470582, "loss": 2.087, "step": 152965 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.0001844338907670188, "loss": 2.1279, "step": 152970 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.00018443290042048763, "loss": 2.0064, "step": 152975 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018443191004511266, "loss": 2.0039, "step": 152980 }, { "epoch": 0.36, "grad_norm": 1.859375, "learning_rate": 0.0001844309196408942, "loss": 2.0044, "step": 152985 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.0001844299292078326, "loss": 2.0388, "step": 152990 }, { "epoch": 0.36, "grad_norm": 2.515625, "learning_rate": 0.00018442893874592823, "loss": 2.3094, "step": 152995 }, { "epoch": 0.36, "grad_norm": 2.46875, "learning_rate": 0.00018442794825518138, "loss": 2.1552, "step": 153000 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.00018442695773559242, "loss": 2.1358, "step": 153005 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018442596718716168, "loss": 2.178, "step": 153010 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.0001844249766098895, "loss": 2.0297, "step": 153015 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.00018442398600377623, "loss": 2.1739, "step": 153020 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018442299536882217, "loss": 2.1542, "step": 153025 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.0001844220047050277, "loss": 2.046, "step": 153030 }, { "epoch": 0.36, "grad_norm": 1.7578125, "learning_rate": 0.00018442101401239315, "loss": 2.1575, "step": 153035 }, { "epoch": 0.36, "grad_norm": 1.8984375, "learning_rate": 0.00018442002329091884, "loss": 2.0942, "step": 153040 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018441903254060512, "loss": 2.0653, "step": 153045 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.0001844180417614523, "loss": 2.2097, "step": 153050 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.0001844170509534608, "loss": 2.1851, "step": 153055 }, { "epoch": 0.36, "grad_norm": 2.484375, "learning_rate": 0.00018441606011663088, "loss": 2.2074, "step": 153060 }, { "epoch": 0.36, "grad_norm": 2.5625, "learning_rate": 0.0001844150692509629, "loss": 2.1772, "step": 153065 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.0001844140783564572, "loss": 2.1971, "step": 153070 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.0001844130874331141, "loss": 2.1271, "step": 153075 }, { "epoch": 0.36, "grad_norm": 1.984375, "learning_rate": 0.00018441209648093398, "loss": 2.0641, "step": 153080 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018441110549991717, "loss": 2.0714, "step": 153085 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018441011449006397, "loss": 2.0103, "step": 153090 }, { "epoch": 0.36, "grad_norm": 1.9375, "learning_rate": 0.0001844091234513748, "loss": 2.1638, "step": 153095 }, { "epoch": 0.36, "grad_norm": 2.484375, "learning_rate": 0.0001844081323838499, "loss": 2.2, "step": 153100 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.0001844071412874896, "loss": 2.2844, "step": 153105 }, { "epoch": 0.36, "grad_norm": 2.515625, "learning_rate": 0.00018440615016229438, "loss": 1.9323, "step": 153110 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.00018440515900826443, "loss": 2.1641, "step": 153115 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.0001844041678254002, "loss": 2.0665, "step": 153120 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.0001844031766137019, "loss": 2.0972, "step": 153125 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018440218537316998, "loss": 1.9575, "step": 153130 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.00018440119410380475, "loss": 1.9551, "step": 153135 }, { "epoch": 0.36, "grad_norm": 2.515625, "learning_rate": 0.00018440020280560652, "loss": 2.3089, "step": 153140 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.0001843992114785757, "loss": 2.2307, "step": 153145 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018439822012271252, "loss": 1.9997, "step": 153150 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018439722873801745, "loss": 2.0602, "step": 153155 }, { "epoch": 0.36, "grad_norm": 1.9296875, "learning_rate": 0.0001843962373244907, "loss": 2.2005, "step": 153160 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018439524588213267, "loss": 2.2545, "step": 153165 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.0001843942544109437, "loss": 2.0798, "step": 153170 }, { "epoch": 0.36, "grad_norm": 2.5625, "learning_rate": 0.0001843932629109241, "loss": 2.0597, "step": 153175 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.00018439227138207428, "loss": 2.0752, "step": 153180 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018439127982439448, "loss": 2.166, "step": 153185 }, { "epoch": 0.36, "grad_norm": 2.640625, "learning_rate": 0.0001843902882378851, "loss": 2.2208, "step": 153190 }, { "epoch": 0.36, "grad_norm": 2.171875, "learning_rate": 0.00018438929662254648, "loss": 2.3277, "step": 153195 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018438830497837896, "loss": 2.0828, "step": 153200 }, { "epoch": 0.36, "grad_norm": 2.546875, "learning_rate": 0.00018438731330538283, "loss": 2.1313, "step": 153205 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.0001843863216035585, "loss": 2.1397, "step": 153210 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018438532987290624, "loss": 2.0239, "step": 153215 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018438433811342645, "loss": 2.0033, "step": 153220 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.0001843833463251194, "loss": 2.0322, "step": 153225 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018438235450798552, "loss": 2.1249, "step": 153230 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018438136266202504, "loss": 2.3022, "step": 153235 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.0001843803707872384, "loss": 2.0485, "step": 153240 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.0001843793788836259, "loss": 2.1134, "step": 153245 }, { "epoch": 0.36, "grad_norm": 2.484375, "learning_rate": 0.00018437838695118782, "loss": 2.1457, "step": 153250 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.0001843773949899246, "loss": 2.2246, "step": 153255 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.0001843764029998365, "loss": 2.1122, "step": 153260 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018437541098092393, "loss": 2.2247, "step": 153265 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.00018437441893318717, "loss": 2.103, "step": 153270 }, { "epoch": 0.36, "grad_norm": 2.9375, "learning_rate": 0.00018437342685662656, "loss": 2.0293, "step": 153275 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.0001843724347512425, "loss": 2.1838, "step": 153280 }, { "epoch": 0.36, "grad_norm": 2.671875, "learning_rate": 0.00018437144261703524, "loss": 2.2078, "step": 153285 }, { "epoch": 0.36, "grad_norm": 2.0, "learning_rate": 0.00018437045045400518, "loss": 2.0997, "step": 153290 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018436945826215263, "loss": 2.0679, "step": 153295 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.000184368466041478, "loss": 2.0535, "step": 153300 }, { "epoch": 0.36, "grad_norm": 2.546875, "learning_rate": 0.0001843674737919815, "loss": 2.2243, "step": 153305 }, { "epoch": 0.36, "grad_norm": 1.8828125, "learning_rate": 0.0001843664815136636, "loss": 2.0932, "step": 153310 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018436548920652455, "loss": 2.1297, "step": 153315 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.00018436449687056472, "loss": 2.0562, "step": 153320 }, { "epoch": 0.36, "grad_norm": 1.890625, "learning_rate": 0.00018436350450578445, "loss": 2.2125, "step": 153325 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018436251211218407, "loss": 2.1986, "step": 153330 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018436151968976395, "loss": 2.2642, "step": 153335 }, { "epoch": 0.36, "grad_norm": 1.921875, "learning_rate": 0.00018436052723852438, "loss": 2.2068, "step": 153340 }, { "epoch": 0.36, "grad_norm": 2.484375, "learning_rate": 0.00018435953475846571, "loss": 2.0494, "step": 153345 }, { "epoch": 0.36, "grad_norm": 1.796875, "learning_rate": 0.00018435854224958833, "loss": 1.9933, "step": 153350 }, { "epoch": 0.36, "grad_norm": 1.8671875, "learning_rate": 0.00018435754971189253, "loss": 2.0993, "step": 153355 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018435655714537868, "loss": 2.1889, "step": 153360 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.00018435556455004707, "loss": 2.1442, "step": 153365 }, { "epoch": 0.36, "grad_norm": 2.46875, "learning_rate": 0.00018435457192589807, "loss": 2.0518, "step": 153370 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018435357927293203, "loss": 2.1824, "step": 153375 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018435258659114926, "loss": 2.2315, "step": 153380 }, { "epoch": 0.36, "grad_norm": 2.484375, "learning_rate": 0.00018435159388055015, "loss": 2.2467, "step": 153385 }, { "epoch": 0.36, "grad_norm": 2.171875, "learning_rate": 0.00018435060114113498, "loss": 2.2403, "step": 153390 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.0001843496083729041, "loss": 2.2231, "step": 153395 }, { "epoch": 0.36, "grad_norm": 1.8203125, "learning_rate": 0.0001843486155758579, "loss": 2.1945, "step": 153400 }, { "epoch": 0.36, "grad_norm": 2.015625, "learning_rate": 0.0001843476227499967, "loss": 2.1616, "step": 153405 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018434662989532077, "loss": 2.1591, "step": 153410 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.00018434563701183053, "loss": 2.2445, "step": 153415 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.0001843446440995263, "loss": 2.1479, "step": 153420 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.00018434365115840837, "loss": 2.1952, "step": 153425 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018434265818847713, "loss": 2.143, "step": 153430 }, { "epoch": 0.36, "grad_norm": 2.59375, "learning_rate": 0.00018434166518973292, "loss": 2.2285, "step": 153435 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018434067216217608, "loss": 2.1255, "step": 153440 }, { "epoch": 0.36, "grad_norm": 1.9375, "learning_rate": 0.0001843396791058069, "loss": 2.1289, "step": 153445 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018433868602062581, "loss": 2.3967, "step": 153450 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.00018433769290663305, "loss": 2.2583, "step": 153455 }, { "epoch": 0.36, "grad_norm": 1.96875, "learning_rate": 0.00018433669976382903, "loss": 2.1355, "step": 153460 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018433570659221404, "loss": 1.8236, "step": 153465 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018433471339178846, "loss": 2.2208, "step": 153470 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018433372016255262, "loss": 1.9647, "step": 153475 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018433272690450685, "loss": 2.2124, "step": 153480 }, { "epoch": 0.36, "grad_norm": 2.46875, "learning_rate": 0.00018433173361765147, "loss": 2.2936, "step": 153485 }, { "epoch": 0.36, "grad_norm": 2.640625, "learning_rate": 0.00018433074030198686, "loss": 2.1711, "step": 153490 }, { "epoch": 0.36, "grad_norm": 1.7578125, "learning_rate": 0.00018432974695751332, "loss": 2.1985, "step": 153495 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.00018432875358423122, "loss": 2.1141, "step": 153500 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.0001843277601821409, "loss": 2.2152, "step": 153505 }, { "epoch": 0.36, "grad_norm": 2.796875, "learning_rate": 0.00018432676675124267, "loss": 2.1881, "step": 153510 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018432577329153692, "loss": 2.2141, "step": 153515 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018432477980302392, "loss": 2.3033, "step": 153520 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018432378628570407, "loss": 2.0459, "step": 153525 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.00018432279273957765, "loss": 2.2494, "step": 153530 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018432179916464504, "loss": 2.1928, "step": 153535 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.0001843208055609066, "loss": 2.286, "step": 153540 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018431981192836266, "loss": 2.1632, "step": 153545 }, { "epoch": 0.36, "grad_norm": 1.8515625, "learning_rate": 0.00018431881826701352, "loss": 1.9752, "step": 153550 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018431782457685954, "loss": 2.0725, "step": 153555 }, { "epoch": 0.36, "grad_norm": 1.9609375, "learning_rate": 0.00018431683085790102, "loss": 2.0777, "step": 153560 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018431583711013842, "loss": 2.0896, "step": 153565 }, { "epoch": 0.36, "grad_norm": 1.9453125, "learning_rate": 0.00018431484333357196, "loss": 2.1298, "step": 153570 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018431384952820204, "loss": 2.0071, "step": 153575 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018431285569402896, "loss": 2.253, "step": 153580 }, { "epoch": 0.36, "grad_norm": 1.9296875, "learning_rate": 0.0001843118618310531, "loss": 2.1552, "step": 153585 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018431086793927473, "loss": 2.17, "step": 153590 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.0001843098740186943, "loss": 2.0462, "step": 153595 }, { "epoch": 0.36, "grad_norm": 2.5625, "learning_rate": 0.00018430888006931205, "loss": 2.0268, "step": 153600 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018430788609112835, "loss": 2.0742, "step": 153605 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.00018430689208414358, "loss": 2.1347, "step": 153610 }, { "epoch": 0.36, "grad_norm": 1.8671875, "learning_rate": 0.00018430589804835804, "loss": 2.1604, "step": 153615 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018430490398377205, "loss": 2.224, "step": 153620 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.000184303909890386, "loss": 2.177, "step": 153625 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.0001843029157682002, "loss": 2.0593, "step": 153630 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.000184301921617215, "loss": 2.3263, "step": 153635 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018430092743743073, "loss": 1.9848, "step": 153640 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.0001842999332288477, "loss": 2.1653, "step": 153645 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.00018429893899146632, "loss": 2.092, "step": 153650 }, { "epoch": 0.36, "grad_norm": 2.765625, "learning_rate": 0.00018429794472528692, "loss": 2.2003, "step": 153655 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.00018429695043030976, "loss": 2.0058, "step": 153660 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018429595610653526, "loss": 2.315, "step": 153665 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.00018429496175396373, "loss": 2.0297, "step": 153670 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018429396737259553, "loss": 2.178, "step": 153675 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018429297296243096, "loss": 2.0557, "step": 153680 }, { "epoch": 0.36, "grad_norm": 1.84375, "learning_rate": 0.00018429197852347037, "loss": 2.1904, "step": 153685 }, { "epoch": 0.36, "grad_norm": 2.046875, "learning_rate": 0.00018429098405571415, "loss": 2.0193, "step": 153690 }, { "epoch": 0.36, "grad_norm": 1.90625, "learning_rate": 0.00018428998955916255, "loss": 2.0617, "step": 153695 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.000184288995033816, "loss": 2.0082, "step": 153700 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.0001842880004796748, "loss": 2.1547, "step": 153705 }, { "epoch": 0.36, "grad_norm": 1.890625, "learning_rate": 0.00018428700589673928, "loss": 2.0089, "step": 153710 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018428601128500977, "loss": 2.2503, "step": 153715 }, { "epoch": 0.36, "grad_norm": 1.9453125, "learning_rate": 0.00018428501664448667, "loss": 2.1258, "step": 153720 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018428402197517025, "loss": 2.2601, "step": 153725 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018428302727706088, "loss": 2.2175, "step": 153730 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018428203255015893, "loss": 1.9349, "step": 153735 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.0001842810377944647, "loss": 2.2312, "step": 153740 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018428004300997852, "loss": 2.2302, "step": 153745 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018427904819670076, "loss": 1.8522, "step": 153750 }, { "epoch": 0.36, "grad_norm": 2.671875, "learning_rate": 0.00018427805335463173, "loss": 2.3479, "step": 153755 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.0001842770584837718, "loss": 2.2137, "step": 153760 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.0001842760635841213, "loss": 2.1632, "step": 153765 }, { "epoch": 0.36, "grad_norm": 2.640625, "learning_rate": 0.00018427506865568055, "loss": 1.9895, "step": 153770 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018427407369844994, "loss": 2.0559, "step": 153775 }, { "epoch": 0.36, "grad_norm": 2.046875, "learning_rate": 0.0001842730787124298, "loss": 1.94, "step": 153780 }, { "epoch": 0.36, "grad_norm": 1.9375, "learning_rate": 0.0001842720836976204, "loss": 2.3116, "step": 153785 }, { "epoch": 0.36, "grad_norm": 2.171875, "learning_rate": 0.00018427108865402212, "loss": 2.2189, "step": 153790 }, { "epoch": 0.36, "grad_norm": 2.625, "learning_rate": 0.00018427009358163533, "loss": 2.2977, "step": 153795 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.00018426909848046034, "loss": 2.311, "step": 153800 }, { "epoch": 0.36, "grad_norm": 1.8125, "learning_rate": 0.0001842681033504975, "loss": 1.8082, "step": 153805 }, { "epoch": 0.36, "grad_norm": 2.546875, "learning_rate": 0.00018426710819174718, "loss": 2.1391, "step": 153810 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018426611300420963, "loss": 2.0777, "step": 153815 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.0001842651177878853, "loss": 2.2238, "step": 153820 }, { "epoch": 0.36, "grad_norm": 2.171875, "learning_rate": 0.00018426412254277444, "loss": 2.1462, "step": 153825 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018426312726887745, "loss": 2.1684, "step": 153830 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018426213196619465, "loss": 2.0584, "step": 153835 }, { "epoch": 0.36, "grad_norm": 2.71875, "learning_rate": 0.00018426113663472636, "loss": 2.201, "step": 153840 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018426014127447294, "loss": 2.1527, "step": 153845 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.0001842591458854347, "loss": 2.0921, "step": 153850 }, { "epoch": 0.36, "grad_norm": 2.875, "learning_rate": 0.00018425815046761205, "loss": 1.9797, "step": 153855 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018425715502100532, "loss": 2.1343, "step": 153860 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018425615954561475, "loss": 2.0748, "step": 153865 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018425516404144078, "loss": 2.4691, "step": 153870 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.0001842541685084837, "loss": 2.1236, "step": 153875 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.0001842531729467439, "loss": 2.0994, "step": 153880 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018425217735622168, "loss": 2.2266, "step": 153885 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.00018425118173691737, "loss": 2.1402, "step": 153890 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.0001842501860888313, "loss": 2.1741, "step": 153895 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018424919041196388, "loss": 2.0169, "step": 153900 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.0001842481947063154, "loss": 2.125, "step": 153905 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018424719897188624, "loss": 2.1471, "step": 153910 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018424620320867664, "loss": 2.1272, "step": 153915 }, { "epoch": 0.36, "grad_norm": 2.546875, "learning_rate": 0.00018424520741668708, "loss": 1.9796, "step": 153920 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018424421159591778, "loss": 2.0661, "step": 153925 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018424321574636915, "loss": 2.1482, "step": 153930 }, { "epoch": 0.36, "grad_norm": 2.046875, "learning_rate": 0.0001842422198680415, "loss": 2.1925, "step": 153935 }, { "epoch": 0.36, "grad_norm": 2.796875, "learning_rate": 0.0001842412239609352, "loss": 2.1319, "step": 153940 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018424022802505054, "loss": 2.1315, "step": 153945 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.0001842392320603879, "loss": 2.292, "step": 153950 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018423823606694763, "loss": 2.168, "step": 153955 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018423724004473004, "loss": 2.1228, "step": 153960 }, { "epoch": 0.36, "grad_norm": 1.9453125, "learning_rate": 0.00018423624399373546, "loss": 2.1379, "step": 153965 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.0001842352479139643, "loss": 2.2809, "step": 153970 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.0001842342518054168, "loss": 2.1224, "step": 153975 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018423325566809337, "loss": 1.7952, "step": 153980 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.00018423225950199435, "loss": 2.1153, "step": 153985 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.00018423126330712006, "loss": 2.1974, "step": 153990 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.0001842302670834708, "loss": 2.1996, "step": 153995 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.000184229270831047, "loss": 2.102, "step": 154000 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018422827454984893, "loss": 2.1762, "step": 154005 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.000184227278239877, "loss": 2.0786, "step": 154010 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018422628190113146, "loss": 2.0711, "step": 154015 }, { "epoch": 0.36, "grad_norm": 2.0, "learning_rate": 0.00018422528553361267, "loss": 2.259, "step": 154020 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018422428913732105, "loss": 2.0796, "step": 154025 }, { "epoch": 0.36, "grad_norm": 1.9453125, "learning_rate": 0.00018422329271225685, "loss": 2.2352, "step": 154030 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018422229625842044, "loss": 2.217, "step": 154035 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.0001842212997758122, "loss": 2.0836, "step": 154040 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018422030326443241, "loss": 2.1773, "step": 154045 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018421930672428145, "loss": 2.0409, "step": 154050 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018421831015535964, "loss": 2.3946, "step": 154055 }, { "epoch": 0.36, "grad_norm": 2.609375, "learning_rate": 0.00018421731355766733, "loss": 2.1199, "step": 154060 }, { "epoch": 0.36, "grad_norm": 2.0, "learning_rate": 0.00018421631693120487, "loss": 2.3155, "step": 154065 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018421532027597257, "loss": 2.1296, "step": 154070 }, { "epoch": 0.36, "grad_norm": 2.171875, "learning_rate": 0.0001842143235919708, "loss": 2.1102, "step": 154075 }, { "epoch": 0.36, "grad_norm": 1.953125, "learning_rate": 0.0001842133268791999, "loss": 2.1323, "step": 154080 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.0001842123301376602, "loss": 2.2654, "step": 154085 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018421133336735202, "loss": 2.1621, "step": 154090 }, { "epoch": 0.36, "grad_norm": 2.484375, "learning_rate": 0.00018421033656827574, "loss": 2.0307, "step": 154095 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018420933974043165, "loss": 2.1308, "step": 154100 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.00018420834288382016, "loss": 2.0811, "step": 154105 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018420734599844155, "loss": 2.1447, "step": 154110 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.00018420634908429618, "loss": 2.0586, "step": 154115 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.00018420535214138441, "loss": 2.086, "step": 154120 }, { "epoch": 0.36, "grad_norm": 1.828125, "learning_rate": 0.00018420435516970657, "loss": 2.0605, "step": 154125 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018420335816926296, "loss": 2.0981, "step": 154130 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.000184202361140054, "loss": 2.2229, "step": 154135 }, { "epoch": 0.36, "grad_norm": 2.65625, "learning_rate": 0.00018420136408207996, "loss": 2.2678, "step": 154140 }, { "epoch": 0.36, "grad_norm": 1.9296875, "learning_rate": 0.0001842003669953412, "loss": 2.0537, "step": 154145 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.0001841993698798381, "loss": 2.2078, "step": 154150 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018419837273557095, "loss": 2.0348, "step": 154155 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.0001841973755625401, "loss": 2.0353, "step": 154160 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018419637836074591, "loss": 2.1738, "step": 154165 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.0001841953811301887, "loss": 2.1299, "step": 154170 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018419438387086884, "loss": 2.0088, "step": 154175 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018419338658278665, "loss": 2.33, "step": 154180 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018419238926594244, "loss": 2.0762, "step": 154185 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.0001841913919203366, "loss": 2.0579, "step": 154190 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018419039454596947, "loss": 2.1174, "step": 154195 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018418939714284137, "loss": 1.9962, "step": 154200 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018418839971095263, "loss": 2.3109, "step": 154205 }, { "epoch": 0.36, "grad_norm": 2.484375, "learning_rate": 0.0001841874022503036, "loss": 2.1366, "step": 154210 }, { "epoch": 0.36, "grad_norm": 1.9609375, "learning_rate": 0.00018418640476089467, "loss": 2.1528, "step": 154215 }, { "epoch": 0.36, "grad_norm": 2.0, "learning_rate": 0.00018418540724272607, "loss": 2.1449, "step": 154220 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018418440969579825, "loss": 2.0879, "step": 154225 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.0001841834121201115, "loss": 2.1159, "step": 154230 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.0001841824145156662, "loss": 2.2002, "step": 154235 }, { "epoch": 0.36, "grad_norm": 2.671875, "learning_rate": 0.0001841814168824626, "loss": 2.1546, "step": 154240 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018418041922050118, "loss": 2.266, "step": 154245 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.00018417942152978216, "loss": 2.1586, "step": 154250 }, { "epoch": 0.36, "grad_norm": 2.046875, "learning_rate": 0.00018417842381030593, "loss": 2.0464, "step": 154255 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.0001841774260620728, "loss": 1.8626, "step": 154260 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018417642828508316, "loss": 2.0794, "step": 154265 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.0001841754304793373, "loss": 2.146, "step": 154270 }, { "epoch": 0.36, "grad_norm": 1.9453125, "learning_rate": 0.0001841744326448356, "loss": 2.1828, "step": 154275 }, { "epoch": 0.36, "grad_norm": 2.046875, "learning_rate": 0.0001841734347815784, "loss": 2.1617, "step": 154280 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.000184172436889566, "loss": 2.1536, "step": 154285 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.0001841714389687988, "loss": 2.2883, "step": 154290 }, { "epoch": 0.36, "grad_norm": 2.59375, "learning_rate": 0.0001841704410192771, "loss": 2.1945, "step": 154295 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018416944304100124, "loss": 2.1216, "step": 154300 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018416844503397158, "loss": 2.1219, "step": 154305 }, { "epoch": 0.36, "grad_norm": 2.0625, "learning_rate": 0.00018416744699818844, "loss": 2.2319, "step": 154310 }, { "epoch": 0.36, "grad_norm": 2.0, "learning_rate": 0.00018416644893365217, "loss": 2.1912, "step": 154315 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018416545084036314, "loss": 1.8821, "step": 154320 }, { "epoch": 0.36, "grad_norm": 1.7578125, "learning_rate": 0.0001841644527183217, "loss": 2.0357, "step": 154325 }, { "epoch": 0.36, "grad_norm": 1.84375, "learning_rate": 0.00018416345456752806, "loss": 2.057, "step": 154330 }, { "epoch": 0.36, "grad_norm": 2.671875, "learning_rate": 0.00018416245638798272, "loss": 2.2111, "step": 154335 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018416145817968596, "loss": 2.185, "step": 154340 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.0001841604599426381, "loss": 2.2377, "step": 154345 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.0001841594616768395, "loss": 2.1373, "step": 154350 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.00018415846338229051, "loss": 2.1663, "step": 154355 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018415746505899146, "loss": 2.1687, "step": 154360 }, { "epoch": 0.36, "grad_norm": 1.9140625, "learning_rate": 0.0001841564667069427, "loss": 2.1269, "step": 154365 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018415546832614458, "loss": 2.129, "step": 154370 }, { "epoch": 0.36, "grad_norm": 2.40625, "learning_rate": 0.0001841544699165974, "loss": 2.0294, "step": 154375 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018415347147830152, "loss": 2.0065, "step": 154380 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.0001841524730112573, "loss": 2.1846, "step": 154385 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018415147451546505, "loss": 2.1052, "step": 154390 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.00018415047599092517, "loss": 2.1277, "step": 154395 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018414947743763794, "loss": 2.066, "step": 154400 }, { "epoch": 0.36, "grad_norm": 2.921875, "learning_rate": 0.00018414847885560374, "loss": 2.1818, "step": 154405 }, { "epoch": 0.36, "grad_norm": 2.515625, "learning_rate": 0.00018414748024482287, "loss": 2.1258, "step": 154410 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.0001841464816052957, "loss": 2.186, "step": 154415 }, { "epoch": 0.36, "grad_norm": 1.9375, "learning_rate": 0.00018414548293702257, "loss": 2.1584, "step": 154420 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.0001841444842400038, "loss": 2.1714, "step": 154425 }, { "epoch": 0.36, "grad_norm": 1.9296875, "learning_rate": 0.00018414348551423976, "loss": 2.1499, "step": 154430 }, { "epoch": 0.36, "grad_norm": 2.5625, "learning_rate": 0.0001841424867597308, "loss": 1.8261, "step": 154435 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.0001841414879764772, "loss": 1.9849, "step": 154440 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018414048916447939, "loss": 2.1094, "step": 154445 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018413949032373763, "loss": 2.1368, "step": 154450 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018413849145425227, "loss": 2.0658, "step": 154455 }, { "epoch": 0.36, "grad_norm": 1.9375, "learning_rate": 0.00018413749255602376, "loss": 2.1753, "step": 154460 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018413649362905227, "loss": 2.0625, "step": 154465 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.00018413549467333827, "loss": 1.8648, "step": 154470 }, { "epoch": 0.36, "grad_norm": 1.96875, "learning_rate": 0.00018413449568888205, "loss": 1.9974, "step": 154475 }, { "epoch": 0.36, "grad_norm": 2.015625, "learning_rate": 0.00018413349667568396, "loss": 2.1717, "step": 154480 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018413249763374437, "loss": 2.1524, "step": 154485 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018413149856306356, "loss": 2.1378, "step": 154490 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.0001841304994636419, "loss": 2.2219, "step": 154495 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.00018412950033547972, "loss": 2.1884, "step": 154500 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018412850117857744, "loss": 2.1283, "step": 154505 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.00018412750199293528, "loss": 2.1233, "step": 154510 }, { "epoch": 0.36, "grad_norm": 2.171875, "learning_rate": 0.00018412650277855366, "loss": 2.1706, "step": 154515 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.0001841255035354329, "loss": 2.149, "step": 154520 }, { "epoch": 0.36, "grad_norm": 2.671875, "learning_rate": 0.00018412450426357336, "loss": 2.0213, "step": 154525 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.00018412350496297534, "loss": 1.959, "step": 154530 }, { "epoch": 0.36, "grad_norm": 2.75, "learning_rate": 0.0001841225056336392, "loss": 2.0659, "step": 154535 }, { "epoch": 0.36, "grad_norm": 2.515625, "learning_rate": 0.00018412150627556532, "loss": 2.1767, "step": 154540 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.00018412050688875397, "loss": 2.1767, "step": 154545 }, { "epoch": 0.36, "grad_norm": 2.609375, "learning_rate": 0.00018411950747320555, "loss": 2.076, "step": 154550 }, { "epoch": 0.36, "grad_norm": 1.828125, "learning_rate": 0.00018411850802892038, "loss": 1.9833, "step": 154555 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018411750855589878, "loss": 2.1596, "step": 154560 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.00018411650905414116, "loss": 2.14, "step": 154565 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018411550952364774, "loss": 2.3146, "step": 154570 }, { "epoch": 0.36, "grad_norm": 3.484375, "learning_rate": 0.00018411450996441898, "loss": 2.1517, "step": 154575 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.0001841135103764552, "loss": 2.1166, "step": 154580 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.0001841125107597567, "loss": 2.1954, "step": 154585 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018411151111432386, "loss": 2.2435, "step": 154590 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.000184110511440157, "loss": 2.3352, "step": 154595 }, { "epoch": 0.36, "grad_norm": 1.6328125, "learning_rate": 0.00018410951173725645, "loss": 2.0559, "step": 154600 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018410851200562254, "loss": 1.9222, "step": 154605 }, { "epoch": 0.36, "grad_norm": 2.5625, "learning_rate": 0.00018410751224525566, "loss": 2.2888, "step": 154610 }, { "epoch": 0.36, "grad_norm": 3.0, "learning_rate": 0.0001841065124561561, "loss": 2.1836, "step": 154615 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.0001841055126383243, "loss": 2.4603, "step": 154620 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018410451279176045, "loss": 2.261, "step": 154625 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.00018410351291646505, "loss": 2.2069, "step": 154630 }, { "epoch": 0.36, "grad_norm": 2.703125, "learning_rate": 0.0001841025130124383, "loss": 2.1333, "step": 154635 }, { "epoch": 0.36, "grad_norm": 2.078125, "learning_rate": 0.00018410151307968065, "loss": 1.9778, "step": 154640 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018410051311819236, "loss": 2.0838, "step": 154645 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018409951312797385, "loss": 2.0073, "step": 154650 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.0001840985131090254, "loss": 2.1097, "step": 154655 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018409751306134738, "loss": 2.034, "step": 154660 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018409651298494014, "loss": 2.1275, "step": 154665 }, { "epoch": 0.36, "grad_norm": 2.671875, "learning_rate": 0.00018409551287980395, "loss": 2.1799, "step": 154670 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.00018409451274593926, "loss": 2.1086, "step": 154675 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018409351258334632, "loss": 2.1229, "step": 154680 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.00018409251239202555, "loss": 1.959, "step": 154685 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018409151217197724, "loss": 2.0835, "step": 154690 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.00018409051192320175, "loss": 2.1187, "step": 154695 }, { "epoch": 0.36, "grad_norm": 1.9140625, "learning_rate": 0.0001840895116456994, "loss": 2.2323, "step": 154700 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018408851133947053, "loss": 2.053, "step": 154705 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018408751100451555, "loss": 2.0649, "step": 154710 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.0001840865106408347, "loss": 2.0424, "step": 154715 }, { "epoch": 0.36, "grad_norm": 2.546875, "learning_rate": 0.00018408551024842843, "loss": 2.1748, "step": 154720 }, { "epoch": 0.36, "grad_norm": 2.171875, "learning_rate": 0.00018408450982729696, "loss": 2.1301, "step": 154725 }, { "epoch": 0.36, "grad_norm": 1.890625, "learning_rate": 0.00018408350937744073, "loss": 2.1941, "step": 154730 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018408250889886008, "loss": 2.066, "step": 154735 }, { "epoch": 0.36, "grad_norm": 2.25, "learning_rate": 0.00018408150839155526, "loss": 2.0237, "step": 154740 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018408050785552672, "loss": 2.0894, "step": 154745 }, { "epoch": 0.36, "grad_norm": 2.46875, "learning_rate": 0.00018407950729077473, "loss": 1.9362, "step": 154750 }, { "epoch": 0.36, "grad_norm": 2.5, "learning_rate": 0.00018407850669729966, "loss": 2.1501, "step": 154755 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.00018407750607510185, "loss": 2.0804, "step": 154760 }, { "epoch": 0.36, "grad_norm": 2.140625, "learning_rate": 0.00018407650542418162, "loss": 2.0552, "step": 154765 }, { "epoch": 0.36, "grad_norm": 1.9609375, "learning_rate": 0.00018407550474453937, "loss": 2.1976, "step": 154770 }, { "epoch": 0.36, "grad_norm": 1.8515625, "learning_rate": 0.00018407450403617537, "loss": 2.1011, "step": 154775 }, { "epoch": 0.36, "grad_norm": 2.03125, "learning_rate": 0.00018407350329909, "loss": 2.0811, "step": 154780 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.0001840725025332836, "loss": 2.1009, "step": 154785 }, { "epoch": 0.36, "grad_norm": 2.125, "learning_rate": 0.00018407150173875648, "loss": 2.214, "step": 154790 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018407050091550907, "loss": 2.2755, "step": 154795 }, { "epoch": 0.36, "grad_norm": 2.375, "learning_rate": 0.0001840695000635416, "loss": 2.0632, "step": 154800 }, { "epoch": 0.36, "grad_norm": 1.9921875, "learning_rate": 0.0001840684991828545, "loss": 2.0599, "step": 154805 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.00018406749827344805, "loss": 1.9387, "step": 154810 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018406649733532265, "loss": 2.0773, "step": 154815 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.00018406549636847857, "loss": 2.3005, "step": 154820 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018406449537291622, "loss": 2.2712, "step": 154825 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.0001840634943486359, "loss": 2.1398, "step": 154830 }, { "epoch": 0.36, "grad_norm": 1.9765625, "learning_rate": 0.00018406249329563795, "loss": 2.1163, "step": 154835 }, { "epoch": 0.36, "grad_norm": 2.015625, "learning_rate": 0.00018406149221392276, "loss": 2.0636, "step": 154840 }, { "epoch": 0.36, "grad_norm": 3.25, "learning_rate": 0.0001840604911034906, "loss": 2.1696, "step": 154845 }, { "epoch": 0.36, "grad_norm": 2.234375, "learning_rate": 0.0001840594899643419, "loss": 2.2761, "step": 154850 }, { "epoch": 0.36, "grad_norm": 2.578125, "learning_rate": 0.00018405848879647695, "loss": 2.0303, "step": 154855 }, { "epoch": 0.36, "grad_norm": 2.734375, "learning_rate": 0.00018405748759989606, "loss": 2.0349, "step": 154860 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.00018405648637459963, "loss": 2.4002, "step": 154865 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.00018405548512058796, "loss": 2.0301, "step": 154870 }, { "epoch": 0.36, "grad_norm": 2.0, "learning_rate": 0.00018405448383786147, "loss": 2.2898, "step": 154875 }, { "epoch": 0.36, "grad_norm": 2.328125, "learning_rate": 0.00018405348252642037, "loss": 2.2091, "step": 154880 }, { "epoch": 0.36, "grad_norm": 2.390625, "learning_rate": 0.00018405248118626513, "loss": 2.1797, "step": 154885 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.000184051479817396, "loss": 2.0269, "step": 154890 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018405047841981337, "loss": 2.0709, "step": 154895 }, { "epoch": 0.36, "grad_norm": 2.359375, "learning_rate": 0.00018404947699351759, "loss": 2.3321, "step": 154900 }, { "epoch": 0.36, "grad_norm": 2.609375, "learning_rate": 0.00018404847553850896, "loss": 2.2992, "step": 154905 }, { "epoch": 0.36, "grad_norm": 2.0, "learning_rate": 0.00018404747405478786, "loss": 2.0683, "step": 154910 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.00018404647254235463, "loss": 2.1169, "step": 154915 }, { "epoch": 0.36, "grad_norm": 3.234375, "learning_rate": 0.00018404547100120956, "loss": 1.9735, "step": 154920 }, { "epoch": 0.36, "grad_norm": 2.046875, "learning_rate": 0.0001840444694313531, "loss": 2.1446, "step": 154925 }, { "epoch": 0.36, "grad_norm": 2.21875, "learning_rate": 0.00018404346783278545, "loss": 2.0621, "step": 154930 }, { "epoch": 0.36, "grad_norm": 1.96875, "learning_rate": 0.0001840424662055071, "loss": 2.2413, "step": 154935 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018404146454951826, "loss": 2.1107, "step": 154940 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018404046286481934, "loss": 2.1501, "step": 154945 }, { "epoch": 0.36, "grad_norm": 2.4375, "learning_rate": 0.0001840394611514107, "loss": 2.0992, "step": 154950 }, { "epoch": 0.36, "grad_norm": 2.265625, "learning_rate": 0.00018403845940929265, "loss": 2.2374, "step": 154955 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018403745763846555, "loss": 1.9969, "step": 154960 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.0001840364558389297, "loss": 2.0287, "step": 154965 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.0001840354540106855, "loss": 2.2361, "step": 154970 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018403445215373323, "loss": 2.0365, "step": 154975 }, { "epoch": 0.36, "grad_norm": 2.765625, "learning_rate": 0.0001840334502680733, "loss": 2.1115, "step": 154980 }, { "epoch": 0.36, "grad_norm": 2.15625, "learning_rate": 0.000184032448353706, "loss": 2.0119, "step": 154985 }, { "epoch": 0.36, "grad_norm": 2.296875, "learning_rate": 0.0001840314464106317, "loss": 2.1434, "step": 154990 }, { "epoch": 0.36, "grad_norm": 2.109375, "learning_rate": 0.00018403044443885072, "loss": 2.0341, "step": 154995 }, { "epoch": 0.36, "grad_norm": 2.046875, "learning_rate": 0.00018402944243836342, "loss": 2.1146, "step": 155000 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018402844040917016, "loss": 2.1732, "step": 155005 }, { "epoch": 0.36, "grad_norm": 2.421875, "learning_rate": 0.00018402743835127126, "loss": 2.1934, "step": 155010 }, { "epoch": 0.36, "grad_norm": 2.515625, "learning_rate": 0.00018402643626466705, "loss": 2.1556, "step": 155015 }, { "epoch": 0.36, "grad_norm": 2.1875, "learning_rate": 0.00018402543414935786, "loss": 2.1004, "step": 155020 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.0001840244320053441, "loss": 2.1428, "step": 155025 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018402342983262605, "loss": 2.2842, "step": 155030 }, { "epoch": 0.36, "grad_norm": 2.171875, "learning_rate": 0.00018402242763120407, "loss": 2.2231, "step": 155035 }, { "epoch": 0.36, "grad_norm": 2.3125, "learning_rate": 0.00018402142540107852, "loss": 2.3409, "step": 155040 }, { "epoch": 0.36, "grad_norm": 2.28125, "learning_rate": 0.00018402042314224972, "loss": 2.0572, "step": 155045 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.00018401942085471802, "loss": 2.1975, "step": 155050 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.00018401841853848375, "loss": 1.9564, "step": 155055 }, { "epoch": 0.36, "grad_norm": 2.203125, "learning_rate": 0.0001840174161935473, "loss": 2.0779, "step": 155060 }, { "epoch": 0.36, "grad_norm": 2.71875, "learning_rate": 0.00018401641381990893, "loss": 1.9205, "step": 155065 }, { "epoch": 0.36, "grad_norm": 2.453125, "learning_rate": 0.00018401541141756906, "loss": 1.9971, "step": 155070 }, { "epoch": 0.36, "grad_norm": 2.46875, "learning_rate": 0.00018401440898652797, "loss": 2.095, "step": 155075 }, { "epoch": 0.36, "grad_norm": 2.59375, "learning_rate": 0.0001840134065267861, "loss": 2.2716, "step": 155080 }, { "epoch": 0.36, "grad_norm": 2.09375, "learning_rate": 0.00018401240403834366, "loss": 2.0359, "step": 155085 }, { "epoch": 0.36, "grad_norm": 2.53125, "learning_rate": 0.00018401140152120106, "loss": 2.1766, "step": 155090 }, { "epoch": 0.36, "grad_norm": 2.5625, "learning_rate": 0.0001840103989753587, "loss": 2.1194, "step": 155095 }, { "epoch": 0.36, "grad_norm": 2.34375, "learning_rate": 0.0001840093964008168, "loss": 2.182, "step": 155100 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.0001840083937975758, "loss": 2.1338, "step": 155105 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018400739116563602, "loss": 2.2112, "step": 155110 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018400638850499777, "loss": 2.1424, "step": 155115 }, { "epoch": 0.37, "grad_norm": 1.765625, "learning_rate": 0.0001840053858156614, "loss": 2.012, "step": 155120 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.0001840043830976273, "loss": 2.1904, "step": 155125 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018400338035089576, "loss": 2.3415, "step": 155130 }, { "epoch": 0.37, "grad_norm": 2.703125, "learning_rate": 0.00018400237757546715, "loss": 2.0458, "step": 155135 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018400137477134182, "loss": 2.2799, "step": 155140 }, { "epoch": 0.37, "grad_norm": 1.890625, "learning_rate": 0.00018400037193852008, "loss": 2.0919, "step": 155145 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.0001839993690770023, "loss": 2.0141, "step": 155150 }, { "epoch": 0.37, "grad_norm": 1.90625, "learning_rate": 0.0001839983661867888, "loss": 2.1731, "step": 155155 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018399736326787992, "loss": 1.9685, "step": 155160 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018399636032027602, "loss": 2.1624, "step": 155165 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018399535734397746, "loss": 2.1541, "step": 155170 }, { "epoch": 0.37, "grad_norm": 2.6875, "learning_rate": 0.00018399435433898455, "loss": 2.287, "step": 155175 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018399335130529765, "loss": 2.3647, "step": 155180 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018399234824291712, "loss": 2.1274, "step": 155185 }, { "epoch": 0.37, "grad_norm": 1.9453125, "learning_rate": 0.00018399134515184324, "loss": 2.3459, "step": 155190 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.0001839903420320764, "loss": 2.0632, "step": 155195 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018398933888361696, "loss": 2.2732, "step": 155200 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018398833570646523, "loss": 2.3543, "step": 155205 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018398733250062156, "loss": 2.2398, "step": 155210 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.0001839863292660863, "loss": 2.2139, "step": 155215 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.00018398532600285975, "loss": 2.1094, "step": 155220 }, { "epoch": 0.37, "grad_norm": 1.921875, "learning_rate": 0.00018398432271094235, "loss": 2.1182, "step": 155225 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018398331939033433, "loss": 2.0608, "step": 155230 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.0001839823160410361, "loss": 2.099, "step": 155235 }, { "epoch": 0.37, "grad_norm": 2.5, "learning_rate": 0.000183981312663048, "loss": 1.9382, "step": 155240 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.0001839803092563704, "loss": 1.9501, "step": 155245 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018397930582100353, "loss": 2.1767, "step": 155250 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.0001839783023569478, "loss": 2.0363, "step": 155255 }, { "epoch": 0.37, "grad_norm": 1.90625, "learning_rate": 0.00018397729886420364, "loss": 2.1786, "step": 155260 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018397629534277124, "loss": 2.0696, "step": 155265 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018397529179265104, "loss": 2.0698, "step": 155270 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018397428821384336, "loss": 2.1189, "step": 155275 }, { "epoch": 0.37, "grad_norm": 2.578125, "learning_rate": 0.00018397328460634853, "loss": 2.3051, "step": 155280 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.0001839722809701669, "loss": 2.087, "step": 155285 }, { "epoch": 0.37, "grad_norm": 2.484375, "learning_rate": 0.00018397127730529885, "loss": 2.2253, "step": 155290 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018397027361174463, "loss": 2.1258, "step": 155295 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018396926988950468, "loss": 2.0987, "step": 155300 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.0001839682661385793, "loss": 2.1178, "step": 155305 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018396726235896886, "loss": 2.1851, "step": 155310 }, { "epoch": 0.37, "grad_norm": 2.453125, "learning_rate": 0.00018396625855067363, "loss": 2.1496, "step": 155315 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018396525471369403, "loss": 2.1502, "step": 155320 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.0001839642508480304, "loss": 2.0928, "step": 155325 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018396324695368302, "loss": 2.284, "step": 155330 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018396224303065227, "loss": 2.2588, "step": 155335 }, { "epoch": 0.37, "grad_norm": 1.9375, "learning_rate": 0.00018396123907893854, "loss": 2.2264, "step": 155340 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.0001839602350985421, "loss": 2.274, "step": 155345 }, { "epoch": 0.37, "grad_norm": 2.859375, "learning_rate": 0.00018395923108946333, "loss": 2.1893, "step": 155350 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018395822705170257, "loss": 2.0521, "step": 155355 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.0001839572229852601, "loss": 2.2271, "step": 155360 }, { "epoch": 0.37, "grad_norm": 1.8203125, "learning_rate": 0.0001839562188901364, "loss": 1.9422, "step": 155365 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.0001839552147663317, "loss": 2.137, "step": 155370 }, { "epoch": 0.37, "grad_norm": 2.671875, "learning_rate": 0.00018395421061384636, "loss": 2.16, "step": 155375 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018395320643268076, "loss": 2.2384, "step": 155380 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018395220222283521, "loss": 2.1574, "step": 155385 }, { "epoch": 0.37, "grad_norm": 2.5, "learning_rate": 0.00018395119798431006, "loss": 2.2036, "step": 155390 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018395019371710567, "loss": 2.0522, "step": 155395 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018394918942122237, "loss": 2.2, "step": 155400 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.0001839481850966605, "loss": 2.2384, "step": 155405 }, { "epoch": 0.37, "grad_norm": 2.59375, "learning_rate": 0.0001839471807434204, "loss": 2.3543, "step": 155410 }, { "epoch": 0.37, "grad_norm": 2.453125, "learning_rate": 0.00018394617636150243, "loss": 2.0525, "step": 155415 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018394517195090694, "loss": 2.0841, "step": 155420 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018394416751163424, "loss": 2.0676, "step": 155425 }, { "epoch": 0.37, "grad_norm": 1.953125, "learning_rate": 0.0001839431630436847, "loss": 2.1643, "step": 155430 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018394215854705863, "loss": 2.1675, "step": 155435 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018394115402175641, "loss": 2.1167, "step": 155440 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018394014946777838, "loss": 1.9719, "step": 155445 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018393914488512485, "loss": 2.1163, "step": 155450 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.0001839381402737962, "loss": 2.067, "step": 155455 }, { "epoch": 0.37, "grad_norm": 2.5, "learning_rate": 0.00018393713563379276, "loss": 2.0544, "step": 155460 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018393613096511486, "loss": 2.126, "step": 155465 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018393512626776288, "loss": 2.1729, "step": 155470 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018393412154173711, "loss": 2.2324, "step": 155475 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.00018393311678703794, "loss": 2.2773, "step": 155480 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.0001839321120036657, "loss": 2.2515, "step": 155485 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.00018393110719162073, "loss": 2.1585, "step": 155490 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018393010235090334, "loss": 1.9311, "step": 155495 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018392909748151393, "loss": 1.979, "step": 155500 }, { "epoch": 0.37, "grad_norm": 2.5, "learning_rate": 0.00018392809258345282, "loss": 2.2471, "step": 155505 }, { "epoch": 0.37, "grad_norm": 1.921875, "learning_rate": 0.00018392708765672033, "loss": 2.1279, "step": 155510 }, { "epoch": 0.37, "grad_norm": 2.53125, "learning_rate": 0.00018392608270131685, "loss": 2.1104, "step": 155515 }, { "epoch": 0.37, "grad_norm": 1.78125, "learning_rate": 0.00018392507771724267, "loss": 1.9584, "step": 155520 }, { "epoch": 0.37, "grad_norm": 2.921875, "learning_rate": 0.00018392407270449818, "loss": 2.3054, "step": 155525 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018392306766308372, "loss": 2.0468, "step": 155530 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.0001839220625929996, "loss": 2.1829, "step": 155535 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018392105749424618, "loss": 2.1844, "step": 155540 }, { "epoch": 0.37, "grad_norm": 2.65625, "learning_rate": 0.0001839200523668238, "loss": 2.2765, "step": 155545 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018391904721073283, "loss": 2.1491, "step": 155550 }, { "epoch": 0.37, "grad_norm": 1.9609375, "learning_rate": 0.00018391804202597359, "loss": 1.9682, "step": 155555 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.0001839170368125464, "loss": 2.1981, "step": 155560 }, { "epoch": 0.37, "grad_norm": 2.578125, "learning_rate": 0.00018391603157045164, "loss": 2.0471, "step": 155565 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018391502629968963, "loss": 2.2245, "step": 155570 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.00018391402100026075, "loss": 2.0693, "step": 155575 }, { "epoch": 0.37, "grad_norm": 2.703125, "learning_rate": 0.0001839130156721653, "loss": 2.13, "step": 155580 }, { "epoch": 0.37, "grad_norm": 2.53125, "learning_rate": 0.00018391201031540364, "loss": 2.1715, "step": 155585 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018391100492997614, "loss": 2.019, "step": 155590 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.0001839099995158831, "loss": 2.2259, "step": 155595 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018390899407312488, "loss": 2.3533, "step": 155600 }, { "epoch": 0.37, "grad_norm": 2.65625, "learning_rate": 0.00018390798860170183, "loss": 2.2127, "step": 155605 }, { "epoch": 0.37, "grad_norm": 1.9296875, "learning_rate": 0.0001839069831016143, "loss": 2.3392, "step": 155610 }, { "epoch": 0.37, "grad_norm": 2.484375, "learning_rate": 0.0001839059775728626, "loss": 2.0476, "step": 155615 }, { "epoch": 0.37, "grad_norm": 1.953125, "learning_rate": 0.00018390497201544711, "loss": 1.9977, "step": 155620 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018390396642936816, "loss": 2.2517, "step": 155625 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.0001839029608146261, "loss": 1.9686, "step": 155630 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018390195517122124, "loss": 2.1896, "step": 155635 }, { "epoch": 0.37, "grad_norm": 1.96875, "learning_rate": 0.000183900949499154, "loss": 2.117, "step": 155640 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018389994379842462, "loss": 2.0679, "step": 155645 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018389893806903352, "loss": 2.0097, "step": 155650 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018389793231098103, "loss": 2.0756, "step": 155655 }, { "epoch": 0.37, "grad_norm": 1.9609375, "learning_rate": 0.00018389692652426748, "loss": 1.9221, "step": 155660 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018389592070889324, "loss": 2.3184, "step": 155665 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.0001838949148648586, "loss": 2.084, "step": 155670 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018389390899216395, "loss": 2.3309, "step": 155675 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.0001838929030908096, "loss": 2.1467, "step": 155680 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018389189716079592, "loss": 2.035, "step": 155685 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018389089120212325, "loss": 2.1862, "step": 155690 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018388988521479194, "loss": 2.1098, "step": 155695 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.0001838888791988023, "loss": 2.1873, "step": 155700 }, { "epoch": 0.37, "grad_norm": 1.9609375, "learning_rate": 0.00018388787315415473, "loss": 1.9808, "step": 155705 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.00018388686708084952, "loss": 2.2871, "step": 155710 }, { "epoch": 0.37, "grad_norm": 1.953125, "learning_rate": 0.000183885860978887, "loss": 2.1503, "step": 155715 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018388485484826763, "loss": 2.0132, "step": 155720 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018388384868899162, "loss": 2.0243, "step": 155725 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018388284250105938, "loss": 2.2795, "step": 155730 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018388183628447125, "loss": 2.1319, "step": 155735 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018388083003922754, "loss": 2.2158, "step": 155740 }, { "epoch": 0.37, "grad_norm": 1.90625, "learning_rate": 0.00018387982376532862, "loss": 2.0898, "step": 155745 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018387881746277486, "loss": 2.2287, "step": 155750 }, { "epoch": 0.37, "grad_norm": 2.5, "learning_rate": 0.00018387781113156652, "loss": 2.2836, "step": 155755 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018387680477170406, "loss": 2.3073, "step": 155760 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018387579838318772, "loss": 2.0774, "step": 155765 }, { "epoch": 0.37, "grad_norm": 2.8125, "learning_rate": 0.00018387479196601788, "loss": 1.956, "step": 155770 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018387378552019493, "loss": 1.9926, "step": 155775 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018387277904571915, "loss": 2.1816, "step": 155780 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.0001838717725425909, "loss": 1.9342, "step": 155785 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018387076601081054, "loss": 2.1847, "step": 155790 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.0001838697594503784, "loss": 2.2074, "step": 155795 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018386875286129485, "loss": 2.1421, "step": 155800 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.0001838677462435602, "loss": 2.0834, "step": 155805 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.0001838667395971748, "loss": 2.0673, "step": 155810 }, { "epoch": 0.37, "grad_norm": 1.9921875, "learning_rate": 0.000183865732922139, "loss": 2.1162, "step": 155815 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018386472621845316, "loss": 2.0183, "step": 155820 }, { "epoch": 0.37, "grad_norm": 2.0, "learning_rate": 0.00018386371948611761, "loss": 2.2187, "step": 155825 }, { "epoch": 0.37, "grad_norm": 1.7734375, "learning_rate": 0.00018386271272513268, "loss": 2.0444, "step": 155830 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018386170593549873, "loss": 2.2105, "step": 155835 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.0001838606991172161, "loss": 2.1355, "step": 155840 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018385969227028515, "loss": 2.1384, "step": 155845 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018385868539470617, "loss": 2.0426, "step": 155850 }, { "epoch": 0.37, "grad_norm": 2.453125, "learning_rate": 0.00018385767849047956, "loss": 2.2061, "step": 155855 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018385667155760565, "loss": 2.1094, "step": 155860 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018385566459608478, "loss": 2.0345, "step": 155865 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.0001838546576059173, "loss": 2.1879, "step": 155870 }, { "epoch": 0.37, "grad_norm": 1.890625, "learning_rate": 0.00018385365058710352, "loss": 2.1837, "step": 155875 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.00018385264353964383, "loss": 2.238, "step": 155880 }, { "epoch": 0.37, "grad_norm": 2.6875, "learning_rate": 0.00018385163646353857, "loss": 1.9673, "step": 155885 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.0001838506293587881, "loss": 2.041, "step": 155890 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018384962222539266, "loss": 2.1253, "step": 155895 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018384861506335273, "loss": 2.1577, "step": 155900 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018384760787266857, "loss": 2.0477, "step": 155905 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.00018384660065334053, "loss": 2.152, "step": 155910 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018384559340536898, "loss": 2.2344, "step": 155915 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018384458612875427, "loss": 1.9933, "step": 155920 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.00018384357882349672, "loss": 2.0748, "step": 155925 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018384257148959665, "loss": 2.1675, "step": 155930 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.0001838415641270545, "loss": 2.177, "step": 155935 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.0001838405567358705, "loss": 2.2346, "step": 155940 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018383954931604506, "loss": 2.2233, "step": 155945 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018383854186757853, "loss": 2.1495, "step": 155950 }, { "epoch": 0.37, "grad_norm": 2.53125, "learning_rate": 0.0001838375343904712, "loss": 2.3037, "step": 155955 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018383652688472348, "loss": 2.108, "step": 155960 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018383551935033564, "loss": 2.0458, "step": 155965 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.0001838345117873081, "loss": 2.0304, "step": 155970 }, { "epoch": 0.37, "grad_norm": 2.78125, "learning_rate": 0.00018383350419564116, "loss": 2.0701, "step": 155975 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.0001838324965753352, "loss": 2.0979, "step": 155980 }, { "epoch": 0.37, "grad_norm": 2.703125, "learning_rate": 0.0001838314889263905, "loss": 2.2195, "step": 155985 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018383048124880747, "loss": 1.82, "step": 155990 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018382947354258643, "loss": 1.9942, "step": 155995 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018382846580772768, "loss": 2.1195, "step": 156000 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018382745804423163, "loss": 1.9789, "step": 156005 }, { "epoch": 0.37, "grad_norm": 2.625, "learning_rate": 0.0001838264502520986, "loss": 2.1881, "step": 156010 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018382544243132894, "loss": 2.2606, "step": 156015 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018382443458192299, "loss": 2.1105, "step": 156020 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.0001838234267038811, "loss": 2.1327, "step": 156025 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018382241879720358, "loss": 2.207, "step": 156030 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018382141086189084, "loss": 2.0836, "step": 156035 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.00018382040289794316, "loss": 2.1656, "step": 156040 }, { "epoch": 0.37, "grad_norm": 2.875, "learning_rate": 0.0001838193949053609, "loss": 2.1494, "step": 156045 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018381838688414444, "loss": 1.9524, "step": 156050 }, { "epoch": 0.37, "grad_norm": 2.0, "learning_rate": 0.00018381737883429409, "loss": 2.0136, "step": 156055 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.0001838163707558102, "loss": 2.2012, "step": 156060 }, { "epoch": 0.37, "grad_norm": 1.953125, "learning_rate": 0.0001838153626486931, "loss": 2.2862, "step": 156065 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.0001838143545129432, "loss": 2.1046, "step": 156070 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018381334634856074, "loss": 2.0983, "step": 156075 }, { "epoch": 0.37, "grad_norm": 2.53125, "learning_rate": 0.00018381233815554615, "loss": 2.1775, "step": 156080 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018381132993389976, "loss": 2.2461, "step": 156085 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018381032168362187, "loss": 2.3408, "step": 156090 }, { "epoch": 0.37, "grad_norm": 1.9609375, "learning_rate": 0.00018380931340471288, "loss": 2.0863, "step": 156095 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018380830509717307, "loss": 2.0616, "step": 156100 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018380729676100287, "loss": 2.3131, "step": 156105 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018380628839620254, "loss": 2.1083, "step": 156110 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018380528000277248, "loss": 2.2408, "step": 156115 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018380427158071303, "loss": 1.9917, "step": 156120 }, { "epoch": 0.37, "grad_norm": 2.5625, "learning_rate": 0.00018380326313002449, "loss": 2.0962, "step": 156125 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018380225465070726, "loss": 2.1073, "step": 156130 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018380124614276167, "loss": 2.1608, "step": 156135 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.000183800237606188, "loss": 2.0067, "step": 156140 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.0001837992290409867, "loss": 2.2149, "step": 156145 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018379822044715805, "loss": 1.9822, "step": 156150 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.0001837972118247024, "loss": 2.0028, "step": 156155 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.0001837962031736201, "loss": 2.17, "step": 156160 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.0001837951944939115, "loss": 2.1429, "step": 156165 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018379418578557695, "loss": 2.2582, "step": 156170 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.0001837931770486168, "loss": 2.0897, "step": 156175 }, { "epoch": 0.37, "grad_norm": 1.9609375, "learning_rate": 0.00018379216828303137, "loss": 2.1415, "step": 156180 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018379115948882098, "loss": 2.2274, "step": 156185 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.0001837901506659861, "loss": 2.0092, "step": 156190 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.0001837891418145269, "loss": 2.1501, "step": 156195 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018378813293444383, "loss": 2.1782, "step": 156200 }, { "epoch": 0.37, "grad_norm": 2.484375, "learning_rate": 0.00018378712402573722, "loss": 2.288, "step": 156205 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018378611508840738, "loss": 2.2064, "step": 156210 }, { "epoch": 0.37, "grad_norm": 2.765625, "learning_rate": 0.00018378510612245474, "loss": 2.1983, "step": 156215 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018378409712787957, "loss": 2.0758, "step": 156220 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.0001837830881046822, "loss": 2.0005, "step": 156225 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018378207905286303, "loss": 2.1717, "step": 156230 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018378106997242238, "loss": 2.1915, "step": 156235 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.00018378006086336064, "loss": 2.2107, "step": 156240 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018377905172567807, "loss": 2.1091, "step": 156245 }, { "epoch": 0.37, "grad_norm": 2.0, "learning_rate": 0.00018377804255937505, "loss": 2.2245, "step": 156250 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018377703336445192, "loss": 2.2168, "step": 156255 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018377602414090908, "loss": 2.0424, "step": 156260 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.0001837750148887468, "loss": 2.1225, "step": 156265 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018377400560796545, "loss": 2.2388, "step": 156270 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.0001837729962985654, "loss": 2.1387, "step": 156275 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018377198696054698, "loss": 2.1831, "step": 156280 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.0001837709775939105, "loss": 2.1299, "step": 156285 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018376996819865636, "loss": 2.211, "step": 156290 }, { "epoch": 0.37, "grad_norm": 1.8046875, "learning_rate": 0.00018376895877478489, "loss": 2.0443, "step": 156295 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.00018376794932229638, "loss": 2.2553, "step": 156300 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018376693984119126, "loss": 2.1972, "step": 156305 }, { "epoch": 0.37, "grad_norm": 2.6875, "learning_rate": 0.00018376593033146983, "loss": 2.1405, "step": 156310 }, { "epoch": 0.37, "grad_norm": 2.546875, "learning_rate": 0.0001837649207931324, "loss": 2.0205, "step": 156315 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.0001837639112261794, "loss": 2.0821, "step": 156320 }, { "epoch": 0.37, "grad_norm": 2.59375, "learning_rate": 0.00018376290163061112, "loss": 2.0178, "step": 156325 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.0001837618920064279, "loss": 2.0841, "step": 156330 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.0001837608823536301, "loss": 2.0549, "step": 156335 }, { "epoch": 0.37, "grad_norm": 2.9375, "learning_rate": 0.00018375987267221807, "loss": 2.0741, "step": 156340 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018375886296219217, "loss": 2.2158, "step": 156345 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018375785322355268, "loss": 1.9967, "step": 156350 }, { "epoch": 0.37, "grad_norm": 1.8671875, "learning_rate": 0.0001837568434563, "loss": 1.9296, "step": 156355 }, { "epoch": 0.37, "grad_norm": 2.71875, "learning_rate": 0.00018375583366043449, "loss": 2.1516, "step": 156360 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018375482383595643, "loss": 2.1956, "step": 156365 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018375381398286623, "loss": 2.5103, "step": 156370 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.0001837528041011642, "loss": 2.0165, "step": 156375 }, { "epoch": 0.37, "grad_norm": 1.953125, "learning_rate": 0.00018375179419085068, "loss": 2.0637, "step": 156380 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018375078425192604, "loss": 2.0205, "step": 156385 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018374977428439065, "loss": 2.1325, "step": 156390 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018374876428824477, "loss": 2.2918, "step": 156395 }, { "epoch": 0.37, "grad_norm": 2.6875, "learning_rate": 0.0001837477542634888, "loss": 2.1543, "step": 156400 }, { "epoch": 0.37, "grad_norm": 1.9296875, "learning_rate": 0.0001837467442101231, "loss": 2.0917, "step": 156405 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.00018374573412814797, "loss": 2.3152, "step": 156410 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018374472401756377, "loss": 2.0577, "step": 156415 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018374371387837088, "loss": 2.081, "step": 156420 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.0001837427037105696, "loss": 2.2787, "step": 156425 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.0001837416935141603, "loss": 2.061, "step": 156430 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.00018374068328914334, "loss": 2.2049, "step": 156435 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.000183739673035519, "loss": 2.0845, "step": 156440 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018373866275328774, "loss": 2.2528, "step": 156445 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018373765244244976, "loss": 2.0607, "step": 156450 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018373664210300552, "loss": 2.1164, "step": 156455 }, { "epoch": 0.37, "grad_norm": 1.96875, "learning_rate": 0.00018373563173495528, "loss": 2.1173, "step": 156460 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018373462133829948, "loss": 2.0963, "step": 156465 }, { "epoch": 0.37, "grad_norm": 2.453125, "learning_rate": 0.0001837336109130384, "loss": 2.1509, "step": 156470 }, { "epoch": 0.37, "grad_norm": 2.8125, "learning_rate": 0.0001837326004591724, "loss": 2.2018, "step": 156475 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018373158997670182, "loss": 2.0152, "step": 156480 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.000183730579465627, "loss": 1.9526, "step": 156485 }, { "epoch": 0.37, "grad_norm": 2.640625, "learning_rate": 0.0001837295689259483, "loss": 1.9352, "step": 156490 }, { "epoch": 0.37, "grad_norm": 2.71875, "learning_rate": 0.0001837285583576661, "loss": 2.0791, "step": 156495 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018372754776078067, "loss": 2.2337, "step": 156500 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.0001837265371352924, "loss": 2.0743, "step": 156505 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.0001837255264812016, "loss": 2.2799, "step": 156510 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.00018372451579850866, "loss": 2.1033, "step": 156515 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018372350508721392, "loss": 2.1142, "step": 156520 }, { "epoch": 0.37, "grad_norm": 2.6875, "learning_rate": 0.0001837224943473177, "loss": 2.2707, "step": 156525 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018372148357882035, "loss": 2.1832, "step": 156530 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018372047278172223, "loss": 1.9948, "step": 156535 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018371946195602367, "loss": 2.1713, "step": 156540 }, { "epoch": 0.37, "grad_norm": 2.453125, "learning_rate": 0.00018371845110172504, "loss": 2.0485, "step": 156545 }, { "epoch": 0.37, "grad_norm": 1.9765625, "learning_rate": 0.00018371744021882669, "loss": 2.0933, "step": 156550 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018371642930732892, "loss": 2.0911, "step": 156555 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.0001837154183672321, "loss": 2.0991, "step": 156560 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018371440739853656, "loss": 2.2918, "step": 156565 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018371339640124265, "loss": 2.2229, "step": 156570 }, { "epoch": 0.37, "grad_norm": 2.453125, "learning_rate": 0.00018371238537535076, "loss": 2.1605, "step": 156575 }, { "epoch": 0.37, "grad_norm": 2.609375, "learning_rate": 0.0001837113743208612, "loss": 2.1907, "step": 156580 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018371036323777432, "loss": 2.1572, "step": 156585 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.0001837093521260904, "loss": 2.0226, "step": 156590 }, { "epoch": 0.37, "grad_norm": 2.546875, "learning_rate": 0.00018370834098580994, "loss": 2.0081, "step": 156595 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018370732981693312, "loss": 1.7894, "step": 156600 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018370631861946038, "loss": 2.1219, "step": 156605 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018370530739339204, "loss": 2.0771, "step": 156610 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018370429613872848, "loss": 2.1517, "step": 156615 }, { "epoch": 0.37, "grad_norm": 2.5, "learning_rate": 0.00018370328485546998, "loss": 2.1471, "step": 156620 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018370227354361695, "loss": 1.975, "step": 156625 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018370126220316967, "loss": 1.9447, "step": 156630 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018370025083412855, "loss": 2.1967, "step": 156635 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.0001836992394364939, "loss": 2.2034, "step": 156640 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018369822801026607, "loss": 2.2997, "step": 156645 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.0001836972165554454, "loss": 1.9578, "step": 156650 }, { "epoch": 0.37, "grad_norm": 2.453125, "learning_rate": 0.00018369620507203223, "loss": 2.148, "step": 156655 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018369519356002694, "loss": 2.2445, "step": 156660 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018369418201942984, "loss": 2.1566, "step": 156665 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.0001836931704502413, "loss": 2.1532, "step": 156670 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018369215885246165, "loss": 1.9725, "step": 156675 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018369114722609125, "loss": 2.0423, "step": 156680 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.0001836901355711304, "loss": 2.1839, "step": 156685 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018368912388757954, "loss": 2.2058, "step": 156690 }, { "epoch": 0.37, "grad_norm": 1.9765625, "learning_rate": 0.00018368811217543893, "loss": 2.074, "step": 156695 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018368710043470895, "loss": 1.9587, "step": 156700 }, { "epoch": 0.37, "grad_norm": 2.0, "learning_rate": 0.00018368608866538993, "loss": 2.1746, "step": 156705 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018368507686748222, "loss": 2.0408, "step": 156710 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018368406504098617, "loss": 2.1193, "step": 156715 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018368305318590213, "loss": 2.3054, "step": 156720 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018368204130223044, "loss": 1.9518, "step": 156725 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018368102938997146, "loss": 2.0929, "step": 156730 }, { "epoch": 0.37, "grad_norm": 1.9765625, "learning_rate": 0.0001836800174491255, "loss": 2.2357, "step": 156735 }, { "epoch": 0.37, "grad_norm": 1.921875, "learning_rate": 0.00018367900547969294, "loss": 2.2282, "step": 156740 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.0001836779934816741, "loss": 2.1659, "step": 156745 }, { "epoch": 0.37, "grad_norm": 2.65625, "learning_rate": 0.00018367698145506936, "loss": 2.0386, "step": 156750 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018367596939987903, "loss": 2.3138, "step": 156755 }, { "epoch": 0.37, "grad_norm": 2.71875, "learning_rate": 0.0001836749573161035, "loss": 2.0162, "step": 156760 }, { "epoch": 0.37, "grad_norm": 3.546875, "learning_rate": 0.00018367394520374308, "loss": 2.1885, "step": 156765 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.00018367293306279813, "loss": 2.1463, "step": 156770 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018367192089326895, "loss": 2.1993, "step": 156775 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018367090869515594, "loss": 2.0331, "step": 156780 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018366989646845944, "loss": 2.0923, "step": 156785 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018366888421317978, "loss": 2.1942, "step": 156790 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.0001836678719293173, "loss": 2.0075, "step": 156795 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018366685961687237, "loss": 2.2228, "step": 156800 }, { "epoch": 0.37, "grad_norm": 1.8125, "learning_rate": 0.00018366584727584533, "loss": 2.2216, "step": 156805 }, { "epoch": 0.37, "grad_norm": 2.59375, "learning_rate": 0.0001836648349062365, "loss": 2.0895, "step": 156810 }, { "epoch": 0.37, "grad_norm": 1.8046875, "learning_rate": 0.00018366382250804627, "loss": 2.1893, "step": 156815 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018366281008127495, "loss": 2.207, "step": 156820 }, { "epoch": 0.37, "grad_norm": 1.7890625, "learning_rate": 0.00018366179762592289, "loss": 2.093, "step": 156825 }, { "epoch": 0.37, "grad_norm": 2.484375, "learning_rate": 0.00018366078514199044, "loss": 1.9762, "step": 156830 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018365977262947795, "loss": 2.0231, "step": 156835 }, { "epoch": 0.37, "grad_norm": 2.765625, "learning_rate": 0.0001836587600883858, "loss": 2.1187, "step": 156840 }, { "epoch": 0.37, "grad_norm": 3.34375, "learning_rate": 0.00018365774751871424, "loss": 2.1296, "step": 156845 }, { "epoch": 0.37, "grad_norm": 2.671875, "learning_rate": 0.0001836567349204637, "loss": 2.1837, "step": 156850 }, { "epoch": 0.37, "grad_norm": 2.546875, "learning_rate": 0.00018365572229363456, "loss": 2.2048, "step": 156855 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.000183654709638227, "loss": 2.0982, "step": 156860 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018365369695424154, "loss": 2.1635, "step": 156865 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018365268424167848, "loss": 2.0619, "step": 156870 }, { "epoch": 0.37, "grad_norm": 1.9609375, "learning_rate": 0.0001836516715005381, "loss": 2.0675, "step": 156875 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.0001836506587308208, "loss": 2.1578, "step": 156880 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018364964593252692, "loss": 2.1498, "step": 156885 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018364863310565684, "loss": 2.0887, "step": 156890 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018364762025021084, "loss": 2.0195, "step": 156895 }, { "epoch": 0.37, "grad_norm": 2.890625, "learning_rate": 0.00018364660736618928, "loss": 2.0208, "step": 156900 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018364559445359255, "loss": 2.1318, "step": 156905 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018364458151242093, "loss": 2.1812, "step": 156910 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018364356854267487, "loss": 2.0486, "step": 156915 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.0001836425555443546, "loss": 2.1445, "step": 156920 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018364154251746052, "loss": 2.217, "step": 156925 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.000183640529461993, "loss": 2.126, "step": 156930 }, { "epoch": 0.37, "grad_norm": 2.484375, "learning_rate": 0.00018363951637795231, "loss": 2.0438, "step": 156935 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.0001836385032653389, "loss": 2.0661, "step": 156940 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018363749012415302, "loss": 2.2006, "step": 156945 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018363647695439505, "loss": 2.2197, "step": 156950 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018363546375606537, "loss": 1.9171, "step": 156955 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018363445052916428, "loss": 1.9123, "step": 156960 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018363343727369219, "loss": 2.1935, "step": 156965 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018363242398964933, "loss": 2.2805, "step": 156970 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018363141067703618, "loss": 2.092, "step": 156975 }, { "epoch": 0.37, "grad_norm": 3.015625, "learning_rate": 0.000183630397335853, "loss": 2.2265, "step": 156980 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.0001836293839661001, "loss": 2.101, "step": 156985 }, { "epoch": 0.37, "grad_norm": 1.859375, "learning_rate": 0.00018362837056777799, "loss": 2.0643, "step": 156990 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018362735714088686, "loss": 1.972, "step": 156995 }, { "epoch": 0.37, "grad_norm": 2.546875, "learning_rate": 0.00018362634368542712, "loss": 2.124, "step": 157000 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.0001836253302013991, "loss": 2.1765, "step": 157005 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.0001836243166888031, "loss": 2.0254, "step": 157010 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.0001836233031476396, "loss": 2.0169, "step": 157015 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.0001836222895779088, "loss": 2.1444, "step": 157020 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018362127597961118, "loss": 2.165, "step": 157025 }, { "epoch": 0.37, "grad_norm": 1.953125, "learning_rate": 0.00018362026235274693, "loss": 1.9706, "step": 157030 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018361924869731656, "loss": 2.2614, "step": 157035 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018361823501332028, "loss": 2.1875, "step": 157040 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018361722130075852, "loss": 2.3624, "step": 157045 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.0001836162075596316, "loss": 2.0034, "step": 157050 }, { "epoch": 0.37, "grad_norm": 2.828125, "learning_rate": 0.00018361519378993983, "loss": 1.9991, "step": 157055 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018361417999168363, "loss": 2.2421, "step": 157060 }, { "epoch": 0.37, "grad_norm": 2.484375, "learning_rate": 0.00018361316616486331, "loss": 2.0445, "step": 157065 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018361215230947923, "loss": 1.93, "step": 157070 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.0001836111384255317, "loss": 2.1971, "step": 157075 }, { "epoch": 0.37, "grad_norm": 2.53125, "learning_rate": 0.0001836101245130211, "loss": 2.1579, "step": 157080 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018360911057194775, "loss": 1.9914, "step": 157085 }, { "epoch": 0.37, "grad_norm": 2.859375, "learning_rate": 0.00018360809660231201, "loss": 2.0805, "step": 157090 }, { "epoch": 0.37, "grad_norm": 2.5, "learning_rate": 0.00018360708260411427, "loss": 2.1508, "step": 157095 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.0001836060685773548, "loss": 2.1423, "step": 157100 }, { "epoch": 0.37, "grad_norm": 2.546875, "learning_rate": 0.00018360505452203398, "loss": 2.0613, "step": 157105 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018360404043815215, "loss": 2.0714, "step": 157110 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018360302632570966, "loss": 2.0649, "step": 157115 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.0001836020121847069, "loss": 2.0915, "step": 157120 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.00018360099801514415, "loss": 2.2012, "step": 157125 }, { "epoch": 0.37, "grad_norm": 1.96875, "learning_rate": 0.00018359998381702177, "loss": 2.127, "step": 157130 }, { "epoch": 0.37, "grad_norm": 2.71875, "learning_rate": 0.00018359896959034013, "loss": 2.1903, "step": 157135 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018359795533509957, "loss": 1.9505, "step": 157140 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018359694105130042, "loss": 2.2159, "step": 157145 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018359592673894307, "loss": 2.1785, "step": 157150 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.0001835949123980278, "loss": 2.0281, "step": 157155 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018359389802855502, "loss": 2.0959, "step": 157160 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018359288363052503, "loss": 2.2508, "step": 157165 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.0001835918692039382, "loss": 2.0634, "step": 157170 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018359085474879488, "loss": 2.1133, "step": 157175 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018358984026509538, "loss": 1.946, "step": 157180 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018358882575284012, "loss": 2.1443, "step": 157185 }, { "epoch": 0.37, "grad_norm": 2.59375, "learning_rate": 0.00018358781121202936, "loss": 2.1692, "step": 157190 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018358679664266353, "loss": 2.2511, "step": 157195 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018358578204474287, "loss": 2.1848, "step": 157200 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018358476741826783, "loss": 2.1225, "step": 157205 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018358375276323873, "loss": 2.0376, "step": 157210 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018358273807965588, "loss": 2.2614, "step": 157215 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018358172336751965, "loss": 2.2566, "step": 157220 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018358070862683042, "loss": 2.0868, "step": 157225 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018357969385758848, "loss": 2.2354, "step": 157230 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.0001835786790597942, "loss": 2.1664, "step": 157235 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.0001835776642334479, "loss": 2.1856, "step": 157240 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018357664937855, "loss": 2.1229, "step": 157245 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018357563449510078, "loss": 2.1524, "step": 157250 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018357461958310063, "loss": 2.0809, "step": 157255 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018357360464254984, "loss": 2.1937, "step": 157260 }, { "epoch": 0.37, "grad_norm": 1.9453125, "learning_rate": 0.0001835725896734488, "loss": 2.0734, "step": 157265 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018357157467579786, "loss": 2.1283, "step": 157270 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018357055964959737, "loss": 2.0789, "step": 157275 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018356954459484764, "loss": 2.2112, "step": 157280 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018356852951154906, "loss": 1.914, "step": 157285 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.0001835675143997019, "loss": 2.093, "step": 157290 }, { "epoch": 0.37, "grad_norm": 2.65625, "learning_rate": 0.0001835664992593066, "loss": 2.2387, "step": 157295 }, { "epoch": 0.37, "grad_norm": 2.734375, "learning_rate": 0.00018356548409036348, "loss": 2.1484, "step": 157300 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018356446889287287, "loss": 2.1162, "step": 157305 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.0001835634536668351, "loss": 2.1407, "step": 157310 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.00018356243841225056, "loss": 2.1877, "step": 157315 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018356142312911955, "loss": 2.0773, "step": 157320 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018356040781744245, "loss": 2.1561, "step": 157325 }, { "epoch": 0.37, "grad_norm": 3.046875, "learning_rate": 0.0001835593924772196, "loss": 2.1148, "step": 157330 }, { "epoch": 0.37, "grad_norm": 1.8203125, "learning_rate": 0.00018355837710845135, "loss": 2.131, "step": 157335 }, { "epoch": 0.37, "grad_norm": 2.546875, "learning_rate": 0.00018355736171113806, "loss": 2.1452, "step": 157340 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018355634628528005, "loss": 2.3112, "step": 157345 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018355533083087768, "loss": 2.2981, "step": 157350 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018355431534793128, "loss": 2.1544, "step": 157355 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.0001835532998364412, "loss": 2.2506, "step": 157360 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018355228429640783, "loss": 2.0461, "step": 157365 }, { "epoch": 0.37, "grad_norm": 2.640625, "learning_rate": 0.00018355126872783147, "loss": 2.2251, "step": 157370 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018355025313071248, "loss": 2.1272, "step": 157375 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.0001835492375050512, "loss": 1.9779, "step": 157380 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.000183548221850848, "loss": 2.3195, "step": 157385 }, { "epoch": 0.37, "grad_norm": 2.0, "learning_rate": 0.0001835472061681032, "loss": 2.2101, "step": 157390 }, { "epoch": 0.37, "grad_norm": 1.765625, "learning_rate": 0.00018354619045681715, "loss": 2.2049, "step": 157395 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018354517471699024, "loss": 2.149, "step": 157400 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018354415894862277, "loss": 2.3455, "step": 157405 }, { "epoch": 0.37, "grad_norm": 2.5625, "learning_rate": 0.00018354314315171508, "loss": 1.9754, "step": 157410 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018354212732626756, "loss": 2.2511, "step": 157415 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018354111147228053, "loss": 2.0345, "step": 157420 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018354009558975434, "loss": 2.1975, "step": 157425 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018353907967868932, "loss": 2.046, "step": 157430 }, { "epoch": 0.37, "grad_norm": 2.546875, "learning_rate": 0.00018353806373908586, "loss": 2.0403, "step": 157435 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018353704777094427, "loss": 2.1396, "step": 157440 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.00018353603177426493, "loss": 2.1523, "step": 157445 }, { "epoch": 0.37, "grad_norm": 2.53125, "learning_rate": 0.0001835350157490481, "loss": 2.0347, "step": 157450 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018353399969529428, "loss": 1.916, "step": 157455 }, { "epoch": 0.37, "grad_norm": 1.953125, "learning_rate": 0.00018353298361300369, "loss": 2.0984, "step": 157460 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.00018353196750217672, "loss": 2.1827, "step": 157465 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.0001835309513628137, "loss": 2.2307, "step": 157470 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018352993519491504, "loss": 1.8772, "step": 157475 }, { "epoch": 0.37, "grad_norm": 2.5, "learning_rate": 0.000183528918998481, "loss": 2.1699, "step": 157480 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.00018352790277351196, "loss": 2.0907, "step": 157485 }, { "epoch": 0.37, "grad_norm": 2.625, "learning_rate": 0.0001835268865200083, "loss": 1.9419, "step": 157490 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.0001835258702379703, "loss": 2.0554, "step": 157495 }, { "epoch": 0.37, "grad_norm": 2.546875, "learning_rate": 0.0001835248539273984, "loss": 2.1988, "step": 157500 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018352383758829286, "loss": 2.4507, "step": 157505 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018352282122065407, "loss": 2.0999, "step": 157510 }, { "epoch": 0.37, "grad_norm": 2.0, "learning_rate": 0.0001835218048244824, "loss": 2.1843, "step": 157515 }, { "epoch": 0.37, "grad_norm": 1.9375, "learning_rate": 0.00018352078839977814, "loss": 2.0527, "step": 157520 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018351977194654164, "loss": 2.2446, "step": 157525 }, { "epoch": 0.37, "grad_norm": 2.921875, "learning_rate": 0.0001835187554647733, "loss": 2.242, "step": 157530 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.00018351773895447342, "loss": 2.1633, "step": 157535 }, { "epoch": 0.37, "grad_norm": 1.9296875, "learning_rate": 0.00018351672241564239, "loss": 2.1761, "step": 157540 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.0001835157058482805, "loss": 2.1167, "step": 157545 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018351468925238817, "loss": 2.1748, "step": 157550 }, { "epoch": 0.37, "grad_norm": 2.671875, "learning_rate": 0.00018351367262796567, "loss": 2.1186, "step": 157555 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018351265597501342, "loss": 2.2704, "step": 157560 }, { "epoch": 0.37, "grad_norm": 2.984375, "learning_rate": 0.0001835116392935317, "loss": 2.4042, "step": 157565 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.00018351062258352093, "loss": 2.0498, "step": 157570 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.0001835096058449814, "loss": 2.2909, "step": 157575 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018350858907791346, "loss": 2.2565, "step": 157580 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.00018350757228231747, "loss": 2.0096, "step": 157585 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.0001835065554581938, "loss": 1.9906, "step": 157590 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018350553860554275, "loss": 2.1295, "step": 157595 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018350452172436472, "loss": 2.1863, "step": 157600 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018350350481466, "loss": 2.1567, "step": 157605 }, { "epoch": 0.37, "grad_norm": 2.546875, "learning_rate": 0.000183502487876429, "loss": 1.9401, "step": 157610 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018350147090967202, "loss": 1.9157, "step": 157615 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.0001835004539143894, "loss": 2.1684, "step": 157620 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.00018349943689058155, "loss": 2.1474, "step": 157625 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018349841983824879, "loss": 1.8232, "step": 157630 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018349740275739142, "loss": 2.1466, "step": 157635 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018349638564800982, "loss": 2.1145, "step": 157640 }, { "epoch": 0.37, "grad_norm": 1.890625, "learning_rate": 0.00018349536851010437, "loss": 1.9804, "step": 157645 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018349435134367537, "loss": 1.8761, "step": 157650 }, { "epoch": 0.37, "grad_norm": 2.765625, "learning_rate": 0.0001834933341487232, "loss": 1.9854, "step": 157655 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018349231692524814, "loss": 2.1304, "step": 157660 }, { "epoch": 0.37, "grad_norm": 1.9140625, "learning_rate": 0.00018349129967325066, "loss": 2.1751, "step": 157665 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.000183490282392731, "loss": 2.3705, "step": 157670 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018348926508368954, "loss": 2.0321, "step": 157675 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.00018348824774612666, "loss": 2.0489, "step": 157680 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018348723038004265, "loss": 2.2539, "step": 157685 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.00018348621298543792, "loss": 1.9998, "step": 157690 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.00018348519556231273, "loss": 2.1915, "step": 157695 }, { "epoch": 0.37, "grad_norm": 3.28125, "learning_rate": 0.00018348417811066753, "loss": 2.1101, "step": 157700 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018348316063050262, "loss": 2.2279, "step": 157705 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018348214312181833, "loss": 2.121, "step": 157710 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018348112558461504, "loss": 2.1296, "step": 157715 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.0001834801080188931, "loss": 2.0671, "step": 157720 }, { "epoch": 0.37, "grad_norm": 2.453125, "learning_rate": 0.00018347909042465283, "loss": 2.3241, "step": 157725 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018347807280189455, "loss": 2.0951, "step": 157730 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018347705515061867, "loss": 2.0836, "step": 157735 }, { "epoch": 0.37, "grad_norm": 2.53125, "learning_rate": 0.00018347603747082553, "loss": 2.1739, "step": 157740 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018347501976251546, "loss": 2.205, "step": 157745 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018347400202568882, "loss": 2.137, "step": 157750 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018347298426034592, "loss": 2.1571, "step": 157755 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018347196646648715, "loss": 2.0079, "step": 157760 }, { "epoch": 0.37, "grad_norm": 2.703125, "learning_rate": 0.0001834709486441128, "loss": 2.2289, "step": 157765 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018346993079322333, "loss": 2.0224, "step": 157770 }, { "epoch": 0.37, "grad_norm": 2.6875, "learning_rate": 0.00018346891291381895, "loss": 2.2263, "step": 157775 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.0001834678950059001, "loss": 1.9494, "step": 157780 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018346687706946715, "loss": 1.9123, "step": 157785 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018346585910452036, "loss": 2.2275, "step": 157790 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018346484111106013, "loss": 2.1118, "step": 157795 }, { "epoch": 0.37, "grad_norm": 2.59375, "learning_rate": 0.00018346382308908677, "loss": 2.1461, "step": 157800 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018346280503860068, "loss": 2.1722, "step": 157805 }, { "epoch": 0.37, "grad_norm": 2.484375, "learning_rate": 0.00018346178695960216, "loss": 1.9701, "step": 157810 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.0001834607688520916, "loss": 2.1502, "step": 157815 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018345975071606933, "loss": 2.2618, "step": 157820 }, { "epoch": 0.37, "grad_norm": 2.578125, "learning_rate": 0.00018345873255153568, "loss": 1.965, "step": 157825 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018345771435849105, "loss": 2.0909, "step": 157830 }, { "epoch": 0.37, "grad_norm": 2.453125, "learning_rate": 0.0001834566961369357, "loss": 2.1231, "step": 157835 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018345567788687005, "loss": 2.1699, "step": 157840 }, { "epoch": 0.37, "grad_norm": 2.53125, "learning_rate": 0.00018345465960829444, "loss": 2.2187, "step": 157845 }, { "epoch": 0.37, "grad_norm": 1.984375, "learning_rate": 0.0001834536413012092, "loss": 2.0936, "step": 157850 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018345262296561466, "loss": 2.1147, "step": 157855 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018345160460151122, "loss": 2.1683, "step": 157860 }, { "epoch": 0.37, "grad_norm": 1.6640625, "learning_rate": 0.0001834505862088992, "loss": 1.9966, "step": 157865 }, { "epoch": 0.37, "grad_norm": 1.984375, "learning_rate": 0.0001834495677877789, "loss": 1.9594, "step": 157870 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018344854933815077, "loss": 2.0266, "step": 157875 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018344753086001507, "loss": 2.1915, "step": 157880 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.0001834465123533722, "loss": 2.2468, "step": 157885 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018344549381822244, "loss": 2.0508, "step": 157890 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.00018344447525456622, "loss": 2.0881, "step": 157895 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018344345666240387, "loss": 2.2029, "step": 157900 }, { "epoch": 0.37, "grad_norm": 1.984375, "learning_rate": 0.00018344243804173572, "loss": 2.2471, "step": 157905 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.0001834414193925621, "loss": 2.02, "step": 157910 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018344040071488339, "loss": 2.2521, "step": 157915 }, { "epoch": 0.37, "grad_norm": 1.9375, "learning_rate": 0.00018343938200869993, "loss": 2.0863, "step": 157920 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018343836327401202, "loss": 2.2559, "step": 157925 }, { "epoch": 0.37, "grad_norm": 1.9921875, "learning_rate": 0.0001834373445108201, "loss": 2.0087, "step": 157930 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.00018343632571912445, "loss": 1.9938, "step": 157935 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018343530689892546, "loss": 2.0882, "step": 157940 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018343428805022344, "loss": 2.0707, "step": 157945 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018343326917301874, "loss": 2.1946, "step": 157950 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018343225026731176, "loss": 2.2903, "step": 157955 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.00018343123133310275, "loss": 2.0974, "step": 157960 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018343021237039218, "loss": 2.0552, "step": 157965 }, { "epoch": 0.37, "grad_norm": 2.0, "learning_rate": 0.00018342919337918033, "loss": 1.9654, "step": 157970 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.0001834281743594675, "loss": 2.2411, "step": 157975 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018342715531125415, "loss": 2.091, "step": 157980 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018342613623454054, "loss": 1.9505, "step": 157985 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.00018342511712932707, "loss": 2.0542, "step": 157990 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018342409799561408, "loss": 2.1325, "step": 157995 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018342307883340185, "loss": 2.273, "step": 158000 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.0001834220596426908, "loss": 2.0543, "step": 158005 }, { "epoch": 0.37, "grad_norm": 2.6875, "learning_rate": 0.00018342104042348129, "loss": 2.0681, "step": 158010 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018342002117577365, "loss": 2.1225, "step": 158015 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.0001834190018995682, "loss": 2.0535, "step": 158020 }, { "epoch": 0.37, "grad_norm": 1.9765625, "learning_rate": 0.00018341798259486526, "loss": 2.1447, "step": 158025 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.00018341696326166528, "loss": 2.1516, "step": 158030 }, { "epoch": 0.37, "grad_norm": 1.984375, "learning_rate": 0.00018341594389996853, "loss": 2.0492, "step": 158035 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018341492450977536, "loss": 2.2667, "step": 158040 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018341390509108617, "loss": 2.3008, "step": 158045 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.0001834128856439013, "loss": 1.9782, "step": 158050 }, { "epoch": 0.37, "grad_norm": 3.328125, "learning_rate": 0.00018341186616822105, "loss": 2.0445, "step": 158055 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018341084666404576, "loss": 2.0083, "step": 158060 }, { "epoch": 0.37, "grad_norm": 2.578125, "learning_rate": 0.00018340982713137586, "loss": 2.0361, "step": 158065 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018340880757021163, "loss": 2.0341, "step": 158070 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018340778798055343, "loss": 2.2187, "step": 158075 }, { "epoch": 0.37, "grad_norm": 2.5625, "learning_rate": 0.00018340676836240164, "loss": 2.1009, "step": 158080 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018340574871575659, "loss": 2.1796, "step": 158085 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.0001834047290406186, "loss": 2.3043, "step": 158090 }, { "epoch": 0.37, "grad_norm": 2.703125, "learning_rate": 0.00018340370933698805, "loss": 2.0246, "step": 158095 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018340268960486528, "loss": 2.0953, "step": 158100 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018340166984425065, "loss": 2.1356, "step": 158105 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.0001834006500551445, "loss": 2.1528, "step": 158110 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018339963023754712, "loss": 1.9352, "step": 158115 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018339861039145898, "loss": 1.8909, "step": 158120 }, { "epoch": 0.37, "grad_norm": 1.9921875, "learning_rate": 0.00018339759051688031, "loss": 2.1264, "step": 158125 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018339657061381158, "loss": 2.0452, "step": 158130 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018339555068225301, "loss": 2.1323, "step": 158135 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018339453072220503, "loss": 2.0855, "step": 158140 }, { "epoch": 0.37, "grad_norm": 2.703125, "learning_rate": 0.00018339351073366796, "loss": 2.0115, "step": 158145 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018339249071664215, "loss": 2.1164, "step": 158150 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018339147067112797, "loss": 2.063, "step": 158155 }, { "epoch": 0.37, "grad_norm": 1.8984375, "learning_rate": 0.00018339045059712574, "loss": 2.0573, "step": 158160 }, { "epoch": 0.37, "grad_norm": 2.546875, "learning_rate": 0.0001833894304946358, "loss": 2.1286, "step": 158165 }, { "epoch": 0.37, "grad_norm": 2.578125, "learning_rate": 0.00018338841036365854, "loss": 2.1419, "step": 158170 }, { "epoch": 0.37, "grad_norm": 1.90625, "learning_rate": 0.00018338739020419428, "loss": 2.0482, "step": 158175 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.0001833863700162434, "loss": 1.9985, "step": 158180 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.0001833853497998062, "loss": 2.0125, "step": 158185 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018338432955488306, "loss": 2.0328, "step": 158190 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018338330928147428, "loss": 2.1266, "step": 158195 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.0001833822889795803, "loss": 2.0134, "step": 158200 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.0001833812686492014, "loss": 2.1928, "step": 158205 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018338024829033797, "loss": 2.103, "step": 158210 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018337922790299032, "loss": 2.0633, "step": 158215 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.0001833782074871588, "loss": 2.1919, "step": 158220 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018337718704284378, "loss": 1.9925, "step": 158225 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018337616657004556, "loss": 2.228, "step": 158230 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018337514606876459, "loss": 2.3799, "step": 158235 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018337412553900116, "loss": 2.0978, "step": 158240 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018337310498075558, "loss": 2.1008, "step": 158245 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018337208439402826, "loss": 2.1964, "step": 158250 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.0001833710637788195, "loss": 2.1887, "step": 158255 }, { "epoch": 0.37, "grad_norm": 1.890625, "learning_rate": 0.0001833700431351297, "loss": 2.0213, "step": 158260 }, { "epoch": 0.37, "grad_norm": 1.75, "learning_rate": 0.00018336902246295919, "loss": 1.8798, "step": 158265 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018336800176230826, "loss": 2.1133, "step": 158270 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018336698103317732, "loss": 2.2573, "step": 158275 }, { "epoch": 0.37, "grad_norm": 1.8125, "learning_rate": 0.00018336596027556675, "loss": 2.0903, "step": 158280 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018336493948947682, "loss": 2.2634, "step": 158285 }, { "epoch": 0.37, "grad_norm": 2.78125, "learning_rate": 0.00018336391867490793, "loss": 2.1982, "step": 158290 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.0001833628978318604, "loss": 2.0722, "step": 158295 }, { "epoch": 0.37, "grad_norm": 1.8984375, "learning_rate": 0.0001833618769603346, "loss": 2.0786, "step": 158300 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018336085606033086, "loss": 2.083, "step": 158305 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.00018335983513184954, "loss": 2.1467, "step": 158310 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018335881417489103, "loss": 2.3151, "step": 158315 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.00018335779318945562, "loss": 2.1488, "step": 158320 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018335677217554364, "loss": 2.2353, "step": 158325 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.0001833557511331555, "loss": 2.0429, "step": 158330 }, { "epoch": 0.37, "grad_norm": 1.9765625, "learning_rate": 0.0001833547300622915, "loss": 2.0117, "step": 158335 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.00018335370896295204, "loss": 2.2322, "step": 158340 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018335268783513744, "loss": 2.0519, "step": 158345 }, { "epoch": 0.37, "grad_norm": 1.9609375, "learning_rate": 0.00018335166667884803, "loss": 2.0377, "step": 158350 }, { "epoch": 0.37, "grad_norm": 2.75, "learning_rate": 0.00018335064549408422, "loss": 2.1957, "step": 158355 }, { "epoch": 0.37, "grad_norm": 2.0, "learning_rate": 0.0001833496242808463, "loss": 2.1417, "step": 158360 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.0001833486030391346, "loss": 2.1881, "step": 158365 }, { "epoch": 0.37, "grad_norm": 2.625, "learning_rate": 0.00018334758176894955, "loss": 2.0208, "step": 158370 }, { "epoch": 0.37, "grad_norm": 2.8125, "learning_rate": 0.00018334656047029144, "loss": 1.9711, "step": 158375 }, { "epoch": 0.37, "grad_norm": 1.9921875, "learning_rate": 0.00018334553914316063, "loss": 2.2927, "step": 158380 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.0001833445177875575, "loss": 2.1221, "step": 158385 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018334349640348232, "loss": 2.0041, "step": 158390 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018334247499093552, "loss": 2.1019, "step": 158395 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018334145354991744, "loss": 2.2229, "step": 158400 }, { "epoch": 0.37, "grad_norm": 2.578125, "learning_rate": 0.00018334043208042834, "loss": 2.2341, "step": 158405 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.00018333941058246871, "loss": 2.1181, "step": 158410 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018333838905603878, "loss": 2.2229, "step": 158415 }, { "epoch": 0.37, "grad_norm": 1.796875, "learning_rate": 0.00018333736750113897, "loss": 2.0234, "step": 158420 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.0001833363459177696, "loss": 2.1087, "step": 158425 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.000183335324305931, "loss": 1.9844, "step": 158430 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018333430266562358, "loss": 2.3185, "step": 158435 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.00018333328099684763, "loss": 2.1022, "step": 158440 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.0001833322592996035, "loss": 2.0527, "step": 158445 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.0001833312375738916, "loss": 2.1211, "step": 158450 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.0001833302158197122, "loss": 2.0574, "step": 158455 }, { "epoch": 0.37, "grad_norm": 1.9453125, "learning_rate": 0.0001833291940370657, "loss": 2.063, "step": 158460 }, { "epoch": 0.37, "grad_norm": 2.828125, "learning_rate": 0.00018332817222595246, "loss": 2.0727, "step": 158465 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018332715038637276, "loss": 1.9189, "step": 158470 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018332612851832702, "loss": 2.2227, "step": 158475 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018332510662181557, "loss": 2.2745, "step": 158480 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.00018332408469683874, "loss": 2.1558, "step": 158485 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018332306274339692, "loss": 2.0678, "step": 158490 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.0001833220407614904, "loss": 2.1584, "step": 158495 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018332101875111957, "loss": 2.0642, "step": 158500 }, { "epoch": 0.37, "grad_norm": 2.59375, "learning_rate": 0.00018331999671228477, "loss": 2.1378, "step": 158505 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018331897464498632, "loss": 2.1979, "step": 158510 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018331795254922462, "loss": 1.8212, "step": 158515 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018331693042500003, "loss": 2.2545, "step": 158520 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018331590827231283, "loss": 2.2118, "step": 158525 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018331488609116338, "loss": 2.2416, "step": 158530 }, { "epoch": 0.37, "grad_norm": 2.703125, "learning_rate": 0.0001833138638815521, "loss": 2.2611, "step": 158535 }, { "epoch": 0.37, "grad_norm": 2.5625, "learning_rate": 0.00018331284164347926, "loss": 2.0649, "step": 158540 }, { "epoch": 0.37, "grad_norm": 1.9296875, "learning_rate": 0.00018331181937694525, "loss": 2.0866, "step": 158545 }, { "epoch": 0.37, "grad_norm": 2.5625, "learning_rate": 0.00018331079708195042, "loss": 2.3034, "step": 158550 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018330977475849514, "loss": 2.221, "step": 158555 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018330875240657967, "loss": 2.0458, "step": 158560 }, { "epoch": 0.37, "grad_norm": 1.8828125, "learning_rate": 0.00018330773002620446, "loss": 2.0612, "step": 158565 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.0001833067076173698, "loss": 2.1763, "step": 158570 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018330568518007607, "loss": 2.2003, "step": 158575 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.0001833046627143236, "loss": 2.1553, "step": 158580 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018330364022011274, "loss": 2.0322, "step": 158585 }, { "epoch": 0.37, "grad_norm": 2.65625, "learning_rate": 0.00018330261769744385, "loss": 2.0546, "step": 158590 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.0001833015951463173, "loss": 1.9995, "step": 158595 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018330057256673338, "loss": 2.2911, "step": 158600 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018329954995869249, "loss": 2.1118, "step": 158605 }, { "epoch": 0.37, "grad_norm": 2.0, "learning_rate": 0.00018329852732219498, "loss": 2.0884, "step": 158610 }, { "epoch": 0.37, "grad_norm": 2.484375, "learning_rate": 0.00018329750465724117, "loss": 2.2179, "step": 158615 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018329648196383137, "loss": 2.1261, "step": 158620 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018329545924196606, "loss": 2.2316, "step": 158625 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018329443649164545, "loss": 2.0082, "step": 158630 }, { "epoch": 0.37, "grad_norm": 2.796875, "learning_rate": 0.00018329341371287, "loss": 2.0834, "step": 158635 }, { "epoch": 0.37, "grad_norm": 1.96875, "learning_rate": 0.00018329239090563996, "loss": 2.1407, "step": 158640 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018329136806995578, "loss": 1.9911, "step": 158645 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018329034520581772, "loss": 2.1728, "step": 158650 }, { "epoch": 0.37, "grad_norm": 1.8984375, "learning_rate": 0.00018328932231322618, "loss": 2.2233, "step": 158655 }, { "epoch": 0.37, "grad_norm": 2.75, "learning_rate": 0.0001832882993921815, "loss": 2.0777, "step": 158660 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.000183287276442684, "loss": 2.015, "step": 158665 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.0001832862534647341, "loss": 2.2152, "step": 158670 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018328523045833207, "loss": 2.1862, "step": 158675 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018328420742347833, "loss": 2.166, "step": 158680 }, { "epoch": 0.37, "grad_norm": 2.0, "learning_rate": 0.00018328318436017316, "loss": 2.0945, "step": 158685 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.00018328216126841697, "loss": 1.9825, "step": 158690 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018328113814821006, "loss": 2.2282, "step": 158695 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.0001832801149995528, "loss": 1.9268, "step": 158700 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.0001832790918224456, "loss": 2.177, "step": 158705 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018327806861688866, "loss": 2.064, "step": 158710 }, { "epoch": 0.37, "grad_norm": 2.609375, "learning_rate": 0.00018327704538288248, "loss": 2.1586, "step": 158715 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018327602212042735, "loss": 2.1182, "step": 158720 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.0001832749988295236, "loss": 2.0797, "step": 158725 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.0001832739755101716, "loss": 2.2005, "step": 158730 }, { "epoch": 0.37, "grad_norm": 2.671875, "learning_rate": 0.00018327295216237174, "loss": 2.1673, "step": 158735 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.0001832719287861243, "loss": 2.1432, "step": 158740 }, { "epoch": 0.37, "grad_norm": 2.0, "learning_rate": 0.00018327090538142966, "loss": 2.1161, "step": 158745 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.00018326988194828815, "loss": 1.9411, "step": 158750 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018326885848670018, "loss": 1.8914, "step": 158755 }, { "epoch": 0.37, "grad_norm": 2.234375, "learning_rate": 0.00018326783499666603, "loss": 2.06, "step": 158760 }, { "epoch": 0.37, "grad_norm": 4.25, "learning_rate": 0.00018326681147818604, "loss": 2.1889, "step": 158765 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018326578793126066, "loss": 2.0773, "step": 158770 }, { "epoch": 0.37, "grad_norm": 2.625, "learning_rate": 0.00018326476435589014, "loss": 2.2484, "step": 158775 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018326374075207488, "loss": 2.1711, "step": 158780 }, { "epoch": 0.37, "grad_norm": 2.8125, "learning_rate": 0.00018326271711981523, "loss": 2.1102, "step": 158785 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.0001832616934591115, "loss": 2.1366, "step": 158790 }, { "epoch": 0.37, "grad_norm": 2.546875, "learning_rate": 0.0001832606697699641, "loss": 2.0433, "step": 158795 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018325964605237332, "loss": 2.1024, "step": 158800 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018325862230633954, "loss": 2.0632, "step": 158805 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.0001832575985318631, "loss": 2.1425, "step": 158810 }, { "epoch": 0.37, "grad_norm": 1.921875, "learning_rate": 0.00018325657472894436, "loss": 2.0071, "step": 158815 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018325555089758363, "loss": 2.0968, "step": 158820 }, { "epoch": 0.37, "grad_norm": 2.515625, "learning_rate": 0.00018325452703778135, "loss": 2.0732, "step": 158825 }, { "epoch": 0.37, "grad_norm": 2.578125, "learning_rate": 0.00018325350314953777, "loss": 2.0355, "step": 158830 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.0001832524792328533, "loss": 2.195, "step": 158835 }, { "epoch": 0.37, "grad_norm": 2.6875, "learning_rate": 0.00018325145528772828, "loss": 2.1447, "step": 158840 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018325043131416305, "loss": 2.1915, "step": 158845 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018324940731215795, "loss": 2.2601, "step": 158850 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.0001832483832817134, "loss": 2.0789, "step": 158855 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.0001832473592228296, "loss": 2.147, "step": 158860 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018324633513550707, "loss": 2.1211, "step": 158865 }, { "epoch": 0.37, "grad_norm": 2.015625, "learning_rate": 0.00018324531101974602, "loss": 2.0909, "step": 158870 }, { "epoch": 0.37, "grad_norm": 2.171875, "learning_rate": 0.00018324428687554691, "loss": 2.1365, "step": 158875 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018324326270291002, "loss": 2.1598, "step": 158880 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018324223850183573, "loss": 2.2118, "step": 158885 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018324121427232436, "loss": 2.146, "step": 158890 }, { "epoch": 0.37, "grad_norm": 3.171875, "learning_rate": 0.00018324019001437632, "loss": 1.959, "step": 158895 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.0001832391657279919, "loss": 2.1254, "step": 158900 }, { "epoch": 0.37, "grad_norm": 1.734375, "learning_rate": 0.0001832381414131715, "loss": 2.072, "step": 158905 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.0001832371170699154, "loss": 2.11, "step": 158910 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.000183236092698224, "loss": 2.034, "step": 158915 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018323506829809764, "loss": 2.1669, "step": 158920 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018323404386953667, "loss": 2.3558, "step": 158925 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.00018323301941254144, "loss": 2.0769, "step": 158930 }, { "epoch": 0.37, "grad_norm": 2.5, "learning_rate": 0.0001832319949271123, "loss": 1.9928, "step": 158935 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018323097041324964, "loss": 2.2775, "step": 158940 }, { "epoch": 0.37, "grad_norm": 2.46875, "learning_rate": 0.00018322994587095375, "loss": 2.0592, "step": 158945 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.00018322892130022498, "loss": 2.1558, "step": 158950 }, { "epoch": 0.37, "grad_norm": 1.984375, "learning_rate": 0.00018322789670106372, "loss": 2.2402, "step": 158955 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.00018322687207347028, "loss": 2.0952, "step": 158960 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018322584741744505, "loss": 2.2065, "step": 158965 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.00018322482273298835, "loss": 2.3111, "step": 158970 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018322379802010054, "loss": 2.1342, "step": 158975 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.000183222773278782, "loss": 2.1374, "step": 158980 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.000183221748509033, "loss": 1.9173, "step": 158985 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.000183220723710854, "loss": 2.2731, "step": 158990 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018321969888424527, "loss": 2.065, "step": 158995 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018321867402920717, "loss": 2.1387, "step": 159000 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018321764914574007, "loss": 2.0734, "step": 159005 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018321662423384432, "loss": 2.0255, "step": 159010 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018321559929352025, "loss": 2.1248, "step": 159015 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.0001832145743247682, "loss": 1.9818, "step": 159020 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018321354932758857, "loss": 2.0576, "step": 159025 }, { "epoch": 0.37, "grad_norm": 3.203125, "learning_rate": 0.0001832125243019817, "loss": 1.9929, "step": 159030 }, { "epoch": 0.37, "grad_norm": 2.0625, "learning_rate": 0.0001832114992479479, "loss": 2.2205, "step": 159035 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.0001832104741654875, "loss": 2.0301, "step": 159040 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018320944905460095, "loss": 2.1856, "step": 159045 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.00018320842391528853, "loss": 2.0178, "step": 159050 }, { "epoch": 0.37, "grad_norm": 2.65625, "learning_rate": 0.0001832073987475506, "loss": 2.0354, "step": 159055 }, { "epoch": 0.37, "grad_norm": 2.53125, "learning_rate": 0.00018320637355138752, "loss": 2.0704, "step": 159060 }, { "epoch": 0.37, "grad_norm": 2.265625, "learning_rate": 0.00018320534832679962, "loss": 1.9762, "step": 159065 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.0001832043230737873, "loss": 2.2181, "step": 159070 }, { "epoch": 0.37, "grad_norm": 2.53125, "learning_rate": 0.00018320329779235084, "loss": 2.182, "step": 159075 }, { "epoch": 0.37, "grad_norm": 2.09375, "learning_rate": 0.00018320227248249063, "loss": 1.9731, "step": 159080 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.000183201247144207, "loss": 2.0007, "step": 159085 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018320022177750037, "loss": 2.2993, "step": 159090 }, { "epoch": 0.37, "grad_norm": 2.078125, "learning_rate": 0.00018319919638237097, "loss": 2.278, "step": 159095 }, { "epoch": 0.37, "grad_norm": 2.75, "learning_rate": 0.00018319817095881926, "loss": 2.3297, "step": 159100 }, { "epoch": 0.37, "grad_norm": 2.453125, "learning_rate": 0.00018319714550684552, "loss": 2.1004, "step": 159105 }, { "epoch": 0.37, "grad_norm": 1.8828125, "learning_rate": 0.00018319612002645012, "loss": 2.1928, "step": 159110 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018319509451763345, "loss": 2.0774, "step": 159115 }, { "epoch": 0.37, "grad_norm": 2.203125, "learning_rate": 0.0001831940689803958, "loss": 2.1902, "step": 159120 }, { "epoch": 0.37, "grad_norm": 2.1875, "learning_rate": 0.00018319304341473755, "loss": 2.1096, "step": 159125 }, { "epoch": 0.37, "grad_norm": 2.03125, "learning_rate": 0.00018319201782065906, "loss": 2.1583, "step": 159130 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.00018319099219816067, "loss": 2.1082, "step": 159135 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018318996654724268, "loss": 2.0447, "step": 159140 }, { "epoch": 0.37, "grad_norm": 2.25, "learning_rate": 0.00018318894086790554, "loss": 2.2154, "step": 159145 }, { "epoch": 0.37, "grad_norm": 2.59375, "learning_rate": 0.00018318791516014956, "loss": 2.2458, "step": 159150 }, { "epoch": 0.37, "grad_norm": 2.21875, "learning_rate": 0.00018318688942397504, "loss": 2.024, "step": 159155 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.0001831858636593824, "loss": 2.1641, "step": 159160 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018318483786637196, "loss": 2.1942, "step": 159165 }, { "epoch": 0.37, "grad_norm": 2.046875, "learning_rate": 0.00018318381204494405, "loss": 2.0277, "step": 159170 }, { "epoch": 0.37, "grad_norm": 2.59375, "learning_rate": 0.00018318278619509904, "loss": 2.0907, "step": 159175 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.0001831817603168373, "loss": 2.1093, "step": 159180 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018318073441015912, "loss": 2.0145, "step": 159185 }, { "epoch": 0.37, "grad_norm": 2.65625, "learning_rate": 0.00018317970847506497, "loss": 1.9693, "step": 159190 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.00018317868251155504, "loss": 2.0734, "step": 159195 }, { "epoch": 0.37, "grad_norm": 2.28125, "learning_rate": 0.0001831776565196298, "loss": 1.9161, "step": 159200 }, { "epoch": 0.37, "grad_norm": 2.359375, "learning_rate": 0.0001831766304992896, "loss": 2.1219, "step": 159205 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.0001831756044505347, "loss": 2.1412, "step": 159210 }, { "epoch": 0.37, "grad_norm": 1.9921875, "learning_rate": 0.00018317457837336554, "loss": 2.2279, "step": 159215 }, { "epoch": 0.37, "grad_norm": 2.65625, "learning_rate": 0.00018317355226778238, "loss": 2.1202, "step": 159220 }, { "epoch": 0.37, "grad_norm": 2.3125, "learning_rate": 0.00018317252613378568, "loss": 2.1686, "step": 159225 }, { "epoch": 0.37, "grad_norm": 2.5625, "learning_rate": 0.00018317149997137575, "loss": 2.1459, "step": 159230 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.0001831704737805529, "loss": 2.0908, "step": 159235 }, { "epoch": 0.37, "grad_norm": 2.59375, "learning_rate": 0.0001831694475613175, "loss": 2.1209, "step": 159240 }, { "epoch": 0.37, "grad_norm": 2.4375, "learning_rate": 0.00018316842131366992, "loss": 2.3094, "step": 159245 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018316739503761048, "loss": 1.9778, "step": 159250 }, { "epoch": 0.37, "grad_norm": 2.125, "learning_rate": 0.00018316636873313958, "loss": 2.1594, "step": 159255 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.0001831653424002575, "loss": 2.1411, "step": 159260 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018316431603896468, "loss": 2.0553, "step": 159265 }, { "epoch": 0.37, "grad_norm": 2.390625, "learning_rate": 0.00018316328964926138, "loss": 2.1568, "step": 159270 }, { "epoch": 0.37, "grad_norm": 2.328125, "learning_rate": 0.000183162263231148, "loss": 2.0134, "step": 159275 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018316123678462494, "loss": 2.286, "step": 159280 }, { "epoch": 0.37, "grad_norm": 1.96875, "learning_rate": 0.0001831602103096924, "loss": 2.2181, "step": 159285 }, { "epoch": 0.37, "grad_norm": 2.34375, "learning_rate": 0.00018315918380635088, "loss": 2.1999, "step": 159290 }, { "epoch": 0.37, "grad_norm": 2.40625, "learning_rate": 0.00018315815727460067, "loss": 2.1164, "step": 159295 }, { "epoch": 0.37, "grad_norm": 2.109375, "learning_rate": 0.00018315713071444213, "loss": 2.1907, "step": 159300 }, { "epoch": 0.37, "grad_norm": 1.9609375, "learning_rate": 0.00018315610412587558, "loss": 2.2153, "step": 159305 }, { "epoch": 0.37, "grad_norm": 2.421875, "learning_rate": 0.00018315507750890143, "loss": 2.0548, "step": 159310 }, { "epoch": 0.37, "grad_norm": 2.609375, "learning_rate": 0.00018315405086351998, "loss": 2.1013, "step": 159315 }, { "epoch": 0.37, "grad_norm": 2.375, "learning_rate": 0.0001831530241897316, "loss": 1.994, "step": 159320 }, { "epoch": 0.37, "grad_norm": 1.9765625, "learning_rate": 0.00018315199748753663, "loss": 2.0845, "step": 159325 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018315097075693543, "loss": 2.05, "step": 159330 }, { "epoch": 0.37, "grad_norm": 2.15625, "learning_rate": 0.00018314994399792837, "loss": 2.1588, "step": 159335 }, { "epoch": 0.37, "grad_norm": 2.140625, "learning_rate": 0.00018314891721051577, "loss": 2.2338, "step": 159340 }, { "epoch": 0.37, "grad_norm": 2.296875, "learning_rate": 0.00018314789039469798, "loss": 2.2177, "step": 159345 }, { "epoch": 0.38, "grad_norm": 1.9296875, "learning_rate": 0.0001831468635504754, "loss": 2.171, "step": 159350 }, { "epoch": 0.38, "grad_norm": 2.15625, "learning_rate": 0.00018314583667784827, "loss": 2.0984, "step": 159355 }, { "epoch": 0.38, "grad_norm": 2.59375, "learning_rate": 0.0001831448097768171, "loss": 2.1076, "step": 159360 }, { "epoch": 0.38, "grad_norm": 2.453125, "learning_rate": 0.00018314378284738208, "loss": 2.1654, "step": 159365 }, { "epoch": 0.38, "grad_norm": 1.9140625, "learning_rate": 0.00018314275588954368, "loss": 2.01, "step": 159370 }, { "epoch": 0.38, "grad_norm": 2.625, "learning_rate": 0.00018314172890330222, "loss": 2.1956, "step": 159375 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.000183140701888658, "loss": 1.9593, "step": 159380 }, { "epoch": 0.38, "grad_norm": 1.9609375, "learning_rate": 0.00018313967484561144, "loss": 2.1797, "step": 159385 }, { "epoch": 0.38, "grad_norm": 2.453125, "learning_rate": 0.00018313864777416282, "loss": 2.0124, "step": 159390 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018313762067431256, "loss": 2.1833, "step": 159395 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.000183136593546061, "loss": 2.2116, "step": 159400 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.0001831355663894084, "loss": 2.1763, "step": 159405 }, { "epoch": 0.38, "grad_norm": 1.765625, "learning_rate": 0.00018313453920435526, "loss": 2.0082, "step": 159410 }, { "epoch": 0.38, "grad_norm": 2.046875, "learning_rate": 0.0001831335119909018, "loss": 2.1452, "step": 159415 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018313248474904846, "loss": 2.1822, "step": 159420 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018313145747879555, "loss": 2.0798, "step": 159425 }, { "epoch": 0.38, "grad_norm": 1.9375, "learning_rate": 0.00018313043018014342, "loss": 2.2715, "step": 159430 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.0001831294028530924, "loss": 2.2729, "step": 159435 }, { "epoch": 0.38, "grad_norm": 2.75, "learning_rate": 0.0001831283754976429, "loss": 2.077, "step": 159440 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018312734811379527, "loss": 2.3049, "step": 159445 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018312632070154977, "loss": 2.2907, "step": 159450 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018312529326090684, "loss": 2.1407, "step": 159455 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018312426579186682, "loss": 2.166, "step": 159460 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018312323829443, "loss": 2.1423, "step": 159465 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.0001831222107685968, "loss": 2.0205, "step": 159470 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.00018312118321436754, "loss": 2.123, "step": 159475 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018312015563174257, "loss": 2.1406, "step": 159480 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018311912802072226, "loss": 2.2338, "step": 159485 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018311810038130694, "loss": 2.104, "step": 159490 }, { "epoch": 0.38, "grad_norm": 1.9609375, "learning_rate": 0.00018311707271349698, "loss": 2.1516, "step": 159495 }, { "epoch": 0.38, "grad_norm": 2.53125, "learning_rate": 0.00018311604501729272, "loss": 2.0915, "step": 159500 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018311501729269448, "loss": 2.0273, "step": 159505 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018311398953970267, "loss": 2.1616, "step": 159510 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.0001831129617583176, "loss": 2.0951, "step": 159515 }, { "epoch": 0.38, "grad_norm": 2.421875, "learning_rate": 0.00018311193394853967, "loss": 1.9585, "step": 159520 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018311090611036916, "loss": 2.1643, "step": 159525 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018310987824380647, "loss": 2.0823, "step": 159530 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018310885034885196, "loss": 2.0877, "step": 159535 }, { "epoch": 0.38, "grad_norm": 2.75, "learning_rate": 0.00018310782242550593, "loss": 2.2858, "step": 159540 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018310679447376877, "loss": 2.0481, "step": 159545 }, { "epoch": 0.38, "grad_norm": 2.84375, "learning_rate": 0.0001831057664936408, "loss": 1.8385, "step": 159550 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.0001831047384851224, "loss": 2.1845, "step": 159555 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018310371044821396, "loss": 2.0737, "step": 159560 }, { "epoch": 0.38, "grad_norm": 1.9765625, "learning_rate": 0.00018310268238291575, "loss": 2.1969, "step": 159565 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018310165428922815, "loss": 2.1139, "step": 159570 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018310062616715153, "loss": 2.1762, "step": 159575 }, { "epoch": 0.38, "grad_norm": 2.53125, "learning_rate": 0.0001830995980166862, "loss": 2.1155, "step": 159580 }, { "epoch": 0.38, "grad_norm": 2.734375, "learning_rate": 0.00018309856983783257, "loss": 2.1865, "step": 159585 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018309754163059098, "loss": 2.0702, "step": 159590 }, { "epoch": 0.38, "grad_norm": 1.8984375, "learning_rate": 0.0001830965133949617, "loss": 1.954, "step": 159595 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018309548513094522, "loss": 2.1845, "step": 159600 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018309445683854176, "loss": 2.1022, "step": 159605 }, { "epoch": 0.38, "grad_norm": 2.578125, "learning_rate": 0.00018309342851775178, "loss": 2.0897, "step": 159610 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.0001830924001685755, "loss": 2.0068, "step": 159615 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018309137179101342, "loss": 2.0961, "step": 159620 }, { "epoch": 0.38, "grad_norm": 2.453125, "learning_rate": 0.00018309034338506578, "loss": 2.1932, "step": 159625 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018308931495073297, "loss": 2.1216, "step": 159630 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018308828648801536, "loss": 1.9884, "step": 159635 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018308725799691332, "loss": 2.1192, "step": 159640 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.0001830862294774271, "loss": 2.1836, "step": 159645 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018308520092955716, "loss": 2.0533, "step": 159650 }, { "epoch": 0.38, "grad_norm": 2.734375, "learning_rate": 0.0001830841723533038, "loss": 2.0918, "step": 159655 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.00018308314374866738, "loss": 2.2124, "step": 159660 }, { "epoch": 0.38, "grad_norm": 2.515625, "learning_rate": 0.00018308211511564824, "loss": 2.2126, "step": 159665 }, { "epoch": 0.38, "grad_norm": 1.9453125, "learning_rate": 0.00018308108645424676, "loss": 2.0997, "step": 159670 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018308005776446323, "loss": 1.9424, "step": 159675 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018307902904629808, "loss": 2.1702, "step": 159680 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018307800029975164, "loss": 2.073, "step": 159685 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018307697152482422, "loss": 2.3032, "step": 159690 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.0001830759427215162, "loss": 2.0462, "step": 159695 }, { "epoch": 0.38, "grad_norm": 2.015625, "learning_rate": 0.00018307491388982797, "loss": 2.168, "step": 159700 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.0001830738850297598, "loss": 2.1622, "step": 159705 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.0001830728561413121, "loss": 2.0056, "step": 159710 }, { "epoch": 0.38, "grad_norm": 2.421875, "learning_rate": 0.0001830718272244852, "loss": 2.1159, "step": 159715 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018307079827927948, "loss": 2.235, "step": 159720 }, { "epoch": 0.38, "grad_norm": 2.796875, "learning_rate": 0.00018306976930569527, "loss": 2.0872, "step": 159725 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018306874030373286, "loss": 2.2964, "step": 159730 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018306771127339272, "loss": 2.1695, "step": 159735 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018306668221467514, "loss": 2.1762, "step": 159740 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.00018306565312758047, "loss": 1.999, "step": 159745 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018306462401210906, "loss": 2.2526, "step": 159750 }, { "epoch": 0.38, "grad_norm": 2.53125, "learning_rate": 0.00018306359486826125, "loss": 2.1931, "step": 159755 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018306256569603743, "loss": 2.0247, "step": 159760 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.00018306153649543793, "loss": 1.9489, "step": 159765 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.0001830605072664631, "loss": 2.1791, "step": 159770 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018305947800911329, "loss": 2.3196, "step": 159775 }, { "epoch": 0.38, "grad_norm": 1.921875, "learning_rate": 0.0001830584487233889, "loss": 2.1168, "step": 159780 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018305741940929018, "loss": 2.2593, "step": 159785 }, { "epoch": 0.38, "grad_norm": 2.59375, "learning_rate": 0.00018305639006681754, "loss": 2.2837, "step": 159790 }, { "epoch": 0.38, "grad_norm": 2.046875, "learning_rate": 0.00018305536069597136, "loss": 1.9663, "step": 159795 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018305433129675195, "loss": 2.0478, "step": 159800 }, { "epoch": 0.38, "grad_norm": 1.9609375, "learning_rate": 0.00018305330186915968, "loss": 2.1722, "step": 159805 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.0001830522724131949, "loss": 2.0979, "step": 159810 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018305124292885793, "loss": 2.1152, "step": 159815 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018305021341614918, "loss": 2.2602, "step": 159820 }, { "epoch": 0.38, "grad_norm": 3.03125, "learning_rate": 0.00018304918387506894, "loss": 2.256, "step": 159825 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018304815430561763, "loss": 2.2305, "step": 159830 }, { "epoch": 0.38, "grad_norm": 2.703125, "learning_rate": 0.00018304712470779553, "loss": 2.2179, "step": 159835 }, { "epoch": 0.38, "grad_norm": 2.875, "learning_rate": 0.00018304609508160304, "loss": 2.1987, "step": 159840 }, { "epoch": 0.38, "grad_norm": 2.765625, "learning_rate": 0.0001830450654270405, "loss": 2.0206, "step": 159845 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018304403574410827, "loss": 2.2143, "step": 159850 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.0001830430060328067, "loss": 2.0945, "step": 159855 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.0001830419762931361, "loss": 2.2628, "step": 159860 }, { "epoch": 0.38, "grad_norm": 1.8359375, "learning_rate": 0.00018304094652509684, "loss": 2.1026, "step": 159865 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018303991672868932, "loss": 2.1093, "step": 159870 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018303888690391387, "loss": 2.1108, "step": 159875 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018303785705077078, "loss": 2.193, "step": 159880 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018303682716926052, "loss": 2.0498, "step": 159885 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018303579725938333, "loss": 2.1331, "step": 159890 }, { "epoch": 0.38, "grad_norm": 1.7578125, "learning_rate": 0.00018303476732113962, "loss": 2.0136, "step": 159895 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.0001830337373545297, "loss": 2.3867, "step": 159900 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.000183032707359554, "loss": 2.2562, "step": 159905 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018303167733621278, "loss": 2.1682, "step": 159910 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018303064728450644, "loss": 2.1978, "step": 159915 }, { "epoch": 0.38, "grad_norm": 1.75, "learning_rate": 0.00018302961720443535, "loss": 2.1505, "step": 159920 }, { "epoch": 0.38, "grad_norm": 2.015625, "learning_rate": 0.0001830285870959998, "loss": 2.0152, "step": 159925 }, { "epoch": 0.38, "grad_norm": 2.515625, "learning_rate": 0.0001830275569592002, "loss": 2.2029, "step": 159930 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.0001830265267940369, "loss": 2.1154, "step": 159935 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018302549660051022, "loss": 2.2472, "step": 159940 }, { "epoch": 0.38, "grad_norm": 2.5, "learning_rate": 0.00018302446637862048, "loss": 2.1606, "step": 159945 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018302343612836813, "loss": 2.1181, "step": 159950 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018302240584975343, "loss": 2.1612, "step": 159955 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018302137554277682, "loss": 2.0878, "step": 159960 }, { "epoch": 0.38, "grad_norm": 2.90625, "learning_rate": 0.00018302034520743855, "loss": 2.17, "step": 159965 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018301931484373906, "loss": 2.2653, "step": 159970 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.00018301828445167864, "loss": 2.2485, "step": 159975 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.0001830172540312577, "loss": 1.919, "step": 159980 }, { "epoch": 0.38, "grad_norm": 2.953125, "learning_rate": 0.00018301622358247655, "loss": 2.0127, "step": 159985 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018301519310533554, "loss": 2.0806, "step": 159990 }, { "epoch": 0.38, "grad_norm": 2.046875, "learning_rate": 0.00018301416259983502, "loss": 2.1249, "step": 159995 }, { "epoch": 0.38, "grad_norm": 1.765625, "learning_rate": 0.0001830131320659754, "loss": 1.9913, "step": 160000 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018301210150375695, "loss": 2.2443, "step": 160005 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018301107091318007, "loss": 2.2263, "step": 160010 }, { "epoch": 0.38, "grad_norm": 1.734375, "learning_rate": 0.00018301004029424512, "loss": 1.8704, "step": 160015 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018300900964695244, "loss": 2.296, "step": 160020 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018300797897130236, "loss": 2.1405, "step": 160025 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018300694826729524, "loss": 2.1888, "step": 160030 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.00018300591753493147, "loss": 1.9601, "step": 160035 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018300488677421134, "loss": 2.1153, "step": 160040 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018300385598513525, "loss": 2.0865, "step": 160045 }, { "epoch": 0.38, "grad_norm": 2.453125, "learning_rate": 0.00018300282516770354, "loss": 2.1899, "step": 160050 }, { "epoch": 0.38, "grad_norm": 2.578125, "learning_rate": 0.00018300179432191653, "loss": 1.935, "step": 160055 }, { "epoch": 0.38, "grad_norm": 2.96875, "learning_rate": 0.00018300076344777465, "loss": 2.1614, "step": 160060 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018299973254527817, "loss": 2.0149, "step": 160065 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.0001829987016144275, "loss": 2.1383, "step": 160070 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018299767065522296, "loss": 2.1743, "step": 160075 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018299663966766486, "loss": 2.057, "step": 160080 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.00018299560865175368, "loss": 1.9534, "step": 160085 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018299457760748966, "loss": 2.1783, "step": 160090 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.00018299354653487319, "loss": 2.2623, "step": 160095 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018299251543390464, "loss": 2.1926, "step": 160100 }, { "epoch": 0.38, "grad_norm": 1.875, "learning_rate": 0.0001829914843045843, "loss": 2.1767, "step": 160105 }, { "epoch": 0.38, "grad_norm": 2.796875, "learning_rate": 0.00018299045314691258, "loss": 2.2946, "step": 160110 }, { "epoch": 0.38, "grad_norm": 2.625, "learning_rate": 0.0001829894219608898, "loss": 2.1533, "step": 160115 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018298839074651636, "loss": 2.0923, "step": 160120 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018298735950379258, "loss": 2.1228, "step": 160125 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.0001829863282327188, "loss": 2.0334, "step": 160130 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.0001829852969332954, "loss": 2.2621, "step": 160135 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.0001829842656055227, "loss": 2.1336, "step": 160140 }, { "epoch": 0.38, "grad_norm": 1.953125, "learning_rate": 0.00018298323424940108, "loss": 2.0202, "step": 160145 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018298220286493087, "loss": 2.2703, "step": 160150 }, { "epoch": 0.38, "grad_norm": 2.921875, "learning_rate": 0.00018298117145211246, "loss": 2.2081, "step": 160155 }, { "epoch": 0.38, "grad_norm": 2.734375, "learning_rate": 0.00018298014001094616, "loss": 2.362, "step": 160160 }, { "epoch": 0.38, "grad_norm": 2.71875, "learning_rate": 0.00018297910854143235, "loss": 2.1457, "step": 160165 }, { "epoch": 0.38, "grad_norm": 2.734375, "learning_rate": 0.00018297807704357135, "loss": 2.2447, "step": 160170 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018297704551736353, "loss": 2.0495, "step": 160175 }, { "epoch": 0.38, "grad_norm": 1.8984375, "learning_rate": 0.00018297601396280927, "loss": 2.0537, "step": 160180 }, { "epoch": 0.38, "grad_norm": 1.7578125, "learning_rate": 0.0001829749823799089, "loss": 2.1723, "step": 160185 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018297395076866276, "loss": 2.1133, "step": 160190 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.00018297291912907122, "loss": 2.1839, "step": 160195 }, { "epoch": 0.38, "grad_norm": 2.640625, "learning_rate": 0.00018297188746113462, "loss": 2.1555, "step": 160200 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.0001829708557648533, "loss": 2.196, "step": 160205 }, { "epoch": 0.38, "grad_norm": 2.421875, "learning_rate": 0.00018296982404022763, "loss": 2.1659, "step": 160210 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.000182968792287258, "loss": 2.0816, "step": 160215 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018296776050594473, "loss": 2.0478, "step": 160220 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.00018296672869628813, "loss": 2.1158, "step": 160225 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018296569685828862, "loss": 2.1642, "step": 160230 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018296466499194652, "loss": 2.1367, "step": 160235 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018296363309726216, "loss": 2.1582, "step": 160240 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018296260117423594, "loss": 2.0902, "step": 160245 }, { "epoch": 0.38, "grad_norm": 2.015625, "learning_rate": 0.00018296156922286818, "loss": 2.1057, "step": 160250 }, { "epoch": 0.38, "grad_norm": 2.5, "learning_rate": 0.00018296053724315927, "loss": 2.0457, "step": 160255 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.0001829595052351095, "loss": 2.1924, "step": 160260 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.0001829584731987193, "loss": 2.0038, "step": 160265 }, { "epoch": 0.38, "grad_norm": 2.890625, "learning_rate": 0.00018295744113398893, "loss": 2.0037, "step": 160270 }, { "epoch": 0.38, "grad_norm": 2.046875, "learning_rate": 0.00018295640904091883, "loss": 2.1999, "step": 160275 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.00018295537691950927, "loss": 2.3345, "step": 160280 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.0001829543447697607, "loss": 1.9443, "step": 160285 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.0001829533125916734, "loss": 2.2244, "step": 160290 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018295228038524777, "loss": 2.1365, "step": 160295 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018295124815048412, "loss": 2.052, "step": 160300 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018295021588738282, "loss": 2.0361, "step": 160305 }, { "epoch": 0.38, "grad_norm": 2.59375, "learning_rate": 0.0001829491835959442, "loss": 2.0136, "step": 160310 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.00018294815127616865, "loss": 2.1081, "step": 160315 }, { "epoch": 0.38, "grad_norm": 2.53125, "learning_rate": 0.00018294711892805648, "loss": 2.1832, "step": 160320 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018294608655160813, "loss": 2.0539, "step": 160325 }, { "epoch": 0.38, "grad_norm": 3.484375, "learning_rate": 0.00018294505414682386, "loss": 2.1868, "step": 160330 }, { "epoch": 0.38, "grad_norm": 2.6875, "learning_rate": 0.00018294402171370407, "loss": 2.115, "step": 160335 }, { "epoch": 0.38, "grad_norm": 2.453125, "learning_rate": 0.00018294298925224908, "loss": 2.032, "step": 160340 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.00018294195676245924, "loss": 1.993, "step": 160345 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018294092424433496, "loss": 2.25, "step": 160350 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018293989169787655, "loss": 2.1903, "step": 160355 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018293885912308436, "loss": 2.0416, "step": 160360 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018293782651995874, "loss": 2.1701, "step": 160365 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.0001829367938885001, "loss": 2.1339, "step": 160370 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.0001829357612287087, "loss": 1.9244, "step": 160375 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018293472854058495, "loss": 2.2609, "step": 160380 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.0001829336958241292, "loss": 2.0654, "step": 160385 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.0001829326630793418, "loss": 2.0738, "step": 160390 }, { "epoch": 0.38, "grad_norm": 1.84375, "learning_rate": 0.0001829316303062231, "loss": 2.263, "step": 160395 }, { "epoch": 0.38, "grad_norm": 1.9453125, "learning_rate": 0.00018293059750477343, "loss": 1.9223, "step": 160400 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018292956467499318, "loss": 2.0659, "step": 160405 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.0001829285318168827, "loss": 2.218, "step": 160410 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.0001829274989304423, "loss": 2.179, "step": 160415 }, { "epoch": 0.38, "grad_norm": 2.15625, "learning_rate": 0.00018292646601567237, "loss": 2.1046, "step": 160420 }, { "epoch": 0.38, "grad_norm": 1.953125, "learning_rate": 0.00018292543307257326, "loss": 1.9626, "step": 160425 }, { "epoch": 0.38, "grad_norm": 2.640625, "learning_rate": 0.00018292440010114533, "loss": 2.0828, "step": 160430 }, { "epoch": 0.38, "grad_norm": 2.0, "learning_rate": 0.00018292336710138888, "loss": 2.0683, "step": 160435 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018292233407330433, "loss": 2.2205, "step": 160440 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018292130101689203, "loss": 2.3325, "step": 160445 }, { "epoch": 0.38, "grad_norm": 2.515625, "learning_rate": 0.0001829202679321523, "loss": 1.9862, "step": 160450 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.00018291923481908548, "loss": 2.125, "step": 160455 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018291820167769197, "loss": 2.2542, "step": 160460 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018291716850797206, "loss": 2.2297, "step": 160465 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.0001829161353099262, "loss": 2.0575, "step": 160470 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018291510208355466, "loss": 2.0053, "step": 160475 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.0001829140688288578, "loss": 2.3183, "step": 160480 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018291303554583598, "loss": 2.1088, "step": 160485 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.0001829120022344896, "loss": 2.1916, "step": 160490 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018291096889481894, "loss": 2.3205, "step": 160495 }, { "epoch": 0.38, "grad_norm": 2.5625, "learning_rate": 0.00018290993552682444, "loss": 2.3901, "step": 160500 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018290890213050638, "loss": 2.1576, "step": 160505 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.0001829078687058651, "loss": 2.07, "step": 160510 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018290683525290104, "loss": 2.248, "step": 160515 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.00018290580177161448, "loss": 1.995, "step": 160520 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018290476826200578, "loss": 2.0107, "step": 160525 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018290373472407533, "loss": 2.122, "step": 160530 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018290270115782343, "loss": 1.932, "step": 160535 }, { "epoch": 0.38, "grad_norm": 2.5625, "learning_rate": 0.0001829016675632505, "loss": 2.3978, "step": 160540 }, { "epoch": 0.38, "grad_norm": 1.90625, "learning_rate": 0.00018290063394035683, "loss": 2.0727, "step": 160545 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018289960028914281, "loss": 2.1007, "step": 160550 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.0001828985666096088, "loss": 2.1957, "step": 160555 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.00018289753290175512, "loss": 2.1801, "step": 160560 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018289649916558212, "loss": 2.0727, "step": 160565 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.0001828954654010902, "loss": 1.9547, "step": 160570 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018289443160827968, "loss": 2.053, "step": 160575 }, { "epoch": 0.38, "grad_norm": 2.5625, "learning_rate": 0.0001828933977871509, "loss": 1.9755, "step": 160580 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018289236393770426, "loss": 2.015, "step": 160585 }, { "epoch": 0.38, "grad_norm": 2.578125, "learning_rate": 0.00018289133005994007, "loss": 2.1389, "step": 160590 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.0001828902961538587, "loss": 2.2544, "step": 160595 }, { "epoch": 0.38, "grad_norm": 2.421875, "learning_rate": 0.0001828892622194605, "loss": 2.1896, "step": 160600 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018288822825674586, "loss": 2.2033, "step": 160605 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018288719426571505, "loss": 2.0705, "step": 160610 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018288616024636847, "loss": 2.0838, "step": 160615 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.0001828851261987065, "loss": 2.2631, "step": 160620 }, { "epoch": 0.38, "grad_norm": 2.734375, "learning_rate": 0.00018288409212272947, "loss": 2.2402, "step": 160625 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018288305801843771, "loss": 2.0082, "step": 160630 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018288202388583161, "loss": 2.1888, "step": 160635 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.0001828809897249115, "loss": 2.158, "step": 160640 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.00018287995553567776, "loss": 2.1735, "step": 160645 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018287892131813068, "loss": 1.9987, "step": 160650 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.0001828778870722707, "loss": 2.3035, "step": 160655 }, { "epoch": 0.38, "grad_norm": 2.671875, "learning_rate": 0.0001828768527980981, "loss": 2.2127, "step": 160660 }, { "epoch": 0.38, "grad_norm": 2.609375, "learning_rate": 0.00018287581849561329, "loss": 2.0244, "step": 160665 }, { "epoch": 0.38, "grad_norm": 1.953125, "learning_rate": 0.00018287478416481657, "loss": 1.9326, "step": 160670 }, { "epoch": 0.38, "grad_norm": 2.15625, "learning_rate": 0.00018287374980570836, "loss": 2.2271, "step": 160675 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018287271541828895, "loss": 2.2923, "step": 160680 }, { "epoch": 0.38, "grad_norm": 1.8984375, "learning_rate": 0.0001828716810025587, "loss": 2.2354, "step": 160685 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018287064655851803, "loss": 2.1227, "step": 160690 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.0001828696120861672, "loss": 2.2287, "step": 160695 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018286857758550662, "loss": 2.1737, "step": 160700 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.0001828675430565366, "loss": 2.1996, "step": 160705 }, { "epoch": 0.38, "grad_norm": 2.671875, "learning_rate": 0.00018286650849925757, "loss": 2.1443, "step": 160710 }, { "epoch": 0.38, "grad_norm": 2.84375, "learning_rate": 0.00018286547391366983, "loss": 1.9803, "step": 160715 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018286443929977375, "loss": 2.2589, "step": 160720 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018286340465756965, "loss": 2.2311, "step": 160725 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018286236998705792, "loss": 1.9381, "step": 160730 }, { "epoch": 0.38, "grad_norm": 1.84375, "learning_rate": 0.0001828613352882389, "loss": 2.1167, "step": 160735 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018286030056111297, "loss": 2.0568, "step": 160740 }, { "epoch": 0.38, "grad_norm": 1.859375, "learning_rate": 0.00018285926580568042, "loss": 2.1602, "step": 160745 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018285823102194164, "loss": 2.1064, "step": 160750 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018285719620989702, "loss": 2.1929, "step": 160755 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.00018285616136954683, "loss": 2.0686, "step": 160760 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018285512650089155, "loss": 2.333, "step": 160765 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.0001828540916039314, "loss": 1.8868, "step": 160770 }, { "epoch": 0.38, "grad_norm": 1.9921875, "learning_rate": 0.00018285305667866677, "loss": 2.0034, "step": 160775 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018285202172509807, "loss": 2.1968, "step": 160780 }, { "epoch": 0.38, "grad_norm": 2.703125, "learning_rate": 0.00018285098674322562, "loss": 2.2316, "step": 160785 }, { "epoch": 0.38, "grad_norm": 1.84375, "learning_rate": 0.00018284995173304975, "loss": 2.1263, "step": 160790 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018284891669457086, "loss": 2.0548, "step": 160795 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018284788162778925, "loss": 2.0855, "step": 160800 }, { "epoch": 0.38, "grad_norm": 1.9453125, "learning_rate": 0.0001828468465327053, "loss": 1.9548, "step": 160805 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.0001828458114093194, "loss": 2.0491, "step": 160810 }, { "epoch": 0.38, "grad_norm": 2.015625, "learning_rate": 0.00018284477625763184, "loss": 1.9692, "step": 160815 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.000182843741077643, "loss": 2.0347, "step": 160820 }, { "epoch": 0.38, "grad_norm": 2.8125, "learning_rate": 0.00018284270586935324, "loss": 2.2094, "step": 160825 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.0001828416706327629, "loss": 2.2245, "step": 160830 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018284063536787236, "loss": 2.0467, "step": 160835 }, { "epoch": 0.38, "grad_norm": 2.453125, "learning_rate": 0.00018283960007468196, "loss": 2.1705, "step": 160840 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018283856475319205, "loss": 1.983, "step": 160845 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018283752940340296, "loss": 2.3017, "step": 160850 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.0001828364940253151, "loss": 2.1688, "step": 160855 }, { "epoch": 0.38, "grad_norm": 2.15625, "learning_rate": 0.00018283545861892876, "loss": 1.8749, "step": 160860 }, { "epoch": 0.38, "grad_norm": 2.53125, "learning_rate": 0.00018283442318424437, "loss": 2.0988, "step": 160865 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.0001828333877212622, "loss": 2.2259, "step": 160870 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018283235222998264, "loss": 2.1283, "step": 160875 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018283131671040605, "loss": 2.1129, "step": 160880 }, { "epoch": 0.38, "grad_norm": 2.453125, "learning_rate": 0.00018283028116253277, "loss": 2.1464, "step": 160885 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.0001828292455863632, "loss": 2.2465, "step": 160890 }, { "epoch": 0.38, "grad_norm": 2.640625, "learning_rate": 0.00018282820998189765, "loss": 2.01, "step": 160895 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018282717434913646, "loss": 1.9622, "step": 160900 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018282613868808, "loss": 1.9936, "step": 160905 }, { "epoch": 0.38, "grad_norm": 2.640625, "learning_rate": 0.00018282510299872864, "loss": 2.1816, "step": 160910 }, { "epoch": 0.38, "grad_norm": 1.9609375, "learning_rate": 0.00018282406728108275, "loss": 2.3268, "step": 160915 }, { "epoch": 0.38, "grad_norm": 1.8671875, "learning_rate": 0.00018282303153514261, "loss": 2.0483, "step": 160920 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018282199576090866, "loss": 2.3195, "step": 160925 }, { "epoch": 0.38, "grad_norm": 2.046875, "learning_rate": 0.00018282095995838117, "loss": 2.1779, "step": 160930 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018281992412756058, "loss": 2.0029, "step": 160935 }, { "epoch": 0.38, "grad_norm": 2.453125, "learning_rate": 0.0001828188882684472, "loss": 2.2208, "step": 160940 }, { "epoch": 0.38, "grad_norm": 2.8125, "learning_rate": 0.00018281785238104137, "loss": 2.358, "step": 160945 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018281681646534345, "loss": 2.5191, "step": 160950 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.0001828157805213538, "loss": 2.0631, "step": 160955 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018281474454907281, "loss": 2.1001, "step": 160960 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018281370854850078, "loss": 2.1496, "step": 160965 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.0001828126725196381, "loss": 1.8636, "step": 160970 }, { "epoch": 0.38, "grad_norm": 2.515625, "learning_rate": 0.0001828116364624851, "loss": 1.941, "step": 160975 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018281060037704213, "loss": 2.323, "step": 160980 }, { "epoch": 0.38, "grad_norm": 1.9609375, "learning_rate": 0.00018280956426330958, "loss": 2.2329, "step": 160985 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018280852812128777, "loss": 1.9984, "step": 160990 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018280749195097705, "loss": 2.2558, "step": 160995 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018280645575237782, "loss": 2.1511, "step": 161000 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.0001828054195254904, "loss": 2.0597, "step": 161005 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018280438327031512, "loss": 1.9912, "step": 161010 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018280334698685238, "loss": 2.1123, "step": 161015 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.0001828023106751025, "loss": 2.0296, "step": 161020 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018280127433506585, "loss": 2.1633, "step": 161025 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018280023796674283, "loss": 2.1559, "step": 161030 }, { "epoch": 0.38, "grad_norm": 2.421875, "learning_rate": 0.0001827992015701337, "loss": 2.0576, "step": 161035 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.00018279816514523887, "loss": 2.2214, "step": 161040 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.00018279712869205867, "loss": 2.0438, "step": 161045 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018279609221059348, "loss": 2.1312, "step": 161050 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.00018279505570084365, "loss": 2.0477, "step": 161055 }, { "epoch": 0.38, "grad_norm": 2.0, "learning_rate": 0.0001827940191628095, "loss": 2.1571, "step": 161060 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018279298259649146, "loss": 2.0599, "step": 161065 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018279194600188983, "loss": 2.2765, "step": 161070 }, { "epoch": 0.38, "grad_norm": 2.578125, "learning_rate": 0.00018279090937900496, "loss": 1.9574, "step": 161075 }, { "epoch": 0.38, "grad_norm": 2.8125, "learning_rate": 0.0001827898727278372, "loss": 1.9629, "step": 161080 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018278883604838693, "loss": 2.1833, "step": 161085 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018278779934065447, "loss": 2.1794, "step": 161090 }, { "epoch": 0.38, "grad_norm": 1.9921875, "learning_rate": 0.00018278676260464024, "loss": 2.0738, "step": 161095 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.0001827857258403445, "loss": 2.3109, "step": 161100 }, { "epoch": 0.38, "grad_norm": 2.5, "learning_rate": 0.00018278468904776767, "loss": 2.2596, "step": 161105 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.0001827836522269101, "loss": 2.0146, "step": 161110 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.00018278261537777214, "loss": 2.2378, "step": 161115 }, { "epoch": 0.38, "grad_norm": 1.8828125, "learning_rate": 0.00018278157850035412, "loss": 1.9681, "step": 161120 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018278054159465642, "loss": 2.0875, "step": 161125 }, { "epoch": 0.38, "grad_norm": 1.9921875, "learning_rate": 0.00018277950466067937, "loss": 2.1598, "step": 161130 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018277846769842334, "loss": 2.3493, "step": 161135 }, { "epoch": 0.38, "grad_norm": 1.9765625, "learning_rate": 0.00018277743070788872, "loss": 2.1207, "step": 161140 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.0001827763936890758, "loss": 1.9743, "step": 161145 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018277535664198497, "loss": 2.1066, "step": 161150 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.0001827743195666166, "loss": 1.9409, "step": 161155 }, { "epoch": 0.38, "grad_norm": 2.421875, "learning_rate": 0.000182773282462971, "loss": 2.3016, "step": 161160 }, { "epoch": 0.38, "grad_norm": 2.046875, "learning_rate": 0.00018277224533104852, "loss": 1.8943, "step": 161165 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018277120817084957, "loss": 2.2544, "step": 161170 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018277017098237446, "loss": 1.9151, "step": 161175 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018276913376562358, "loss": 2.0968, "step": 161180 }, { "epoch": 0.38, "grad_norm": 1.8046875, "learning_rate": 0.00018276809652059724, "loss": 1.9936, "step": 161185 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018276705924729584, "loss": 2.1146, "step": 161190 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.0001827660219457197, "loss": 2.1901, "step": 161195 }, { "epoch": 0.38, "grad_norm": 1.8828125, "learning_rate": 0.00018276498461586917, "loss": 2.3057, "step": 161200 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.0001827639472577446, "loss": 2.2232, "step": 161205 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018276290987134642, "loss": 2.0923, "step": 161210 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018276187245667492, "loss": 2.2755, "step": 161215 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018276083501373043, "loss": 2.1751, "step": 161220 }, { "epoch": 0.38, "grad_norm": 2.78125, "learning_rate": 0.00018275979754251334, "loss": 2.1773, "step": 161225 }, { "epoch": 0.38, "grad_norm": 1.9296875, "learning_rate": 0.00018275876004302406, "loss": 2.1618, "step": 161230 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018275772251526284, "loss": 2.1783, "step": 161235 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018275668495923008, "loss": 2.2301, "step": 161240 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018275564737492615, "loss": 2.124, "step": 161245 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018275460976235138, "loss": 2.1744, "step": 161250 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.0001827535721215061, "loss": 2.15, "step": 161255 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018275253445239076, "loss": 2.2012, "step": 161260 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018275149675500566, "loss": 2.234, "step": 161265 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.0001827504590293511, "loss": 2.0508, "step": 161270 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.0001827494212754275, "loss": 2.1736, "step": 161275 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.0001827483834932352, "loss": 2.3194, "step": 161280 }, { "epoch": 0.38, "grad_norm": 1.7734375, "learning_rate": 0.00018274734568277454, "loss": 2.0165, "step": 161285 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.0001827463078440459, "loss": 2.2702, "step": 161290 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.0001827452699770496, "loss": 2.2669, "step": 161295 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018274423208178604, "loss": 2.2107, "step": 161300 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018274319415825555, "loss": 2.156, "step": 161305 }, { "epoch": 0.38, "grad_norm": 2.5625, "learning_rate": 0.00018274215620645847, "loss": 2.048, "step": 161310 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018274111822639516, "loss": 2.1744, "step": 161315 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018274008021806602, "loss": 2.2281, "step": 161320 }, { "epoch": 0.38, "grad_norm": 2.703125, "learning_rate": 0.00018273904218147133, "loss": 2.1692, "step": 161325 }, { "epoch": 0.38, "grad_norm": 2.640625, "learning_rate": 0.0001827380041166115, "loss": 2.2019, "step": 161330 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018273696602348686, "loss": 2.1322, "step": 161335 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.0001827359279020978, "loss": 2.1932, "step": 161340 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018273488975244462, "loss": 2.0458, "step": 161345 }, { "epoch": 0.38, "grad_norm": 2.453125, "learning_rate": 0.0001827338515745277, "loss": 1.9734, "step": 161350 }, { "epoch": 0.38, "grad_norm": 2.71875, "learning_rate": 0.00018273281336834738, "loss": 2.2283, "step": 161355 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018273177513390408, "loss": 2.238, "step": 161360 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.00018273073687119807, "loss": 1.917, "step": 161365 }, { "epoch": 0.38, "grad_norm": 1.5, "learning_rate": 0.00018272969858022973, "loss": 2.1464, "step": 161370 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018272866026099944, "loss": 2.1691, "step": 161375 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018272762191350755, "loss": 2.2482, "step": 161380 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018272658353775438, "loss": 2.2256, "step": 161385 }, { "epoch": 0.38, "grad_norm": 2.515625, "learning_rate": 0.00018272554513374032, "loss": 1.9475, "step": 161390 }, { "epoch": 0.38, "grad_norm": 2.703125, "learning_rate": 0.00018272450670146572, "loss": 2.11, "step": 161395 }, { "epoch": 0.38, "grad_norm": 1.9609375, "learning_rate": 0.00018272346824093095, "loss": 2.3202, "step": 161400 }, { "epoch": 0.38, "grad_norm": 2.53125, "learning_rate": 0.0001827224297521363, "loss": 2.1726, "step": 161405 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018272139123508217, "loss": 2.0245, "step": 161410 }, { "epoch": 0.38, "grad_norm": 1.828125, "learning_rate": 0.00018272035268976895, "loss": 2.1975, "step": 161415 }, { "epoch": 0.38, "grad_norm": 1.9609375, "learning_rate": 0.00018271931411619692, "loss": 1.9609, "step": 161420 }, { "epoch": 0.38, "grad_norm": 4.15625, "learning_rate": 0.0001827182755143665, "loss": 2.2226, "step": 161425 }, { "epoch": 0.38, "grad_norm": 2.015625, "learning_rate": 0.000182717236884278, "loss": 2.3753, "step": 161430 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018271619822593177, "loss": 2.1577, "step": 161435 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018271515953932824, "loss": 2.0778, "step": 161440 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018271412082446767, "loss": 2.0064, "step": 161445 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018271308208135046, "loss": 2.1459, "step": 161450 }, { "epoch": 0.38, "grad_norm": 2.71875, "learning_rate": 0.00018271204330997696, "loss": 2.2385, "step": 161455 }, { "epoch": 0.38, "grad_norm": 2.71875, "learning_rate": 0.00018271100451034756, "loss": 2.3959, "step": 161460 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018270996568246254, "loss": 1.9546, "step": 161465 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018270892682632233, "loss": 1.9059, "step": 161470 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018270788794192723, "loss": 2.2312, "step": 161475 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.00018270684902927764, "loss": 2.2308, "step": 161480 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018270581008837384, "loss": 2.2144, "step": 161485 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018270477111921628, "loss": 2.0362, "step": 161490 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018270373212180525, "loss": 2.1272, "step": 161495 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.0001827026930961411, "loss": 2.061, "step": 161500 }, { "epoch": 0.38, "grad_norm": 2.5625, "learning_rate": 0.00018270165404222428, "loss": 2.1407, "step": 161505 }, { "epoch": 0.38, "grad_norm": 2.5625, "learning_rate": 0.00018270061496005502, "loss": 2.2799, "step": 161510 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018269957584963374, "loss": 2.096, "step": 161515 }, { "epoch": 0.38, "grad_norm": 2.046875, "learning_rate": 0.00018269853671096082, "loss": 2.2865, "step": 161520 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.00018269749754403655, "loss": 2.1158, "step": 161525 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018269645834886132, "loss": 2.1997, "step": 161530 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018269541912543547, "loss": 2.1252, "step": 161535 }, { "epoch": 0.38, "grad_norm": 2.15625, "learning_rate": 0.00018269437987375936, "loss": 1.9923, "step": 161540 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018269334059383335, "loss": 2.0622, "step": 161545 }, { "epoch": 0.38, "grad_norm": 5.5625, "learning_rate": 0.00018269230128565783, "loss": 1.993, "step": 161550 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.0001826912619492331, "loss": 2.2121, "step": 161555 }, { "epoch": 0.38, "grad_norm": 1.8359375, "learning_rate": 0.00018269022258455954, "loss": 1.9842, "step": 161560 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.0001826891831916375, "loss": 2.027, "step": 161565 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.0001826881437704673, "loss": 2.1098, "step": 161570 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018268710432104937, "loss": 2.0527, "step": 161575 }, { "epoch": 0.38, "grad_norm": 2.75, "learning_rate": 0.000182686064843384, "loss": 1.9059, "step": 161580 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018268502533747157, "loss": 1.8119, "step": 161585 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018268398580331246, "loss": 2.0912, "step": 161590 }, { "epoch": 0.38, "grad_norm": 2.578125, "learning_rate": 0.00018268294624090698, "loss": 2.1686, "step": 161595 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.00018268190665025552, "loss": 2.1536, "step": 161600 }, { "epoch": 0.38, "grad_norm": 2.71875, "learning_rate": 0.00018268086703135842, "loss": 2.2034, "step": 161605 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018267982738421603, "loss": 2.1542, "step": 161610 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018267878770882875, "loss": 2.26, "step": 161615 }, { "epoch": 0.38, "grad_norm": 2.015625, "learning_rate": 0.00018267774800519683, "loss": 2.136, "step": 161620 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018267670827332074, "loss": 2.282, "step": 161625 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018267566851320078, "loss": 2.1277, "step": 161630 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018267462872483727, "loss": 2.0563, "step": 161635 }, { "epoch": 0.38, "grad_norm": 2.015625, "learning_rate": 0.00018267358890823067, "loss": 2.0697, "step": 161640 }, { "epoch": 0.38, "grad_norm": 2.578125, "learning_rate": 0.00018267254906338123, "loss": 1.8993, "step": 161645 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018267150919028935, "loss": 2.0665, "step": 161650 }, { "epoch": 0.38, "grad_norm": 2.046875, "learning_rate": 0.00018267046928895542, "loss": 2.0731, "step": 161655 }, { "epoch": 0.38, "grad_norm": 2.609375, "learning_rate": 0.00018266942935937972, "loss": 2.1261, "step": 161660 }, { "epoch": 0.38, "grad_norm": 2.609375, "learning_rate": 0.00018266838940156265, "loss": 2.2368, "step": 161665 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018266734941550455, "loss": 2.2028, "step": 161670 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.0001826663094012058, "loss": 2.079, "step": 161675 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018266526935866676, "loss": 2.1199, "step": 161680 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018266422928788775, "loss": 1.9063, "step": 161685 }, { "epoch": 0.38, "grad_norm": 2.515625, "learning_rate": 0.00018266318918886914, "loss": 2.2902, "step": 161690 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.00018266214906161125, "loss": 2.018, "step": 161695 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.0001826611089061145, "loss": 1.7967, "step": 161700 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018266006872237923, "loss": 2.1276, "step": 161705 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018265902851040576, "loss": 2.0999, "step": 161710 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.00018265798827019446, "loss": 2.2523, "step": 161715 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018265694800174574, "loss": 2.1638, "step": 161720 }, { "epoch": 0.38, "grad_norm": 2.046875, "learning_rate": 0.00018265590770505984, "loss": 2.0724, "step": 161725 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.00018265486738013723, "loss": 2.0333, "step": 161730 }, { "epoch": 0.38, "grad_norm": 2.578125, "learning_rate": 0.00018265382702697822, "loss": 2.2603, "step": 161735 }, { "epoch": 0.38, "grad_norm": 2.421875, "learning_rate": 0.00018265278664558314, "loss": 2.196, "step": 161740 }, { "epoch": 0.38, "grad_norm": 2.578125, "learning_rate": 0.00018265174623595238, "loss": 2.1485, "step": 161745 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018265070579808628, "loss": 2.1246, "step": 161750 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.00018264966533198523, "loss": 2.2211, "step": 161755 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.00018264862483764952, "loss": 2.1581, "step": 161760 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018264758431507953, "loss": 2.193, "step": 161765 }, { "epoch": 0.38, "grad_norm": 3.0, "learning_rate": 0.00018264654376427564, "loss": 2.2847, "step": 161770 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018264550318523818, "loss": 2.1937, "step": 161775 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018264446257796756, "loss": 2.1769, "step": 161780 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018264342194246406, "loss": 1.9709, "step": 161785 }, { "epoch": 0.38, "grad_norm": 1.921875, "learning_rate": 0.00018264238127872808, "loss": 2.2059, "step": 161790 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018264134058675995, "loss": 2.2602, "step": 161795 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018264029986656005, "loss": 2.1512, "step": 161800 }, { "epoch": 0.38, "grad_norm": 2.65625, "learning_rate": 0.00018263925911812874, "loss": 1.9918, "step": 161805 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018263821834146632, "loss": 2.1841, "step": 161810 }, { "epoch": 0.38, "grad_norm": 1.7890625, "learning_rate": 0.0001826371775365732, "loss": 1.9473, "step": 161815 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018263613670344974, "loss": 2.1037, "step": 161820 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018263509584209628, "loss": 2.172, "step": 161825 }, { "epoch": 0.38, "grad_norm": 1.9609375, "learning_rate": 0.00018263405495251315, "loss": 1.8939, "step": 161830 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018263301403470075, "loss": 2.1137, "step": 161835 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018263197308865937, "loss": 2.1595, "step": 161840 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018263093211438948, "loss": 2.0707, "step": 161845 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018262989111189132, "loss": 2.2607, "step": 161850 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.0001826288500811653, "loss": 2.1562, "step": 161855 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018262780902221176, "loss": 2.1565, "step": 161860 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018262676793503105, "loss": 1.9874, "step": 161865 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018262572681962356, "loss": 2.1121, "step": 161870 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.00018262468567598964, "loss": 1.9444, "step": 161875 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.0001826236445041296, "loss": 2.3054, "step": 161880 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.0001826226033040438, "loss": 2.1052, "step": 161885 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018262156207573266, "loss": 2.1263, "step": 161890 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018262052081919648, "loss": 2.2497, "step": 161895 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018261947953443565, "loss": 2.2239, "step": 161900 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.0001826184382214505, "loss": 2.2031, "step": 161905 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.0001826173968802414, "loss": 2.0581, "step": 161910 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.00018261635551080867, "loss": 2.0238, "step": 161915 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.0001826153141131527, "loss": 2.2003, "step": 161920 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018261427268727385, "loss": 2.2627, "step": 161925 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018261323123317248, "loss": 2.1244, "step": 161930 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.0001826121897508489, "loss": 2.3235, "step": 161935 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018261114824030355, "loss": 2.1851, "step": 161940 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018261010670153667, "loss": 1.9265, "step": 161945 }, { "epoch": 0.38, "grad_norm": 1.828125, "learning_rate": 0.00018260906513454871, "loss": 2.2703, "step": 161950 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018260802353934, "loss": 2.1641, "step": 161955 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.0001826069819159109, "loss": 2.1507, "step": 161960 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018260594026426173, "loss": 1.9243, "step": 161965 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.0001826048985843929, "loss": 2.2403, "step": 161970 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018260385687630473, "loss": 2.1389, "step": 161975 }, { "epoch": 0.38, "grad_norm": 1.78125, "learning_rate": 0.00018260281513999757, "loss": 2.1015, "step": 161980 }, { "epoch": 0.38, "grad_norm": 2.0, "learning_rate": 0.0001826017733754718, "loss": 1.9035, "step": 161985 }, { "epoch": 0.38, "grad_norm": 2.53125, "learning_rate": 0.00018260073158272774, "loss": 2.2366, "step": 161990 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.0001825996897617658, "loss": 2.2382, "step": 161995 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.0001825986479125863, "loss": 2.3593, "step": 162000 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018259760603518962, "loss": 2.1034, "step": 162005 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.0001825965641295761, "loss": 2.0114, "step": 162010 }, { "epoch": 0.38, "grad_norm": 2.953125, "learning_rate": 0.00018259552219574609, "loss": 2.137, "step": 162015 }, { "epoch": 0.38, "grad_norm": 2.046875, "learning_rate": 0.00018259448023369993, "loss": 2.1242, "step": 162020 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018259343824343803, "loss": 2.1557, "step": 162025 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.0001825923962249607, "loss": 2.0697, "step": 162030 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.0001825913541782683, "loss": 2.2032, "step": 162035 }, { "epoch": 0.38, "grad_norm": 2.703125, "learning_rate": 0.0001825903121033612, "loss": 2.1099, "step": 162040 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018258927000023976, "loss": 2.1161, "step": 162045 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018258822786890432, "loss": 2.1579, "step": 162050 }, { "epoch": 0.38, "grad_norm": 2.625, "learning_rate": 0.00018258718570935523, "loss": 2.0057, "step": 162055 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.0001825861435215929, "loss": 2.2132, "step": 162060 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018258510130561762, "loss": 2.0829, "step": 162065 }, { "epoch": 0.38, "grad_norm": 2.53125, "learning_rate": 0.00018258405906142975, "loss": 2.0063, "step": 162070 }, { "epoch": 0.38, "grad_norm": 2.15625, "learning_rate": 0.00018258301678902972, "loss": 2.0121, "step": 162075 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.0001825819744884178, "loss": 2.2429, "step": 162080 }, { "epoch": 0.38, "grad_norm": 1.765625, "learning_rate": 0.00018258093215959436, "loss": 2.0371, "step": 162085 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018257988980255983, "loss": 1.8888, "step": 162090 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018257884741731446, "loss": 1.8977, "step": 162095 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.0001825778050038587, "loss": 2.1775, "step": 162100 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018257676256219283, "loss": 2.0918, "step": 162105 }, { "epoch": 0.38, "grad_norm": 2.5625, "learning_rate": 0.00018257572009231724, "loss": 2.1829, "step": 162110 }, { "epoch": 0.38, "grad_norm": 1.984375, "learning_rate": 0.00018257467759423231, "loss": 2.1551, "step": 162115 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.00018257363506793837, "loss": 2.0247, "step": 162120 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018257259251343574, "loss": 2.272, "step": 162125 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018257154993072487, "loss": 2.1096, "step": 162130 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018257050731980602, "loss": 2.4359, "step": 162135 }, { "epoch": 0.38, "grad_norm": 1.96875, "learning_rate": 0.00018256946468067962, "loss": 2.3471, "step": 162140 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018256842201334599, "loss": 2.149, "step": 162145 }, { "epoch": 0.38, "grad_norm": 3.25, "learning_rate": 0.00018256737931780545, "loss": 2.2885, "step": 162150 }, { "epoch": 0.38, "grad_norm": 1.7890625, "learning_rate": 0.00018256633659405845, "loss": 2.2272, "step": 162155 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018256529384210525, "loss": 2.1708, "step": 162160 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018256425106194627, "loss": 2.0383, "step": 162165 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018256320825358184, "loss": 2.145, "step": 162170 }, { "epoch": 0.38, "grad_norm": 1.8125, "learning_rate": 0.00018256216541701233, "loss": 2.1693, "step": 162175 }, { "epoch": 0.38, "grad_norm": 2.578125, "learning_rate": 0.00018256112255223805, "loss": 2.0404, "step": 162180 }, { "epoch": 0.38, "grad_norm": 2.015625, "learning_rate": 0.00018256007965925943, "loss": 2.1882, "step": 162185 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018255903673807677, "loss": 2.2739, "step": 162190 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018255799378869044, "loss": 2.032, "step": 162195 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018255695081110084, "loss": 2.2358, "step": 162200 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018255590780530824, "loss": 2.0692, "step": 162205 }, { "epoch": 0.38, "grad_norm": 1.953125, "learning_rate": 0.0001825548647713131, "loss": 2.0648, "step": 162210 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.0001825538217091157, "loss": 2.0649, "step": 162215 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.0001825527786187164, "loss": 2.331, "step": 162220 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018255173550011557, "loss": 2.1546, "step": 162225 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018255069235331356, "loss": 2.2165, "step": 162230 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.0001825496491783108, "loss": 2.1016, "step": 162235 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.0001825486059751075, "loss": 1.9674, "step": 162240 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018254756274370415, "loss": 2.178, "step": 162245 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018254651948410104, "loss": 1.9986, "step": 162250 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018254547619629856, "loss": 2.0546, "step": 162255 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.00018254443288029704, "loss": 2.0964, "step": 162260 }, { "epoch": 0.38, "grad_norm": 3.015625, "learning_rate": 0.00018254338953609684, "loss": 1.9621, "step": 162265 }, { "epoch": 0.38, "grad_norm": 2.15625, "learning_rate": 0.00018254234616369832, "loss": 2.1813, "step": 162270 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018254130276310182, "loss": 1.9294, "step": 162275 }, { "epoch": 0.38, "grad_norm": 2.5, "learning_rate": 0.00018254025933430774, "loss": 2.1455, "step": 162280 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018253921587731638, "loss": 2.1886, "step": 162285 }, { "epoch": 0.38, "grad_norm": 2.0, "learning_rate": 0.00018253817239212818, "loss": 2.0719, "step": 162290 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.0001825371288787434, "loss": 1.9543, "step": 162295 }, { "epoch": 0.38, "grad_norm": 2.734375, "learning_rate": 0.00018253608533716246, "loss": 2.0203, "step": 162300 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.0001825350417673857, "loss": 2.0989, "step": 162305 }, { "epoch": 0.38, "grad_norm": 1.8125, "learning_rate": 0.00018253399816941347, "loss": 2.0708, "step": 162310 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.0001825329545432461, "loss": 2.0744, "step": 162315 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.000182531910888884, "loss": 2.2674, "step": 162320 }, { "epoch": 0.38, "grad_norm": 1.96875, "learning_rate": 0.0001825308672063275, "loss": 2.1285, "step": 162325 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.00018252982349557697, "loss": 2.059, "step": 162330 }, { "epoch": 0.38, "grad_norm": 1.9375, "learning_rate": 0.00018252877975663274, "loss": 2.139, "step": 162335 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018252773598949516, "loss": 2.1768, "step": 162340 }, { "epoch": 0.38, "grad_norm": 2.65625, "learning_rate": 0.00018252669219416466, "loss": 1.9791, "step": 162345 }, { "epoch": 0.38, "grad_norm": 2.734375, "learning_rate": 0.00018252564837064152, "loss": 2.1868, "step": 162350 }, { "epoch": 0.38, "grad_norm": 1.859375, "learning_rate": 0.00018252460451892612, "loss": 2.2054, "step": 162355 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.0001825235606390188, "loss": 2.0508, "step": 162360 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018252251673091997, "loss": 2.236, "step": 162365 }, { "epoch": 0.38, "grad_norm": 2.15625, "learning_rate": 0.00018252147279462994, "loss": 2.0725, "step": 162370 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018252042883014908, "loss": 2.1105, "step": 162375 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.0001825193848374777, "loss": 2.0852, "step": 162380 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018251834081661628, "loss": 2.1849, "step": 162385 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018251729676756507, "loss": 1.9868, "step": 162390 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018251625269032445, "loss": 2.116, "step": 162395 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018251520858489478, "loss": 2.2374, "step": 162400 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.0001825141644512764, "loss": 1.9992, "step": 162405 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.0001825131202894697, "loss": 2.1149, "step": 162410 }, { "epoch": 0.38, "grad_norm": 1.875, "learning_rate": 0.00018251207609947502, "loss": 2.2024, "step": 162415 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018251103188129272, "loss": 2.0992, "step": 162420 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018250998763492317, "loss": 2.0899, "step": 162425 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.0001825089433603667, "loss": 2.051, "step": 162430 }, { "epoch": 0.38, "grad_norm": 2.421875, "learning_rate": 0.00018250789905762367, "loss": 2.1596, "step": 162435 }, { "epoch": 0.38, "grad_norm": 2.515625, "learning_rate": 0.00018250685472669445, "loss": 1.8257, "step": 162440 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.0001825058103675794, "loss": 2.1191, "step": 162445 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018250476598027885, "loss": 2.148, "step": 162450 }, { "epoch": 0.38, "grad_norm": 2.625, "learning_rate": 0.0001825037215647932, "loss": 2.2904, "step": 162455 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018250267712112276, "loss": 1.9989, "step": 162460 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.0001825016326492679, "loss": 2.0838, "step": 162465 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018250058814922904, "loss": 2.0618, "step": 162470 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018249954362100642, "loss": 2.0936, "step": 162475 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.0001824984990646005, "loss": 2.2149, "step": 162480 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.0001824974544800116, "loss": 2.1815, "step": 162485 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018249640986724004, "loss": 2.277, "step": 162490 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018249536522628622, "loss": 2.101, "step": 162495 }, { "epoch": 0.38, "grad_norm": 2.0, "learning_rate": 0.0001824943205571505, "loss": 2.2369, "step": 162500 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.00018249327585983323, "loss": 2.1524, "step": 162505 }, { "epoch": 0.38, "grad_norm": 1.828125, "learning_rate": 0.00018249223113433476, "loss": 2.0405, "step": 162510 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.00018249118638065541, "loss": 2.3022, "step": 162515 }, { "epoch": 0.38, "grad_norm": 1.640625, "learning_rate": 0.00018249014159879562, "loss": 2.0308, "step": 162520 }, { "epoch": 0.38, "grad_norm": 2.015625, "learning_rate": 0.00018248909678875567, "loss": 2.1764, "step": 162525 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018248805195053596, "loss": 2.1104, "step": 162530 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.00018248700708413683, "loss": 2.2141, "step": 162535 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.00018248596218955866, "loss": 2.1402, "step": 162540 }, { "epoch": 0.38, "grad_norm": 2.453125, "learning_rate": 0.00018248491726680178, "loss": 2.1574, "step": 162545 }, { "epoch": 0.38, "grad_norm": 1.9140625, "learning_rate": 0.00018248387231586656, "loss": 2.2596, "step": 162550 }, { "epoch": 0.38, "grad_norm": 2.046875, "learning_rate": 0.00018248282733675334, "loss": 2.4067, "step": 162555 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018248178232946251, "loss": 2.1719, "step": 162560 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018248073729399439, "loss": 2.2377, "step": 162565 }, { "epoch": 0.38, "grad_norm": 2.0, "learning_rate": 0.00018247969223034936, "loss": 2.1031, "step": 162570 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.0001824786471385278, "loss": 2.1999, "step": 162575 }, { "epoch": 0.38, "grad_norm": 2.65625, "learning_rate": 0.00018247760201852998, "loss": 2.2362, "step": 162580 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018247655687035635, "loss": 2.0883, "step": 162585 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018247551169400724, "loss": 2.3188, "step": 162590 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.000182474466489483, "loss": 2.0708, "step": 162595 }, { "epoch": 0.38, "grad_norm": 1.75, "learning_rate": 0.00018247342125678399, "loss": 2.2089, "step": 162600 }, { "epoch": 0.38, "grad_norm": 2.15625, "learning_rate": 0.00018247237599591052, "loss": 2.2231, "step": 162605 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.000182471330706863, "loss": 2.0936, "step": 162610 }, { "epoch": 0.38, "grad_norm": 2.5, "learning_rate": 0.0001824702853896418, "loss": 2.216, "step": 162615 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.00018246924004424727, "loss": 2.3262, "step": 162620 }, { "epoch": 0.38, "grad_norm": 1.9453125, "learning_rate": 0.00018246819467067972, "loss": 2.0624, "step": 162625 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.00018246714926893956, "loss": 2.1758, "step": 162630 }, { "epoch": 0.38, "grad_norm": 1.5859375, "learning_rate": 0.0001824661038390271, "loss": 2.071, "step": 162635 }, { "epoch": 0.38, "grad_norm": 2.625, "learning_rate": 0.00018246505838094278, "loss": 2.2167, "step": 162640 }, { "epoch": 0.38, "grad_norm": 1.828125, "learning_rate": 0.00018246401289468683, "loss": 2.2232, "step": 162645 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.0001824629673802597, "loss": 1.9438, "step": 162650 }, { "epoch": 0.38, "grad_norm": 2.515625, "learning_rate": 0.00018246192183766176, "loss": 2.0877, "step": 162655 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018246087626689329, "loss": 2.135, "step": 162660 }, { "epoch": 0.38, "grad_norm": 2.0, "learning_rate": 0.00018245983066795472, "loss": 2.266, "step": 162665 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018245878504084636, "loss": 2.165, "step": 162670 }, { "epoch": 0.38, "grad_norm": 1.890625, "learning_rate": 0.00018245773938556858, "loss": 1.9933, "step": 162675 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018245669370212176, "loss": 2.2999, "step": 162680 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018245564799050624, "loss": 2.2333, "step": 162685 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018245460225072233, "loss": 2.2778, "step": 162690 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018245355648277046, "loss": 2.0464, "step": 162695 }, { "epoch": 0.38, "grad_norm": 2.015625, "learning_rate": 0.00018245251068665097, "loss": 2.1422, "step": 162700 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018245146486236418, "loss": 2.1072, "step": 162705 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.0001824504190099105, "loss": 2.193, "step": 162710 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018244937312929027, "loss": 2.1965, "step": 162715 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018244832722050382, "loss": 2.1359, "step": 162720 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018244728128355154, "loss": 2.1592, "step": 162725 }, { "epoch": 0.38, "grad_norm": 1.8984375, "learning_rate": 0.00018244623531843375, "loss": 1.9165, "step": 162730 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018244518932515085, "loss": 2.1895, "step": 162735 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018244414330370316, "loss": 2.1063, "step": 162740 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.00018244309725409108, "loss": 2.1092, "step": 162745 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018244205117631492, "loss": 2.1748, "step": 162750 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018244100507037507, "loss": 2.1678, "step": 162755 }, { "epoch": 0.38, "grad_norm": 2.421875, "learning_rate": 0.0001824399589362719, "loss": 2.1369, "step": 162760 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.0001824389127740057, "loss": 2.0729, "step": 162765 }, { "epoch": 0.38, "grad_norm": 2.65625, "learning_rate": 0.0001824378665835769, "loss": 2.1561, "step": 162770 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.0001824368203649858, "loss": 2.1013, "step": 162775 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.00018243577411823284, "loss": 2.0519, "step": 162780 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018243472784331828, "loss": 2.1263, "step": 162785 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018243368154024253, "loss": 2.1669, "step": 162790 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.00018243263520900595, "loss": 2.0936, "step": 162795 }, { "epoch": 0.38, "grad_norm": 2.15625, "learning_rate": 0.00018243158884960893, "loss": 2.2986, "step": 162800 }, { "epoch": 0.38, "grad_norm": 1.984375, "learning_rate": 0.0001824305424620517, "loss": 2.0879, "step": 162805 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018242949604633474, "loss": 2.0734, "step": 162810 }, { "epoch": 0.38, "grad_norm": 2.5625, "learning_rate": 0.00018242844960245837, "loss": 2.1355, "step": 162815 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018242740313042296, "loss": 2.2764, "step": 162820 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018242635663022884, "loss": 2.1459, "step": 162825 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.0001824253101018764, "loss": 2.1657, "step": 162830 }, { "epoch": 0.38, "grad_norm": 1.84375, "learning_rate": 0.00018242426354536594, "loss": 2.1367, "step": 162835 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018242321696069786, "loss": 2.2306, "step": 162840 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018242217034787256, "loss": 2.0163, "step": 162845 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018242112370689032, "loss": 2.0573, "step": 162850 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018242007703775154, "loss": 1.9452, "step": 162855 }, { "epoch": 0.38, "grad_norm": 2.515625, "learning_rate": 0.00018241903034045656, "loss": 2.1624, "step": 162860 }, { "epoch": 0.38, "grad_norm": 1.8671875, "learning_rate": 0.00018241798361500573, "loss": 1.9091, "step": 162865 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.00018241693686139944, "loss": 2.1091, "step": 162870 }, { "epoch": 0.38, "grad_norm": 1.8984375, "learning_rate": 0.00018241589007963804, "loss": 2.173, "step": 162875 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018241484326972186, "loss": 2.0297, "step": 162880 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.0001824137964316513, "loss": 2.3112, "step": 162885 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018241274956542664, "loss": 2.235, "step": 162890 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.00018241170267104833, "loss": 2.1737, "step": 162895 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018241065574851667, "loss": 2.2296, "step": 162900 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018240960879783205, "loss": 2.0325, "step": 162905 }, { "epoch": 0.38, "grad_norm": 2.421875, "learning_rate": 0.0001824085618189948, "loss": 2.0829, "step": 162910 }, { "epoch": 0.38, "grad_norm": 2.765625, "learning_rate": 0.0001824075148120053, "loss": 2.0583, "step": 162915 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.0001824064677768639, "loss": 2.1973, "step": 162920 }, { "epoch": 0.38, "grad_norm": 2.8125, "learning_rate": 0.00018240542071357095, "loss": 2.1397, "step": 162925 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.0001824043736221268, "loss": 2.0495, "step": 162930 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.00018240332650253186, "loss": 2.0214, "step": 162935 }, { "epoch": 0.38, "grad_norm": 2.640625, "learning_rate": 0.00018240227935478642, "loss": 1.9942, "step": 162940 }, { "epoch": 0.38, "grad_norm": 2.15625, "learning_rate": 0.00018240123217889086, "loss": 2.0364, "step": 162945 }, { "epoch": 0.38, "grad_norm": 2.453125, "learning_rate": 0.00018240018497484557, "loss": 2.1616, "step": 162950 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.00018239913774265086, "loss": 2.2153, "step": 162955 }, { "epoch": 0.38, "grad_norm": 2.53125, "learning_rate": 0.00018239809048230715, "loss": 2.0238, "step": 162960 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018239704319381473, "loss": 2.0517, "step": 162965 }, { "epoch": 0.38, "grad_norm": 1.6640625, "learning_rate": 0.00018239599587717398, "loss": 2.0965, "step": 162970 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018239494853238526, "loss": 2.0749, "step": 162975 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018239390115944897, "loss": 2.0332, "step": 162980 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018239285375836538, "loss": 2.1173, "step": 162985 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.0001823918063291349, "loss": 2.2865, "step": 162990 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.0001823907588717579, "loss": 1.904, "step": 162995 }, { "epoch": 0.38, "grad_norm": 1.84375, "learning_rate": 0.00018238971138623473, "loss": 2.1629, "step": 163000 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.00018238866387256574, "loss": 2.2633, "step": 163005 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018238761633075127, "loss": 2.1513, "step": 163010 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018238656876079173, "loss": 2.0504, "step": 163015 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.0001823855211626874, "loss": 2.1934, "step": 163020 }, { "epoch": 0.38, "grad_norm": 2.609375, "learning_rate": 0.0001823844735364387, "loss": 2.0191, "step": 163025 }, { "epoch": 0.38, "grad_norm": 2.65625, "learning_rate": 0.000182383425882046, "loss": 1.9834, "step": 163030 }, { "epoch": 0.38, "grad_norm": 2.53125, "learning_rate": 0.00018238237819950958, "loss": 1.9957, "step": 163035 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018238133048882987, "loss": 2.0416, "step": 163040 }, { "epoch": 0.38, "grad_norm": 1.96875, "learning_rate": 0.0001823802827500072, "loss": 2.1286, "step": 163045 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.00018237923498304196, "loss": 2.1348, "step": 163050 }, { "epoch": 0.38, "grad_norm": 2.953125, "learning_rate": 0.00018237818718793442, "loss": 2.089, "step": 163055 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018237713936468503, "loss": 2.0886, "step": 163060 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.0001823760915132941, "loss": 2.1669, "step": 163065 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018237504363376202, "loss": 2.0768, "step": 163070 }, { "epoch": 0.38, "grad_norm": 2.921875, "learning_rate": 0.00018237399572608914, "loss": 2.2326, "step": 163075 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.0001823729477902758, "loss": 2.0766, "step": 163080 }, { "epoch": 0.38, "grad_norm": 2.421875, "learning_rate": 0.00018237189982632236, "loss": 2.0069, "step": 163085 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.00018237085183422916, "loss": 2.0806, "step": 163090 }, { "epoch": 0.38, "grad_norm": 2.53125, "learning_rate": 0.00018236980381399663, "loss": 2.1636, "step": 163095 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.00018236875576562508, "loss": 2.1274, "step": 163100 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018236770768911484, "loss": 2.1477, "step": 163105 }, { "epoch": 0.38, "grad_norm": 1.9453125, "learning_rate": 0.00018236665958446633, "loss": 2.2403, "step": 163110 }, { "epoch": 0.38, "grad_norm": 2.375, "learning_rate": 0.00018236561145167985, "loss": 2.1257, "step": 163115 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.00018236456329075577, "loss": 2.2127, "step": 163120 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.00018236351510169448, "loss": 2.214, "step": 163125 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018236246688449633, "loss": 1.996, "step": 163130 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018236141863916166, "loss": 2.2443, "step": 163135 }, { "epoch": 0.38, "grad_norm": 2.015625, "learning_rate": 0.00018236037036569082, "loss": 2.034, "step": 163140 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.0001823593220640842, "loss": 2.2552, "step": 163145 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.00018235827373434216, "loss": 2.1216, "step": 163150 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.000182357225376465, "loss": 2.1769, "step": 163155 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018235617699045314, "loss": 2.2246, "step": 163160 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.0001823551285763069, "loss": 2.0738, "step": 163165 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018235408013402666, "loss": 2.0228, "step": 163170 }, { "epoch": 0.38, "grad_norm": 2.359375, "learning_rate": 0.0001823530316636128, "loss": 2.1296, "step": 163175 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.0001823519831650656, "loss": 2.0937, "step": 163180 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.0001823509346383855, "loss": 1.9747, "step": 163185 }, { "epoch": 0.38, "grad_norm": 2.171875, "learning_rate": 0.00018234988608357283, "loss": 2.1163, "step": 163190 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018234883750062797, "loss": 2.192, "step": 163195 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.0001823477888895512, "loss": 2.1734, "step": 163200 }, { "epoch": 0.38, "grad_norm": 2.34375, "learning_rate": 0.00018234674025034296, "loss": 2.2517, "step": 163205 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018234569158300358, "loss": 2.0732, "step": 163210 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.0001823446428875334, "loss": 2.0497, "step": 163215 }, { "epoch": 0.38, "grad_norm": 1.9765625, "learning_rate": 0.00018234359416393283, "loss": 2.0179, "step": 163220 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018234254541220216, "loss": 2.2181, "step": 163225 }, { "epoch": 0.38, "grad_norm": 1.9609375, "learning_rate": 0.00018234149663234184, "loss": 2.0085, "step": 163230 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.00018234044782435214, "loss": 1.9477, "step": 163235 }, { "epoch": 0.38, "grad_norm": 2.3125, "learning_rate": 0.00018233939898823343, "loss": 2.0697, "step": 163240 }, { "epoch": 0.38, "grad_norm": 2.296875, "learning_rate": 0.0001823383501239861, "loss": 2.1103, "step": 163245 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.00018233730123161048, "loss": 2.2052, "step": 163250 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018233625231110697, "loss": 2.0414, "step": 163255 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.0001823352033624759, "loss": 2.0884, "step": 163260 }, { "epoch": 0.38, "grad_norm": 1.75, "learning_rate": 0.00018233415438571762, "loss": 2.1153, "step": 163265 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.0001823331053808325, "loss": 2.1338, "step": 163270 }, { "epoch": 0.38, "grad_norm": 2.875, "learning_rate": 0.00018233205634782092, "loss": 2.134, "step": 163275 }, { "epoch": 0.38, "grad_norm": 2.15625, "learning_rate": 0.00018233100728668318, "loss": 2.0141, "step": 163280 }, { "epoch": 0.38, "grad_norm": 2.671875, "learning_rate": 0.0001823299581974197, "loss": 2.3643, "step": 163285 }, { "epoch": 0.38, "grad_norm": 2.453125, "learning_rate": 0.0001823289090800308, "loss": 2.0795, "step": 163290 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018232785993451686, "loss": 2.0679, "step": 163295 }, { "epoch": 0.38, "grad_norm": 1.9921875, "learning_rate": 0.00018232681076087824, "loss": 2.1532, "step": 163300 }, { "epoch": 0.38, "grad_norm": 2.71875, "learning_rate": 0.00018232576155911528, "loss": 2.1335, "step": 163305 }, { "epoch": 0.38, "grad_norm": 1.953125, "learning_rate": 0.00018232471232922833, "loss": 2.0982, "step": 163310 }, { "epoch": 0.38, "grad_norm": 1.953125, "learning_rate": 0.00018232366307121777, "loss": 2.0865, "step": 163315 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018232261378508396, "loss": 2.2359, "step": 163320 }, { "epoch": 0.38, "grad_norm": 2.40625, "learning_rate": 0.00018232156447082728, "loss": 2.2456, "step": 163325 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018232051512844804, "loss": 2.0845, "step": 163330 }, { "epoch": 0.38, "grad_norm": 2.078125, "learning_rate": 0.0001823194657579466, "loss": 2.1303, "step": 163335 }, { "epoch": 0.38, "grad_norm": 2.484375, "learning_rate": 0.00018231841635932335, "loss": 2.1263, "step": 163340 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018231736693257862, "loss": 2.2004, "step": 163345 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.00018231631747771281, "loss": 2.0732, "step": 163350 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.00018231526799472624, "loss": 2.1757, "step": 163355 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.0001823142184836193, "loss": 2.0814, "step": 163360 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018231316894439233, "loss": 2.2227, "step": 163365 }, { "epoch": 0.38, "grad_norm": 1.9140625, "learning_rate": 0.00018231211937704567, "loss": 1.9789, "step": 163370 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.0001823110697815797, "loss": 2.1166, "step": 163375 }, { "epoch": 0.38, "grad_norm": 2.09375, "learning_rate": 0.00018231002015799477, "loss": 2.2675, "step": 163380 }, { "epoch": 0.38, "grad_norm": 2.203125, "learning_rate": 0.00018230897050629125, "loss": 2.326, "step": 163385 }, { "epoch": 0.38, "grad_norm": 2.234375, "learning_rate": 0.0001823079208264695, "loss": 2.2845, "step": 163390 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018230687111852988, "loss": 1.9056, "step": 163395 }, { "epoch": 0.38, "grad_norm": 1.9921875, "learning_rate": 0.0001823058213824727, "loss": 2.1076, "step": 163400 }, { "epoch": 0.38, "grad_norm": 1.8671875, "learning_rate": 0.00018230477161829839, "loss": 2.1587, "step": 163405 }, { "epoch": 0.38, "grad_norm": 2.421875, "learning_rate": 0.0001823037218260073, "loss": 2.2024, "step": 163410 }, { "epoch": 0.38, "grad_norm": 2.5625, "learning_rate": 0.00018230267200559973, "loss": 2.245, "step": 163415 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.0001823016221570761, "loss": 2.0721, "step": 163420 }, { "epoch": 0.38, "grad_norm": 2.328125, "learning_rate": 0.0001823005722804367, "loss": 2.162, "step": 163425 }, { "epoch": 0.38, "grad_norm": 2.046875, "learning_rate": 0.00018229952237568196, "loss": 2.0881, "step": 163430 }, { "epoch": 0.38, "grad_norm": 2.96875, "learning_rate": 0.00018229847244281223, "loss": 2.1288, "step": 163435 }, { "epoch": 0.38, "grad_norm": 2.46875, "learning_rate": 0.0001822974224818278, "loss": 2.1828, "step": 163440 }, { "epoch": 0.38, "grad_norm": 2.578125, "learning_rate": 0.00018229637249272913, "loss": 2.0575, "step": 163445 }, { "epoch": 0.38, "grad_norm": 2.5625, "learning_rate": 0.0001822953224755165, "loss": 2.2279, "step": 163450 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.0001822942724301903, "loss": 2.1407, "step": 163455 }, { "epoch": 0.38, "grad_norm": 2.625, "learning_rate": 0.0001822932223567509, "loss": 2.2375, "step": 163460 }, { "epoch": 0.38, "grad_norm": 2.125, "learning_rate": 0.0001822921722551986, "loss": 2.2038, "step": 163465 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.00018229112212553384, "loss": 2.3272, "step": 163470 }, { "epoch": 0.38, "grad_norm": 1.9296875, "learning_rate": 0.00018229007196775694, "loss": 2.0741, "step": 163475 }, { "epoch": 0.38, "grad_norm": 2.140625, "learning_rate": 0.00018228902178186825, "loss": 2.1922, "step": 163480 }, { "epoch": 0.38, "grad_norm": 2.109375, "learning_rate": 0.00018228797156786812, "loss": 2.2474, "step": 163485 }, { "epoch": 0.38, "grad_norm": 6.375, "learning_rate": 0.00018228692132575693, "loss": 2.0803, "step": 163490 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.00018228587105553504, "loss": 2.0702, "step": 163495 }, { "epoch": 0.38, "grad_norm": 2.515625, "learning_rate": 0.00018228482075720282, "loss": 2.1828, "step": 163500 }, { "epoch": 0.38, "grad_norm": 2.25, "learning_rate": 0.0001822837704307606, "loss": 2.1273, "step": 163505 }, { "epoch": 0.38, "grad_norm": 2.1875, "learning_rate": 0.00018228272007620876, "loss": 2.2544, "step": 163510 }, { "epoch": 0.38, "grad_norm": 2.0625, "learning_rate": 0.00018228166969354764, "loss": 2.1714, "step": 163515 }, { "epoch": 0.38, "grad_norm": 2.546875, "learning_rate": 0.00018228061928277762, "loss": 2.109, "step": 163520 }, { "epoch": 0.38, "grad_norm": 1.9453125, "learning_rate": 0.00018227956884389905, "loss": 2.1383, "step": 163525 }, { "epoch": 0.38, "grad_norm": 2.265625, "learning_rate": 0.00018227851837691227, "loss": 2.1874, "step": 163530 }, { "epoch": 0.38, "grad_norm": 1.984375, "learning_rate": 0.00018227746788181768, "loss": 1.9539, "step": 163535 }, { "epoch": 0.38, "grad_norm": 2.578125, "learning_rate": 0.0001822764173586156, "loss": 2.1264, "step": 163540 }, { "epoch": 0.38, "grad_norm": 2.4375, "learning_rate": 0.00018227536680730642, "loss": 1.9918, "step": 163545 }, { "epoch": 0.38, "grad_norm": 2.03125, "learning_rate": 0.00018227431622789043, "loss": 2.0802, "step": 163550 }, { "epoch": 0.38, "grad_norm": 2.671875, "learning_rate": 0.00018227326562036812, "loss": 2.1353, "step": 163555 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.0001822722149847397, "loss": 2.094, "step": 163560 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018227116432100563, "loss": 2.1513, "step": 163565 }, { "epoch": 0.38, "grad_norm": 2.390625, "learning_rate": 0.00018227011362916624, "loss": 2.0407, "step": 163570 }, { "epoch": 0.38, "grad_norm": 1.9296875, "learning_rate": 0.00018226906290922187, "loss": 2.0055, "step": 163575 }, { "epoch": 0.38, "grad_norm": 1.921875, "learning_rate": 0.00018226801216117292, "loss": 2.1573, "step": 163580 }, { "epoch": 0.38, "grad_norm": 2.28125, "learning_rate": 0.0001822669613850197, "loss": 2.1416, "step": 163585 }, { "epoch": 0.38, "grad_norm": 2.59375, "learning_rate": 0.00018226591058076262, "loss": 2.1456, "step": 163590 }, { "epoch": 0.38, "grad_norm": 2.21875, "learning_rate": 0.00018226485974840202, "loss": 2.2352, "step": 163595 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.0001822638088879382, "loss": 2.1018, "step": 163600 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.00018226275799937161, "loss": 2.2054, "step": 163605 }, { "epoch": 0.39, "grad_norm": 1.890625, "learning_rate": 0.00018226170708270258, "loss": 2.099, "step": 163610 }, { "epoch": 0.39, "grad_norm": 2.578125, "learning_rate": 0.00018226065613793143, "loss": 2.0432, "step": 163615 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.00018225960516505857, "loss": 2.1109, "step": 163620 }, { "epoch": 0.39, "grad_norm": 2.734375, "learning_rate": 0.00018225855416408436, "loss": 2.1644, "step": 163625 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018225750313500907, "loss": 1.9242, "step": 163630 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018225645207783317, "loss": 2.0802, "step": 163635 }, { "epoch": 0.39, "grad_norm": 2.796875, "learning_rate": 0.00018225540099255696, "loss": 2.1764, "step": 163640 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.00018225434987918082, "loss": 2.1577, "step": 163645 }, { "epoch": 0.39, "grad_norm": 2.484375, "learning_rate": 0.00018225329873770512, "loss": 2.1077, "step": 163650 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018225224756813016, "loss": 1.996, "step": 163655 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018225119637045638, "loss": 1.9306, "step": 163660 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018225014514468407, "loss": 2.1285, "step": 163665 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.00018224909389081363, "loss": 2.1537, "step": 163670 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018224804260884541, "loss": 2.084, "step": 163675 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018224699129877975, "loss": 2.2176, "step": 163680 }, { "epoch": 0.39, "grad_norm": 2.0, "learning_rate": 0.00018224593996061706, "loss": 2.1147, "step": 163685 }, { "epoch": 0.39, "grad_norm": 2.59375, "learning_rate": 0.00018224488859435765, "loss": 2.1438, "step": 163690 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.00018224383720000188, "loss": 2.1151, "step": 163695 }, { "epoch": 0.39, "grad_norm": 2.75, "learning_rate": 0.00018224278577755012, "loss": 2.1373, "step": 163700 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018224173432700275, "loss": 2.2194, "step": 163705 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.00018224068284836012, "loss": 2.145, "step": 163710 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018223963134162256, "loss": 2.1147, "step": 163715 }, { "epoch": 0.39, "grad_norm": 2.515625, "learning_rate": 0.00018223857980679046, "loss": 2.175, "step": 163720 }, { "epoch": 0.39, "grad_norm": 1.875, "learning_rate": 0.00018223752824386417, "loss": 2.2772, "step": 163725 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018223647665284407, "loss": 2.1991, "step": 163730 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018223542503373045, "loss": 2.0337, "step": 163735 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018223437338652377, "loss": 2.0725, "step": 163740 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.0001822333217112243, "loss": 1.9625, "step": 163745 }, { "epoch": 0.39, "grad_norm": 2.84375, "learning_rate": 0.00018223227000783244, "loss": 2.2153, "step": 163750 }, { "epoch": 0.39, "grad_norm": 2.625, "learning_rate": 0.00018223121827634856, "loss": 2.1096, "step": 163755 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.000182230166516773, "loss": 2.1851, "step": 163760 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.0001822291147291061, "loss": 2.1649, "step": 163765 }, { "epoch": 0.39, "grad_norm": 2.71875, "learning_rate": 0.0001822280629133483, "loss": 2.2018, "step": 163770 }, { "epoch": 0.39, "grad_norm": 2.53125, "learning_rate": 0.00018222701106949984, "loss": 2.1431, "step": 163775 }, { "epoch": 0.39, "grad_norm": 1.921875, "learning_rate": 0.00018222595919756117, "loss": 2.227, "step": 163780 }, { "epoch": 0.39, "grad_norm": 1.953125, "learning_rate": 0.00018222490729753263, "loss": 2.0269, "step": 163785 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018222385536941456, "loss": 2.1527, "step": 163790 }, { "epoch": 0.39, "grad_norm": 2.046875, "learning_rate": 0.00018222280341320733, "loss": 2.0891, "step": 163795 }, { "epoch": 0.39, "grad_norm": 2.453125, "learning_rate": 0.0001822217514289113, "loss": 2.0542, "step": 163800 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.0001822206994165268, "loss": 2.2668, "step": 163805 }, { "epoch": 0.39, "grad_norm": 2.015625, "learning_rate": 0.00018221964737605428, "loss": 2.0794, "step": 163810 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.00018221859530749397, "loss": 2.0981, "step": 163815 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.00018221754321084635, "loss": 2.0892, "step": 163820 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.0001822164910861117, "loss": 2.1518, "step": 163825 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.0001822154389332904, "loss": 1.9162, "step": 163830 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018221438675238285, "loss": 2.1085, "step": 163835 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018221333454338933, "loss": 2.0541, "step": 163840 }, { "epoch": 0.39, "grad_norm": 1.9140625, "learning_rate": 0.00018221228230631026, "loss": 1.8858, "step": 163845 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018221123004114598, "loss": 2.0707, "step": 163850 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.0001822101777478969, "loss": 2.2093, "step": 163855 }, { "epoch": 0.39, "grad_norm": 2.484375, "learning_rate": 0.00018220912542656328, "loss": 2.1756, "step": 163860 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018220807307714555, "loss": 2.0092, "step": 163865 }, { "epoch": 0.39, "grad_norm": 1.96875, "learning_rate": 0.00018220702069964402, "loss": 2.0634, "step": 163870 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.00018220596829405912, "loss": 2.2329, "step": 163875 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018220491586039118, "loss": 2.3748, "step": 163880 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018220386339864048, "loss": 2.1018, "step": 163885 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.0001822028109088075, "loss": 2.1549, "step": 163890 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018220175839089254, "loss": 2.1521, "step": 163895 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.00018220070584489602, "loss": 2.0413, "step": 163900 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018219965327081819, "loss": 2.2488, "step": 163905 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018219860066865948, "loss": 2.1291, "step": 163910 }, { "epoch": 0.39, "grad_norm": 2.65625, "learning_rate": 0.00018219754803842025, "loss": 2.0861, "step": 163915 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.0001821964953801008, "loss": 2.1752, "step": 163920 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.0001821954426937016, "loss": 2.2393, "step": 163925 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.0001821943899792229, "loss": 1.9948, "step": 163930 }, { "epoch": 0.39, "grad_norm": 3.28125, "learning_rate": 0.00018219333723666512, "loss": 2.0012, "step": 163935 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.0001821922844660286, "loss": 2.2823, "step": 163940 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018219123166731373, "loss": 2.2037, "step": 163945 }, { "epoch": 0.39, "grad_norm": 1.921875, "learning_rate": 0.0001821901788405208, "loss": 2.1594, "step": 163950 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018218912598565024, "loss": 2.2426, "step": 163955 }, { "epoch": 0.39, "grad_norm": 2.703125, "learning_rate": 0.0001821880731027024, "loss": 2.2686, "step": 163960 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.00018218702019167762, "loss": 1.9586, "step": 163965 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018218596725257626, "loss": 2.0621, "step": 163970 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018218491428539867, "loss": 2.1644, "step": 163975 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.00018218386129014523, "loss": 2.0993, "step": 163980 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.0001821828082668163, "loss": 2.2131, "step": 163985 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.0001821817552154122, "loss": 1.9383, "step": 163990 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.00018218070213593335, "loss": 1.9303, "step": 163995 }, { "epoch": 0.39, "grad_norm": 3.0, "learning_rate": 0.00018217964902838006, "loss": 2.0709, "step": 164000 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018217859589275275, "loss": 2.1171, "step": 164005 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.0001821775427290517, "loss": 2.0337, "step": 164010 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.00018217648953727732, "loss": 2.2246, "step": 164015 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018217543631743, "loss": 2.0664, "step": 164020 }, { "epoch": 0.39, "grad_norm": 2.53125, "learning_rate": 0.00018217438306951, "loss": 2.0347, "step": 164025 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.0001821733297935178, "loss": 2.1199, "step": 164030 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018217227648945368, "loss": 1.9066, "step": 164035 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.000182171223157318, "loss": 2.2397, "step": 164040 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018217016979711116, "loss": 2.1222, "step": 164045 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018216911640883348, "loss": 2.1262, "step": 164050 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018216806299248535, "loss": 1.8702, "step": 164055 }, { "epoch": 0.39, "grad_norm": 1.7890625, "learning_rate": 0.00018216700954806712, "loss": 2.2535, "step": 164060 }, { "epoch": 0.39, "grad_norm": 2.53125, "learning_rate": 0.00018216595607557917, "loss": 2.1621, "step": 164065 }, { "epoch": 0.39, "grad_norm": 1.90625, "learning_rate": 0.00018216490257502181, "loss": 2.273, "step": 164070 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018216384904639544, "loss": 2.1143, "step": 164075 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.0001821627954897004, "loss": 2.0921, "step": 164080 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018216174190493705, "loss": 2.1471, "step": 164085 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.0001821606882921058, "loss": 2.0087, "step": 164090 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018215963465120693, "loss": 1.8791, "step": 164095 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018215858098224086, "loss": 2.0504, "step": 164100 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018215752728520794, "loss": 2.2536, "step": 164105 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.0001821564735601085, "loss": 2.1547, "step": 164110 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018215541980694293, "loss": 2.0073, "step": 164115 }, { "epoch": 0.39, "grad_norm": 1.84375, "learning_rate": 0.00018215436602571155, "loss": 2.0584, "step": 164120 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018215331221641475, "loss": 2.1521, "step": 164125 }, { "epoch": 0.39, "grad_norm": 2.484375, "learning_rate": 0.00018215225837905292, "loss": 2.0866, "step": 164130 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018215120451362638, "loss": 2.2171, "step": 164135 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.0001821501506201355, "loss": 2.0363, "step": 164140 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018214909669858061, "loss": 2.1107, "step": 164145 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018214804274896214, "loss": 2.1271, "step": 164150 }, { "epoch": 0.39, "grad_norm": 10.1875, "learning_rate": 0.00018214698877128037, "loss": 2.1454, "step": 164155 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018214593476553574, "loss": 2.1108, "step": 164160 }, { "epoch": 0.39, "grad_norm": 2.65625, "learning_rate": 0.00018214488073172854, "loss": 2.0911, "step": 164165 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018214382666985917, "loss": 2.0885, "step": 164170 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.00018214277257992795, "loss": 2.2003, "step": 164175 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.0001821417184619353, "loss": 1.937, "step": 164180 }, { "epoch": 0.39, "grad_norm": 2.578125, "learning_rate": 0.00018214066431588156, "loss": 2.1097, "step": 164185 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018213961014176702, "loss": 2.0711, "step": 164190 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018213855593959217, "loss": 2.0156, "step": 164195 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018213750170935727, "loss": 2.0637, "step": 164200 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.00018213644745106273, "loss": 2.1002, "step": 164205 }, { "epoch": 0.39, "grad_norm": 2.0625, "learning_rate": 0.00018213539316470887, "loss": 2.0974, "step": 164210 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018213433885029608, "loss": 2.2816, "step": 164215 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018213328450782467, "loss": 2.3149, "step": 164220 }, { "epoch": 0.39, "grad_norm": 1.9765625, "learning_rate": 0.00018213223013729507, "loss": 2.0431, "step": 164225 }, { "epoch": 0.39, "grad_norm": 1.671875, "learning_rate": 0.00018213117573870764, "loss": 1.9454, "step": 164230 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018213012131206266, "loss": 2.0987, "step": 164235 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018212906685736058, "loss": 2.232, "step": 164240 }, { "epoch": 0.39, "grad_norm": 2.0, "learning_rate": 0.00018212801237460168, "loss": 2.1812, "step": 164245 }, { "epoch": 0.39, "grad_norm": 1.8828125, "learning_rate": 0.0001821269578637864, "loss": 2.1272, "step": 164250 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018212590332491505, "loss": 2.2377, "step": 164255 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.000182124848757988, "loss": 2.1464, "step": 164260 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.0001821237941630056, "loss": 2.0455, "step": 164265 }, { "epoch": 0.39, "grad_norm": 2.578125, "learning_rate": 0.00018212273953996825, "loss": 2.4074, "step": 164270 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018212168488887626, "loss": 2.0102, "step": 164275 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018212063020973002, "loss": 2.1231, "step": 164280 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.00018211957550252992, "loss": 2.1791, "step": 164285 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018211852076727622, "loss": 2.0735, "step": 164290 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018211746600396939, "loss": 2.1902, "step": 164295 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018211641121260972, "loss": 2.1709, "step": 164300 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018211535639319761, "loss": 2.2545, "step": 164305 }, { "epoch": 0.39, "grad_norm": 2.703125, "learning_rate": 0.00018211430154573338, "loss": 2.2431, "step": 164310 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018211324667021742, "loss": 2.0541, "step": 164315 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018211219176665012, "loss": 2.348, "step": 164320 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018211113683503177, "loss": 2.1699, "step": 164325 }, { "epoch": 0.39, "grad_norm": 1.8046875, "learning_rate": 0.0001821100818753628, "loss": 1.9571, "step": 164330 }, { "epoch": 0.39, "grad_norm": 2.5625, "learning_rate": 0.00018210902688764352, "loss": 2.2721, "step": 164335 }, { "epoch": 0.39, "grad_norm": 2.046875, "learning_rate": 0.0001821079718718743, "loss": 2.1211, "step": 164340 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.0001821069168280555, "loss": 2.1964, "step": 164345 }, { "epoch": 0.39, "grad_norm": 3.359375, "learning_rate": 0.0001821058617561875, "loss": 2.1199, "step": 164350 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.00018210480665627066, "loss": 2.0609, "step": 164355 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018210375152830533, "loss": 2.2875, "step": 164360 }, { "epoch": 0.39, "grad_norm": 2.578125, "learning_rate": 0.00018210269637229182, "loss": 2.119, "step": 164365 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.0001821016411882306, "loss": 2.0781, "step": 164370 }, { "epoch": 0.39, "grad_norm": 1.8828125, "learning_rate": 0.00018210058597612196, "loss": 2.0646, "step": 164375 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018209953073596624, "loss": 2.0956, "step": 164380 }, { "epoch": 0.39, "grad_norm": 2.59375, "learning_rate": 0.00018209847546776385, "loss": 2.0359, "step": 164385 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018209742017151514, "loss": 2.2653, "step": 164390 }, { "epoch": 0.39, "grad_norm": 2.0625, "learning_rate": 0.00018209636484722044, "loss": 1.94, "step": 164395 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018209530949488017, "loss": 2.2123, "step": 164400 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.0001820942541144946, "loss": 2.237, "step": 164405 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.0001820931987060642, "loss": 2.0818, "step": 164410 }, { "epoch": 0.39, "grad_norm": 2.046875, "learning_rate": 0.00018209214326958924, "loss": 2.2705, "step": 164415 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018209108780507013, "loss": 2.1937, "step": 164420 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.0001820900323125072, "loss": 1.9621, "step": 164425 }, { "epoch": 0.39, "grad_norm": 2.765625, "learning_rate": 0.00018208897679190085, "loss": 2.2312, "step": 164430 }, { "epoch": 0.39, "grad_norm": 3.0, "learning_rate": 0.0001820879212432514, "loss": 2.17, "step": 164435 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.00018208686566655922, "loss": 2.2075, "step": 164440 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.0001820858100618247, "loss": 2.0829, "step": 164445 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018208475442904815, "loss": 2.1871, "step": 164450 }, { "epoch": 0.39, "grad_norm": 1.9765625, "learning_rate": 0.00018208369876823, "loss": 2.0915, "step": 164455 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.00018208264307937054, "loss": 2.2598, "step": 164460 }, { "epoch": 0.39, "grad_norm": 2.53125, "learning_rate": 0.00018208158736247016, "loss": 2.2858, "step": 164465 }, { "epoch": 0.39, "grad_norm": 2.53125, "learning_rate": 0.00018208053161752923, "loss": 2.1303, "step": 164470 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.0001820794758445481, "loss": 2.1483, "step": 164475 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018207842004352713, "loss": 2.1623, "step": 164480 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.0001820773642144667, "loss": 2.0555, "step": 164485 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.0001820763083573671, "loss": 2.1049, "step": 164490 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.0001820752524722288, "loss": 2.2185, "step": 164495 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018207419655905208, "loss": 2.1869, "step": 164500 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018207314061783732, "loss": 2.0213, "step": 164505 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.0001820720846485849, "loss": 2.1361, "step": 164510 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.00018207102865129516, "loss": 2.0254, "step": 164515 }, { "epoch": 0.39, "grad_norm": 2.0, "learning_rate": 0.0001820699726259685, "loss": 2.2674, "step": 164520 }, { "epoch": 0.39, "grad_norm": 1.984375, "learning_rate": 0.00018206891657260524, "loss": 2.0925, "step": 164525 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018206786049120572, "loss": 2.1423, "step": 164530 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.00018206680438177035, "loss": 2.0694, "step": 164535 }, { "epoch": 0.39, "grad_norm": 1.96875, "learning_rate": 0.00018206574824429947, "loss": 1.9319, "step": 164540 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018206469207879345, "loss": 2.0352, "step": 164545 }, { "epoch": 0.39, "grad_norm": 1.9765625, "learning_rate": 0.00018206363588525262, "loss": 2.003, "step": 164550 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018206257966367736, "loss": 2.2927, "step": 164555 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018206152341406805, "loss": 2.099, "step": 164560 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018206046713642503, "loss": 2.1778, "step": 164565 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.0001820594108307487, "loss": 2.0202, "step": 164570 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018205835449703933, "loss": 2.1249, "step": 164575 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018205729813529738, "loss": 2.0617, "step": 164580 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018205624174552317, "loss": 2.2365, "step": 164585 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018205518532771704, "loss": 2.1119, "step": 164590 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018205412888187936, "loss": 1.9282, "step": 164595 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.0001820530724080105, "loss": 2.1255, "step": 164600 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018205201590611086, "loss": 2.0948, "step": 164605 }, { "epoch": 0.39, "grad_norm": 2.0, "learning_rate": 0.00018205095937618072, "loss": 2.0644, "step": 164610 }, { "epoch": 0.39, "grad_norm": 1.984375, "learning_rate": 0.00018204990281822054, "loss": 2.1994, "step": 164615 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018204884623223059, "loss": 2.0384, "step": 164620 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018204778961821124, "loss": 2.2111, "step": 164625 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.0001820467329761629, "loss": 2.0562, "step": 164630 }, { "epoch": 0.39, "grad_norm": 2.0625, "learning_rate": 0.00018204567630608596, "loss": 2.1833, "step": 164635 }, { "epoch": 0.39, "grad_norm": 1.9296875, "learning_rate": 0.00018204461960798067, "loss": 2.2227, "step": 164640 }, { "epoch": 0.39, "grad_norm": 1.8828125, "learning_rate": 0.00018204356288184745, "loss": 2.0619, "step": 164645 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018204250612768668, "loss": 1.9626, "step": 164650 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.00018204144934549866, "loss": 2.1838, "step": 164655 }, { "epoch": 0.39, "grad_norm": 1.8359375, "learning_rate": 0.00018204039253528386, "loss": 2.2465, "step": 164660 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018203933569704253, "loss": 2.163, "step": 164665 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.0001820382788307751, "loss": 2.0586, "step": 164670 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018203722193648188, "loss": 2.1824, "step": 164675 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018203616501416328, "loss": 2.0984, "step": 164680 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.00018203510806381963, "loss": 2.0827, "step": 164685 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.0001820340510854513, "loss": 2.1588, "step": 164690 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018203299407905865, "loss": 2.1047, "step": 164695 }, { "epoch": 0.39, "grad_norm": 2.015625, "learning_rate": 0.00018203193704464203, "loss": 2.0578, "step": 164700 }, { "epoch": 0.39, "grad_norm": 2.859375, "learning_rate": 0.00018203087998220184, "loss": 2.1748, "step": 164705 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018202982289173837, "loss": 2.3227, "step": 164710 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018202876577325206, "loss": 1.8766, "step": 164715 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.00018202770862674322, "loss": 2.2297, "step": 164720 }, { "epoch": 0.39, "grad_norm": 2.015625, "learning_rate": 0.00018202665145221225, "loss": 2.1909, "step": 164725 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018202559424965946, "loss": 2.2949, "step": 164730 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018202453701908526, "loss": 2.0467, "step": 164735 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.00018202347976048998, "loss": 2.0915, "step": 164740 }, { "epoch": 0.39, "grad_norm": 2.578125, "learning_rate": 0.00018202242247387397, "loss": 1.9805, "step": 164745 }, { "epoch": 0.39, "grad_norm": 2.53125, "learning_rate": 0.00018202136515923766, "loss": 2.2729, "step": 164750 }, { "epoch": 0.39, "grad_norm": 1.9921875, "learning_rate": 0.00018202030781658133, "loss": 2.0477, "step": 164755 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018201925044590537, "loss": 2.2923, "step": 164760 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018201819304721018, "loss": 2.1191, "step": 164765 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018201713562049606, "loss": 2.1845, "step": 164770 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018201607816576343, "loss": 2.1551, "step": 164775 }, { "epoch": 0.39, "grad_norm": 2.015625, "learning_rate": 0.0001820150206830126, "loss": 1.9676, "step": 164780 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018201396317224394, "loss": 2.0787, "step": 164785 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.00018201290563345783, "loss": 1.968, "step": 164790 }, { "epoch": 0.39, "grad_norm": 1.84375, "learning_rate": 0.00018201184806665462, "loss": 1.9841, "step": 164795 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.0001820107904718347, "loss": 2.245, "step": 164800 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018200973284899838, "loss": 2.0851, "step": 164805 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.00018200867519814606, "loss": 1.9876, "step": 164810 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.0001820076175192781, "loss": 2.1638, "step": 164815 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018200655981239484, "loss": 2.1401, "step": 164820 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018200550207749663, "loss": 2.1319, "step": 164825 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018200444431458386, "loss": 1.9675, "step": 164830 }, { "epoch": 0.39, "grad_norm": 2.515625, "learning_rate": 0.0001820033865236569, "loss": 2.117, "step": 164835 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.00018200232870471613, "loss": 2.0219, "step": 164840 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.0001820012708577618, "loss": 2.0464, "step": 164845 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.0001820002129827944, "loss": 2.2567, "step": 164850 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.00018199915507981424, "loss": 2.1455, "step": 164855 }, { "epoch": 0.39, "grad_norm": 2.625, "learning_rate": 0.00018199809714882168, "loss": 2.1972, "step": 164860 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018199703918981704, "loss": 2.2812, "step": 164865 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.0001819959812028008, "loss": 2.0773, "step": 164870 }, { "epoch": 0.39, "grad_norm": 2.53125, "learning_rate": 0.00018199492318777318, "loss": 1.9332, "step": 164875 }, { "epoch": 0.39, "grad_norm": 2.5625, "learning_rate": 0.00018199386514473464, "loss": 2.1012, "step": 164880 }, { "epoch": 0.39, "grad_norm": 2.953125, "learning_rate": 0.00018199280707368549, "loss": 2.1237, "step": 164885 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.0001819917489746261, "loss": 1.796, "step": 164890 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018199069084755688, "loss": 2.1278, "step": 164895 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018198963269247816, "loss": 2.0388, "step": 164900 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018198857450939024, "loss": 2.1768, "step": 164905 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018198751629829358, "loss": 2.3045, "step": 164910 }, { "epoch": 0.39, "grad_norm": 2.015625, "learning_rate": 0.00018198645805918848, "loss": 1.9221, "step": 164915 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018198539979207533, "loss": 2.0918, "step": 164920 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018198434149695446, "loss": 2.2977, "step": 164925 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.00018198328317382628, "loss": 2.0365, "step": 164930 }, { "epoch": 0.39, "grad_norm": 2.484375, "learning_rate": 0.00018198222482269108, "loss": 1.9135, "step": 164935 }, { "epoch": 0.39, "grad_norm": 2.046875, "learning_rate": 0.0001819811664435493, "loss": 2.1743, "step": 164940 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018198010803640127, "loss": 2.2964, "step": 164945 }, { "epoch": 0.39, "grad_norm": 1.9765625, "learning_rate": 0.00018197904960124732, "loss": 1.9322, "step": 164950 }, { "epoch": 0.39, "grad_norm": 1.875, "learning_rate": 0.00018197799113808786, "loss": 2.041, "step": 164955 }, { "epoch": 0.39, "grad_norm": 2.609375, "learning_rate": 0.00018197693264692324, "loss": 2.1132, "step": 164960 }, { "epoch": 0.39, "grad_norm": 1.984375, "learning_rate": 0.0001819758741277538, "loss": 2.0931, "step": 164965 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.0001819748155805799, "loss": 1.9859, "step": 164970 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018197375700540195, "loss": 2.0144, "step": 164975 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018197269840222028, "loss": 2.0498, "step": 164980 }, { "epoch": 0.39, "grad_norm": 3.046875, "learning_rate": 0.0001819716397710352, "loss": 1.9026, "step": 164985 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018197058111184714, "loss": 2.0937, "step": 164990 }, { "epoch": 0.39, "grad_norm": 2.609375, "learning_rate": 0.0001819695224246565, "loss": 2.1311, "step": 164995 }, { "epoch": 0.39, "grad_norm": 2.046875, "learning_rate": 0.00018196846370946353, "loss": 2.0871, "step": 165000 }, { "epoch": 0.39, "grad_norm": 1.9453125, "learning_rate": 0.00018196740496626866, "loss": 2.301, "step": 165005 }, { "epoch": 0.39, "grad_norm": 2.453125, "learning_rate": 0.0001819663461950722, "loss": 2.1196, "step": 165010 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.0001819652873958746, "loss": 2.1912, "step": 165015 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018196422856867615, "loss": 1.8743, "step": 165020 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018196316971347723, "loss": 2.1298, "step": 165025 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.0001819621108302782, "loss": 2.1901, "step": 165030 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.00018196105191907946, "loss": 2.2163, "step": 165035 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.0001819599929798813, "loss": 2.2379, "step": 165040 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.00018195893401268417, "loss": 2.0575, "step": 165045 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018195787501748832, "loss": 2.2981, "step": 165050 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.0001819568159942942, "loss": 1.9729, "step": 165055 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.00018195575694310217, "loss": 1.9122, "step": 165060 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018195469786391253, "loss": 2.1117, "step": 165065 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018195363875672568, "loss": 2.2863, "step": 165070 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.000181952579621542, "loss": 2.1758, "step": 165075 }, { "epoch": 0.39, "grad_norm": 2.5625, "learning_rate": 0.00018195152045836184, "loss": 2.1902, "step": 165080 }, { "epoch": 0.39, "grad_norm": 2.515625, "learning_rate": 0.00018195046126718553, "loss": 2.0513, "step": 165085 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018194940204801347, "loss": 2.1138, "step": 165090 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018194834280084603, "loss": 2.198, "step": 165095 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018194728352568352, "loss": 2.1668, "step": 165100 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.0001819462242225263, "loss": 2.1184, "step": 165105 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.0001819451648913748, "loss": 2.264, "step": 165110 }, { "epoch": 0.39, "grad_norm": 2.0, "learning_rate": 0.00018194410553222938, "loss": 2.1237, "step": 165115 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018194304614509033, "loss": 2.2089, "step": 165120 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018194198672995805, "loss": 2.2349, "step": 165125 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.0001819409272868329, "loss": 2.1959, "step": 165130 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.00018193986781571522, "loss": 1.9455, "step": 165135 }, { "epoch": 0.39, "grad_norm": 1.9609375, "learning_rate": 0.00018193880831660547, "loss": 2.1564, "step": 165140 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018193774878950386, "loss": 2.2713, "step": 165145 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018193668923441084, "loss": 2.0295, "step": 165150 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.0001819356296513268, "loss": 1.9879, "step": 165155 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018193457004025206, "loss": 2.2674, "step": 165160 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018193351040118695, "loss": 2.1673, "step": 165165 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018193245073413187, "loss": 2.2039, "step": 165170 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.0001819313910390872, "loss": 2.2285, "step": 165175 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018193033131605327, "loss": 2.1867, "step": 165180 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018192927156503047, "loss": 2.0785, "step": 165185 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018192821178601913, "loss": 2.1542, "step": 165190 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018192715197901964, "loss": 2.0187, "step": 165195 }, { "epoch": 0.39, "grad_norm": 2.0625, "learning_rate": 0.00018192609214403232, "loss": 2.1974, "step": 165200 }, { "epoch": 0.39, "grad_norm": 1.9140625, "learning_rate": 0.0001819250322810576, "loss": 1.9795, "step": 165205 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018192397239009576, "loss": 1.9953, "step": 165210 }, { "epoch": 0.39, "grad_norm": 2.0, "learning_rate": 0.00018192291247114722, "loss": 2.3088, "step": 165215 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.00018192185252421234, "loss": 2.1149, "step": 165220 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.0001819207925492915, "loss": 2.0136, "step": 165225 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018191973254638497, "loss": 2.2295, "step": 165230 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.0001819186725154932, "loss": 2.1863, "step": 165235 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.0001819176124566165, "loss": 2.0224, "step": 165240 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018191655236975528, "loss": 2.1041, "step": 165245 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.00018191549225490993, "loss": 2.044, "step": 165250 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.0001819144321120807, "loss": 2.1513, "step": 165255 }, { "epoch": 0.39, "grad_norm": 2.59375, "learning_rate": 0.000181913371941268, "loss": 2.0943, "step": 165260 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018191231174247225, "loss": 2.1226, "step": 165265 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018191125151569376, "loss": 2.1105, "step": 165270 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.0001819101912609329, "loss": 2.0419, "step": 165275 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018190913097819003, "loss": 2.0595, "step": 165280 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018190807066746553, "loss": 2.2567, "step": 165285 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018190701032875972, "loss": 2.2032, "step": 165290 }, { "epoch": 0.39, "grad_norm": 2.015625, "learning_rate": 0.000181905949962073, "loss": 2.1707, "step": 165295 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018190488956740573, "loss": 2.0065, "step": 165300 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018190382914475825, "loss": 2.1439, "step": 165305 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.00018190276869413095, "loss": 2.0425, "step": 165310 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.00018190170821552417, "loss": 2.1379, "step": 165315 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018190064770893827, "loss": 2.2749, "step": 165320 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018189958717437365, "loss": 2.1861, "step": 165325 }, { "epoch": 0.39, "grad_norm": 2.453125, "learning_rate": 0.0001818985266118306, "loss": 2.1999, "step": 165330 }, { "epoch": 0.39, "grad_norm": 2.78125, "learning_rate": 0.0001818974660213096, "loss": 2.3529, "step": 165335 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.0001818964054028109, "loss": 2.2693, "step": 165340 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.0001818953447563349, "loss": 1.9695, "step": 165345 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.00018189428408188198, "loss": 1.9545, "step": 165350 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018189322337945244, "loss": 2.1445, "step": 165355 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018189216264904675, "loss": 2.1301, "step": 165360 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.00018189110189066517, "loss": 2.2029, "step": 165365 }, { "epoch": 0.39, "grad_norm": 2.046875, "learning_rate": 0.00018189004110430814, "loss": 2.1744, "step": 165370 }, { "epoch": 0.39, "grad_norm": 1.9609375, "learning_rate": 0.00018188898028997596, "loss": 1.9911, "step": 165375 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018188791944766903, "loss": 2.2316, "step": 165380 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.0001818868585773877, "loss": 2.1896, "step": 165385 }, { "epoch": 0.39, "grad_norm": 2.921875, "learning_rate": 0.00018188579767913234, "loss": 2.0681, "step": 165390 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018188473675290328, "loss": 2.0451, "step": 165395 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018188367579870097, "loss": 2.3272, "step": 165400 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018188261481652567, "loss": 2.1612, "step": 165405 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018188155380637777, "loss": 2.1708, "step": 165410 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.00018188049276825766, "loss": 2.0986, "step": 165415 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.0001818794317021657, "loss": 2.1067, "step": 165420 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.0001818783706081022, "loss": 1.9981, "step": 165425 }, { "epoch": 0.39, "grad_norm": 2.484375, "learning_rate": 0.00018187730948606763, "loss": 2.0545, "step": 165430 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018187624833606222, "loss": 2.1386, "step": 165435 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018187518715808644, "loss": 1.9466, "step": 165440 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018187412595214062, "loss": 2.007, "step": 165445 }, { "epoch": 0.39, "grad_norm": 1.96875, "learning_rate": 0.0001818730647182251, "loss": 2.1286, "step": 165450 }, { "epoch": 0.39, "grad_norm": 2.53125, "learning_rate": 0.00018187200345634024, "loss": 2.1757, "step": 165455 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018187094216648642, "loss": 2.1137, "step": 165460 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018186988084866403, "loss": 2.2115, "step": 165465 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018186881950287337, "loss": 2.3285, "step": 165470 }, { "epoch": 0.39, "grad_norm": 2.609375, "learning_rate": 0.00018186775812911485, "loss": 2.1115, "step": 165475 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018186669672738882, "loss": 2.0268, "step": 165480 }, { "epoch": 0.39, "grad_norm": 1.8984375, "learning_rate": 0.00018186563529769566, "loss": 2.1033, "step": 165485 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.0001818645738400357, "loss": 2.2988, "step": 165490 }, { "epoch": 0.39, "grad_norm": 2.71875, "learning_rate": 0.0001818635123544093, "loss": 2.1909, "step": 165495 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018186245084081686, "loss": 2.2608, "step": 165500 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018186138929925872, "loss": 2.2671, "step": 165505 }, { "epoch": 0.39, "grad_norm": 1.6953125, "learning_rate": 0.00018186032772973522, "loss": 2.0376, "step": 165510 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.00018185926613224678, "loss": 2.3313, "step": 165515 }, { "epoch": 0.39, "grad_norm": 2.0, "learning_rate": 0.0001818582045067937, "loss": 2.2459, "step": 165520 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.0001818571428533764, "loss": 2.2304, "step": 165525 }, { "epoch": 0.39, "grad_norm": 2.796875, "learning_rate": 0.0001818560811719952, "loss": 2.1476, "step": 165530 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018185501946265048, "loss": 2.1271, "step": 165535 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.0001818539577253426, "loss": 2.1308, "step": 165540 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018185289596007193, "loss": 2.0038, "step": 165545 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018185183416683884, "loss": 2.0014, "step": 165550 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018185077234564366, "loss": 2.2087, "step": 165555 }, { "epoch": 0.39, "grad_norm": 1.9609375, "learning_rate": 0.00018184971049648677, "loss": 2.114, "step": 165560 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.00018184864861936855, "loss": 2.027, "step": 165565 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018184758671428933, "loss": 2.0304, "step": 165570 }, { "epoch": 0.39, "grad_norm": 2.84375, "learning_rate": 0.00018184652478124948, "loss": 2.1334, "step": 165575 }, { "epoch": 0.39, "grad_norm": 1.8828125, "learning_rate": 0.00018184546282024941, "loss": 1.9786, "step": 165580 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018184440083128943, "loss": 2.1286, "step": 165585 }, { "epoch": 0.39, "grad_norm": 1.78125, "learning_rate": 0.0001818433388143699, "loss": 2.2024, "step": 165590 }, { "epoch": 0.39, "grad_norm": 1.9765625, "learning_rate": 0.0001818422767694912, "loss": 2.0279, "step": 165595 }, { "epoch": 0.39, "grad_norm": 2.71875, "learning_rate": 0.0001818412146966537, "loss": 2.3016, "step": 165600 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.00018184015259585778, "loss": 2.2034, "step": 165605 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018183909046710377, "loss": 2.1885, "step": 165610 }, { "epoch": 0.39, "grad_norm": 2.046875, "learning_rate": 0.00018183802831039204, "loss": 2.1123, "step": 165615 }, { "epoch": 0.39, "grad_norm": 2.71875, "learning_rate": 0.00018183696612572296, "loss": 2.0087, "step": 165620 }, { "epoch": 0.39, "grad_norm": 2.8125, "learning_rate": 0.0001818359039130969, "loss": 2.1363, "step": 165625 }, { "epoch": 0.39, "grad_norm": 2.703125, "learning_rate": 0.00018183484167251418, "loss": 2.2841, "step": 165630 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.0001818337794039752, "loss": 2.1462, "step": 165635 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018183271710748032, "loss": 2.1522, "step": 165640 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018183165478302988, "loss": 2.0844, "step": 165645 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.0001818305924306243, "loss": 1.9797, "step": 165650 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.00018182953005026388, "loss": 1.9849, "step": 165655 }, { "epoch": 0.39, "grad_norm": 2.609375, "learning_rate": 0.00018182846764194902, "loss": 2.0955, "step": 165660 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.0001818274052056801, "loss": 2.2679, "step": 165665 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.0001818263427414574, "loss": 2.1027, "step": 165670 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018182528024928137, "loss": 2.0229, "step": 165675 }, { "epoch": 0.39, "grad_norm": 2.046875, "learning_rate": 0.00018182421772915233, "loss": 2.4255, "step": 165680 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018182315518107064, "loss": 2.0935, "step": 165685 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.0001818220926050367, "loss": 1.9531, "step": 165690 }, { "epoch": 0.39, "grad_norm": 3.171875, "learning_rate": 0.00018182103000105083, "loss": 2.0949, "step": 165695 }, { "epoch": 0.39, "grad_norm": 2.5625, "learning_rate": 0.00018181996736911345, "loss": 2.1365, "step": 165700 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018181890470922484, "loss": 2.1263, "step": 165705 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018181784202138542, "loss": 2.0542, "step": 165710 }, { "epoch": 0.39, "grad_norm": 1.9921875, "learning_rate": 0.00018181677930559554, "loss": 2.1875, "step": 165715 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.00018181571656185558, "loss": 2.0414, "step": 165720 }, { "epoch": 0.39, "grad_norm": 2.625, "learning_rate": 0.0001818146537901659, "loss": 2.0334, "step": 165725 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018181359099052682, "loss": 2.0668, "step": 165730 }, { "epoch": 0.39, "grad_norm": 1.890625, "learning_rate": 0.00018181252816293873, "loss": 2.1223, "step": 165735 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018181146530740202, "loss": 2.2279, "step": 165740 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018181040242391702, "loss": 2.1317, "step": 165745 }, { "epoch": 0.39, "grad_norm": 2.53125, "learning_rate": 0.0001818093395124841, "loss": 2.0809, "step": 165750 }, { "epoch": 0.39, "grad_norm": 2.734375, "learning_rate": 0.00018180827657310363, "loss": 2.0656, "step": 165755 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018180721360577597, "loss": 2.2343, "step": 165760 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018180615061050151, "loss": 2.0929, "step": 165765 }, { "epoch": 0.39, "grad_norm": 2.484375, "learning_rate": 0.00018180508758728055, "loss": 2.092, "step": 165770 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.0001818040245361135, "loss": 2.2867, "step": 165775 }, { "epoch": 0.39, "grad_norm": 2.046875, "learning_rate": 0.0001818029614570007, "loss": 2.0861, "step": 165780 }, { "epoch": 0.39, "grad_norm": 2.625, "learning_rate": 0.0001818018983499425, "loss": 2.1551, "step": 165785 }, { "epoch": 0.39, "grad_norm": 2.015625, "learning_rate": 0.00018180083521493934, "loss": 2.1817, "step": 165790 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.0001817997720519915, "loss": 2.2843, "step": 165795 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018179870886109937, "loss": 2.074, "step": 165800 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018179764564226335, "loss": 2.0453, "step": 165805 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.00018179658239548375, "loss": 2.1804, "step": 165810 }, { "epoch": 0.39, "grad_norm": 2.453125, "learning_rate": 0.00018179551912076095, "loss": 1.9823, "step": 165815 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018179445581809533, "loss": 2.2764, "step": 165820 }, { "epoch": 0.39, "grad_norm": 2.515625, "learning_rate": 0.00018179339248748723, "loss": 2.063, "step": 165825 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018179232912893703, "loss": 2.0865, "step": 165830 }, { "epoch": 0.39, "grad_norm": 1.953125, "learning_rate": 0.00018179126574244508, "loss": 2.229, "step": 165835 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.00018179020232801177, "loss": 2.0705, "step": 165840 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.0001817891388856374, "loss": 2.1518, "step": 165845 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.0001817880754153224, "loss": 2.1429, "step": 165850 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018178701191706712, "loss": 2.1293, "step": 165855 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018178594839087192, "loss": 2.101, "step": 165860 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018178488483673716, "loss": 2.1095, "step": 165865 }, { "epoch": 0.39, "grad_norm": 2.734375, "learning_rate": 0.00018178382125466318, "loss": 2.3162, "step": 165870 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018178275764465038, "loss": 2.3412, "step": 165875 }, { "epoch": 0.39, "grad_norm": 1.90625, "learning_rate": 0.00018178169400669906, "loss": 2.1695, "step": 165880 }, { "epoch": 0.39, "grad_norm": 2.640625, "learning_rate": 0.0001817806303408097, "loss": 2.123, "step": 165885 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018177956664698256, "loss": 2.132, "step": 165890 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018177850292521803, "loss": 2.0728, "step": 165895 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018177743917551647, "loss": 2.0541, "step": 165900 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.00018177637539787829, "loss": 2.2593, "step": 165905 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.00018177531159230378, "loss": 2.2215, "step": 165910 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018177424775879338, "loss": 2.1594, "step": 165915 }, { "epoch": 0.39, "grad_norm": 1.8984375, "learning_rate": 0.00018177318389734736, "loss": 2.0518, "step": 165920 }, { "epoch": 0.39, "grad_norm": 2.0625, "learning_rate": 0.0001817721200079662, "loss": 2.2959, "step": 165925 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018177105609065017, "loss": 2.0971, "step": 165930 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.00018176999214539964, "loss": 2.2109, "step": 165935 }, { "epoch": 0.39, "grad_norm": 2.5625, "learning_rate": 0.00018176892817221502, "loss": 2.0143, "step": 165940 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018176786417109666, "loss": 1.9589, "step": 165945 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018176680014204491, "loss": 1.9524, "step": 165950 }, { "epoch": 0.39, "grad_norm": 2.53125, "learning_rate": 0.00018176573608506017, "loss": 2.1966, "step": 165955 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.0001817646720001427, "loss": 2.0647, "step": 165960 }, { "epoch": 0.39, "grad_norm": 1.875, "learning_rate": 0.00018176360788729298, "loss": 2.0421, "step": 165965 }, { "epoch": 0.39, "grad_norm": 2.015625, "learning_rate": 0.0001817625437465113, "loss": 2.1186, "step": 165970 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.0001817614795777981, "loss": 2.1546, "step": 165975 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018176041538115368, "loss": 2.2983, "step": 165980 }, { "epoch": 0.39, "grad_norm": 2.453125, "learning_rate": 0.0001817593511565784, "loss": 2.0399, "step": 165985 }, { "epoch": 0.39, "grad_norm": 2.734375, "learning_rate": 0.00018175828690407265, "loss": 2.2237, "step": 165990 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018175722262363676, "loss": 2.2665, "step": 165995 }, { "epoch": 0.39, "grad_norm": 2.0625, "learning_rate": 0.00018175615831527117, "loss": 2.0692, "step": 166000 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018175509397897618, "loss": 1.9483, "step": 166005 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018175402961475215, "loss": 2.2769, "step": 166010 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018175296522259947, "loss": 2.2507, "step": 166015 }, { "epoch": 0.39, "grad_norm": 2.703125, "learning_rate": 0.0001817519008025185, "loss": 2.3288, "step": 166020 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.0001817508363545096, "loss": 2.151, "step": 166025 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018174977187857309, "loss": 2.2071, "step": 166030 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018174870737470945, "loss": 2.1169, "step": 166035 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018174764284291893, "loss": 1.9607, "step": 166040 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018174657828320191, "loss": 2.1514, "step": 166045 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018174551369555878, "loss": 2.2476, "step": 166050 }, { "epoch": 0.39, "grad_norm": 1.875, "learning_rate": 0.00018174444907998992, "loss": 2.3113, "step": 166055 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.0001817433844364957, "loss": 2.1085, "step": 166060 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.0001817423197650764, "loss": 2.0709, "step": 166065 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018174125506573247, "loss": 2.2783, "step": 166070 }, { "epoch": 0.39, "grad_norm": 3.015625, "learning_rate": 0.00018174019033846426, "loss": 2.0756, "step": 166075 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.0001817391255832721, "loss": 2.1466, "step": 166080 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.0001817380608001564, "loss": 2.0212, "step": 166085 }, { "epoch": 0.39, "grad_norm": 2.53125, "learning_rate": 0.00018173699598911743, "loss": 2.2047, "step": 166090 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018173593115015569, "loss": 2.1288, "step": 166095 }, { "epoch": 0.39, "grad_norm": 2.9375, "learning_rate": 0.00018173486628327143, "loss": 2.1006, "step": 166100 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018173380138846509, "loss": 2.158, "step": 166105 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.00018173273646573696, "loss": 2.2588, "step": 166110 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018173167151508746, "loss": 2.2476, "step": 166115 }, { "epoch": 0.39, "grad_norm": 2.71875, "learning_rate": 0.00018173060653651696, "loss": 2.0131, "step": 166120 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018172954153002579, "loss": 2.1488, "step": 166125 }, { "epoch": 0.39, "grad_norm": 1.90625, "learning_rate": 0.00018172847649561432, "loss": 2.1318, "step": 166130 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018172741143328294, "loss": 2.1126, "step": 166135 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018172634634303197, "loss": 2.1432, "step": 166140 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.0001817252812248618, "loss": 2.2035, "step": 166145 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.0001817242160787728, "loss": 2.2873, "step": 166150 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.0001817231509047653, "loss": 2.0917, "step": 166155 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018172208570283976, "loss": 1.9571, "step": 166160 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.0001817210204729964, "loss": 2.1884, "step": 166165 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.00018171995521523569, "loss": 1.9775, "step": 166170 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018171888992955798, "loss": 2.0746, "step": 166175 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018171782461596356, "loss": 2.2962, "step": 166180 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.0001817167592744529, "loss": 2.0496, "step": 166185 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.00018171569390502628, "loss": 2.1086, "step": 166190 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.0001817146285076841, "loss": 2.1047, "step": 166195 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018171356308242675, "loss": 2.1754, "step": 166200 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.00018171249762925453, "loss": 2.0966, "step": 166205 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018171143214816787, "loss": 2.2295, "step": 166210 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018171036663916707, "loss": 2.1611, "step": 166215 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018170930110225254, "loss": 2.1513, "step": 166220 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018170823553742468, "loss": 2.2183, "step": 166225 }, { "epoch": 0.39, "grad_norm": 1.9921875, "learning_rate": 0.00018170716994468373, "loss": 2.0007, "step": 166230 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018170610432403014, "loss": 1.9704, "step": 166235 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.0001817050386754643, "loss": 2.0604, "step": 166240 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018170397299898652, "loss": 2.0286, "step": 166245 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.00018170290729459718, "loss": 1.9622, "step": 166250 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018170184156229664, "loss": 2.2021, "step": 166255 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018170077580208528, "loss": 2.311, "step": 166260 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018169971001396343, "loss": 2.0924, "step": 166265 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.0001816986441979315, "loss": 2.0713, "step": 166270 }, { "epoch": 0.39, "grad_norm": 2.015625, "learning_rate": 0.00018169757835398984, "loss": 1.9144, "step": 166275 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.0001816965124821388, "loss": 2.157, "step": 166280 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018169544658237874, "loss": 2.0394, "step": 166285 }, { "epoch": 0.39, "grad_norm": 1.6796875, "learning_rate": 0.00018169438065471002, "loss": 2.1364, "step": 166290 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018169331469913304, "loss": 2.2545, "step": 166295 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018169224871564813, "loss": 2.0697, "step": 166300 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.0001816911827042557, "loss": 2.0257, "step": 166305 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018169011666495605, "loss": 2.1774, "step": 166310 }, { "epoch": 0.39, "grad_norm": 1.7265625, "learning_rate": 0.00018168905059774956, "loss": 2.1027, "step": 166315 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018168798450263665, "loss": 2.2332, "step": 166320 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.0001816869183796176, "loss": 2.0656, "step": 166325 }, { "epoch": 0.39, "grad_norm": 1.9296875, "learning_rate": 0.00018168585222869284, "loss": 1.9498, "step": 166330 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.0001816847860498627, "loss": 2.1351, "step": 166335 }, { "epoch": 0.39, "grad_norm": 1.9609375, "learning_rate": 0.00018168371984312754, "loss": 2.0387, "step": 166340 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018168265360848777, "loss": 2.2469, "step": 166345 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.00018168158734594374, "loss": 2.0669, "step": 166350 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018168052105549577, "loss": 2.1984, "step": 166355 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018167945473714428, "loss": 2.1638, "step": 166360 }, { "epoch": 0.39, "grad_norm": 2.625, "learning_rate": 0.00018167838839088955, "loss": 2.1969, "step": 166365 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018167732201673203, "loss": 1.86, "step": 166370 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.0001816762556146721, "loss": 2.0058, "step": 166375 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018167518918471004, "loss": 2.1062, "step": 166380 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018167412272684623, "loss": 2.0963, "step": 166385 }, { "epoch": 0.39, "grad_norm": 2.75, "learning_rate": 0.0001816730562410811, "loss": 2.121, "step": 166390 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018167198972741496, "loss": 2.0885, "step": 166395 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018167092318584818, "loss": 2.0573, "step": 166400 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018166985661638115, "loss": 2.0888, "step": 166405 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.0001816687900190142, "loss": 2.0733, "step": 166410 }, { "epoch": 0.39, "grad_norm": 1.9453125, "learning_rate": 0.0001816677233937477, "loss": 2.0806, "step": 166415 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018166665674058203, "loss": 2.1703, "step": 166420 }, { "epoch": 0.39, "grad_norm": 2.0625, "learning_rate": 0.00018166559005951757, "loss": 2.0808, "step": 166425 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018166452335055463, "loss": 1.9065, "step": 166430 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018166345661369365, "loss": 2.061, "step": 166435 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018166238984893492, "loss": 2.2517, "step": 166440 }, { "epoch": 0.39, "grad_norm": 2.78125, "learning_rate": 0.00018166132305627884, "loss": 2.1326, "step": 166445 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018166025623572577, "loss": 2.0654, "step": 166450 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018165918938727607, "loss": 2.0483, "step": 166455 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018165812251093013, "loss": 2.1082, "step": 166460 }, { "epoch": 0.39, "grad_norm": 2.609375, "learning_rate": 0.00018165705560668828, "loss": 2.0743, "step": 166465 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018165598867455091, "loss": 2.0862, "step": 166470 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018165492171451838, "loss": 2.0828, "step": 166475 }, { "epoch": 0.39, "grad_norm": 2.640625, "learning_rate": 0.00018165385472659104, "loss": 2.055, "step": 166480 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018165278771076925, "loss": 2.3374, "step": 166485 }, { "epoch": 0.39, "grad_norm": 1.7890625, "learning_rate": 0.0001816517206670534, "loss": 2.0125, "step": 166490 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018165065359544382, "loss": 2.1396, "step": 166495 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018164958649594093, "loss": 2.0634, "step": 166500 }, { "epoch": 0.39, "grad_norm": 1.984375, "learning_rate": 0.00018164851936854504, "loss": 2.2833, "step": 166505 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.0001816474522132565, "loss": 2.0401, "step": 166510 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.0001816463850300758, "loss": 2.0552, "step": 166515 }, { "epoch": 0.39, "grad_norm": 1.921875, "learning_rate": 0.00018164531781900313, "loss": 2.2697, "step": 166520 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.000181644250580039, "loss": 2.2283, "step": 166525 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018164318331318367, "loss": 2.0669, "step": 166530 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018164211601843757, "loss": 2.1307, "step": 166535 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018164104869580102, "loss": 2.252, "step": 166540 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.00018163998134527442, "loss": 2.2197, "step": 166545 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.0001816389139668581, "loss": 2.1219, "step": 166550 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018163784656055252, "loss": 2.1223, "step": 166555 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018163677912635788, "loss": 2.1151, "step": 166560 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018163571166427472, "loss": 1.96, "step": 166565 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018163464417430327, "loss": 2.1359, "step": 166570 }, { "epoch": 0.39, "grad_norm": 2.59375, "learning_rate": 0.00018163357665644395, "loss": 2.1033, "step": 166575 }, { "epoch": 0.39, "grad_norm": 1.96875, "learning_rate": 0.00018163250911069708, "loss": 2.1026, "step": 166580 }, { "epoch": 0.39, "grad_norm": 2.75, "learning_rate": 0.00018163144153706312, "loss": 2.117, "step": 166585 }, { "epoch": 0.39, "grad_norm": 1.859375, "learning_rate": 0.00018163037393554237, "loss": 2.1196, "step": 166590 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.0001816293063061352, "loss": 2.0851, "step": 166595 }, { "epoch": 0.39, "grad_norm": 1.96875, "learning_rate": 0.00018162823864884198, "loss": 2.1008, "step": 166600 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018162717096366308, "loss": 1.9273, "step": 166605 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018162610325059885, "loss": 2.0739, "step": 166610 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018162503550964965, "loss": 2.0099, "step": 166615 }, { "epoch": 0.39, "grad_norm": 2.0, "learning_rate": 0.00018162396774081587, "loss": 2.1874, "step": 166620 }, { "epoch": 0.39, "grad_norm": 1.96875, "learning_rate": 0.00018162289994409786, "loss": 2.195, "step": 166625 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018162183211949598, "loss": 1.9983, "step": 166630 }, { "epoch": 0.39, "grad_norm": 2.0625, "learning_rate": 0.0001816207642670106, "loss": 1.9482, "step": 166635 }, { "epoch": 0.39, "grad_norm": 1.9453125, "learning_rate": 0.00018161969638664211, "loss": 2.1103, "step": 166640 }, { "epoch": 0.39, "grad_norm": 2.578125, "learning_rate": 0.00018161862847839084, "loss": 2.0788, "step": 166645 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018161756054225716, "loss": 2.1595, "step": 166650 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018161649257824144, "loss": 2.2445, "step": 166655 }, { "epoch": 0.39, "grad_norm": 2.015625, "learning_rate": 0.00018161542458634407, "loss": 2.0056, "step": 166660 }, { "epoch": 0.39, "grad_norm": 2.53125, "learning_rate": 0.0001816143565665654, "loss": 2.1757, "step": 166665 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018161328851890574, "loss": 2.1629, "step": 166670 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.0001816122204433655, "loss": 1.984, "step": 166675 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018161115233994508, "loss": 2.1245, "step": 166680 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.0001816100842086448, "loss": 2.2018, "step": 166685 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.000181609016049465, "loss": 2.1037, "step": 166690 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.0001816079478624061, "loss": 2.2995, "step": 166695 }, { "epoch": 0.39, "grad_norm": 2.71875, "learning_rate": 0.00018160687964746848, "loss": 2.2993, "step": 166700 }, { "epoch": 0.39, "grad_norm": 1.828125, "learning_rate": 0.00018160581140465242, "loss": 2.1018, "step": 166705 }, { "epoch": 0.39, "grad_norm": 2.671875, "learning_rate": 0.00018160474313395837, "loss": 2.2614, "step": 166710 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.00018160367483538664, "loss": 2.1982, "step": 166715 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018160260650893764, "loss": 2.245, "step": 166720 }, { "epoch": 0.39, "grad_norm": 2.046875, "learning_rate": 0.0001816015381546117, "loss": 2.0431, "step": 166725 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018160046977240917, "loss": 2.019, "step": 166730 }, { "epoch": 0.39, "grad_norm": 1.953125, "learning_rate": 0.00018159940136233048, "loss": 2.0745, "step": 166735 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.0001815983329243759, "loss": 2.2085, "step": 166740 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.0001815972644585459, "loss": 1.9189, "step": 166745 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018159619596484078, "loss": 2.1704, "step": 166750 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.0001815951274432609, "loss": 2.124, "step": 166755 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018159405889380667, "loss": 2.2294, "step": 166760 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.0001815929903164784, "loss": 2.138, "step": 166765 }, { "epoch": 0.39, "grad_norm": 2.59375, "learning_rate": 0.00018159192171127652, "loss": 1.8743, "step": 166770 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018159085307820134, "loss": 2.1084, "step": 166775 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018158978441725325, "loss": 1.9806, "step": 166780 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.0001815887157284326, "loss": 2.058, "step": 166785 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.0001815876470117398, "loss": 1.9144, "step": 166790 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018158657826717516, "loss": 1.9121, "step": 166795 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018158550949473905, "loss": 2.0631, "step": 166800 }, { "epoch": 0.39, "grad_norm": 1.96875, "learning_rate": 0.00018158444069443186, "loss": 2.2822, "step": 166805 }, { "epoch": 0.39, "grad_norm": 1.9296875, "learning_rate": 0.00018158337186625394, "loss": 2.0287, "step": 166810 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018158230301020567, "loss": 2.2901, "step": 166815 }, { "epoch": 0.39, "grad_norm": 1.953125, "learning_rate": 0.0001815812341262874, "loss": 2.0135, "step": 166820 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.0001815801652144995, "loss": 2.1026, "step": 166825 }, { "epoch": 0.39, "grad_norm": 2.640625, "learning_rate": 0.00018157909627484236, "loss": 2.2548, "step": 166830 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018157802730731627, "loss": 2.0421, "step": 166835 }, { "epoch": 0.39, "grad_norm": 1.9296875, "learning_rate": 0.0001815769583119217, "loss": 2.2036, "step": 166840 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018157588928865894, "loss": 2.0127, "step": 166845 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.00018157482023752836, "loss": 2.0275, "step": 166850 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018157375115853035, "loss": 2.0695, "step": 166855 }, { "epoch": 0.39, "grad_norm": 2.5625, "learning_rate": 0.0001815726820516653, "loss": 2.1259, "step": 166860 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.0001815716129169335, "loss": 2.2139, "step": 166865 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018157054375433538, "loss": 1.9698, "step": 166870 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018156947456387127, "loss": 1.9973, "step": 166875 }, { "epoch": 0.39, "grad_norm": 2.515625, "learning_rate": 0.00018156840534554156, "loss": 2.1889, "step": 166880 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.0001815673360993466, "loss": 2.2158, "step": 166885 }, { "epoch": 0.39, "grad_norm": 1.9609375, "learning_rate": 0.00018156626682528675, "loss": 2.0404, "step": 166890 }, { "epoch": 0.39, "grad_norm": 1.9296875, "learning_rate": 0.00018156519752336237, "loss": 2.116, "step": 166895 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018156412819357386, "loss": 2.1657, "step": 166900 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018156305883592158, "loss": 1.9479, "step": 166905 }, { "epoch": 0.39, "grad_norm": 1.953125, "learning_rate": 0.0001815619894504059, "loss": 2.026, "step": 166910 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.0001815609200370271, "loss": 1.93, "step": 166915 }, { "epoch": 0.39, "grad_norm": 2.453125, "learning_rate": 0.00018155985059578565, "loss": 2.0647, "step": 166920 }, { "epoch": 0.39, "grad_norm": 2.0625, "learning_rate": 0.00018155878112668185, "loss": 2.0916, "step": 166925 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018155771162971612, "loss": 2.3103, "step": 166930 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.0001815566421048888, "loss": 2.1642, "step": 166935 }, { "epoch": 0.39, "grad_norm": 2.796875, "learning_rate": 0.00018155557255220022, "loss": 2.2378, "step": 166940 }, { "epoch": 0.39, "grad_norm": 2.453125, "learning_rate": 0.00018155450297165077, "loss": 2.1158, "step": 166945 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018155343336324087, "loss": 2.1782, "step": 166950 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.0001815523637269708, "loss": 1.8741, "step": 166955 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018155129406284098, "loss": 2.2572, "step": 166960 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018155022437085175, "loss": 1.9382, "step": 166965 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018154915465100352, "loss": 1.8757, "step": 166970 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018154808490329657, "loss": 2.218, "step": 166975 }, { "epoch": 0.39, "grad_norm": 1.71875, "learning_rate": 0.00018154701512773134, "loss": 1.9496, "step": 166980 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018154594532430814, "loss": 2.028, "step": 166985 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018154487549302742, "loss": 2.1277, "step": 166990 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018154380563388945, "loss": 2.2341, "step": 166995 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018154273574689467, "loss": 2.218, "step": 167000 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018154166583204338, "loss": 2.0389, "step": 167005 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.000181540595889336, "loss": 2.0128, "step": 167010 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018153952591877284, "loss": 2.178, "step": 167015 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018153845592035434, "loss": 2.2243, "step": 167020 }, { "epoch": 0.39, "grad_norm": 1.9140625, "learning_rate": 0.00018153738589408083, "loss": 2.1484, "step": 167025 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018153631583995265, "loss": 2.1742, "step": 167030 }, { "epoch": 0.39, "grad_norm": 1.9921875, "learning_rate": 0.00018153524575797016, "loss": 2.1593, "step": 167035 }, { "epoch": 0.39, "grad_norm": 2.359375, "learning_rate": 0.0001815341756481338, "loss": 2.0984, "step": 167040 }, { "epoch": 0.39, "grad_norm": 2.484375, "learning_rate": 0.00018153310551044386, "loss": 2.1787, "step": 167045 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018153203534490074, "loss": 2.2078, "step": 167050 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018153096515150478, "loss": 2.086, "step": 167055 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.00018152989493025638, "loss": 2.1142, "step": 167060 }, { "epoch": 0.39, "grad_norm": 2.0, "learning_rate": 0.0001815288246811559, "loss": 2.0728, "step": 167065 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.0001815277544042037, "loss": 2.1452, "step": 167070 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.0001815266840994001, "loss": 2.1461, "step": 167075 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018152561376674552, "loss": 2.1842, "step": 167080 }, { "epoch": 0.39, "grad_norm": 1.8359375, "learning_rate": 0.00018152454340624035, "loss": 1.933, "step": 167085 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018152347301788487, "loss": 2.1087, "step": 167090 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018152240260167952, "loss": 2.3464, "step": 167095 }, { "epoch": 0.39, "grad_norm": 1.9609375, "learning_rate": 0.00018152133215762462, "loss": 2.2329, "step": 167100 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018152026168572055, "loss": 2.1241, "step": 167105 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018151919118596771, "loss": 2.1219, "step": 167110 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018151812065836642, "loss": 2.0164, "step": 167115 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018151705010291706, "loss": 2.294, "step": 167120 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018151597951962, "loss": 2.0852, "step": 167125 }, { "epoch": 0.39, "grad_norm": 2.5, "learning_rate": 0.0001815149089084756, "loss": 2.0861, "step": 167130 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018151383826948422, "loss": 2.1651, "step": 167135 }, { "epoch": 0.39, "grad_norm": 1.9609375, "learning_rate": 0.00018151276760264626, "loss": 2.2701, "step": 167140 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018151169690796203, "loss": 2.0569, "step": 167145 }, { "epoch": 0.39, "grad_norm": 1.8671875, "learning_rate": 0.00018151062618543195, "loss": 1.9641, "step": 167150 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018150955543505637, "loss": 2.2352, "step": 167155 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018150848465683564, "loss": 2.027, "step": 167160 }, { "epoch": 0.39, "grad_norm": 2.640625, "learning_rate": 0.0001815074138507701, "loss": 2.2706, "step": 167165 }, { "epoch": 0.39, "grad_norm": 1.984375, "learning_rate": 0.00018150634301686018, "loss": 2.0315, "step": 167170 }, { "epoch": 0.39, "grad_norm": 1.8515625, "learning_rate": 0.00018150527215510622, "loss": 2.2024, "step": 167175 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018150420126550855, "loss": 1.9174, "step": 167180 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.0001815031303480676, "loss": 1.9096, "step": 167185 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.0001815020594027837, "loss": 2.2401, "step": 167190 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018150098842965718, "loss": 2.1906, "step": 167195 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018149991742868847, "loss": 2.0889, "step": 167200 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018149884639987794, "loss": 2.2345, "step": 167205 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.00018149777534322588, "loss": 2.3012, "step": 167210 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.00018149670425873272, "loss": 2.204, "step": 167215 }, { "epoch": 0.39, "grad_norm": 2.0625, "learning_rate": 0.0001814956331463988, "loss": 2.1207, "step": 167220 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.00018149456200622452, "loss": 2.1312, "step": 167225 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.0001814934908382102, "loss": 1.9977, "step": 167230 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018149241964235624, "loss": 2.0345, "step": 167235 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018149134841866297, "loss": 2.2174, "step": 167240 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018149027716713077, "loss": 1.9308, "step": 167245 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018148920588776003, "loss": 2.0223, "step": 167250 }, { "epoch": 0.39, "grad_norm": 3.171875, "learning_rate": 0.00018148813458055112, "loss": 2.1558, "step": 167255 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018148706324550435, "loss": 2.1177, "step": 167260 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018148599188262012, "loss": 2.3342, "step": 167265 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018148492049189884, "loss": 2.0496, "step": 167270 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.0001814838490733408, "loss": 2.0191, "step": 167275 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.0001814827776269464, "loss": 2.2572, "step": 167280 }, { "epoch": 0.39, "grad_norm": 1.96875, "learning_rate": 0.000181481706152716, "loss": 2.0762, "step": 167285 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.00018148063465065, "loss": 1.9556, "step": 167290 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.0001814795631207487, "loss": 2.2015, "step": 167295 }, { "epoch": 0.39, "grad_norm": 2.453125, "learning_rate": 0.00018147849156301254, "loss": 2.0425, "step": 167300 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018147741997744183, "loss": 2.1393, "step": 167305 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018147634836403694, "loss": 2.1461, "step": 167310 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.0001814752767227983, "loss": 2.2874, "step": 167315 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.0001814742050537262, "loss": 2.0366, "step": 167320 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.000181473133356821, "loss": 2.2495, "step": 167325 }, { "epoch": 0.39, "grad_norm": 2.34375, "learning_rate": 0.00018147206163208316, "loss": 2.1557, "step": 167330 }, { "epoch": 0.39, "grad_norm": 2.40625, "learning_rate": 0.00018147098987951295, "loss": 2.1915, "step": 167335 }, { "epoch": 0.39, "grad_norm": 1.9375, "learning_rate": 0.00018146991809911078, "loss": 2.1094, "step": 167340 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018146884629087698, "loss": 2.2145, "step": 167345 }, { "epoch": 0.39, "grad_norm": 2.515625, "learning_rate": 0.000181467774454812, "loss": 2.1957, "step": 167350 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.0001814667025909161, "loss": 2.1498, "step": 167355 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018146563069918977, "loss": 2.1416, "step": 167360 }, { "epoch": 0.39, "grad_norm": 1.921875, "learning_rate": 0.0001814645587796332, "loss": 2.1535, "step": 167365 }, { "epoch": 0.39, "grad_norm": 2.0625, "learning_rate": 0.00018146348683224694, "loss": 2.1227, "step": 167370 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018146241485703126, "loss": 2.083, "step": 167375 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.0001814613428539865, "loss": 2.2899, "step": 167380 }, { "epoch": 0.39, "grad_norm": 2.484375, "learning_rate": 0.0001814602708231131, "loss": 2.1424, "step": 167385 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018145919876441142, "loss": 1.8659, "step": 167390 }, { "epoch": 0.39, "grad_norm": 2.578125, "learning_rate": 0.0001814581266778818, "loss": 2.0509, "step": 167395 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018145705456352454, "loss": 2.162, "step": 167400 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018145598242134012, "loss": 2.3204, "step": 167405 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018145491025132885, "loss": 2.1043, "step": 167410 }, { "epoch": 0.39, "grad_norm": 2.015625, "learning_rate": 0.0001814538380534911, "loss": 2.0192, "step": 167415 }, { "epoch": 0.39, "grad_norm": 2.828125, "learning_rate": 0.00018145276582782723, "loss": 2.0741, "step": 167420 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018145169357433765, "loss": 1.9015, "step": 167425 }, { "epoch": 0.39, "grad_norm": 2.75, "learning_rate": 0.00018145062129302272, "loss": 2.1494, "step": 167430 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018144954898388272, "loss": 2.0337, "step": 167435 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018144847664691808, "loss": 2.1023, "step": 167440 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.0001814474042821292, "loss": 2.1653, "step": 167445 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.0001814463318895164, "loss": 2.1634, "step": 167450 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018144525946908003, "loss": 2.0752, "step": 167455 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018144418702082052, "loss": 2.1486, "step": 167460 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018144311454473816, "loss": 2.2152, "step": 167465 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018144204204083338, "loss": 2.3794, "step": 167470 }, { "epoch": 0.39, "grad_norm": 2.65625, "learning_rate": 0.0001814409695091065, "loss": 2.0215, "step": 167475 }, { "epoch": 0.39, "grad_norm": 1.8828125, "learning_rate": 0.00018143989694955793, "loss": 2.2048, "step": 167480 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018143882436218802, "loss": 2.1829, "step": 167485 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.0001814377517469971, "loss": 2.315, "step": 167490 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.0001814366791039856, "loss": 2.2398, "step": 167495 }, { "epoch": 0.39, "grad_norm": 1.78125, "learning_rate": 0.00018143560643315383, "loss": 1.9984, "step": 167500 }, { "epoch": 0.39, "grad_norm": 2.484375, "learning_rate": 0.00018143453373450216, "loss": 2.199, "step": 167505 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018143346100803103, "loss": 2.1219, "step": 167510 }, { "epoch": 0.39, "grad_norm": 2.140625, "learning_rate": 0.00018143238825374072, "loss": 2.0497, "step": 167515 }, { "epoch": 0.39, "grad_norm": 2.09375, "learning_rate": 0.00018143131547163167, "loss": 2.191, "step": 167520 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018143024266170413, "loss": 2.0883, "step": 167525 }, { "epoch": 0.39, "grad_norm": 2.078125, "learning_rate": 0.00018142916982395862, "loss": 2.1289, "step": 167530 }, { "epoch": 0.39, "grad_norm": 2.546875, "learning_rate": 0.00018142809695839539, "loss": 2.3409, "step": 167535 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018142702406501483, "loss": 2.3975, "step": 167540 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018142595114381736, "loss": 2.0705, "step": 167545 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018142487819480327, "loss": 2.2059, "step": 167550 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.00018142380521797297, "loss": 2.4817, "step": 167555 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018142273221332685, "loss": 2.2203, "step": 167560 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.00018142165918086522, "loss": 2.1341, "step": 167565 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.0001814205861205885, "loss": 2.2782, "step": 167570 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.000181419513032497, "loss": 2.11, "step": 167575 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018141843991659114, "loss": 2.1571, "step": 167580 }, { "epoch": 0.39, "grad_norm": 2.515625, "learning_rate": 0.00018141736677287126, "loss": 2.0803, "step": 167585 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018141629360133774, "loss": 2.1595, "step": 167590 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.00018141522040199094, "loss": 2.2279, "step": 167595 }, { "epoch": 0.39, "grad_norm": 2.296875, "learning_rate": 0.0001814141471748312, "loss": 2.1855, "step": 167600 }, { "epoch": 0.39, "grad_norm": 2.5625, "learning_rate": 0.0001814130739198589, "loss": 2.2013, "step": 167605 }, { "epoch": 0.39, "grad_norm": 2.03125, "learning_rate": 0.00018141200063707448, "loss": 2.3458, "step": 167610 }, { "epoch": 0.39, "grad_norm": 2.484375, "learning_rate": 0.0001814109273264782, "loss": 2.2628, "step": 167615 }, { "epoch": 0.39, "grad_norm": 2.515625, "learning_rate": 0.00018140985398807047, "loss": 2.094, "step": 167620 }, { "epoch": 0.39, "grad_norm": 2.375, "learning_rate": 0.00018140878062185167, "loss": 2.0439, "step": 167625 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.00018140770722782214, "loss": 2.4035, "step": 167630 }, { "epoch": 0.39, "grad_norm": 2.515625, "learning_rate": 0.00018140663380598228, "loss": 2.0635, "step": 167635 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.0001814055603563324, "loss": 2.2385, "step": 167640 }, { "epoch": 0.39, "grad_norm": 2.1875, "learning_rate": 0.00018140448687887294, "loss": 1.9478, "step": 167645 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018140341337360424, "loss": 2.1533, "step": 167650 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018140233984052665, "loss": 2.287, "step": 167655 }, { "epoch": 0.39, "grad_norm": 2.796875, "learning_rate": 0.0001814012662796405, "loss": 2.073, "step": 167660 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018140019269094623, "loss": 2.1114, "step": 167665 }, { "epoch": 0.39, "grad_norm": 2.046875, "learning_rate": 0.00018139911907444418, "loss": 2.2237, "step": 167670 }, { "epoch": 0.39, "grad_norm": 2.015625, "learning_rate": 0.0001813980454301347, "loss": 1.9545, "step": 167675 }, { "epoch": 0.39, "grad_norm": 2.109375, "learning_rate": 0.00018139697175801822, "loss": 2.213, "step": 167680 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018139589805809503, "loss": 2.2605, "step": 167685 }, { "epoch": 0.39, "grad_norm": 2.21875, "learning_rate": 0.00018139482433036553, "loss": 2.2222, "step": 167690 }, { "epoch": 0.39, "grad_norm": 2.453125, "learning_rate": 0.0001813937505748301, "loss": 2.3416, "step": 167695 }, { "epoch": 0.39, "grad_norm": 2.171875, "learning_rate": 0.00018139267679148904, "loss": 1.9379, "step": 167700 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018139160298034278, "loss": 2.1118, "step": 167705 }, { "epoch": 0.39, "grad_norm": 1.9921875, "learning_rate": 0.00018139052914139167, "loss": 2.0702, "step": 167710 }, { "epoch": 0.39, "grad_norm": 2.25, "learning_rate": 0.0001813894552746361, "loss": 1.9831, "step": 167715 }, { "epoch": 0.39, "grad_norm": 1.9765625, "learning_rate": 0.00018138838138007638, "loss": 2.2296, "step": 167720 }, { "epoch": 0.39, "grad_norm": 2.265625, "learning_rate": 0.00018138730745771294, "loss": 1.9826, "step": 167725 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.00018138623350754613, "loss": 2.1269, "step": 167730 }, { "epoch": 0.39, "grad_norm": 2.3125, "learning_rate": 0.00018138515952957632, "loss": 2.0843, "step": 167735 }, { "epoch": 0.39, "grad_norm": 2.0625, "learning_rate": 0.00018138408552380382, "loss": 2.0667, "step": 167740 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.00018138301149022906, "loss": 1.9047, "step": 167745 }, { "epoch": 0.39, "grad_norm": 2.15625, "learning_rate": 0.0001813819374288524, "loss": 2.156, "step": 167750 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018138086333967417, "loss": 2.2406, "step": 167755 }, { "epoch": 0.39, "grad_norm": 2.203125, "learning_rate": 0.00018137978922269475, "loss": 2.0567, "step": 167760 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.00018137871507791455, "loss": 2.1125, "step": 167765 }, { "epoch": 0.39, "grad_norm": 2.65625, "learning_rate": 0.0001813776409053339, "loss": 2.0694, "step": 167770 }, { "epoch": 0.39, "grad_norm": 2.046875, "learning_rate": 0.00018137656670495317, "loss": 2.2164, "step": 167775 }, { "epoch": 0.39, "grad_norm": 1.8515625, "learning_rate": 0.00018137549247677272, "loss": 2.3369, "step": 167780 }, { "epoch": 0.39, "grad_norm": 2.125, "learning_rate": 0.00018137441822079292, "loss": 2.258, "step": 167785 }, { "epoch": 0.39, "grad_norm": 1.9296875, "learning_rate": 0.00018137334393701418, "loss": 2.2012, "step": 167790 }, { "epoch": 0.39, "grad_norm": 2.421875, "learning_rate": 0.0001813722696254368, "loss": 2.0346, "step": 167795 }, { "epoch": 0.39, "grad_norm": 2.796875, "learning_rate": 0.00018137119528606116, "loss": 2.2104, "step": 167800 }, { "epoch": 0.39, "grad_norm": 2.234375, "learning_rate": 0.00018137012091888765, "loss": 2.2324, "step": 167805 }, { "epoch": 0.39, "grad_norm": 2.484375, "learning_rate": 0.00018136904652391665, "loss": 2.1332, "step": 167810 }, { "epoch": 0.39, "grad_norm": 2.515625, "learning_rate": 0.00018136797210114853, "loss": 2.2679, "step": 167815 }, { "epoch": 0.39, "grad_norm": 2.328125, "learning_rate": 0.0001813668976505836, "loss": 2.2513, "step": 167820 }, { "epoch": 0.39, "grad_norm": 2.390625, "learning_rate": 0.00018136582317222226, "loss": 2.1485, "step": 167825 }, { "epoch": 0.39, "grad_norm": 2.4375, "learning_rate": 0.0001813647486660649, "loss": 1.891, "step": 167830 }, { "epoch": 0.39, "grad_norm": 2.28125, "learning_rate": 0.00018136367413211187, "loss": 2.2163, "step": 167835 }, { "epoch": 0.39, "grad_norm": 2.46875, "learning_rate": 0.00018136259957036353, "loss": 2.0551, "step": 167840 }, { "epoch": 0.39, "grad_norm": 1.9765625, "learning_rate": 0.0001813615249808202, "loss": 1.9957, "step": 167845 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018136045036348236, "loss": 2.1205, "step": 167850 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018135937571835028, "loss": 2.0721, "step": 167855 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018135830104542438, "loss": 2.0842, "step": 167860 }, { "epoch": 0.4, "grad_norm": 1.6328125, "learning_rate": 0.000181357226344705, "loss": 2.1697, "step": 167865 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018135615161619253, "loss": 2.1113, "step": 167870 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.0001813550768598873, "loss": 2.3105, "step": 167875 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.0001813540020757897, "loss": 2.2224, "step": 167880 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018135292726390012, "loss": 2.0212, "step": 167885 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.00018135185242421892, "loss": 2.1182, "step": 167890 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.0001813507775567464, "loss": 2.2198, "step": 167895 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.000181349702661483, "loss": 2.1017, "step": 167900 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.0001813486277384291, "loss": 2.113, "step": 167905 }, { "epoch": 0.4, "grad_norm": 1.8828125, "learning_rate": 0.00018134755278758496, "loss": 1.918, "step": 167910 }, { "epoch": 0.4, "grad_norm": 2.578125, "learning_rate": 0.00018134647780895108, "loss": 2.1917, "step": 167915 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.00018134540280252776, "loss": 2.0885, "step": 167920 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018134432776831537, "loss": 2.2731, "step": 167925 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018134325270631428, "loss": 2.0741, "step": 167930 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018134217761652487, "loss": 2.1199, "step": 167935 }, { "epoch": 0.4, "grad_norm": 2.0, "learning_rate": 0.0001813411024989475, "loss": 2.1031, "step": 167940 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018134002735358247, "loss": 2.2121, "step": 167945 }, { "epoch": 0.4, "grad_norm": 1.8828125, "learning_rate": 0.00018133895218043028, "loss": 2.1521, "step": 167950 }, { "epoch": 0.4, "grad_norm": 1.9453125, "learning_rate": 0.00018133787697949123, "loss": 2.0594, "step": 167955 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018133680175076567, "loss": 2.1726, "step": 167960 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.000181335726494254, "loss": 2.1758, "step": 167965 }, { "epoch": 0.4, "grad_norm": 2.453125, "learning_rate": 0.00018133465120995652, "loss": 2.2158, "step": 167970 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.0001813335758978737, "loss": 2.0897, "step": 167975 }, { "epoch": 0.4, "grad_norm": 1.96875, "learning_rate": 0.0001813325005580058, "loss": 1.8856, "step": 167980 }, { "epoch": 0.4, "grad_norm": 2.640625, "learning_rate": 0.00018133142519035332, "loss": 2.3015, "step": 167985 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.0001813303497949165, "loss": 2.097, "step": 167990 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.00018132927437169576, "loss": 2.2042, "step": 167995 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018132819892069147, "loss": 2.0995, "step": 168000 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018132712344190398, "loss": 2.1496, "step": 168005 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.0001813260479353337, "loss": 2.0623, "step": 168010 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018132497240098093, "loss": 2.0913, "step": 168015 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.0001813238968388461, "loss": 2.1982, "step": 168020 }, { "epoch": 0.4, "grad_norm": 2.53125, "learning_rate": 0.00018132282124892958, "loss": 2.163, "step": 168025 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018132174563123167, "loss": 2.1211, "step": 168030 }, { "epoch": 0.4, "grad_norm": 1.9453125, "learning_rate": 0.00018132066998575275, "loss": 2.0914, "step": 168035 }, { "epoch": 0.4, "grad_norm": 2.515625, "learning_rate": 0.00018131959431249325, "loss": 2.1514, "step": 168040 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.0001813185186114535, "loss": 2.1365, "step": 168045 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018131744288263387, "loss": 2.013, "step": 168050 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018131636712603473, "loss": 2.1355, "step": 168055 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018131529134165643, "loss": 1.9514, "step": 168060 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.00018131421552949938, "loss": 2.064, "step": 168065 }, { "epoch": 0.4, "grad_norm": 2.671875, "learning_rate": 0.0001813131396895639, "loss": 2.1322, "step": 168070 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018131206382185035, "loss": 2.1483, "step": 168075 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.00018131098792635917, "loss": 2.1388, "step": 168080 }, { "epoch": 0.4, "grad_norm": 1.84375, "learning_rate": 0.00018130991200309064, "loss": 2.1286, "step": 168085 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.0001813088360520452, "loss": 2.1335, "step": 168090 }, { "epoch": 0.4, "grad_norm": 1.78125, "learning_rate": 0.00018130776007322318, "loss": 2.2374, "step": 168095 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018130668406662496, "loss": 2.1823, "step": 168100 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.0001813056080322509, "loss": 2.2924, "step": 168105 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018130453197010135, "loss": 2.0549, "step": 168110 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018130345588017672, "loss": 2.305, "step": 168115 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018130237976247737, "loss": 2.0325, "step": 168120 }, { "epoch": 0.4, "grad_norm": 1.6484375, "learning_rate": 0.0001813013036170036, "loss": 1.9923, "step": 168125 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018130022744375588, "loss": 2.1301, "step": 168130 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.0001812991512427345, "loss": 2.0792, "step": 168135 }, { "epoch": 0.4, "grad_norm": 3.484375, "learning_rate": 0.00018129807501393987, "loss": 2.1718, "step": 168140 }, { "epoch": 0.4, "grad_norm": 2.453125, "learning_rate": 0.00018129699875737233, "loss": 2.2276, "step": 168145 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018129592247303225, "loss": 2.3258, "step": 168150 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018129484616092006, "loss": 2.0584, "step": 168155 }, { "epoch": 0.4, "grad_norm": 1.8828125, "learning_rate": 0.00018129376982103603, "loss": 2.1041, "step": 168160 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.0001812926934533806, "loss": 2.0385, "step": 168165 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018129161705795409, "loss": 2.0299, "step": 168170 }, { "epoch": 0.4, "grad_norm": 1.7109375, "learning_rate": 0.0001812905406347569, "loss": 2.0387, "step": 168175 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.0001812894641837894, "loss": 1.9889, "step": 168180 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018128838770505192, "loss": 2.1619, "step": 168185 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018128731119854488, "loss": 2.0822, "step": 168190 }, { "epoch": 0.4, "grad_norm": 1.9765625, "learning_rate": 0.00018128623466426858, "loss": 2.0819, "step": 168195 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.00018128515810222348, "loss": 2.2108, "step": 168200 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018128408151240984, "loss": 1.9801, "step": 168205 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.0001812830048948281, "loss": 2.1408, "step": 168210 }, { "epoch": 0.4, "grad_norm": 2.234375, "learning_rate": 0.00018128192824947863, "loss": 2.2307, "step": 168215 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018128085157636175, "loss": 2.1694, "step": 168220 }, { "epoch": 0.4, "grad_norm": 2.5625, "learning_rate": 0.0001812797748754779, "loss": 2.2092, "step": 168225 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.0001812786981468274, "loss": 2.1693, "step": 168230 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.00018127762139041057, "loss": 2.2413, "step": 168235 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018127654460622786, "loss": 2.0865, "step": 168240 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.0001812754677942796, "loss": 2.037, "step": 168245 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.0001812743909545662, "loss": 2.347, "step": 168250 }, { "epoch": 0.4, "grad_norm": 1.8984375, "learning_rate": 0.000181273314087088, "loss": 2.1302, "step": 168255 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.0001812722371918453, "loss": 2.1363, "step": 168260 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.00018127116026883856, "loss": 2.0896, "step": 168265 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018127008331806813, "loss": 2.0734, "step": 168270 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018126900633953435, "loss": 2.0204, "step": 168275 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.00018126792933323763, "loss": 2.139, "step": 168280 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.0001812668522991783, "loss": 1.9018, "step": 168285 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018126577523735674, "loss": 2.2678, "step": 168290 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018126469814777328, "loss": 2.3012, "step": 168295 }, { "epoch": 0.4, "grad_norm": 1.9453125, "learning_rate": 0.00018126362103042837, "loss": 1.8946, "step": 168300 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018126254388532235, "loss": 2.0896, "step": 168305 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018126146671245552, "loss": 2.0553, "step": 168310 }, { "epoch": 0.4, "grad_norm": 2.0, "learning_rate": 0.00018126038951182832, "loss": 2.031, "step": 168315 }, { "epoch": 0.4, "grad_norm": 1.875, "learning_rate": 0.0001812593122834411, "loss": 2.2774, "step": 168320 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.0001812582350272942, "loss": 2.2523, "step": 168325 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018125715774338806, "loss": 2.202, "step": 168330 }, { "epoch": 0.4, "grad_norm": 2.515625, "learning_rate": 0.00018125608043172297, "loss": 2.0792, "step": 168335 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018125500309229932, "loss": 1.9702, "step": 168340 }, { "epoch": 0.4, "grad_norm": 2.484375, "learning_rate": 0.00018125392572511753, "loss": 2.1721, "step": 168345 }, { "epoch": 0.4, "grad_norm": 1.9765625, "learning_rate": 0.00018125284833017788, "loss": 2.2465, "step": 168350 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.0001812517709074808, "loss": 2.1589, "step": 168355 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018125069345702665, "loss": 2.202, "step": 168360 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018124961597881578, "loss": 2.0307, "step": 168365 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018124853847284857, "loss": 2.2295, "step": 168370 }, { "epoch": 0.4, "grad_norm": 2.78125, "learning_rate": 0.00018124746093912538, "loss": 2.109, "step": 168375 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.00018124638337764658, "loss": 2.0563, "step": 168380 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018124530578841257, "loss": 2.0283, "step": 168385 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018124422817142368, "loss": 2.1858, "step": 168390 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.00018124315052668027, "loss": 2.2407, "step": 168395 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018124207285418273, "loss": 2.1038, "step": 168400 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018124099515393145, "loss": 2.1833, "step": 168405 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018123991742592674, "loss": 2.151, "step": 168410 }, { "epoch": 0.4, "grad_norm": 2.703125, "learning_rate": 0.00018123883967016902, "loss": 2.3073, "step": 168415 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.0001812377618866586, "loss": 2.0188, "step": 168420 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.00018123668407539595, "loss": 1.8507, "step": 168425 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018123560623638135, "loss": 2.1763, "step": 168430 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018123452836961518, "loss": 2.0464, "step": 168435 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018123345047509783, "loss": 2.093, "step": 168440 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018123237255282968, "loss": 1.9601, "step": 168445 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018123129460281105, "loss": 2.2889, "step": 168450 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.00018123021662504236, "loss": 2.3583, "step": 168455 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018122913861952396, "loss": 2.065, "step": 168460 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018122806058625617, "loss": 2.0538, "step": 168465 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.0001812269825252394, "loss": 2.1257, "step": 168470 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018122590443647407, "loss": 2.1159, "step": 168475 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.00018122482631996047, "loss": 2.2417, "step": 168480 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018122374817569902, "loss": 2.1386, "step": 168485 }, { "epoch": 0.4, "grad_norm": 2.53125, "learning_rate": 0.00018122267000369001, "loss": 2.1665, "step": 168490 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.0001812215918039339, "loss": 2.2106, "step": 168495 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.000181220513576431, "loss": 2.3346, "step": 168500 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018121943532118176, "loss": 2.1642, "step": 168505 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.00018121835703818645, "loss": 2.0894, "step": 168510 }, { "epoch": 0.4, "grad_norm": 1.8671875, "learning_rate": 0.00018121727872744546, "loss": 2.1081, "step": 168515 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018121620038895916, "loss": 2.0607, "step": 168520 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018121512202272797, "loss": 2.1778, "step": 168525 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018121404362875224, "loss": 2.1086, "step": 168530 }, { "epoch": 0.4, "grad_norm": 2.484375, "learning_rate": 0.00018121296520703229, "loss": 2.2138, "step": 168535 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.0001812118867575685, "loss": 2.0975, "step": 168540 }, { "epoch": 0.4, "grad_norm": 1.96875, "learning_rate": 0.00018121080828036127, "loss": 2.0348, "step": 168545 }, { "epoch": 0.4, "grad_norm": 1.859375, "learning_rate": 0.00018120972977541097, "loss": 2.1802, "step": 168550 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018120865124271793, "loss": 2.0572, "step": 168555 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018120757268228258, "loss": 2.1693, "step": 168560 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018120649409410523, "loss": 2.0486, "step": 168565 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018120541547818624, "loss": 2.1196, "step": 168570 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018120433683452603, "loss": 2.2319, "step": 168575 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.00018120325816312497, "loss": 2.2585, "step": 168580 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018120217946398338, "loss": 2.0306, "step": 168585 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018120110073710166, "loss": 2.1886, "step": 168590 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018120002198248017, "loss": 2.2594, "step": 168595 }, { "epoch": 0.4, "grad_norm": 1.984375, "learning_rate": 0.00018119894320011928, "loss": 2.1587, "step": 168600 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.00018119786439001935, "loss": 2.208, "step": 168605 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018119678555218076, "loss": 2.1192, "step": 168610 }, { "epoch": 0.4, "grad_norm": 2.5625, "learning_rate": 0.00018119570668660386, "loss": 2.2957, "step": 168615 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.0001811946277932891, "loss": 2.0458, "step": 168620 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.0001811935488722367, "loss": 2.2069, "step": 168625 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018119246992344716, "loss": 2.1743, "step": 168630 }, { "epoch": 0.4, "grad_norm": 1.78125, "learning_rate": 0.00018119139094692076, "loss": 2.2447, "step": 168635 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018119031194265795, "loss": 2.1927, "step": 168640 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018118923291065902, "loss": 2.0798, "step": 168645 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.0001811881538509244, "loss": 2.1941, "step": 168650 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.00018118707476345443, "loss": 2.0932, "step": 168655 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.0001811859956482495, "loss": 2.1363, "step": 168660 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018118491650530994, "loss": 2.1932, "step": 168665 }, { "epoch": 0.4, "grad_norm": 2.5625, "learning_rate": 0.00018118383733463613, "loss": 2.3514, "step": 168670 }, { "epoch": 0.4, "grad_norm": 2.0, "learning_rate": 0.00018118275813622847, "loss": 1.9585, "step": 168675 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.0001811816789100873, "loss": 2.0788, "step": 168680 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.000181180599656213, "loss": 2.0859, "step": 168685 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.00018117952037460594, "loss": 2.0672, "step": 168690 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018117844106526647, "loss": 2.2444, "step": 168695 }, { "epoch": 0.4, "grad_norm": 2.0, "learning_rate": 0.00018117736172819498, "loss": 2.2443, "step": 168700 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.00018117628236339182, "loss": 2.2259, "step": 168705 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.0001811752029708574, "loss": 2.1381, "step": 168710 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018117412355059202, "loss": 2.1434, "step": 168715 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.0001811730441025961, "loss": 2.2283, "step": 168720 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.00018117196462687002, "loss": 2.1023, "step": 168725 }, { "epoch": 0.4, "grad_norm": 2.46875, "learning_rate": 0.0001811708851234141, "loss": 2.0563, "step": 168730 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018116980559222873, "loss": 2.0764, "step": 168735 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018116872603331433, "loss": 1.915, "step": 168740 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018116764644667116, "loss": 2.0168, "step": 168745 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018116656683229968, "loss": 2.1235, "step": 168750 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.00018116548719020021, "loss": 2.1106, "step": 168755 }, { "epoch": 0.4, "grad_norm": 2.546875, "learning_rate": 0.00018116440752037315, "loss": 2.2947, "step": 168760 }, { "epoch": 0.4, "grad_norm": 2.71875, "learning_rate": 0.00018116332782281886, "loss": 2.3323, "step": 168765 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.00018116224809753767, "loss": 2.18, "step": 168770 }, { "epoch": 0.4, "grad_norm": 1.859375, "learning_rate": 0.00018116116834453004, "loss": 1.9787, "step": 168775 }, { "epoch": 0.4, "grad_norm": 1.890625, "learning_rate": 0.00018116008856379625, "loss": 2.0963, "step": 168780 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018115900875533672, "loss": 1.9626, "step": 168785 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.0001811579289191518, "loss": 2.0812, "step": 168790 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018115684905524183, "loss": 2.1087, "step": 168795 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018115576916360725, "loss": 2.2272, "step": 168800 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018115468924424834, "loss": 1.9566, "step": 168805 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018115360929716557, "loss": 2.1277, "step": 168810 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.0001811525293223592, "loss": 2.1457, "step": 168815 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.0001811514493198297, "loss": 2.2599, "step": 168820 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018115036928957736, "loss": 2.0847, "step": 168825 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018114928923160259, "loss": 2.1552, "step": 168830 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018114820914590576, "loss": 2.0085, "step": 168835 }, { "epoch": 0.4, "grad_norm": 1.765625, "learning_rate": 0.00018114712903248722, "loss": 2.1703, "step": 168840 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018114604889134735, "loss": 2.2347, "step": 168845 }, { "epoch": 0.4, "grad_norm": 1.7109375, "learning_rate": 0.00018114496872248653, "loss": 2.2058, "step": 168850 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018114388852590512, "loss": 1.9393, "step": 168855 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.0001811428083016035, "loss": 2.0688, "step": 168860 }, { "epoch": 0.4, "grad_norm": 2.515625, "learning_rate": 0.00018114172804958197, "loss": 2.0961, "step": 168865 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.000181140647769841, "loss": 2.1854, "step": 168870 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.0001811395674623809, "loss": 2.1395, "step": 168875 }, { "epoch": 0.4, "grad_norm": 1.828125, "learning_rate": 0.00018113848712720206, "loss": 2.0412, "step": 168880 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018113740676430485, "loss": 2.2099, "step": 168885 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.0001811363263736896, "loss": 2.0, "step": 168890 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018113524595535671, "loss": 2.1886, "step": 168895 }, { "epoch": 0.4, "grad_norm": 1.96875, "learning_rate": 0.00018113416550930657, "loss": 2.1032, "step": 168900 }, { "epoch": 0.4, "grad_norm": 1.984375, "learning_rate": 0.00018113308503553952, "loss": 2.2179, "step": 168905 }, { "epoch": 0.4, "grad_norm": 2.453125, "learning_rate": 0.0001811320045340559, "loss": 2.1206, "step": 168910 }, { "epoch": 0.4, "grad_norm": 1.90625, "learning_rate": 0.00018113092400485619, "loss": 2.1617, "step": 168915 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018112984344794065, "loss": 2.1025, "step": 168920 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.00018112876286330968, "loss": 2.0482, "step": 168925 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018112768225096363, "loss": 2.2488, "step": 168930 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018112660161090293, "loss": 2.1736, "step": 168935 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.0001811255209431279, "loss": 2.3244, "step": 168940 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018112444024763892, "loss": 2.0736, "step": 168945 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.00018112335952443637, "loss": 2.0993, "step": 168950 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.0001811222787735206, "loss": 2.1149, "step": 168955 }, { "epoch": 0.4, "grad_norm": 1.9921875, "learning_rate": 0.00018112119799489198, "loss": 2.2452, "step": 168960 }, { "epoch": 0.4, "grad_norm": 1.9453125, "learning_rate": 0.00018112011718855088, "loss": 1.9993, "step": 168965 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018111903635449768, "loss": 2.0538, "step": 168970 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.0001811179554927328, "loss": 2.0374, "step": 168975 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018111687460325648, "loss": 2.0184, "step": 168980 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018111579368606918, "loss": 2.2678, "step": 168985 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.0001811147127411713, "loss": 2.2735, "step": 168990 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018111363176856314, "loss": 1.9621, "step": 168995 }, { "epoch": 0.4, "grad_norm": 1.9453125, "learning_rate": 0.00018111255076824507, "loss": 1.9371, "step": 169000 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.0001811114697402175, "loss": 1.968, "step": 169005 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.0001811103886844808, "loss": 2.0977, "step": 169010 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.0001811093076010353, "loss": 2.0058, "step": 169015 }, { "epoch": 0.4, "grad_norm": 3.03125, "learning_rate": 0.00018110822648988138, "loss": 2.2429, "step": 169020 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.0001811071453510194, "loss": 2.0903, "step": 169025 }, { "epoch": 0.4, "grad_norm": 2.59375, "learning_rate": 0.0001811060641844498, "loss": 2.189, "step": 169030 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.00018110498299017284, "loss": 2.1526, "step": 169035 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.000181103901768189, "loss": 2.1302, "step": 169040 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.0001811028205184986, "loss": 2.1933, "step": 169045 }, { "epoch": 0.4, "grad_norm": 1.9765625, "learning_rate": 0.00018110173924110195, "loss": 2.2274, "step": 169050 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.0001811006579359995, "loss": 2.1519, "step": 169055 }, { "epoch": 0.4, "grad_norm": 1.953125, "learning_rate": 0.00018109957660319162, "loss": 2.2199, "step": 169060 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018109849524267863, "loss": 2.031, "step": 169065 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.00018109741385446092, "loss": 2.2002, "step": 169070 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018109633243853888, "loss": 2.0977, "step": 169075 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018109525099491286, "loss": 2.0476, "step": 169080 }, { "epoch": 0.4, "grad_norm": 2.515625, "learning_rate": 0.0001810941695235832, "loss": 2.1815, "step": 169085 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018109308802455033, "loss": 2.1892, "step": 169090 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.0001810920064978146, "loss": 2.1173, "step": 169095 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018109092494337636, "loss": 2.1097, "step": 169100 }, { "epoch": 0.4, "grad_norm": 2.46875, "learning_rate": 0.000181089843361236, "loss": 2.1281, "step": 169105 }, { "epoch": 0.4, "grad_norm": 1.8828125, "learning_rate": 0.00018108876175139385, "loss": 2.2211, "step": 169110 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.0001810876801138503, "loss": 2.2892, "step": 169115 }, { "epoch": 0.4, "grad_norm": 1.7421875, "learning_rate": 0.00018108659844860576, "loss": 2.0154, "step": 169120 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.00018108551675566058, "loss": 1.9909, "step": 169125 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018108443503501507, "loss": 2.2411, "step": 169130 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018108335328666968, "loss": 2.0132, "step": 169135 }, { "epoch": 0.4, "grad_norm": 1.7734375, "learning_rate": 0.00018108227151062475, "loss": 2.0843, "step": 169140 }, { "epoch": 0.4, "grad_norm": 2.234375, "learning_rate": 0.0001810811897068806, "loss": 1.9929, "step": 169145 }, { "epoch": 0.4, "grad_norm": 1.859375, "learning_rate": 0.0001810801078754377, "loss": 2.1636, "step": 169150 }, { "epoch": 0.4, "grad_norm": 1.90625, "learning_rate": 0.00018107902601629633, "loss": 1.8966, "step": 169155 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.0001810779441294569, "loss": 2.1924, "step": 169160 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.0001810768622149198, "loss": 2.0934, "step": 169165 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018107578027268533, "loss": 2.127, "step": 169170 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018107469830275395, "loss": 2.2811, "step": 169175 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018107361630512596, "loss": 2.1471, "step": 169180 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018107253427980172, "loss": 2.169, "step": 169185 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018107145222678167, "loss": 2.2489, "step": 169190 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018107037014606614, "loss": 2.2022, "step": 169195 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.0001810692880376555, "loss": 2.1391, "step": 169200 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.00018106820590155012, "loss": 2.2357, "step": 169205 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.00018106712373775036, "loss": 2.1254, "step": 169210 }, { "epoch": 0.4, "grad_norm": 2.625, "learning_rate": 0.0001810660415462566, "loss": 2.0229, "step": 169215 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018106495932706924, "loss": 2.1678, "step": 169220 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018106387708018859, "loss": 2.2636, "step": 169225 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018106279480561508, "loss": 1.9816, "step": 169230 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.000181061712503349, "loss": 2.234, "step": 169235 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018106063017339082, "loss": 2.1589, "step": 169240 }, { "epoch": 0.4, "grad_norm": 1.8671875, "learning_rate": 0.0001810595478157408, "loss": 2.1293, "step": 169245 }, { "epoch": 0.4, "grad_norm": 2.609375, "learning_rate": 0.0001810584654303994, "loss": 1.9648, "step": 169250 }, { "epoch": 0.4, "grad_norm": 2.234375, "learning_rate": 0.00018105738301736697, "loss": 2.0656, "step": 169255 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018105630057664387, "loss": 2.245, "step": 169260 }, { "epoch": 0.4, "grad_norm": 1.7734375, "learning_rate": 0.00018105521810823046, "loss": 2.1668, "step": 169265 }, { "epoch": 0.4, "grad_norm": 2.453125, "learning_rate": 0.0001810541356121271, "loss": 2.0491, "step": 169270 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018105305308833421, "loss": 2.0316, "step": 169275 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018105197053685208, "loss": 2.3545, "step": 169280 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.00018105088795768116, "loss": 1.9995, "step": 169285 }, { "epoch": 0.4, "grad_norm": 2.71875, "learning_rate": 0.00018104980535082178, "loss": 2.0669, "step": 169290 }, { "epoch": 0.4, "grad_norm": 2.515625, "learning_rate": 0.00018104872271627434, "loss": 2.27, "step": 169295 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018104764005403916, "loss": 2.0653, "step": 169300 }, { "epoch": 0.4, "grad_norm": 1.96875, "learning_rate": 0.00018104655736411664, "loss": 2.2101, "step": 169305 }, { "epoch": 0.4, "grad_norm": 2.5625, "learning_rate": 0.00018104547464650714, "loss": 2.2475, "step": 169310 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018104439190121105, "loss": 2.0447, "step": 169315 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018104330912822872, "loss": 2.2078, "step": 169320 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018104222632756053, "loss": 2.1995, "step": 169325 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018104114349920685, "loss": 2.2373, "step": 169330 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018104006064316802, "loss": 2.2323, "step": 169335 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018103897775944444, "loss": 1.9352, "step": 169340 }, { "epoch": 0.4, "grad_norm": 2.65625, "learning_rate": 0.0001810378948480365, "loss": 2.1448, "step": 169345 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018103681190894451, "loss": 2.1825, "step": 169350 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.0001810357289421689, "loss": 2.1472, "step": 169355 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018103464594771004, "loss": 2.1113, "step": 169360 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018103356292556823, "loss": 2.1472, "step": 169365 }, { "epoch": 0.4, "grad_norm": 2.234375, "learning_rate": 0.0001810324798757439, "loss": 2.2273, "step": 169370 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018103139679823742, "loss": 2.2028, "step": 169375 }, { "epoch": 0.4, "grad_norm": 2.703125, "learning_rate": 0.00018103031369304912, "loss": 2.0026, "step": 169380 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018102923056017943, "loss": 2.034, "step": 169385 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018102814739962865, "loss": 2.1618, "step": 169390 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.0001810270642113972, "loss": 1.9467, "step": 169395 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.00018102598099548544, "loss": 2.2447, "step": 169400 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018102489775189375, "loss": 2.1683, "step": 169405 }, { "epoch": 0.4, "grad_norm": 1.9765625, "learning_rate": 0.00018102381448062245, "loss": 2.1435, "step": 169410 }, { "epoch": 0.4, "grad_norm": 3.671875, "learning_rate": 0.00018102273118167196, "loss": 2.251, "step": 169415 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018102164785504266, "loss": 2.1397, "step": 169420 }, { "epoch": 0.4, "grad_norm": 1.96875, "learning_rate": 0.00018102056450073486, "loss": 2.1262, "step": 169425 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.000181019481118749, "loss": 2.0213, "step": 169430 }, { "epoch": 0.4, "grad_norm": 1.5078125, "learning_rate": 0.00018101839770908538, "loss": 2.0588, "step": 169435 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.0001810173142717444, "loss": 1.884, "step": 169440 }, { "epoch": 0.4, "grad_norm": 2.8125, "learning_rate": 0.00018101623080672648, "loss": 2.2481, "step": 169445 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018101514731403193, "loss": 2.2731, "step": 169450 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.0001810140637936611, "loss": 2.2, "step": 169455 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018101298024561445, "loss": 2.1509, "step": 169460 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018101189666989225, "loss": 2.1208, "step": 169465 }, { "epoch": 0.4, "grad_norm": 2.671875, "learning_rate": 0.00018101081306649497, "loss": 2.0854, "step": 169470 }, { "epoch": 0.4, "grad_norm": 1.8046875, "learning_rate": 0.00018100972943542289, "loss": 2.2989, "step": 169475 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.0001810086457766764, "loss": 1.9577, "step": 169480 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018100756209025592, "loss": 2.1001, "step": 169485 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018100647837616178, "loss": 2.1128, "step": 169490 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.00018100539463439437, "loss": 2.0682, "step": 169495 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018100431086495402, "loss": 2.2653, "step": 169500 }, { "epoch": 0.4, "grad_norm": 2.453125, "learning_rate": 0.00018100322706784115, "loss": 2.1446, "step": 169505 }, { "epoch": 0.4, "grad_norm": 1.84375, "learning_rate": 0.0001810021432430561, "loss": 1.9469, "step": 169510 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018100105939059926, "loss": 2.1973, "step": 169515 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018099997551047095, "loss": 2.185, "step": 169520 }, { "epoch": 0.4, "grad_norm": 1.9765625, "learning_rate": 0.00018099889160267166, "loss": 2.2418, "step": 169525 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.0001809978076672016, "loss": 2.1835, "step": 169530 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018099672370406125, "loss": 1.9171, "step": 169535 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.00018099563971325094, "loss": 2.0294, "step": 169540 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018099455569477106, "loss": 2.1053, "step": 169545 }, { "epoch": 0.4, "grad_norm": 1.7421875, "learning_rate": 0.000180993471648622, "loss": 2.0799, "step": 169550 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018099238757480403, "loss": 2.282, "step": 169555 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018099130347331766, "loss": 2.0593, "step": 169560 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018099021934416315, "loss": 2.1476, "step": 169565 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018098913518734092, "loss": 2.0583, "step": 169570 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018098805100285134, "loss": 2.0189, "step": 169575 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.0001809869667906948, "loss": 2.2084, "step": 169580 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018098588255087162, "loss": 2.2515, "step": 169585 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018098479828338216, "loss": 2.2366, "step": 169590 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.0001809837139882269, "loss": 2.0794, "step": 169595 }, { "epoch": 0.4, "grad_norm": 2.796875, "learning_rate": 0.00018098262966540607, "loss": 2.1815, "step": 169600 }, { "epoch": 0.4, "grad_norm": 1.8828125, "learning_rate": 0.0001809815453149201, "loss": 2.1296, "step": 169605 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.0001809804609367694, "loss": 2.1316, "step": 169610 }, { "epoch": 0.4, "grad_norm": 2.921875, "learning_rate": 0.0001809793765309543, "loss": 2.1832, "step": 169615 }, { "epoch": 0.4, "grad_norm": 1.9375, "learning_rate": 0.00018097829209747516, "loss": 2.0421, "step": 169620 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.0001809772076363324, "loss": 2.0981, "step": 169625 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.0001809761231475263, "loss": 2.0368, "step": 169630 }, { "epoch": 0.4, "grad_norm": 1.9375, "learning_rate": 0.00018097503863105736, "loss": 2.1048, "step": 169635 }, { "epoch": 0.4, "grad_norm": 2.0, "learning_rate": 0.00018097395408692583, "loss": 1.955, "step": 169640 }, { "epoch": 0.4, "grad_norm": 2.0, "learning_rate": 0.00018097286951513212, "loss": 2.1635, "step": 169645 }, { "epoch": 0.4, "grad_norm": 2.578125, "learning_rate": 0.00018097178491567663, "loss": 2.1127, "step": 169650 }, { "epoch": 0.4, "grad_norm": 2.859375, "learning_rate": 0.00018097070028855972, "loss": 2.2837, "step": 169655 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018096961563378175, "loss": 2.2275, "step": 169660 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018096853095134308, "loss": 1.9662, "step": 169665 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018096744624124407, "loss": 2.2101, "step": 169670 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018096636150348515, "loss": 2.1856, "step": 169675 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018096527673806665, "loss": 2.2747, "step": 169680 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.0001809641919449889, "loss": 2.353, "step": 169685 }, { "epoch": 0.4, "grad_norm": 2.234375, "learning_rate": 0.00018096310712425236, "loss": 2.0353, "step": 169690 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018096202227585733, "loss": 2.0969, "step": 169695 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.0001809609373998042, "loss": 2.2191, "step": 169700 }, { "epoch": 0.4, "grad_norm": 2.5625, "learning_rate": 0.00018095985249609337, "loss": 2.3191, "step": 169705 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.00018095876756472518, "loss": 2.1426, "step": 169710 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.0001809576826057, "loss": 2.2281, "step": 169715 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018095659761901823, "loss": 2.2184, "step": 169720 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018095551260468017, "loss": 2.2801, "step": 169725 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018095442756268625, "loss": 2.1503, "step": 169730 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.00018095334249303686, "loss": 2.1603, "step": 169735 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018095225739573234, "loss": 2.0376, "step": 169740 }, { "epoch": 0.4, "grad_norm": 2.640625, "learning_rate": 0.00018095117227077303, "loss": 2.033, "step": 169745 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018095008711815935, "loss": 2.1543, "step": 169750 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018094900193789165, "loss": 2.1232, "step": 169755 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.0001809479167299703, "loss": 2.2043, "step": 169760 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018094683149439566, "loss": 1.9159, "step": 169765 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018094574623116815, "loss": 2.0115, "step": 169770 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.0001809446609402881, "loss": 2.1775, "step": 169775 }, { "epoch": 0.4, "grad_norm": 3.140625, "learning_rate": 0.00018094357562175586, "loss": 2.1843, "step": 169780 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.0001809424902755718, "loss": 2.1812, "step": 169785 }, { "epoch": 0.4, "grad_norm": 2.5625, "learning_rate": 0.00018094140490173638, "loss": 1.9486, "step": 169790 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018094031950024986, "loss": 2.1463, "step": 169795 }, { "epoch": 0.4, "grad_norm": 2.546875, "learning_rate": 0.0001809392340711127, "loss": 2.3406, "step": 169800 }, { "epoch": 0.4, "grad_norm": 1.9609375, "learning_rate": 0.0001809381486143252, "loss": 2.1737, "step": 169805 }, { "epoch": 0.4, "grad_norm": 2.75, "learning_rate": 0.00018093706312988778, "loss": 2.0344, "step": 169810 }, { "epoch": 0.4, "grad_norm": 1.890625, "learning_rate": 0.0001809359776178008, "loss": 2.0904, "step": 169815 }, { "epoch": 0.4, "grad_norm": 1.8515625, "learning_rate": 0.0001809348920780646, "loss": 2.3036, "step": 169820 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018093380651067957, "loss": 2.2881, "step": 169825 }, { "epoch": 0.4, "grad_norm": 1.9765625, "learning_rate": 0.00018093272091564608, "loss": 2.1893, "step": 169830 }, { "epoch": 0.4, "grad_norm": 2.234375, "learning_rate": 0.0001809316352929645, "loss": 2.0731, "step": 169835 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018093054964263522, "loss": 2.1344, "step": 169840 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.0001809294639646586, "loss": 2.2269, "step": 169845 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.000180928378259035, "loss": 2.1785, "step": 169850 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018092729252576484, "loss": 2.0316, "step": 169855 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018092620676484842, "loss": 2.1312, "step": 169860 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.0001809251209762861, "loss": 2.1002, "step": 169865 }, { "epoch": 0.4, "grad_norm": 2.234375, "learning_rate": 0.00018092403516007833, "loss": 2.0929, "step": 169870 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018092294931622546, "loss": 2.1495, "step": 169875 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018092186344472778, "loss": 1.9515, "step": 169880 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.00018092077754558577, "loss": 2.0738, "step": 169885 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018091969161879975, "loss": 2.0381, "step": 169890 }, { "epoch": 0.4, "grad_norm": 2.546875, "learning_rate": 0.0001809186056643701, "loss": 1.9496, "step": 169895 }, { "epoch": 0.4, "grad_norm": 1.890625, "learning_rate": 0.0001809175196822972, "loss": 2.0889, "step": 169900 }, { "epoch": 0.4, "grad_norm": 2.59375, "learning_rate": 0.0001809164336725814, "loss": 2.1749, "step": 169905 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018091534763522304, "loss": 2.1767, "step": 169910 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018091426157022258, "loss": 2.2506, "step": 169915 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018091317547758032, "loss": 1.9869, "step": 169920 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018091208935729663, "loss": 2.1307, "step": 169925 }, { "epoch": 0.4, "grad_norm": 1.875, "learning_rate": 0.00018091100320937194, "loss": 1.946, "step": 169930 }, { "epoch": 0.4, "grad_norm": 1.9609375, "learning_rate": 0.00018090991703380654, "loss": 2.1648, "step": 169935 }, { "epoch": 0.4, "grad_norm": 2.65625, "learning_rate": 0.00018090883083060087, "loss": 1.9663, "step": 169940 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018090774459975532, "loss": 2.1508, "step": 169945 }, { "epoch": 0.4, "grad_norm": 2.0, "learning_rate": 0.00018090665834127014, "loss": 2.166, "step": 169950 }, { "epoch": 0.4, "grad_norm": 2.921875, "learning_rate": 0.0001809055720551458, "loss": 2.0857, "step": 169955 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018090448574138268, "loss": 1.9655, "step": 169960 }, { "epoch": 0.4, "grad_norm": 3.03125, "learning_rate": 0.00018090339939998108, "loss": 2.1213, "step": 169965 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.00018090231303094146, "loss": 2.2314, "step": 169970 }, { "epoch": 0.4, "grad_norm": 2.75, "learning_rate": 0.0001809012266342641, "loss": 2.0865, "step": 169975 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018090014020994945, "loss": 2.1695, "step": 169980 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.0001808990537579978, "loss": 2.2495, "step": 169985 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.0001808979672784096, "loss": 2.2061, "step": 169990 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018089688077118518, "loss": 2.055, "step": 169995 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.0001808957942363249, "loss": 2.0458, "step": 170000 }, { "epoch": 0.4, "grad_norm": 2.734375, "learning_rate": 0.0001808947076738292, "loss": 2.1651, "step": 170005 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018089362108369836, "loss": 2.0818, "step": 170010 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.0001808925344659328, "loss": 2.0878, "step": 170015 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018089144782053285, "loss": 2.2985, "step": 170020 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.000180890361147499, "loss": 2.0141, "step": 170025 }, { "epoch": 0.4, "grad_norm": 1.8671875, "learning_rate": 0.00018088927444683144, "loss": 2.1489, "step": 170030 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.0001808881877185307, "loss": 2.0222, "step": 170035 }, { "epoch": 0.4, "grad_norm": 1.8515625, "learning_rate": 0.00018088710096259708, "loss": 2.1398, "step": 170040 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018088601417903092, "loss": 2.2535, "step": 170045 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018088492736783267, "loss": 2.1244, "step": 170050 }, { "epoch": 0.4, "grad_norm": 1.9921875, "learning_rate": 0.00018088384052900264, "loss": 2.0826, "step": 170055 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.00018088275366254122, "loss": 2.0877, "step": 170060 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.0001808816667684488, "loss": 2.0627, "step": 170065 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018088057984672572, "loss": 2.424, "step": 170070 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.0001808794928973724, "loss": 2.0557, "step": 170075 }, { "epoch": 0.4, "grad_norm": 2.59375, "learning_rate": 0.00018087840592038916, "loss": 2.1638, "step": 170080 }, { "epoch": 0.4, "grad_norm": 1.953125, "learning_rate": 0.00018087731891577638, "loss": 2.0168, "step": 170085 }, { "epoch": 0.4, "grad_norm": 2.515625, "learning_rate": 0.00018087623188353446, "loss": 2.1061, "step": 170090 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.00018087514482366371, "loss": 2.2721, "step": 170095 }, { "epoch": 0.4, "grad_norm": 1.9453125, "learning_rate": 0.0001808740577361646, "loss": 2.0526, "step": 170100 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018087297062103743, "loss": 2.0884, "step": 170105 }, { "epoch": 0.4, "grad_norm": 2.46875, "learning_rate": 0.00018087188347828254, "loss": 2.3146, "step": 170110 }, { "epoch": 0.4, "grad_norm": 1.96875, "learning_rate": 0.00018087079630790042, "loss": 2.2836, "step": 170115 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018086970910989134, "loss": 2.1421, "step": 170120 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.0001808686218842557, "loss": 2.1543, "step": 170125 }, { "epoch": 0.4, "grad_norm": 2.546875, "learning_rate": 0.00018086753463099383, "loss": 1.9605, "step": 170130 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.0001808664473501062, "loss": 2.1374, "step": 170135 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.00018086536004159308, "loss": 2.1252, "step": 170140 }, { "epoch": 0.4, "grad_norm": 1.9765625, "learning_rate": 0.00018086427270545493, "loss": 2.3277, "step": 170145 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018086318534169205, "loss": 2.1537, "step": 170150 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018086209795030485, "loss": 2.0235, "step": 170155 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.0001808610105312937, "loss": 2.1517, "step": 170160 }, { "epoch": 0.4, "grad_norm": 1.9375, "learning_rate": 0.00018085992308465896, "loss": 1.8742, "step": 170165 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.000180858835610401, "loss": 2.1342, "step": 170170 }, { "epoch": 0.4, "grad_norm": 1.8828125, "learning_rate": 0.00018085774810852015, "loss": 2.0226, "step": 170175 }, { "epoch": 0.4, "grad_norm": 2.234375, "learning_rate": 0.00018085666057901688, "loss": 2.1246, "step": 170180 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018085557302189152, "loss": 2.2252, "step": 170185 }, { "epoch": 0.4, "grad_norm": 2.484375, "learning_rate": 0.00018085448543714438, "loss": 2.1554, "step": 170190 }, { "epoch": 0.4, "grad_norm": 2.640625, "learning_rate": 0.00018085339782477591, "loss": 2.2826, "step": 170195 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018085231018478645, "loss": 2.1198, "step": 170200 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.0001808512225171764, "loss": 1.9575, "step": 170205 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018085013482194607, "loss": 1.9808, "step": 170210 }, { "epoch": 0.4, "grad_norm": 1.953125, "learning_rate": 0.00018084904709909588, "loss": 2.1949, "step": 170215 }, { "epoch": 0.4, "grad_norm": 1.4296875, "learning_rate": 0.00018084795934862619, "loss": 2.0736, "step": 170220 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.00018084687157053736, "loss": 2.2899, "step": 170225 }, { "epoch": 0.4, "grad_norm": 1.7265625, "learning_rate": 0.00018084578376482976, "loss": 2.0756, "step": 170230 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018084469593150382, "loss": 2.1186, "step": 170235 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.0001808436080705598, "loss": 2.2213, "step": 170240 }, { "epoch": 0.4, "grad_norm": 2.53125, "learning_rate": 0.0001808425201819982, "loss": 1.9476, "step": 170245 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.0001808414322658193, "loss": 2.2077, "step": 170250 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.0001808403443220235, "loss": 2.02, "step": 170255 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.0001808392563506112, "loss": 2.3037, "step": 170260 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018083816835158273, "loss": 2.3336, "step": 170265 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018083708032493848, "loss": 2.0324, "step": 170270 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.0001808359922706788, "loss": 2.2381, "step": 170275 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.0001808349041888041, "loss": 2.1558, "step": 170280 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.0001808338160793147, "loss": 2.1763, "step": 170285 }, { "epoch": 0.4, "grad_norm": 2.453125, "learning_rate": 0.00018083272794221103, "loss": 2.1839, "step": 170290 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.0001808316397774934, "loss": 2.0827, "step": 170295 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.00018083055158516224, "loss": 2.2347, "step": 170300 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.0001808294633652179, "loss": 2.0209, "step": 170305 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018082837511766074, "loss": 2.2832, "step": 170310 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018082728684249116, "loss": 2.2372, "step": 170315 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.0001808261985397095, "loss": 2.1669, "step": 170320 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018082511020931614, "loss": 2.0128, "step": 170325 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.00018082402185131145, "loss": 2.125, "step": 170330 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.00018082293346569582, "loss": 2.0723, "step": 170335 }, { "epoch": 0.4, "grad_norm": 3.6875, "learning_rate": 0.0001808218450524696, "loss": 1.9429, "step": 170340 }, { "epoch": 0.4, "grad_norm": 3.296875, "learning_rate": 0.0001808207566116332, "loss": 2.1472, "step": 170345 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.00018081966814318694, "loss": 2.0257, "step": 170350 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018081857964713125, "loss": 2.0329, "step": 170355 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.0001808174911234664, "loss": 2.2052, "step": 170360 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018081640257219288, "loss": 2.2306, "step": 170365 }, { "epoch": 0.4, "grad_norm": 2.578125, "learning_rate": 0.000180815313993311, "loss": 2.227, "step": 170370 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018081422538682117, "loss": 2.0967, "step": 170375 }, { "epoch": 0.4, "grad_norm": 1.984375, "learning_rate": 0.0001808131367527237, "loss": 2.0367, "step": 170380 }, { "epoch": 0.4, "grad_norm": 2.53125, "learning_rate": 0.000180812048091019, "loss": 2.1626, "step": 170385 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018081095940170744, "loss": 2.1434, "step": 170390 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018080987068478939, "loss": 2.0436, "step": 170395 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018080878194026522, "loss": 2.2068, "step": 170400 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.0001808076931681353, "loss": 2.0745, "step": 170405 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.00018080660436840001, "loss": 2.0474, "step": 170410 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018080551554105974, "loss": 2.2377, "step": 170415 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.0001808044266861148, "loss": 2.1889, "step": 170420 }, { "epoch": 0.4, "grad_norm": 1.9140625, "learning_rate": 0.00018080333780356566, "loss": 2.2174, "step": 170425 }, { "epoch": 0.4, "grad_norm": 2.53125, "learning_rate": 0.0001808022488934126, "loss": 2.2674, "step": 170430 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018080115995565603, "loss": 2.1524, "step": 170435 }, { "epoch": 0.4, "grad_norm": 2.453125, "learning_rate": 0.00018080007099029633, "loss": 1.9567, "step": 170440 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.00018079898199733382, "loss": 2.1199, "step": 170445 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018079789297676896, "loss": 2.0182, "step": 170450 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018079680392860204, "loss": 2.1473, "step": 170455 }, { "epoch": 0.4, "grad_norm": 2.0, "learning_rate": 0.00018079571485283348, "loss": 2.0226, "step": 170460 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018079462574946363, "loss": 2.2174, "step": 170465 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.0001807935366184929, "loss": 2.1506, "step": 170470 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.0001807924474599216, "loss": 2.1589, "step": 170475 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018079135827375016, "loss": 2.3054, "step": 170480 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018079026905997892, "loss": 2.2343, "step": 170485 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018078917981860824, "loss": 2.1059, "step": 170490 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018078809054963854, "loss": 2.0409, "step": 170495 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018078700125307012, "loss": 2.2768, "step": 170500 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018078591192890344, "loss": 2.1023, "step": 170505 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.0001807848225771388, "loss": 2.0942, "step": 170510 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018078373319777662, "loss": 2.1841, "step": 170515 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018078264379081725, "loss": 2.1341, "step": 170520 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018078155435626104, "loss": 1.9906, "step": 170525 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.0001807804648941084, "loss": 2.1276, "step": 170530 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018077937540435967, "loss": 2.1849, "step": 170535 }, { "epoch": 0.4, "grad_norm": 1.8359375, "learning_rate": 0.0001807782858870153, "loss": 2.1489, "step": 170540 }, { "epoch": 0.4, "grad_norm": 2.546875, "learning_rate": 0.00018077719634207554, "loss": 2.2285, "step": 170545 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018077610676954083, "loss": 1.8931, "step": 170550 }, { "epoch": 0.4, "grad_norm": 2.46875, "learning_rate": 0.00018077501716941155, "loss": 2.1181, "step": 170555 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018077392754168808, "loss": 2.0642, "step": 170560 }, { "epoch": 0.4, "grad_norm": 2.515625, "learning_rate": 0.00018077283788637077, "loss": 2.3413, "step": 170565 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018077174820345997, "loss": 1.9234, "step": 170570 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.0001807706584929561, "loss": 2.114, "step": 170575 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018076956875485948, "loss": 2.0409, "step": 170580 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.0001807684789891705, "loss": 2.5285, "step": 170585 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.00018076738919588955, "loss": 2.0586, "step": 170590 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018076629937501703, "loss": 2.1599, "step": 170595 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018076520952655325, "loss": 2.1328, "step": 170600 }, { "epoch": 0.4, "grad_norm": 1.9453125, "learning_rate": 0.00018076411965049864, "loss": 1.9859, "step": 170605 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018076302974685352, "loss": 2.1891, "step": 170610 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018076193981561827, "loss": 2.0401, "step": 170615 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018076084985679328, "loss": 2.0857, "step": 170620 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018075975987037895, "loss": 2.103, "step": 170625 }, { "epoch": 0.4, "grad_norm": 1.9765625, "learning_rate": 0.0001807586698563756, "loss": 2.2214, "step": 170630 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018075757981478362, "loss": 1.9578, "step": 170635 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018075648974560338, "loss": 2.1929, "step": 170640 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018075539964883527, "loss": 2.1165, "step": 170645 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018075430952447965, "loss": 2.1266, "step": 170650 }, { "epoch": 0.4, "grad_norm": 2.46875, "learning_rate": 0.0001807532193725369, "loss": 2.2245, "step": 170655 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.0001807521291930074, "loss": 2.1111, "step": 170660 }, { "epoch": 0.4, "grad_norm": 2.515625, "learning_rate": 0.00018075103898589146, "loss": 2.2634, "step": 170665 }, { "epoch": 0.4, "grad_norm": 1.8359375, "learning_rate": 0.00018074994875118952, "loss": 2.1542, "step": 170670 }, { "epoch": 0.4, "grad_norm": 2.75, "learning_rate": 0.00018074885848890195, "loss": 1.9805, "step": 170675 }, { "epoch": 0.4, "grad_norm": 1.8203125, "learning_rate": 0.0001807477681990291, "loss": 2.0635, "step": 170680 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018074667788157134, "loss": 1.987, "step": 170685 }, { "epoch": 0.4, "grad_norm": 1.96875, "learning_rate": 0.00018074558753652906, "loss": 2.1352, "step": 170690 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.0001807444971639026, "loss": 2.1697, "step": 170695 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.0001807434067636924, "loss": 2.1832, "step": 170700 }, { "epoch": 0.4, "grad_norm": 2.59375, "learning_rate": 0.00018074231633589873, "loss": 2.0995, "step": 170705 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018074122588052207, "loss": 2.273, "step": 170710 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018074013539756272, "loss": 1.858, "step": 170715 }, { "epoch": 0.4, "grad_norm": 2.6875, "learning_rate": 0.00018073904488702107, "loss": 2.1658, "step": 170720 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.0001807379543488975, "loss": 2.2563, "step": 170725 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.0001807368637831924, "loss": 2.119, "step": 170730 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.0001807357731899061, "loss": 2.0621, "step": 170735 }, { "epoch": 0.4, "grad_norm": 1.8828125, "learning_rate": 0.00018073468256903902, "loss": 2.0805, "step": 170740 }, { "epoch": 0.4, "grad_norm": 1.9765625, "learning_rate": 0.00018073359192059149, "loss": 2.0832, "step": 170745 }, { "epoch": 0.4, "grad_norm": 2.8125, "learning_rate": 0.0001807325012445639, "loss": 2.0227, "step": 170750 }, { "epoch": 0.4, "grad_norm": 1.859375, "learning_rate": 0.00018073141054095665, "loss": 2.2844, "step": 170755 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.00018073031980977007, "loss": 2.3399, "step": 170760 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018072922905100453, "loss": 1.9316, "step": 170765 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018072813826466044, "loss": 2.2441, "step": 170770 }, { "epoch": 0.4, "grad_norm": 1.953125, "learning_rate": 0.00018072704745073813, "loss": 2.1162, "step": 170775 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018072595660923805, "loss": 2.0951, "step": 170780 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.00018072486574016047, "loss": 2.0622, "step": 170785 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018072377484350583, "loss": 2.2349, "step": 170790 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.00018072268391927448, "loss": 2.1589, "step": 170795 }, { "epoch": 0.4, "grad_norm": 2.859375, "learning_rate": 0.00018072159296746677, "loss": 2.2118, "step": 170800 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.00018072050198808315, "loss": 2.3127, "step": 170805 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.00018071941098112392, "loss": 2.1884, "step": 170810 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018071831994658947, "loss": 2.0782, "step": 170815 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.0001807172288844802, "loss": 2.4002, "step": 170820 }, { "epoch": 0.4, "grad_norm": 2.703125, "learning_rate": 0.00018071613779479644, "loss": 2.1934, "step": 170825 }, { "epoch": 0.4, "grad_norm": 2.609375, "learning_rate": 0.0001807150466775386, "loss": 2.3093, "step": 170830 }, { "epoch": 0.4, "grad_norm": 1.8671875, "learning_rate": 0.000180713955532707, "loss": 1.9199, "step": 170835 }, { "epoch": 0.4, "grad_norm": 1.8046875, "learning_rate": 0.00018071286436030208, "loss": 2.1372, "step": 170840 }, { "epoch": 0.4, "grad_norm": 1.9609375, "learning_rate": 0.00018071177316032416, "loss": 2.1863, "step": 170845 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018071068193277365, "loss": 2.105, "step": 170850 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018070959067765092, "loss": 2.276, "step": 170855 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018070849939495632, "loss": 2.2599, "step": 170860 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018070740808469023, "loss": 2.1453, "step": 170865 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018070631674685302, "loss": 2.2087, "step": 170870 }, { "epoch": 0.4, "grad_norm": 1.9921875, "learning_rate": 0.0001807052253814451, "loss": 2.085, "step": 170875 }, { "epoch": 0.4, "grad_norm": 1.9453125, "learning_rate": 0.00018070413398846676, "loss": 2.3121, "step": 170880 }, { "epoch": 0.4, "grad_norm": 2.515625, "learning_rate": 0.00018070304256791845, "loss": 2.2492, "step": 170885 }, { "epoch": 0.4, "grad_norm": 2.609375, "learning_rate": 0.00018070195111980053, "loss": 2.0936, "step": 170890 }, { "epoch": 0.4, "grad_norm": 1.9609375, "learning_rate": 0.00018070085964411333, "loss": 2.2524, "step": 170895 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018069976814085728, "loss": 2.2082, "step": 170900 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.0001806986766100327, "loss": 2.2429, "step": 170905 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018069758505164, "loss": 2.1921, "step": 170910 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018069649346567955, "loss": 2.1211, "step": 170915 }, { "epoch": 0.4, "grad_norm": 2.0, "learning_rate": 0.0001806954018521517, "loss": 2.1867, "step": 170920 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018069431021105686, "loss": 2.0393, "step": 170925 }, { "epoch": 0.4, "grad_norm": 2.546875, "learning_rate": 0.00018069321854239536, "loss": 2.1325, "step": 170930 }, { "epoch": 0.4, "grad_norm": 2.578125, "learning_rate": 0.00018069212684616761, "loss": 2.1743, "step": 170935 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018069103512237394, "loss": 2.0112, "step": 170940 }, { "epoch": 0.4, "grad_norm": 2.546875, "learning_rate": 0.00018068994337101476, "loss": 1.9376, "step": 170945 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018068885159209047, "loss": 2.0899, "step": 170950 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.00018068775978560136, "loss": 2.2326, "step": 170955 }, { "epoch": 0.4, "grad_norm": 2.546875, "learning_rate": 0.00018068666795154786, "loss": 1.9551, "step": 170960 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018068557608993033, "loss": 2.1731, "step": 170965 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018068448420074912, "loss": 2.0201, "step": 170970 }, { "epoch": 0.4, "grad_norm": 1.7265625, "learning_rate": 0.00018068339228400466, "loss": 2.067, "step": 170975 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018068230033969727, "loss": 2.2416, "step": 170980 }, { "epoch": 0.4, "grad_norm": 2.421875, "learning_rate": 0.00018068120836782736, "loss": 2.1311, "step": 170985 }, { "epoch": 0.4, "grad_norm": 3.5, "learning_rate": 0.0001806801163683953, "loss": 2.1748, "step": 170990 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018067902434140142, "loss": 1.8927, "step": 170995 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.0001806779322868461, "loss": 2.1534, "step": 171000 }, { "epoch": 0.4, "grad_norm": 2.234375, "learning_rate": 0.0001806768402047298, "loss": 2.1056, "step": 171005 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.0001806757480950528, "loss": 2.2336, "step": 171010 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.0001806746559578155, "loss": 2.0888, "step": 171015 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.00018067356379301825, "loss": 2.1317, "step": 171020 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018067247160066148, "loss": 2.0983, "step": 171025 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018067137938074552, "loss": 2.3557, "step": 171030 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.00018067028713327074, "loss": 2.2287, "step": 171035 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018066919485823754, "loss": 1.986, "step": 171040 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018066810255564628, "loss": 2.115, "step": 171045 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018066701022549734, "loss": 2.2048, "step": 171050 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018066591786779107, "loss": 2.1582, "step": 171055 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.00018066482548252788, "loss": 2.1577, "step": 171060 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.0001806637330697081, "loss": 2.2711, "step": 171065 }, { "epoch": 0.4, "grad_norm": 2.515625, "learning_rate": 0.00018066264062933213, "loss": 2.0762, "step": 171070 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018066154816140034, "loss": 2.0861, "step": 171075 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018066045566591312, "loss": 2.1028, "step": 171080 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.0001806593631428708, "loss": 2.0266, "step": 171085 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018065827059227378, "loss": 2.1588, "step": 171090 }, { "epoch": 0.4, "grad_norm": 2.515625, "learning_rate": 0.00018065717801412246, "loss": 2.1046, "step": 171095 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018065608540841717, "loss": 1.9558, "step": 171100 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.00018065499277515831, "loss": 2.263, "step": 171105 }, { "epoch": 0.4, "grad_norm": 2.46875, "learning_rate": 0.00018065390011434622, "loss": 2.0899, "step": 171110 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018065280742598133, "loss": 2.1549, "step": 171115 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018065171471006393, "loss": 2.1486, "step": 171120 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018065062196659447, "loss": 2.1978, "step": 171125 }, { "epoch": 0.4, "grad_norm": 2.0, "learning_rate": 0.00018064952919557328, "loss": 2.1234, "step": 171130 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018064843639700075, "loss": 2.3267, "step": 171135 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018064734357087727, "loss": 2.1609, "step": 171140 }, { "epoch": 0.4, "grad_norm": 2.640625, "learning_rate": 0.00018064625071720317, "loss": 1.9889, "step": 171145 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018064515783597885, "loss": 2.1867, "step": 171150 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.0001806440649272047, "loss": 2.2189, "step": 171155 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.0001806429719908811, "loss": 2.0752, "step": 171160 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018064187902700835, "loss": 1.9379, "step": 171165 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018064078603558689, "loss": 2.0145, "step": 171170 }, { "epoch": 0.4, "grad_norm": 1.96875, "learning_rate": 0.00018063969301661706, "loss": 2.2044, "step": 171175 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.00018063859997009926, "loss": 2.1993, "step": 171180 }, { "epoch": 0.4, "grad_norm": 2.46875, "learning_rate": 0.00018063750689603384, "loss": 2.0049, "step": 171185 }, { "epoch": 0.4, "grad_norm": 1.9609375, "learning_rate": 0.00018063641379442122, "loss": 2.1054, "step": 171190 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.0001806353206652617, "loss": 2.0144, "step": 171195 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018063422750855574, "loss": 1.9918, "step": 171200 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.0001806331343243036, "loss": 1.9528, "step": 171205 }, { "epoch": 0.4, "grad_norm": 1.71875, "learning_rate": 0.00018063204111250578, "loss": 2.1135, "step": 171210 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.00018063094787316257, "loss": 2.1935, "step": 171215 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018062985460627436, "loss": 2.1274, "step": 171220 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.0001806287613118415, "loss": 2.1586, "step": 171225 }, { "epoch": 0.4, "grad_norm": 2.625, "learning_rate": 0.00018062766798986445, "loss": 2.0059, "step": 171230 }, { "epoch": 0.4, "grad_norm": 1.9609375, "learning_rate": 0.0001806265746403435, "loss": 2.3022, "step": 171235 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.00018062548126327905, "loss": 2.3093, "step": 171240 }, { "epoch": 0.4, "grad_norm": 1.90625, "learning_rate": 0.00018062438785867147, "loss": 2.3788, "step": 171245 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018062329442652115, "loss": 1.996, "step": 171250 }, { "epoch": 0.4, "grad_norm": 2.453125, "learning_rate": 0.00018062220096682846, "loss": 2.1882, "step": 171255 }, { "epoch": 0.4, "grad_norm": 1.875, "learning_rate": 0.00018062110747959372, "loss": 2.0946, "step": 171260 }, { "epoch": 0.4, "grad_norm": 1.9375, "learning_rate": 0.00018062001396481736, "loss": 2.2582, "step": 171265 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018061892042249978, "loss": 1.9883, "step": 171270 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018061782685264128, "loss": 2.1117, "step": 171275 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018061673325524226, "loss": 2.3334, "step": 171280 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018061563963030311, "loss": 2.0413, "step": 171285 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.0001806145459778242, "loss": 1.9473, "step": 171290 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018061345229780594, "loss": 2.3221, "step": 171295 }, { "epoch": 0.4, "grad_norm": 2.640625, "learning_rate": 0.0001806123585902486, "loss": 2.0626, "step": 171300 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018061126485515266, "loss": 2.1549, "step": 171305 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.0001806101710925184, "loss": 2.2679, "step": 171310 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.00018060907730234628, "loss": 2.1788, "step": 171315 }, { "epoch": 0.4, "grad_norm": 1.8671875, "learning_rate": 0.00018060798348463664, "loss": 2.0367, "step": 171320 }, { "epoch": 0.4, "grad_norm": 2.453125, "learning_rate": 0.00018060688963938986, "loss": 2.175, "step": 171325 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018060579576660628, "loss": 2.0923, "step": 171330 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018060470186628632, "loss": 1.7706, "step": 171335 }, { "epoch": 0.4, "grad_norm": 2.828125, "learning_rate": 0.0001806036079384303, "loss": 1.9401, "step": 171340 }, { "epoch": 0.4, "grad_norm": 1.9921875, "learning_rate": 0.00018060251398303864, "loss": 1.9558, "step": 171345 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.00018060142000011172, "loss": 2.1203, "step": 171350 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018060032598964988, "loss": 2.1384, "step": 171355 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018059923195165348, "loss": 2.0188, "step": 171360 }, { "epoch": 0.4, "grad_norm": 2.0, "learning_rate": 0.00018059813788612295, "loss": 2.1034, "step": 171365 }, { "epoch": 0.4, "grad_norm": 2.65625, "learning_rate": 0.00018059704379305865, "loss": 2.2212, "step": 171370 }, { "epoch": 0.4, "grad_norm": 2.5625, "learning_rate": 0.00018059594967246092, "loss": 1.9309, "step": 171375 }, { "epoch": 0.4, "grad_norm": 3.109375, "learning_rate": 0.00018059485552433013, "loss": 2.0926, "step": 171380 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.0001805937613486667, "loss": 2.1389, "step": 171385 }, { "epoch": 0.4, "grad_norm": 1.890625, "learning_rate": 0.00018059266714547096, "loss": 2.0607, "step": 171390 }, { "epoch": 0.4, "grad_norm": 2.71875, "learning_rate": 0.00018059157291474334, "loss": 2.0988, "step": 171395 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018059047865648414, "loss": 2.1602, "step": 171400 }, { "epoch": 0.4, "grad_norm": 1.9453125, "learning_rate": 0.0001805893843706938, "loss": 2.034, "step": 171405 }, { "epoch": 0.4, "grad_norm": 2.484375, "learning_rate": 0.00018058829005737262, "loss": 2.065, "step": 171410 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018058719571652104, "loss": 2.1441, "step": 171415 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018058610134813946, "loss": 1.9823, "step": 171420 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.00018058500695222816, "loss": 2.1487, "step": 171425 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018058391252878753, "loss": 2.1583, "step": 171430 }, { "epoch": 0.4, "grad_norm": 1.9140625, "learning_rate": 0.00018058281807781804, "loss": 2.1908, "step": 171435 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018058172359931995, "loss": 2.1747, "step": 171440 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018058062909329372, "loss": 2.0778, "step": 171445 }, { "epoch": 0.4, "grad_norm": 2.765625, "learning_rate": 0.00018057953455973965, "loss": 2.1273, "step": 171450 }, { "epoch": 0.4, "grad_norm": 2.1875, "learning_rate": 0.00018057843999865817, "loss": 2.2144, "step": 171455 }, { "epoch": 0.4, "grad_norm": 2.46875, "learning_rate": 0.00018057734541004965, "loss": 2.1297, "step": 171460 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018057625079391442, "loss": 1.9944, "step": 171465 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.0001805751561502529, "loss": 2.0445, "step": 171470 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.0001805740614790654, "loss": 2.1179, "step": 171475 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.0001805729667803524, "loss": 1.921, "step": 171480 }, { "epoch": 0.4, "grad_norm": 1.8203125, "learning_rate": 0.00018057187205411417, "loss": 2.0957, "step": 171485 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018057077730035113, "loss": 1.9215, "step": 171490 }, { "epoch": 0.4, "grad_norm": 1.984375, "learning_rate": 0.0001805696825190637, "loss": 2.2418, "step": 171495 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.00018056858771025217, "loss": 2.0499, "step": 171500 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.00018056749287391694, "loss": 2.181, "step": 171505 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.0001805663980100584, "loss": 2.1272, "step": 171510 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.00018056530311867693, "loss": 2.0379, "step": 171515 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018056420819977285, "loss": 2.2546, "step": 171520 }, { "epoch": 0.4, "grad_norm": 1.96875, "learning_rate": 0.00018056311325334665, "loss": 1.964, "step": 171525 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018056201827939855, "loss": 2.0225, "step": 171530 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018056092327792906, "loss": 2.1087, "step": 171535 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.00018055982824893846, "loss": 1.9854, "step": 171540 }, { "epoch": 0.4, "grad_norm": 1.8828125, "learning_rate": 0.0001805587331924272, "loss": 2.1488, "step": 171545 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.00018055763810839558, "loss": 2.0304, "step": 171550 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018055654299684403, "loss": 2.1539, "step": 171555 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.0001805554478577729, "loss": 2.2031, "step": 171560 }, { "epoch": 0.4, "grad_norm": 1.8828125, "learning_rate": 0.00018055435269118258, "loss": 2.0899, "step": 171565 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.0001805532574970734, "loss": 2.1795, "step": 171570 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.0001805521622754458, "loss": 2.0284, "step": 171575 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.0001805510670263001, "loss": 2.1746, "step": 171580 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.0001805499717496367, "loss": 2.2017, "step": 171585 }, { "epoch": 0.4, "grad_norm": 1.7890625, "learning_rate": 0.00018054887644545597, "loss": 2.0287, "step": 171590 }, { "epoch": 0.4, "grad_norm": 2.578125, "learning_rate": 0.0001805477811137583, "loss": 2.3658, "step": 171595 }, { "epoch": 0.4, "grad_norm": 2.53125, "learning_rate": 0.000180546685754544, "loss": 2.2289, "step": 171600 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018054559036781353, "loss": 1.8917, "step": 171605 }, { "epoch": 0.4, "grad_norm": 1.9765625, "learning_rate": 0.0001805444949535672, "loss": 2.1805, "step": 171610 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018054339951180544, "loss": 2.1098, "step": 171615 }, { "epoch": 0.4, "grad_norm": 2.375, "learning_rate": 0.00018054230404252857, "loss": 2.026, "step": 171620 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.000180541208545737, "loss": 2.1232, "step": 171625 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018054011302143108, "loss": 2.1474, "step": 171630 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018053901746961125, "loss": 2.1437, "step": 171635 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018053792189027775, "loss": 2.1827, "step": 171640 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.0001805368262834311, "loss": 2.1654, "step": 171645 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.00018053573064907158, "loss": 2.2076, "step": 171650 }, { "epoch": 0.4, "grad_norm": 1.921875, "learning_rate": 0.00018053463498719958, "loss": 2.2369, "step": 171655 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018053353929781553, "loss": 2.1244, "step": 171660 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.0001805324435809197, "loss": 2.0913, "step": 171665 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018053134783651257, "loss": 2.1258, "step": 171670 }, { "epoch": 0.4, "grad_norm": 2.4375, "learning_rate": 0.0001805302520645945, "loss": 2.0339, "step": 171675 }, { "epoch": 0.4, "grad_norm": 1.8671875, "learning_rate": 0.00018052915626516578, "loss": 2.0139, "step": 171680 }, { "epoch": 0.4, "grad_norm": 2.234375, "learning_rate": 0.00018052806043822686, "loss": 2.15, "step": 171685 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018052696458377806, "loss": 2.1236, "step": 171690 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018052586870181988, "loss": 2.2575, "step": 171695 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.0001805247727923525, "loss": 2.1413, "step": 171700 }, { "epoch": 0.4, "grad_norm": 2.46875, "learning_rate": 0.00018052367685537647, "loss": 2.113, "step": 171705 }, { "epoch": 0.4, "grad_norm": 2.546875, "learning_rate": 0.00018052258089089205, "loss": 2.0029, "step": 171710 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018052148489889968, "loss": 2.005, "step": 171715 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.00018052038887939967, "loss": 1.8973, "step": 171720 }, { "epoch": 0.4, "grad_norm": 2.65625, "learning_rate": 0.00018051929283239247, "loss": 2.2863, "step": 171725 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018051819675787845, "loss": 2.0955, "step": 171730 }, { "epoch": 0.4, "grad_norm": 2.140625, "learning_rate": 0.0001805171006558579, "loss": 2.279, "step": 171735 }, { "epoch": 0.4, "grad_norm": 1.8515625, "learning_rate": 0.00018051600452633127, "loss": 2.0693, "step": 171740 }, { "epoch": 0.4, "grad_norm": 1.859375, "learning_rate": 0.00018051490836929891, "loss": 2.3261, "step": 171745 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.0001805138121847612, "loss": 2.0683, "step": 171750 }, { "epoch": 0.4, "grad_norm": 1.9609375, "learning_rate": 0.0001805127159727185, "loss": 2.0158, "step": 171755 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.0001805116197331712, "loss": 1.9753, "step": 171760 }, { "epoch": 0.4, "grad_norm": 2.390625, "learning_rate": 0.0001805105234661197, "loss": 2.2333, "step": 171765 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.0001805094271715643, "loss": 2.3129, "step": 171770 }, { "epoch": 0.4, "grad_norm": 2.234375, "learning_rate": 0.00018050833084950544, "loss": 1.9739, "step": 171775 }, { "epoch": 0.4, "grad_norm": 2.3125, "learning_rate": 0.0001805072344999435, "loss": 2.1442, "step": 171780 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018050613812287879, "loss": 2.2206, "step": 171785 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018050504171831171, "loss": 2.1961, "step": 171790 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.0001805039452862427, "loss": 1.8329, "step": 171795 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.00018050284882667206, "loss": 2.0288, "step": 171800 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.00018050175233960016, "loss": 2.1018, "step": 171805 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018050065582502742, "loss": 2.0793, "step": 171810 }, { "epoch": 0.4, "grad_norm": 2.109375, "learning_rate": 0.00018049955928295419, "loss": 2.1941, "step": 171815 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.00018049846271338087, "loss": 2.2536, "step": 171820 }, { "epoch": 0.4, "grad_norm": 2.09375, "learning_rate": 0.00018049736611630782, "loss": 2.2632, "step": 171825 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.0001804962694917354, "loss": 2.2633, "step": 171830 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018049517283966397, "loss": 2.3446, "step": 171835 }, { "epoch": 0.4, "grad_norm": 2.609375, "learning_rate": 0.00018049407616009395, "loss": 2.1618, "step": 171840 }, { "epoch": 0.4, "grad_norm": 2.5, "learning_rate": 0.0001804929794530257, "loss": 2.2198, "step": 171845 }, { "epoch": 0.4, "grad_norm": 1.8359375, "learning_rate": 0.00018049188271845957, "loss": 2.0463, "step": 171850 }, { "epoch": 0.4, "grad_norm": 1.9609375, "learning_rate": 0.00018049078595639597, "loss": 2.1234, "step": 171855 }, { "epoch": 0.4, "grad_norm": 1.8671875, "learning_rate": 0.00018048968916683524, "loss": 2.1587, "step": 171860 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.0001804885923497778, "loss": 2.0258, "step": 171865 }, { "epoch": 0.4, "grad_norm": 2.453125, "learning_rate": 0.000180487495505224, "loss": 1.9773, "step": 171870 }, { "epoch": 0.4, "grad_norm": 2.453125, "learning_rate": 0.00018048639863317418, "loss": 2.1282, "step": 171875 }, { "epoch": 0.4, "grad_norm": 1.953125, "learning_rate": 0.00018048530173362876, "loss": 2.2334, "step": 171880 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.0001804842048065881, "loss": 2.2735, "step": 171885 }, { "epoch": 0.4, "grad_norm": 2.078125, "learning_rate": 0.0001804831078520526, "loss": 2.1523, "step": 171890 }, { "epoch": 0.4, "grad_norm": 2.8125, "learning_rate": 0.00018048201087002256, "loss": 2.0276, "step": 171895 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018048091386049843, "loss": 2.0031, "step": 171900 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.00018047981682348056, "loss": 2.0402, "step": 171905 }, { "epoch": 0.4, "grad_norm": 2.046875, "learning_rate": 0.00018047871975896933, "loss": 2.0072, "step": 171910 }, { "epoch": 0.4, "grad_norm": 2.65625, "learning_rate": 0.0001804776226669651, "loss": 2.3037, "step": 171915 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018047652554746825, "loss": 2.2197, "step": 171920 }, { "epoch": 0.4, "grad_norm": 1.984375, "learning_rate": 0.0001804754284004792, "loss": 2.0352, "step": 171925 }, { "epoch": 0.4, "grad_norm": 2.125, "learning_rate": 0.00018047433122599826, "loss": 2.4321, "step": 171930 }, { "epoch": 0.4, "grad_norm": 2.328125, "learning_rate": 0.00018047323402402583, "loss": 2.0059, "step": 171935 }, { "epoch": 0.4, "grad_norm": 2.0625, "learning_rate": 0.00018047213679456227, "loss": 2.1215, "step": 171940 }, { "epoch": 0.4, "grad_norm": 2.203125, "learning_rate": 0.00018047103953760794, "loss": 2.0472, "step": 171945 }, { "epoch": 0.4, "grad_norm": 2.015625, "learning_rate": 0.00018046994225316332, "loss": 2.0683, "step": 171950 }, { "epoch": 0.4, "grad_norm": 2.515625, "learning_rate": 0.00018046884494122867, "loss": 2.2124, "step": 171955 }, { "epoch": 0.4, "grad_norm": 2.15625, "learning_rate": 0.0001804677476018044, "loss": 2.2073, "step": 171960 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.0001804666502348909, "loss": 2.0334, "step": 171965 }, { "epoch": 0.4, "grad_norm": 1.890625, "learning_rate": 0.00018046555284048852, "loss": 2.116, "step": 171970 }, { "epoch": 0.4, "grad_norm": 2.8125, "learning_rate": 0.00018046445541859763, "loss": 2.0653, "step": 171975 }, { "epoch": 0.4, "grad_norm": 1.953125, "learning_rate": 0.00018046335796921865, "loss": 1.9913, "step": 171980 }, { "epoch": 0.4, "grad_norm": 2.171875, "learning_rate": 0.0001804622604923519, "loss": 2.3053, "step": 171985 }, { "epoch": 0.4, "grad_norm": 2.671875, "learning_rate": 0.00018046116298799783, "loss": 2.1343, "step": 171990 }, { "epoch": 0.4, "grad_norm": 2.265625, "learning_rate": 0.00018046006545615673, "loss": 2.1527, "step": 171995 }, { "epoch": 0.4, "grad_norm": 2.359375, "learning_rate": 0.00018045896789682902, "loss": 2.0515, "step": 172000 }, { "epoch": 0.4, "grad_norm": 1.84375, "learning_rate": 0.00018045787031001508, "loss": 2.2721, "step": 172005 }, { "epoch": 0.4, "grad_norm": 1.828125, "learning_rate": 0.00018045677269571528, "loss": 1.9993, "step": 172010 }, { "epoch": 0.4, "grad_norm": 1.9453125, "learning_rate": 0.00018045567505392996, "loss": 2.2274, "step": 172015 }, { "epoch": 0.4, "grad_norm": 1.90625, "learning_rate": 0.00018045457738465954, "loss": 2.1649, "step": 172020 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.00018045347968790435, "loss": 2.146, "step": 172025 }, { "epoch": 0.4, "grad_norm": 2.34375, "learning_rate": 0.0001804523819636648, "loss": 2.0404, "step": 172030 }, { "epoch": 0.4, "grad_norm": 2.296875, "learning_rate": 0.0001804512842119413, "loss": 2.2097, "step": 172035 }, { "epoch": 0.4, "grad_norm": 2.40625, "learning_rate": 0.00018045018643273415, "loss": 2.2081, "step": 172040 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.00018044908862604374, "loss": 2.1563, "step": 172045 }, { "epoch": 0.4, "grad_norm": 1.9765625, "learning_rate": 0.00018044799079187048, "loss": 1.8728, "step": 172050 }, { "epoch": 0.4, "grad_norm": 1.875, "learning_rate": 0.00018044689293021475, "loss": 1.9574, "step": 172055 }, { "epoch": 0.4, "grad_norm": 2.53125, "learning_rate": 0.0001804457950410769, "loss": 2.137, "step": 172060 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.00018044469712445726, "loss": 2.0864, "step": 172065 }, { "epoch": 0.4, "grad_norm": 1.7109375, "learning_rate": 0.00018044359918035628, "loss": 2.1782, "step": 172070 }, { "epoch": 0.4, "grad_norm": 2.25, "learning_rate": 0.0001804425012087743, "loss": 1.9239, "step": 172075 }, { "epoch": 0.4, "grad_norm": 2.03125, "learning_rate": 0.00018044140320971173, "loss": 1.963, "step": 172080 }, { "epoch": 0.4, "grad_norm": 1.7734375, "learning_rate": 0.0001804403051831689, "loss": 2.1972, "step": 172085 }, { "epoch": 0.4, "grad_norm": 2.28125, "learning_rate": 0.0001804392071291462, "loss": 2.0795, "step": 172090 }, { "epoch": 0.4, "grad_norm": 2.21875, "learning_rate": 0.000180438109047644, "loss": 2.2292, "step": 172095 }, { "epoch": 0.41, "grad_norm": 2.4375, "learning_rate": 0.0001804370109386627, "loss": 1.8738, "step": 172100 }, { "epoch": 0.41, "grad_norm": 2.0, "learning_rate": 0.00018043591280220267, "loss": 2.2133, "step": 172105 }, { "epoch": 0.41, "grad_norm": 1.8984375, "learning_rate": 0.00018043481463826425, "loss": 1.9961, "step": 172110 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00018043371644684784, "loss": 2.0838, "step": 172115 }, { "epoch": 0.41, "grad_norm": 2.9375, "learning_rate": 0.0001804326182279538, "loss": 2.2702, "step": 172120 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018043151998158255, "loss": 2.1432, "step": 172125 }, { "epoch": 0.41, "grad_norm": 2.703125, "learning_rate": 0.00018043042170773444, "loss": 2.0971, "step": 172130 }, { "epoch": 0.41, "grad_norm": 2.765625, "learning_rate": 0.0001804293234064098, "loss": 2.108, "step": 172135 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00018042822507760909, "loss": 2.1417, "step": 172140 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.00018042712672133258, "loss": 2.0817, "step": 172145 }, { "epoch": 0.41, "grad_norm": 3.390625, "learning_rate": 0.00018042602833758076, "loss": 2.1017, "step": 172150 }, { "epoch": 0.41, "grad_norm": 2.53125, "learning_rate": 0.0001804249299263539, "loss": 2.0844, "step": 172155 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00018042383148765247, "loss": 2.3244, "step": 172160 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00018042273302147678, "loss": 2.2698, "step": 172165 }, { "epoch": 0.41, "grad_norm": 3.71875, "learning_rate": 0.00018042163452782725, "loss": 2.1902, "step": 172170 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.0001804205360067042, "loss": 2.1564, "step": 172175 }, { "epoch": 0.41, "grad_norm": 2.640625, "learning_rate": 0.00018041943745810804, "loss": 2.0332, "step": 172180 }, { "epoch": 0.41, "grad_norm": 1.9140625, "learning_rate": 0.00018041833888203916, "loss": 2.2481, "step": 172185 }, { "epoch": 0.41, "grad_norm": 2.53125, "learning_rate": 0.0001804172402784979, "loss": 2.2052, "step": 172190 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00018041614164748465, "loss": 2.2006, "step": 172195 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.0001804150429889998, "loss": 2.2513, "step": 172200 }, { "epoch": 0.41, "grad_norm": 1.9765625, "learning_rate": 0.00018041394430304373, "loss": 2.1967, "step": 172205 }, { "epoch": 0.41, "grad_norm": 1.984375, "learning_rate": 0.00018041284558961674, "loss": 2.0907, "step": 172210 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00018041174684871932, "loss": 2.1462, "step": 172215 }, { "epoch": 0.41, "grad_norm": 2.515625, "learning_rate": 0.00018041064808035177, "loss": 2.1182, "step": 172220 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.0001804095492845145, "loss": 1.9808, "step": 172225 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.00018040845046120784, "loss": 1.9486, "step": 172230 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00018040735161043223, "loss": 2.0907, "step": 172235 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.00018040625273218797, "loss": 1.9656, "step": 172240 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.0001804051538264755, "loss": 2.0948, "step": 172245 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00018040405489329518, "loss": 2.3093, "step": 172250 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00018040295593264736, "loss": 2.0013, "step": 172255 }, { "epoch": 0.41, "grad_norm": 1.9453125, "learning_rate": 0.00018040185694453243, "loss": 1.93, "step": 172260 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00018040075792895077, "loss": 2.1325, "step": 172265 }, { "epoch": 0.41, "grad_norm": 1.625, "learning_rate": 0.00018039965888590277, "loss": 1.9111, "step": 172270 }, { "epoch": 0.41, "grad_norm": 2.609375, "learning_rate": 0.00018039855981538878, "loss": 2.266, "step": 172275 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00018039746071740916, "loss": 2.0795, "step": 172280 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00018039636159196434, "loss": 2.0104, "step": 172285 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00018039526243905462, "loss": 2.3201, "step": 172290 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00018039416325868048, "loss": 2.2955, "step": 172295 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.0001803930640508422, "loss": 2.2316, "step": 172300 }, { "epoch": 0.41, "grad_norm": 2.484375, "learning_rate": 0.0001803919648155402, "loss": 2.0528, "step": 172305 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00018039086555277485, "loss": 2.2171, "step": 172310 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.0001803897662625465, "loss": 2.1894, "step": 172315 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00018038866694485557, "loss": 2.1399, "step": 172320 }, { "epoch": 0.41, "grad_norm": 1.8828125, "learning_rate": 0.00018038756759970243, "loss": 2.0337, "step": 172325 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.0001803864682270874, "loss": 2.2909, "step": 172330 }, { "epoch": 0.41, "grad_norm": 1.9921875, "learning_rate": 0.00018038536882701093, "loss": 2.0131, "step": 172335 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00018038426939947334, "loss": 2.0469, "step": 172340 }, { "epoch": 0.41, "grad_norm": 1.828125, "learning_rate": 0.00018038316994447505, "loss": 2.1879, "step": 172345 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.00018038207046201636, "loss": 2.1712, "step": 172350 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00018038097095209773, "loss": 2.1259, "step": 172355 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.0001803798714147195, "loss": 2.1842, "step": 172360 }, { "epoch": 0.41, "grad_norm": 2.0, "learning_rate": 0.00018037877184988205, "loss": 2.0989, "step": 172365 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018037767225758574, "loss": 2.1663, "step": 172370 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00018037657263783097, "loss": 1.9933, "step": 172375 }, { "epoch": 0.41, "grad_norm": 1.953125, "learning_rate": 0.00018037547299061814, "loss": 2.0119, "step": 172380 }, { "epoch": 0.41, "grad_norm": 2.578125, "learning_rate": 0.00018037437331594754, "loss": 2.2088, "step": 172385 }, { "epoch": 0.41, "grad_norm": 2.46875, "learning_rate": 0.00018037327361381962, "loss": 2.1221, "step": 172390 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00018037217388423474, "loss": 2.0816, "step": 172395 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00018037107412719326, "loss": 2.2191, "step": 172400 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00018036997434269555, "loss": 2.2083, "step": 172405 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00018036887453074202, "loss": 2.0907, "step": 172410 }, { "epoch": 0.41, "grad_norm": 2.53125, "learning_rate": 0.000180367774691333, "loss": 1.8894, "step": 172415 }, { "epoch": 0.41, "grad_norm": 1.875, "learning_rate": 0.0001803666748244689, "loss": 2.1109, "step": 172420 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00018036557493015008, "loss": 2.0774, "step": 172425 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00018036447500837692, "loss": 2.1902, "step": 172430 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018036337505914982, "loss": 2.1725, "step": 172435 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.0001803622750824691, "loss": 2.1398, "step": 172440 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018036117507833521, "loss": 2.2505, "step": 172445 }, { "epoch": 0.41, "grad_norm": 1.890625, "learning_rate": 0.00018036007504674845, "loss": 2.1671, "step": 172450 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00018035897498770926, "loss": 2.1366, "step": 172455 }, { "epoch": 0.41, "grad_norm": 1.90625, "learning_rate": 0.00018035787490121798, "loss": 2.1272, "step": 172460 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.00018035677478727498, "loss": 2.1778, "step": 172465 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00018035567464588065, "loss": 2.1228, "step": 172470 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.00018035457447703536, "loss": 2.0799, "step": 172475 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00018035347428073953, "loss": 2.1487, "step": 172480 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018035237405699347, "loss": 2.0937, "step": 172485 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00018035127380579757, "loss": 2.0158, "step": 172490 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018035017352715222, "loss": 2.1041, "step": 172495 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.0001803490732210578, "loss": 2.0545, "step": 172500 }, { "epoch": 0.41, "grad_norm": 4.625, "learning_rate": 0.0001803479728875147, "loss": 2.3357, "step": 172505 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00018034687252652324, "loss": 2.3388, "step": 172510 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00018034577213808383, "loss": 2.102, "step": 172515 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00018034467172219686, "loss": 2.0823, "step": 172520 }, { "epoch": 0.41, "grad_norm": 1.703125, "learning_rate": 0.0001803435712788627, "loss": 1.9322, "step": 172525 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.0001803424708080817, "loss": 2.1103, "step": 172530 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018034137030985427, "loss": 2.1672, "step": 172535 }, { "epoch": 0.41, "grad_norm": 1.96875, "learning_rate": 0.00018034026978418078, "loss": 1.9792, "step": 172540 }, { "epoch": 0.41, "grad_norm": 1.7890625, "learning_rate": 0.0001803391692310616, "loss": 1.927, "step": 172545 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00018033806865049706, "loss": 2.0888, "step": 172550 }, { "epoch": 0.41, "grad_norm": 1.9453125, "learning_rate": 0.0001803369680424876, "loss": 2.0465, "step": 172555 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.0001803358674070336, "loss": 2.1527, "step": 172560 }, { "epoch": 0.41, "grad_norm": 1.8671875, "learning_rate": 0.0001803347667441354, "loss": 2.1134, "step": 172565 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.0001803336660537934, "loss": 2.287, "step": 172570 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.00018033256533600791, "loss": 1.8945, "step": 172575 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.0001803314645907794, "loss": 2.1296, "step": 172580 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00018033036381810818, "loss": 2.105, "step": 172585 }, { "epoch": 0.41, "grad_norm": 2.0, "learning_rate": 0.0001803292630179947, "loss": 2.0832, "step": 172590 }, { "epoch": 0.41, "grad_norm": 1.96875, "learning_rate": 0.00018032816219043922, "loss": 2.3604, "step": 172595 }, { "epoch": 0.41, "grad_norm": 1.828125, "learning_rate": 0.00018032706133544224, "loss": 2.1796, "step": 172600 }, { "epoch": 0.41, "grad_norm": 2.671875, "learning_rate": 0.00018032596045300405, "loss": 2.1507, "step": 172605 }, { "epoch": 0.41, "grad_norm": 2.484375, "learning_rate": 0.00018032485954312506, "loss": 2.0915, "step": 172610 }, { "epoch": 0.41, "grad_norm": 1.921875, "learning_rate": 0.00018032375860580564, "loss": 2.1232, "step": 172615 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00018032265764104618, "loss": 2.3079, "step": 172620 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.00018032155664884702, "loss": 2.054, "step": 172625 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00018032045562920858, "loss": 2.0984, "step": 172630 }, { "epoch": 0.41, "grad_norm": 1.96875, "learning_rate": 0.0001803193545821312, "loss": 1.9965, "step": 172635 }, { "epoch": 0.41, "grad_norm": 1.8671875, "learning_rate": 0.0001803182535076153, "loss": 2.1266, "step": 172640 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00018031715240566119, "loss": 2.2105, "step": 172645 }, { "epoch": 0.41, "grad_norm": 1.890625, "learning_rate": 0.00018031605127626928, "loss": 2.1407, "step": 172650 }, { "epoch": 0.41, "grad_norm": 2.515625, "learning_rate": 0.00018031495011943996, "loss": 2.1557, "step": 172655 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00018031384893517363, "loss": 2.0188, "step": 172660 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00018031274772347061, "loss": 1.9549, "step": 172665 }, { "epoch": 0.41, "grad_norm": 2.625, "learning_rate": 0.0001803116464843313, "loss": 2.4116, "step": 172670 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.00018031054521775607, "loss": 2.1405, "step": 172675 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.00018030944392374534, "loss": 2.0964, "step": 172680 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00018030834260229938, "loss": 2.0333, "step": 172685 }, { "epoch": 0.41, "grad_norm": 2.421875, "learning_rate": 0.00018030724125341866, "loss": 2.195, "step": 172690 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00018030613987710353, "loss": 2.0279, "step": 172695 }, { "epoch": 0.41, "grad_norm": 2.6875, "learning_rate": 0.0001803050384733544, "loss": 1.9927, "step": 172700 }, { "epoch": 0.41, "grad_norm": 2.828125, "learning_rate": 0.0001803039370421716, "loss": 2.1343, "step": 172705 }, { "epoch": 0.41, "grad_norm": 2.46875, "learning_rate": 0.00018030283558355547, "loss": 2.097, "step": 172710 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.00018030173409750648, "loss": 2.2016, "step": 172715 }, { "epoch": 0.41, "grad_norm": 2.609375, "learning_rate": 0.00018030063258402498, "loss": 2.122, "step": 172720 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.0001802995310431113, "loss": 2.3261, "step": 172725 }, { "epoch": 0.41, "grad_norm": 2.390625, "learning_rate": 0.00018029842947476582, "loss": 2.1211, "step": 172730 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00018029732787898896, "loss": 2.1241, "step": 172735 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00018029622625578107, "loss": 2.003, "step": 172740 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00018029512460514258, "loss": 1.9887, "step": 172745 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00018029402292707376, "loss": 2.0844, "step": 172750 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00018029292122157507, "loss": 2.118, "step": 172755 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00018029181948864685, "loss": 2.1498, "step": 172760 }, { "epoch": 0.41, "grad_norm": 2.5625, "learning_rate": 0.0001802907177282895, "loss": 2.0159, "step": 172765 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.0001802896159405034, "loss": 2.0975, "step": 172770 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00018028851412528888, "loss": 2.1502, "step": 172775 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00018028741228264634, "loss": 2.2117, "step": 172780 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.0001802863104125762, "loss": 2.176, "step": 172785 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.0001802852085150788, "loss": 1.9889, "step": 172790 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.0001802841065901545, "loss": 1.9599, "step": 172795 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00018028300463780367, "loss": 2.3566, "step": 172800 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00018028190265802674, "loss": 2.2434, "step": 172805 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00018028080065082404, "loss": 1.9795, "step": 172810 }, { "epoch": 0.41, "grad_norm": 2.5625, "learning_rate": 0.00018027969861619597, "loss": 2.2676, "step": 172815 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00018027859655414291, "loss": 2.0824, "step": 172820 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.0001802774944646652, "loss": 2.2628, "step": 172825 }, { "epoch": 0.41, "grad_norm": 2.5625, "learning_rate": 0.00018027639234776327, "loss": 2.1912, "step": 172830 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00018027529020343748, "loss": 2.1568, "step": 172835 }, { "epoch": 0.41, "grad_norm": 2.578125, "learning_rate": 0.00018027418803168816, "loss": 1.9259, "step": 172840 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.0001802730858325157, "loss": 2.1511, "step": 172845 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.00018027198360592056, "loss": 1.9216, "step": 172850 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018027088135190303, "loss": 2.0337, "step": 172855 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.0001802697790704635, "loss": 2.0148, "step": 172860 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00018026867676160238, "loss": 2.0165, "step": 172865 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00018026757442532, "loss": 2.113, "step": 172870 }, { "epoch": 0.41, "grad_norm": 1.8515625, "learning_rate": 0.00018026647206161676, "loss": 2.3735, "step": 172875 }, { "epoch": 0.41, "grad_norm": 1.8671875, "learning_rate": 0.00018026536967049307, "loss": 2.1065, "step": 172880 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00018026426725194924, "loss": 2.1076, "step": 172885 }, { "epoch": 0.41, "grad_norm": 2.46875, "learning_rate": 0.00018026316480598568, "loss": 2.1682, "step": 172890 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.0001802620623326028, "loss": 2.2037, "step": 172895 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00018026095983180094, "loss": 2.0678, "step": 172900 }, { "epoch": 0.41, "grad_norm": 1.890625, "learning_rate": 0.00018025985730358044, "loss": 2.2002, "step": 172905 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00018025875474794175, "loss": 2.1091, "step": 172910 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00018025765216488518, "loss": 2.2855, "step": 172915 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.0001802565495544112, "loss": 2.0022, "step": 172920 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00018025544691652008, "loss": 2.3098, "step": 172925 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00018025434425121225, "loss": 1.9985, "step": 172930 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.0001802532415584881, "loss": 2.0157, "step": 172935 }, { "epoch": 0.41, "grad_norm": 1.9375, "learning_rate": 0.00018025213883834799, "loss": 2.0426, "step": 172940 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00018025103609079225, "loss": 2.2253, "step": 172945 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00018024993331582133, "loss": 2.1309, "step": 172950 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.0001802488305134356, "loss": 2.28, "step": 172955 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00018024772768363535, "loss": 2.2435, "step": 172960 }, { "epoch": 0.41, "grad_norm": 1.9296875, "learning_rate": 0.0001802466248264211, "loss": 1.9687, "step": 172965 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00018024552194179308, "loss": 2.0689, "step": 172970 }, { "epoch": 0.41, "grad_norm": 1.8203125, "learning_rate": 0.00018024441902975176, "loss": 1.9486, "step": 172975 }, { "epoch": 0.41, "grad_norm": 2.46875, "learning_rate": 0.00018024331609029753, "loss": 2.0146, "step": 172980 }, { "epoch": 0.41, "grad_norm": 1.84375, "learning_rate": 0.00018024221312343066, "loss": 2.156, "step": 172985 }, { "epoch": 0.41, "grad_norm": 1.8984375, "learning_rate": 0.00018024111012915163, "loss": 2.2789, "step": 172990 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00018024000710746077, "loss": 2.1848, "step": 172995 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.0001802389040583585, "loss": 1.9993, "step": 173000 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.00018023780098184512, "loss": 2.1683, "step": 173005 }, { "epoch": 0.41, "grad_norm": 1.921875, "learning_rate": 0.00018023669787792107, "loss": 2.1534, "step": 173010 }, { "epoch": 0.41, "grad_norm": 1.9609375, "learning_rate": 0.00018023559474658672, "loss": 2.115, "step": 173015 }, { "epoch": 0.41, "grad_norm": 1.84375, "learning_rate": 0.00018023449158784244, "loss": 2.0582, "step": 173020 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.0001802333884016886, "loss": 2.3727, "step": 173025 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00018023228518812556, "loss": 2.1586, "step": 173030 }, { "epoch": 0.41, "grad_norm": 1.7109375, "learning_rate": 0.00018023118194715373, "loss": 1.8672, "step": 173035 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00018023007867877349, "loss": 2.1804, "step": 173040 }, { "epoch": 0.41, "grad_norm": 1.90625, "learning_rate": 0.00018022897538298515, "loss": 2.0493, "step": 173045 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.0001802278720597892, "loss": 2.2455, "step": 173050 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00018022676870918592, "loss": 2.0952, "step": 173055 }, { "epoch": 0.41, "grad_norm": 1.9140625, "learning_rate": 0.0001802256653311757, "loss": 2.0635, "step": 173060 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00018022456192575895, "loss": 2.2455, "step": 173065 }, { "epoch": 0.41, "grad_norm": 2.828125, "learning_rate": 0.00018022345849293605, "loss": 2.1782, "step": 173070 }, { "epoch": 0.41, "grad_norm": 2.90625, "learning_rate": 0.00018022235503270734, "loss": 2.2421, "step": 173075 }, { "epoch": 0.41, "grad_norm": 1.9609375, "learning_rate": 0.00018022125154507326, "loss": 1.9787, "step": 173080 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00018022014803003412, "loss": 2.1262, "step": 173085 }, { "epoch": 0.41, "grad_norm": 1.890625, "learning_rate": 0.00018021904448759033, "loss": 2.1588, "step": 173090 }, { "epoch": 0.41, "grad_norm": 1.921875, "learning_rate": 0.00018021794091774225, "loss": 2.1395, "step": 173095 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00018021683732049026, "loss": 2.1946, "step": 173100 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.00018021573369583475, "loss": 2.2531, "step": 173105 }, { "epoch": 0.41, "grad_norm": 2.84375, "learning_rate": 0.0001802146300437761, "loss": 2.0791, "step": 173110 }, { "epoch": 0.41, "grad_norm": 2.546875, "learning_rate": 0.0001802135263643147, "loss": 2.1781, "step": 173115 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018021242265745083, "loss": 2.1227, "step": 173120 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00018021131892318498, "loss": 1.9765, "step": 173125 }, { "epoch": 0.41, "grad_norm": 2.515625, "learning_rate": 0.0001802102151615175, "loss": 2.0751, "step": 173130 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00018020911137244875, "loss": 2.135, "step": 173135 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.0001802080075559791, "loss": 2.1959, "step": 173140 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00018020690371210897, "loss": 2.1658, "step": 173145 }, { "epoch": 0.41, "grad_norm": 2.390625, "learning_rate": 0.0001802057998408387, "loss": 2.1478, "step": 173150 }, { "epoch": 0.41, "grad_norm": 2.390625, "learning_rate": 0.00018020469594216866, "loss": 2.0527, "step": 173155 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00018020359201609924, "loss": 2.282, "step": 173160 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00018020248806263083, "loss": 2.2694, "step": 173165 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.0001802013840817638, "loss": 2.1325, "step": 173170 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.0001802002800734985, "loss": 2.0847, "step": 173175 }, { "epoch": 0.41, "grad_norm": 2.609375, "learning_rate": 0.00018019917603783535, "loss": 2.1546, "step": 173180 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00018019807197477473, "loss": 2.0862, "step": 173185 }, { "epoch": 0.41, "grad_norm": 1.96875, "learning_rate": 0.00018019696788431694, "loss": 2.1183, "step": 173190 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00018019586376646243, "loss": 2.1133, "step": 173195 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.00018019475962121158, "loss": 2.1258, "step": 173200 }, { "epoch": 0.41, "grad_norm": 1.875, "learning_rate": 0.00018019365544856471, "loss": 2.1147, "step": 173205 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00018019255124852225, "loss": 2.2447, "step": 173210 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00018019144702108457, "loss": 2.1675, "step": 173215 }, { "epoch": 0.41, "grad_norm": 2.96875, "learning_rate": 0.00018019034276625204, "loss": 2.1142, "step": 173220 }, { "epoch": 0.41, "grad_norm": 1.984375, "learning_rate": 0.00018018923848402501, "loss": 2.1101, "step": 173225 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.0001801881341744039, "loss": 2.1064, "step": 173230 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00018018702983738907, "loss": 2.2183, "step": 173235 }, { "epoch": 0.41, "grad_norm": 2.515625, "learning_rate": 0.0001801859254729809, "loss": 2.1304, "step": 173240 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00018018482108117975, "loss": 2.0947, "step": 173245 }, { "epoch": 0.41, "grad_norm": 2.84375, "learning_rate": 0.00018018371666198601, "loss": 2.1788, "step": 173250 }, { "epoch": 0.41, "grad_norm": 2.515625, "learning_rate": 0.00018018261221540007, "loss": 2.1394, "step": 173255 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.0001801815077414223, "loss": 2.067, "step": 173260 }, { "epoch": 0.41, "grad_norm": 1.8125, "learning_rate": 0.00018018040324005306, "loss": 2.0756, "step": 173265 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00018017929871129275, "loss": 2.2184, "step": 173270 }, { "epoch": 0.41, "grad_norm": 3.03125, "learning_rate": 0.0001801781941551417, "loss": 2.1402, "step": 173275 }, { "epoch": 0.41, "grad_norm": 2.703125, "learning_rate": 0.00018017708957160036, "loss": 2.2158, "step": 173280 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00018017598496066907, "loss": 2.0114, "step": 173285 }, { "epoch": 0.41, "grad_norm": 2.609375, "learning_rate": 0.0001801748803223482, "loss": 2.106, "step": 173290 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00018017377565663817, "loss": 2.0565, "step": 173295 }, { "epoch": 0.41, "grad_norm": 1.828125, "learning_rate": 0.00018017267096353927, "loss": 2.1613, "step": 173300 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00018017156624305198, "loss": 2.0004, "step": 173305 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.0001801704614951766, "loss": 2.1643, "step": 173310 }, { "epoch": 0.41, "grad_norm": 2.75, "learning_rate": 0.00018016935671991355, "loss": 2.1208, "step": 173315 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.00018016825191726317, "loss": 2.1942, "step": 173320 }, { "epoch": 0.41, "grad_norm": 1.875, "learning_rate": 0.00018016714708722587, "loss": 1.993, "step": 173325 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00018016604222980206, "loss": 2.3976, "step": 173330 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018016493734499203, "loss": 1.9973, "step": 173335 }, { "epoch": 0.41, "grad_norm": 2.546875, "learning_rate": 0.00018016383243279625, "loss": 2.0965, "step": 173340 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.000180162727493215, "loss": 2.1247, "step": 173345 }, { "epoch": 0.41, "grad_norm": 1.8984375, "learning_rate": 0.00018016162252624873, "loss": 2.0432, "step": 173350 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.0001801605175318978, "loss": 2.346, "step": 173355 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.00018015941251016258, "loss": 2.0293, "step": 173360 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00018015830746104342, "loss": 2.1361, "step": 173365 }, { "epoch": 0.41, "grad_norm": 5.0, "learning_rate": 0.00018015720238454076, "loss": 2.1729, "step": 173370 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.00018015609728065494, "loss": 2.234, "step": 173375 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00018015499214938635, "loss": 2.1286, "step": 173380 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00018015388699073537, "loss": 2.205, "step": 173385 }, { "epoch": 0.41, "grad_norm": 2.4375, "learning_rate": 0.00018015278180470235, "loss": 2.0762, "step": 173390 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.0001801516765912877, "loss": 2.0967, "step": 173395 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00018015057135049175, "loss": 2.0955, "step": 173400 }, { "epoch": 0.41, "grad_norm": 1.8984375, "learning_rate": 0.00018014946608231497, "loss": 2.1852, "step": 173405 }, { "epoch": 0.41, "grad_norm": 2.609375, "learning_rate": 0.00018014836078675763, "loss": 2.2502, "step": 173410 }, { "epoch": 0.41, "grad_norm": 1.796875, "learning_rate": 0.00018014725546382015, "loss": 2.1589, "step": 173415 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.00018014615011350296, "loss": 2.1325, "step": 173420 }, { "epoch": 0.41, "grad_norm": 1.8671875, "learning_rate": 0.00018014504473580636, "loss": 2.2524, "step": 173425 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00018014393933073075, "loss": 1.9522, "step": 173430 }, { "epoch": 0.41, "grad_norm": 2.484375, "learning_rate": 0.00018014283389827655, "loss": 2.0074, "step": 173435 }, { "epoch": 0.41, "grad_norm": 2.0, "learning_rate": 0.00018014172843844407, "loss": 2.2667, "step": 173440 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00018014062295123375, "loss": 2.0347, "step": 173445 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00018013951743664594, "loss": 2.2026, "step": 173450 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.000180138411894681, "loss": 2.1356, "step": 173455 }, { "epoch": 0.41, "grad_norm": 1.9609375, "learning_rate": 0.0001801373063253393, "loss": 2.0914, "step": 173460 }, { "epoch": 0.41, "grad_norm": 2.703125, "learning_rate": 0.00018013620072862132, "loss": 2.1484, "step": 173465 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.0001801350951045273, "loss": 2.1026, "step": 173470 }, { "epoch": 0.41, "grad_norm": 1.984375, "learning_rate": 0.00018013398945305767, "loss": 2.2104, "step": 173475 }, { "epoch": 0.41, "grad_norm": 2.484375, "learning_rate": 0.00018013288377421286, "loss": 2.232, "step": 173480 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.0001801317780679932, "loss": 2.0911, "step": 173485 }, { "epoch": 0.41, "grad_norm": 2.0, "learning_rate": 0.00018013067233439904, "loss": 2.0685, "step": 173490 }, { "epoch": 0.41, "grad_norm": 2.671875, "learning_rate": 0.0001801295665734308, "loss": 1.9393, "step": 173495 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00018012846078508887, "loss": 2.2315, "step": 173500 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00018012735496937356, "loss": 2.0233, "step": 173505 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00018012624912628532, "loss": 2.144, "step": 173510 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.0001801251432558245, "loss": 2.1576, "step": 173515 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.0001801240373579915, "loss": 2.3786, "step": 173520 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00018012293143278662, "loss": 2.0302, "step": 173525 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018012182548021033, "loss": 2.1055, "step": 173530 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00018012071950026297, "loss": 2.0987, "step": 173535 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.0001801196134929449, "loss": 2.1849, "step": 173540 }, { "epoch": 0.41, "grad_norm": 1.7890625, "learning_rate": 0.00018011850745825654, "loss": 2.1345, "step": 173545 }, { "epoch": 0.41, "grad_norm": 1.7421875, "learning_rate": 0.00018011740139619825, "loss": 2.0658, "step": 173550 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00018011629530677038, "loss": 2.0668, "step": 173555 }, { "epoch": 0.41, "grad_norm": 1.90625, "learning_rate": 0.00018011518918997335, "loss": 2.1814, "step": 173560 }, { "epoch": 0.41, "grad_norm": 2.59375, "learning_rate": 0.0001801140830458075, "loss": 2.2187, "step": 173565 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00018011297687427322, "loss": 2.0352, "step": 173570 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.0001801118706753709, "loss": 2.1551, "step": 173575 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00018011076444910095, "loss": 2.1472, "step": 173580 }, { "epoch": 0.41, "grad_norm": 2.53125, "learning_rate": 0.00018010965819546367, "loss": 1.994, "step": 173585 }, { "epoch": 0.41, "grad_norm": 2.609375, "learning_rate": 0.0001801085519144595, "loss": 2.2095, "step": 173590 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00018010744560608877, "loss": 2.016, "step": 173595 }, { "epoch": 0.41, "grad_norm": 1.96875, "learning_rate": 0.0001801063392703519, "loss": 2.2516, "step": 173600 }, { "epoch": 0.41, "grad_norm": 1.875, "learning_rate": 0.00018010523290724924, "loss": 2.1088, "step": 173605 }, { "epoch": 0.41, "grad_norm": 2.46875, "learning_rate": 0.0001801041265167812, "loss": 2.1279, "step": 173610 }, { "epoch": 0.41, "grad_norm": 1.7890625, "learning_rate": 0.00018010302009894815, "loss": 2.0478, "step": 173615 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018010191365375042, "loss": 2.0642, "step": 173620 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.00018010080718118844, "loss": 2.1548, "step": 173625 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00018009970068126257, "loss": 2.0962, "step": 173630 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.00018009859415397318, "loss": 2.2117, "step": 173635 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.00018009748759932067, "loss": 2.188, "step": 173640 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00018009638101730539, "loss": 1.9878, "step": 173645 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00018009527440792774, "loss": 1.9963, "step": 173650 }, { "epoch": 0.41, "grad_norm": 2.5625, "learning_rate": 0.0001800941677711881, "loss": 2.3043, "step": 173655 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00018009306110708682, "loss": 2.065, "step": 173660 }, { "epoch": 0.41, "grad_norm": 2.734375, "learning_rate": 0.0001800919544156243, "loss": 2.0108, "step": 173665 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00018009084769680098, "loss": 2.1942, "step": 173670 }, { "epoch": 0.41, "grad_norm": 1.9453125, "learning_rate": 0.0001800897409506171, "loss": 1.8723, "step": 173675 }, { "epoch": 0.41, "grad_norm": 2.578125, "learning_rate": 0.00018008863417707315, "loss": 2.136, "step": 173680 }, { "epoch": 0.41, "grad_norm": 1.859375, "learning_rate": 0.00018008752737616943, "loss": 2.0233, "step": 173685 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00018008642054790638, "loss": 2.1467, "step": 173690 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.00018008531369228438, "loss": 2.1894, "step": 173695 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00018008420680930376, "loss": 2.0003, "step": 173700 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00018008309989896493, "loss": 2.2604, "step": 173705 }, { "epoch": 0.41, "grad_norm": 2.421875, "learning_rate": 0.00018008199296126826, "loss": 2.358, "step": 173710 }, { "epoch": 0.41, "grad_norm": 2.0, "learning_rate": 0.0001800808859962141, "loss": 2.2134, "step": 173715 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.0001800797790038029, "loss": 2.1154, "step": 173720 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00018007867198403498, "loss": 2.1663, "step": 173725 }, { "epoch": 0.41, "grad_norm": 2.625, "learning_rate": 0.00018007756493691073, "loss": 2.1539, "step": 173730 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.00018007645786243055, "loss": 2.1186, "step": 173735 }, { "epoch": 0.41, "grad_norm": 1.875, "learning_rate": 0.00018007535076059475, "loss": 2.2085, "step": 173740 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.0001800742436314038, "loss": 2.0215, "step": 173745 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00018007313647485805, "loss": 2.3171, "step": 173750 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00018007202929095782, "loss": 2.1209, "step": 173755 }, { "epoch": 0.41, "grad_norm": 1.671875, "learning_rate": 0.00018007092207970358, "loss": 2.0761, "step": 173760 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00018006981484109563, "loss": 2.1288, "step": 173765 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.0001800687075751344, "loss": 2.0312, "step": 173770 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.0001800676002818202, "loss": 2.0245, "step": 173775 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.00018006649296115347, "loss": 2.2501, "step": 173780 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.0001800653856131346, "loss": 2.1884, "step": 173785 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00018006427823776395, "loss": 2.145, "step": 173790 }, { "epoch": 0.41, "grad_norm": 2.484375, "learning_rate": 0.00018006317083504186, "loss": 2.1612, "step": 173795 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.00018006206340496876, "loss": 2.1542, "step": 173800 }, { "epoch": 0.41, "grad_norm": 1.90625, "learning_rate": 0.000180060955947545, "loss": 2.2388, "step": 173805 }, { "epoch": 0.41, "grad_norm": 1.921875, "learning_rate": 0.00018005984846277095, "loss": 2.2624, "step": 173810 }, { "epoch": 0.41, "grad_norm": 2.75, "learning_rate": 0.00018005874095064703, "loss": 2.0852, "step": 173815 }, { "epoch": 0.41, "grad_norm": 2.578125, "learning_rate": 0.00018005763341117357, "loss": 2.1062, "step": 173820 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.000180056525844351, "loss": 2.0966, "step": 173825 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.00018005541825017964, "loss": 2.378, "step": 173830 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.0001800543106286599, "loss": 2.1803, "step": 173835 }, { "epoch": 0.41, "grad_norm": 1.9453125, "learning_rate": 0.0001800532029797922, "loss": 2.2335, "step": 173840 }, { "epoch": 0.41, "grad_norm": 1.9296875, "learning_rate": 0.00018005209530357681, "loss": 2.3084, "step": 173845 }, { "epoch": 0.41, "grad_norm": 1.8828125, "learning_rate": 0.0001800509876000142, "loss": 1.9248, "step": 173850 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.00018004987986910474, "loss": 2.0368, "step": 173855 }, { "epoch": 0.41, "grad_norm": 3.328125, "learning_rate": 0.00018004877211084876, "loss": 2.1824, "step": 173860 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00018004766432524666, "loss": 2.0751, "step": 173865 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00018004655651229884, "loss": 2.1101, "step": 173870 }, { "epoch": 0.41, "grad_norm": 1.8359375, "learning_rate": 0.00018004544867200568, "loss": 2.044, "step": 173875 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00018004434080436753, "loss": 2.1657, "step": 173880 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.0001800432329093848, "loss": 1.9753, "step": 173885 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.0001800421249870578, "loss": 2.2441, "step": 173890 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.000180041017037387, "loss": 2.2048, "step": 173895 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00018003990906037274, "loss": 1.8623, "step": 173900 }, { "epoch": 0.41, "grad_norm": 1.8671875, "learning_rate": 0.00018003880105601537, "loss": 2.282, "step": 173905 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00018003769302431531, "loss": 2.1102, "step": 173910 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00018003658496527293, "loss": 1.953, "step": 173915 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.0001800354768788886, "loss": 2.1043, "step": 173920 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00018003436876516267, "loss": 2.235, "step": 173925 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.0001800332606240956, "loss": 2.2518, "step": 173930 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00018003215245568764, "loss": 2.1211, "step": 173935 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00018003104425993934, "loss": 2.1043, "step": 173940 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00018002993603685093, "loss": 2.0082, "step": 173945 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00018002882778642281, "loss": 2.1447, "step": 173950 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00018002771950865546, "loss": 2.0714, "step": 173955 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00018002661120354913, "loss": 2.104, "step": 173960 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.0001800255028711043, "loss": 2.1908, "step": 173965 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.00018002439451132127, "loss": 2.1869, "step": 173970 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.0001800232861242005, "loss": 1.9837, "step": 173975 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.0001800221777097423, "loss": 1.9551, "step": 173980 }, { "epoch": 0.41, "grad_norm": 2.546875, "learning_rate": 0.00018002106926794705, "loss": 2.2911, "step": 173985 }, { "epoch": 0.41, "grad_norm": 2.8125, "learning_rate": 0.0001800199607988152, "loss": 2.245, "step": 173990 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00018001885230234708, "loss": 1.754, "step": 173995 }, { "epoch": 0.41, "grad_norm": 2.390625, "learning_rate": 0.00018001774377854302, "loss": 2.2459, "step": 174000 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.0001800166352274035, "loss": 2.0513, "step": 174005 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.0001800155266489288, "loss": 2.2093, "step": 174010 }, { "epoch": 0.41, "grad_norm": 1.96875, "learning_rate": 0.00018001441804311937, "loss": 2.1628, "step": 174015 }, { "epoch": 0.41, "grad_norm": 1.796875, "learning_rate": 0.00018001330940997553, "loss": 2.0398, "step": 174020 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00018001220074949774, "loss": 2.2003, "step": 174025 }, { "epoch": 0.41, "grad_norm": 2.859375, "learning_rate": 0.0001800110920616863, "loss": 2.328, "step": 174030 }, { "epoch": 0.41, "grad_norm": 2.578125, "learning_rate": 0.00018000998334654164, "loss": 2.0672, "step": 174035 }, { "epoch": 0.41, "grad_norm": 1.90625, "learning_rate": 0.00018000887460406411, "loss": 2.066, "step": 174040 }, { "epoch": 0.41, "grad_norm": 1.9765625, "learning_rate": 0.0001800077658342541, "loss": 2.1497, "step": 174045 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.000180006657037112, "loss": 2.0669, "step": 174050 }, { "epoch": 0.41, "grad_norm": 2.5625, "learning_rate": 0.00018000554821263816, "loss": 2.0354, "step": 174055 }, { "epoch": 0.41, "grad_norm": 1.90625, "learning_rate": 0.00018000443936083295, "loss": 1.8515, "step": 174060 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.0001800033304816968, "loss": 2.1984, "step": 174065 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00018000222157523005, "loss": 1.9278, "step": 174070 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.0001800011126414331, "loss": 2.3504, "step": 174075 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00018000000368030632, "loss": 2.0839, "step": 174080 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.0001799988946918501, "loss": 2.2647, "step": 174085 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.00017999778567606475, "loss": 2.1824, "step": 174090 }, { "epoch": 0.41, "grad_norm": 2.6875, "learning_rate": 0.00017999667663295076, "loss": 2.1324, "step": 174095 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.00017999556756250845, "loss": 2.2642, "step": 174100 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.0001799944584647382, "loss": 2.152, "step": 174105 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.0001799933493396404, "loss": 1.9544, "step": 174110 }, { "epoch": 0.41, "grad_norm": 2.640625, "learning_rate": 0.00017999224018721538, "loss": 2.2705, "step": 174115 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.0001799911310074636, "loss": 2.3072, "step": 174120 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.00017999002180038538, "loss": 2.0926, "step": 174125 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00017998891256598113, "loss": 2.1227, "step": 174130 }, { "epoch": 0.41, "grad_norm": 2.609375, "learning_rate": 0.0001799878033042512, "loss": 2.1614, "step": 174135 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.000179986694015196, "loss": 2.1664, "step": 174140 }, { "epoch": 0.41, "grad_norm": 2.59375, "learning_rate": 0.00017998558469881589, "loss": 2.1813, "step": 174145 }, { "epoch": 0.41, "grad_norm": 2.6875, "learning_rate": 0.00017998447535511126, "loss": 2.0548, "step": 174150 }, { "epoch": 0.41, "grad_norm": 2.0, "learning_rate": 0.00017998336598408246, "loss": 2.1708, "step": 174155 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.0001799822565857299, "loss": 1.9533, "step": 174160 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00017998114716005396, "loss": 2.2271, "step": 174165 }, { "epoch": 0.41, "grad_norm": 2.484375, "learning_rate": 0.00017998003770705503, "loss": 2.1356, "step": 174170 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00017997892822673345, "loss": 2.0149, "step": 174175 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.0001799778187190896, "loss": 2.1972, "step": 174180 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00017997670918412387, "loss": 2.2109, "step": 174185 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.0001799755996218367, "loss": 2.1855, "step": 174190 }, { "epoch": 0.41, "grad_norm": 1.875, "learning_rate": 0.00017997449003222837, "loss": 2.125, "step": 174195 }, { "epoch": 0.41, "grad_norm": 2.65625, "learning_rate": 0.00017997338041529935, "loss": 2.0446, "step": 174200 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00017997227077104993, "loss": 2.0365, "step": 174205 }, { "epoch": 0.41, "grad_norm": 1.75, "learning_rate": 0.00017997116109948053, "loss": 2.1732, "step": 174210 }, { "epoch": 0.41, "grad_norm": 1.75, "learning_rate": 0.00017997005140059152, "loss": 1.9903, "step": 174215 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00017996894167438332, "loss": 2.183, "step": 174220 }, { "epoch": 0.41, "grad_norm": 1.9453125, "learning_rate": 0.0001799678319208563, "loss": 2.024, "step": 174225 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00017996672214001076, "loss": 2.3221, "step": 174230 }, { "epoch": 0.41, "grad_norm": 2.484375, "learning_rate": 0.00017996561233184718, "loss": 2.1054, "step": 174235 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.00017996450249636587, "loss": 2.1475, "step": 174240 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00017996339263356726, "loss": 2.1529, "step": 174245 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.00017996228274345168, "loss": 2.099, "step": 174250 }, { "epoch": 0.41, "grad_norm": 2.390625, "learning_rate": 0.00017996117282601955, "loss": 2.1514, "step": 174255 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00017996006288127124, "loss": 2.2025, "step": 174260 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00017995895290920714, "loss": 2.1599, "step": 174265 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00017995784290982756, "loss": 1.8895, "step": 174270 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00017995673288313295, "loss": 2.206, "step": 174275 }, { "epoch": 0.41, "grad_norm": 2.4375, "learning_rate": 0.00017995562282912368, "loss": 2.1888, "step": 174280 }, { "epoch": 0.41, "grad_norm": 1.796875, "learning_rate": 0.00017995451274780013, "loss": 2.147, "step": 174285 }, { "epoch": 0.41, "grad_norm": 2.53125, "learning_rate": 0.00017995340263916263, "loss": 2.067, "step": 174290 }, { "epoch": 0.41, "grad_norm": 2.0, "learning_rate": 0.00017995229250321163, "loss": 2.1698, "step": 174295 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.00017995118233994745, "loss": 2.1007, "step": 174300 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00017995007214937055, "loss": 2.1437, "step": 174305 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.0001799489619314812, "loss": 2.0789, "step": 174310 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00017994785168627987, "loss": 2.2379, "step": 174315 }, { "epoch": 0.41, "grad_norm": 1.890625, "learning_rate": 0.00017994674141376686, "loss": 2.1199, "step": 174320 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00017994563111394264, "loss": 1.9876, "step": 174325 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00017994452078680753, "loss": 2.2728, "step": 174330 }, { "epoch": 0.41, "grad_norm": 2.953125, "learning_rate": 0.00017994341043236188, "loss": 2.2586, "step": 174335 }, { "epoch": 0.41, "grad_norm": 2.390625, "learning_rate": 0.00017994230005060618, "loss": 2.324, "step": 174340 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.0001799411896415407, "loss": 2.1093, "step": 174345 }, { "epoch": 0.41, "grad_norm": 1.8984375, "learning_rate": 0.00017994007920516588, "loss": 2.245, "step": 174350 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00017993896874148204, "loss": 2.2715, "step": 174355 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00017993785825048963, "loss": 2.1208, "step": 174360 }, { "epoch": 0.41, "grad_norm": 1.9765625, "learning_rate": 0.00017993674773218897, "loss": 2.1203, "step": 174365 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00017993563718658053, "loss": 2.0193, "step": 174370 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00017993452661366457, "loss": 2.0581, "step": 174375 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017993341601344156, "loss": 2.1392, "step": 174380 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00017993230538591181, "loss": 2.2269, "step": 174385 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00017993119473107576, "loss": 2.0224, "step": 174390 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.00017993008404893374, "loss": 2.1088, "step": 174395 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.00017992897333948617, "loss": 2.1609, "step": 174400 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.0001799278626027334, "loss": 2.0631, "step": 174405 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00017992675183867586, "loss": 2.0696, "step": 174410 }, { "epoch": 0.41, "grad_norm": 1.9453125, "learning_rate": 0.00017992564104731386, "loss": 2.1207, "step": 174415 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00017992453022864783, "loss": 2.0374, "step": 174420 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00017992341938267813, "loss": 2.0838, "step": 174425 }, { "epoch": 0.41, "grad_norm": 1.5234375, "learning_rate": 0.00017992230850940512, "loss": 2.1109, "step": 174430 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00017992119760882922, "loss": 2.2512, "step": 174435 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.00017992008668095077, "loss": 2.1271, "step": 174440 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017991897572577017, "loss": 2.3757, "step": 174445 }, { "epoch": 0.41, "grad_norm": 3.015625, "learning_rate": 0.00017991786474328783, "loss": 2.1707, "step": 174450 }, { "epoch": 0.41, "grad_norm": 4.1875, "learning_rate": 0.00017991675373350407, "loss": 2.1273, "step": 174455 }, { "epoch": 0.41, "grad_norm": 2.4375, "learning_rate": 0.0001799156426964193, "loss": 2.0883, "step": 174460 }, { "epoch": 0.41, "grad_norm": 1.96875, "learning_rate": 0.0001799145316320339, "loss": 1.9683, "step": 174465 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00017991342054034823, "loss": 2.2445, "step": 174470 }, { "epoch": 0.41, "grad_norm": 1.953125, "learning_rate": 0.00017991230942136273, "loss": 2.2149, "step": 174475 }, { "epoch": 0.41, "grad_norm": 2.734375, "learning_rate": 0.0001799111982750777, "loss": 2.1588, "step": 174480 }, { "epoch": 0.41, "grad_norm": 2.75, "learning_rate": 0.00017991008710149358, "loss": 2.1297, "step": 174485 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00017990897590061067, "loss": 2.082, "step": 174490 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00017990786467242948, "loss": 2.0972, "step": 174495 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00017990675341695027, "loss": 2.3016, "step": 174500 }, { "epoch": 0.41, "grad_norm": 1.953125, "learning_rate": 0.00017990564213417348, "loss": 2.056, "step": 174505 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.00017990453082409946, "loss": 2.2573, "step": 174510 }, { "epoch": 0.41, "grad_norm": 1.8828125, "learning_rate": 0.00017990341948672859, "loss": 2.1408, "step": 174515 }, { "epoch": 0.41, "grad_norm": 2.515625, "learning_rate": 0.0001799023081220613, "loss": 2.141, "step": 174520 }, { "epoch": 0.41, "grad_norm": 1.8359375, "learning_rate": 0.00017990119673009792, "loss": 2.1829, "step": 174525 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00017990008531083883, "loss": 2.1109, "step": 174530 }, { "epoch": 0.41, "grad_norm": 2.8125, "learning_rate": 0.0001798989738642844, "loss": 2.0226, "step": 174535 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.0001798978623904351, "loss": 2.0271, "step": 174540 }, { "epoch": 0.41, "grad_norm": 1.984375, "learning_rate": 0.00017989675088929118, "loss": 2.0134, "step": 174545 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.00017989563936085312, "loss": 2.0971, "step": 174550 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017989452780512122, "loss": 2.1278, "step": 174555 }, { "epoch": 0.41, "grad_norm": 1.9765625, "learning_rate": 0.00017989341622209594, "loss": 1.9632, "step": 174560 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.0001798923046117776, "loss": 2.1172, "step": 174565 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.0001798911929741666, "loss": 2.0572, "step": 174570 }, { "epoch": 0.41, "grad_norm": 1.8671875, "learning_rate": 0.00017989008130926332, "loss": 2.1163, "step": 174575 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00017988896961706815, "loss": 1.8799, "step": 174580 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017988785789758145, "loss": 2.0392, "step": 174585 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.0001798867461508036, "loss": 2.0595, "step": 174590 }, { "epoch": 0.41, "grad_norm": 2.484375, "learning_rate": 0.000179885634376735, "loss": 2.2419, "step": 174595 }, { "epoch": 0.41, "grad_norm": 2.0, "learning_rate": 0.000179884522575376, "loss": 2.2061, "step": 174600 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00017988341074672703, "loss": 2.0713, "step": 174605 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00017988229889078842, "loss": 2.1793, "step": 174610 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017988118700756057, "loss": 2.1555, "step": 174615 }, { "epoch": 0.41, "grad_norm": 2.75, "learning_rate": 0.00017988007509704386, "loss": 2.1202, "step": 174620 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00017987896315923866, "loss": 1.9898, "step": 174625 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.0001798778511941454, "loss": 2.0326, "step": 174630 }, { "epoch": 0.41, "grad_norm": 2.5, "learning_rate": 0.00017987673920176434, "loss": 2.1807, "step": 174635 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00017987562718209602, "loss": 2.0828, "step": 174640 }, { "epoch": 0.41, "grad_norm": 2.609375, "learning_rate": 0.00017987451513514069, "loss": 2.1005, "step": 174645 }, { "epoch": 0.41, "grad_norm": 2.515625, "learning_rate": 0.00017987340306089875, "loss": 2.19, "step": 174650 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00017987229095937067, "loss": 2.1376, "step": 174655 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.0001798711788305567, "loss": 2.0787, "step": 174660 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.00017987006667445735, "loss": 2.1569, "step": 174665 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.0001798689544910729, "loss": 2.0798, "step": 174670 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.0001798678422804038, "loss": 2.2567, "step": 174675 }, { "epoch": 0.41, "grad_norm": 2.734375, "learning_rate": 0.00017986673004245036, "loss": 2.0333, "step": 174680 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.000179865617777213, "loss": 2.1661, "step": 174685 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00017986450548469212, "loss": 2.0884, "step": 174690 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017986339316488804, "loss": 2.0883, "step": 174695 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.0001798622808178012, "loss": 2.233, "step": 174700 }, { "epoch": 0.41, "grad_norm": 1.75, "learning_rate": 0.00017986116844343196, "loss": 2.155, "step": 174705 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.0001798600560417807, "loss": 2.0849, "step": 174710 }, { "epoch": 0.41, "grad_norm": 2.4375, "learning_rate": 0.0001798589436128478, "loss": 2.0399, "step": 174715 }, { "epoch": 0.41, "grad_norm": 1.9609375, "learning_rate": 0.00017985783115663363, "loss": 2.0765, "step": 174720 }, { "epoch": 0.41, "grad_norm": 2.390625, "learning_rate": 0.00017985671867313854, "loss": 2.1323, "step": 174725 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00017985560616236298, "loss": 2.1612, "step": 174730 }, { "epoch": 0.41, "grad_norm": 2.515625, "learning_rate": 0.0001798544936243073, "loss": 2.0018, "step": 174735 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.0001798533810589719, "loss": 2.1281, "step": 174740 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.0001798522684663571, "loss": 2.0662, "step": 174745 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00017985115584646333, "loss": 2.0411, "step": 174750 }, { "epoch": 0.41, "grad_norm": 1.8125, "learning_rate": 0.00017985004319929094, "loss": 2.0556, "step": 174755 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00017984893052484034, "loss": 2.0243, "step": 174760 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.0001798478178231119, "loss": 2.3061, "step": 174765 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.00017984670509410598, "loss": 2.0022, "step": 174770 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.000179845592337823, "loss": 2.09, "step": 174775 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017984447955426332, "loss": 1.9532, "step": 174780 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.0001798433667434273, "loss": 2.1169, "step": 174785 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00017984225390531536, "loss": 2.0517, "step": 174790 }, { "epoch": 0.41, "grad_norm": 1.8203125, "learning_rate": 0.00017984114103992784, "loss": 1.9755, "step": 174795 }, { "epoch": 0.41, "grad_norm": 1.7421875, "learning_rate": 0.00017984002814726513, "loss": 1.9816, "step": 174800 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.00017983891522732764, "loss": 2.0834, "step": 174805 }, { "epoch": 0.41, "grad_norm": 2.734375, "learning_rate": 0.0001798378022801157, "loss": 2.0518, "step": 174810 }, { "epoch": 0.41, "grad_norm": 1.953125, "learning_rate": 0.00017983668930562975, "loss": 2.2045, "step": 174815 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00017983557630387012, "loss": 2.0641, "step": 174820 }, { "epoch": 0.41, "grad_norm": 2.515625, "learning_rate": 0.00017983446327483722, "loss": 2.1635, "step": 174825 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00017983335021853143, "loss": 2.1177, "step": 174830 }, { "epoch": 0.41, "grad_norm": 2.421875, "learning_rate": 0.0001798322371349531, "loss": 2.1911, "step": 174835 }, { "epoch": 0.41, "grad_norm": 2.46875, "learning_rate": 0.00017983112402410263, "loss": 2.2647, "step": 174840 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.0001798300108859804, "loss": 2.169, "step": 174845 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00017982889772058676, "loss": 2.1846, "step": 174850 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.0001798277845279222, "loss": 2.3046, "step": 174855 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.000179826671307987, "loss": 2.0015, "step": 174860 }, { "epoch": 0.41, "grad_norm": 1.875, "learning_rate": 0.0001798255580607815, "loss": 2.2106, "step": 174865 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00017982444478630618, "loss": 1.9981, "step": 174870 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.0001798233314845614, "loss": 2.0893, "step": 174875 }, { "epoch": 0.41, "grad_norm": 2.78125, "learning_rate": 0.00017982221815554747, "loss": 2.0499, "step": 174880 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00017982110479926487, "loss": 2.2397, "step": 174885 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.00017981999141571394, "loss": 2.1615, "step": 174890 }, { "epoch": 0.41, "grad_norm": 2.625, "learning_rate": 0.00017981887800489502, "loss": 2.058, "step": 174895 }, { "epoch": 0.41, "grad_norm": 2.5625, "learning_rate": 0.00017981776456680852, "loss": 2.0737, "step": 174900 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00017981665110145486, "loss": 2.1208, "step": 174905 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017981553760883437, "loss": 1.9817, "step": 174910 }, { "epoch": 0.41, "grad_norm": 1.875, "learning_rate": 0.00017981442408894745, "loss": 2.0165, "step": 174915 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00017981331054179445, "loss": 2.1707, "step": 174920 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.0001798121969673758, "loss": 2.1101, "step": 174925 }, { "epoch": 0.41, "grad_norm": 2.484375, "learning_rate": 0.00017981108336569184, "loss": 2.2792, "step": 174930 }, { "epoch": 0.41, "grad_norm": 1.90625, "learning_rate": 0.00017980996973674299, "loss": 2.2922, "step": 174935 }, { "epoch": 0.41, "grad_norm": 1.8046875, "learning_rate": 0.00017980885608052957, "loss": 2.164, "step": 174940 }, { "epoch": 0.41, "grad_norm": 2.625, "learning_rate": 0.00017980774239705202, "loss": 2.1216, "step": 174945 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.0001798066286863107, "loss": 2.246, "step": 174950 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00017980551494830598, "loss": 2.0325, "step": 174955 }, { "epoch": 0.41, "grad_norm": 1.828125, "learning_rate": 0.00017980440118303825, "loss": 2.1146, "step": 174960 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.0001798032873905079, "loss": 2.2109, "step": 174965 }, { "epoch": 0.41, "grad_norm": 2.5625, "learning_rate": 0.0001798021735707153, "loss": 2.0446, "step": 174970 }, { "epoch": 0.41, "grad_norm": 1.875, "learning_rate": 0.0001798010597236608, "loss": 2.2447, "step": 174975 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00017979994584934483, "loss": 2.1763, "step": 174980 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017979883194776773, "loss": 2.1926, "step": 174985 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00017979771801892994, "loss": 2.0142, "step": 174990 }, { "epoch": 0.41, "grad_norm": 1.8671875, "learning_rate": 0.00017979660406283182, "loss": 2.2886, "step": 174995 }, { "epoch": 0.41, "grad_norm": 2.484375, "learning_rate": 0.0001797954900794737, "loss": 1.9485, "step": 175000 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00017979437606885598, "loss": 2.1183, "step": 175005 }, { "epoch": 0.41, "grad_norm": 1.890625, "learning_rate": 0.00017979326203097906, "loss": 1.9801, "step": 175010 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017979214796584332, "loss": 1.9156, "step": 175015 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00017979103387344916, "loss": 2.1332, "step": 175020 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00017978991975379688, "loss": 2.2488, "step": 175025 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00017978880560688695, "loss": 2.1021, "step": 175030 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00017978769143271975, "loss": 2.0357, "step": 175035 }, { "epoch": 0.41, "grad_norm": 1.8203125, "learning_rate": 0.00017978657723129556, "loss": 2.0662, "step": 175040 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00017978546300261489, "loss": 2.0174, "step": 175045 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00017978434874667802, "loss": 2.1294, "step": 175050 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00017978323446348537, "loss": 2.1271, "step": 175055 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00017978212015303733, "loss": 2.1425, "step": 175060 }, { "epoch": 0.41, "grad_norm": 1.828125, "learning_rate": 0.00017978100581533428, "loss": 2.1072, "step": 175065 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.0001797798914503766, "loss": 2.1152, "step": 175070 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00017977877705816463, "loss": 2.1488, "step": 175075 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.0001797776626386988, "loss": 2.2128, "step": 175080 }, { "epoch": 0.41, "grad_norm": 1.6015625, "learning_rate": 0.00017977654819197948, "loss": 1.9889, "step": 175085 }, { "epoch": 0.41, "grad_norm": 2.46875, "learning_rate": 0.00017977543371800706, "loss": 2.0729, "step": 175090 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.00017977431921678186, "loss": 2.1005, "step": 175095 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00017977320468830433, "loss": 2.1231, "step": 175100 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017977209013257482, "loss": 2.2085, "step": 175105 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.0001797709755495937, "loss": 1.9766, "step": 175110 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00017976986093936142, "loss": 2.1491, "step": 175115 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00017976874630187828, "loss": 2.1589, "step": 175120 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.00017976763163714466, "loss": 2.2465, "step": 175125 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.000179766516945161, "loss": 1.9381, "step": 175130 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00017976540222592767, "loss": 2.2174, "step": 175135 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00017976428747944502, "loss": 1.9459, "step": 175140 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.0001797631727057134, "loss": 1.8979, "step": 175145 }, { "epoch": 0.41, "grad_norm": 2.59375, "learning_rate": 0.00017976205790473329, "loss": 2.3811, "step": 175150 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.000179760943076505, "loss": 2.2727, "step": 175155 }, { "epoch": 0.41, "grad_norm": 2.90625, "learning_rate": 0.00017975982822102888, "loss": 1.9651, "step": 175160 }, { "epoch": 0.41, "grad_norm": 2.421875, "learning_rate": 0.0001797587133383054, "loss": 2.0846, "step": 175165 }, { "epoch": 0.41, "grad_norm": 1.6640625, "learning_rate": 0.00017975759842833491, "loss": 2.028, "step": 175170 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.00017975648349111777, "loss": 2.0196, "step": 175175 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.00017975536852665435, "loss": 2.0308, "step": 175180 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00017975425353494503, "loss": 2.1063, "step": 175185 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017975313851599026, "loss": 2.0602, "step": 175190 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00017975202346979033, "loss": 2.2071, "step": 175195 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.0001797509083963457, "loss": 2.0022, "step": 175200 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.0001797497932956567, "loss": 2.1141, "step": 175205 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.0001797486781677237, "loss": 1.9869, "step": 175210 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00017974756301254713, "loss": 2.0343, "step": 175215 }, { "epoch": 0.41, "grad_norm": 1.7265625, "learning_rate": 0.00017974644783012736, "loss": 2.0602, "step": 175220 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.0001797453326204647, "loss": 2.2672, "step": 175225 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00017974421738355963, "loss": 2.3372, "step": 175230 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.0001797431021194125, "loss": 2.0748, "step": 175235 }, { "epoch": 0.41, "grad_norm": 1.9609375, "learning_rate": 0.00017974198682802365, "loss": 2.2181, "step": 175240 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00017974087150939353, "loss": 2.1449, "step": 175245 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00017973975616352244, "loss": 2.2364, "step": 175250 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00017973864079041084, "loss": 1.9663, "step": 175255 }, { "epoch": 0.41, "grad_norm": 2.75, "learning_rate": 0.00017973752539005906, "loss": 2.1032, "step": 175260 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00017973640996246749, "loss": 2.094, "step": 175265 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.0001797352945076365, "loss": 2.2308, "step": 175270 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.0001797341790255665, "loss": 2.2703, "step": 175275 }, { "epoch": 0.41, "grad_norm": 1.765625, "learning_rate": 0.00017973306351625788, "loss": 2.1513, "step": 175280 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017973194797971096, "loss": 2.1278, "step": 175285 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.0001797308324159262, "loss": 2.1163, "step": 175290 }, { "epoch": 0.41, "grad_norm": 2.421875, "learning_rate": 0.0001797297168249039, "loss": 1.9147, "step": 175295 }, { "epoch": 0.41, "grad_norm": 2.578125, "learning_rate": 0.0001797286012066445, "loss": 2.0683, "step": 175300 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00017972748556114839, "loss": 2.135, "step": 175305 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.0001797263698884159, "loss": 2.3177, "step": 175310 }, { "epoch": 0.41, "grad_norm": 2.453125, "learning_rate": 0.00017972525418844746, "loss": 2.103, "step": 175315 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.0001797241384612434, "loss": 1.7824, "step": 175320 }, { "epoch": 0.41, "grad_norm": 1.875, "learning_rate": 0.00017972302270680412, "loss": 2.2195, "step": 175325 }, { "epoch": 0.41, "grad_norm": 2.4375, "learning_rate": 0.00017972190692513006, "loss": 2.0628, "step": 175330 }, { "epoch": 0.41, "grad_norm": 2.390625, "learning_rate": 0.0001797207911162215, "loss": 2.1052, "step": 175335 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.0001797196752800789, "loss": 2.1486, "step": 175340 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.0001797185594167026, "loss": 2.1377, "step": 175345 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.000179717443526093, "loss": 2.3838, "step": 175350 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00017971632760825046, "loss": 2.1259, "step": 175355 }, { "epoch": 0.41, "grad_norm": 1.75, "learning_rate": 0.00017971521166317537, "loss": 2.0181, "step": 175360 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.00017971409569086813, "loss": 2.203, "step": 175365 }, { "epoch": 0.41, "grad_norm": 2.46875, "learning_rate": 0.00017971297969132915, "loss": 1.7326, "step": 175370 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017971186366455872, "loss": 2.0837, "step": 175375 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.0001797107476105573, "loss": 2.0716, "step": 175380 }, { "epoch": 0.41, "grad_norm": 2.0, "learning_rate": 0.00017970963152932524, "loss": 2.1027, "step": 175385 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.0001797085154208629, "loss": 2.1085, "step": 175390 }, { "epoch": 0.41, "grad_norm": 2.4375, "learning_rate": 0.00017970739928517074, "loss": 2.1726, "step": 175395 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00017970628312224902, "loss": 2.122, "step": 175400 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00017970516693209822, "loss": 2.1354, "step": 175405 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.0001797040507147187, "loss": 2.0129, "step": 175410 }, { "epoch": 0.41, "grad_norm": 1.890625, "learning_rate": 0.00017970293447011078, "loss": 2.2053, "step": 175415 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00017970181819827494, "loss": 2.0869, "step": 175420 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.0001797007018992115, "loss": 2.1994, "step": 175425 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00017969958557292087, "loss": 2.1578, "step": 175430 }, { "epoch": 0.41, "grad_norm": 2.71875, "learning_rate": 0.00017969846921940338, "loss": 2.0594, "step": 175435 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.0001796973528386595, "loss": 2.2561, "step": 175440 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.0001796962364306895, "loss": 1.9402, "step": 175445 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00017969511999549388, "loss": 2.1985, "step": 175450 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00017969400353307294, "loss": 2.1934, "step": 175455 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00017969288704342706, "loss": 2.0178, "step": 175460 }, { "epoch": 0.41, "grad_norm": 2.4375, "learning_rate": 0.00017969177052655665, "loss": 2.1333, "step": 175465 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.0001796906539824621, "loss": 1.9402, "step": 175470 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.0001796895374111438, "loss": 2.2156, "step": 175475 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00017968842081260205, "loss": 2.0731, "step": 175480 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00017968730418683733, "loss": 2.1644, "step": 175485 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017968618753385, "loss": 2.2027, "step": 175490 }, { "epoch": 0.41, "grad_norm": 2.90625, "learning_rate": 0.00017968507085364038, "loss": 2.2292, "step": 175495 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00017968395414620888, "loss": 2.2115, "step": 175500 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00017968283741155597, "loss": 2.1441, "step": 175505 }, { "epoch": 0.41, "grad_norm": 2.421875, "learning_rate": 0.00017968172064968189, "loss": 2.0444, "step": 175510 }, { "epoch": 0.41, "grad_norm": 1.703125, "learning_rate": 0.00017968060386058712, "loss": 1.9574, "step": 175515 }, { "epoch": 0.41, "grad_norm": 2.5625, "learning_rate": 0.000179679487044272, "loss": 2.1396, "step": 175520 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.0001796783702007369, "loss": 2.1616, "step": 175525 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00017967725332998228, "loss": 2.233, "step": 175530 }, { "epoch": 0.41, "grad_norm": 1.8828125, "learning_rate": 0.00017967613643200844, "loss": 2.1827, "step": 175535 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00017967501950681578, "loss": 1.9696, "step": 175540 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00017967390255440468, "loss": 2.0589, "step": 175545 }, { "epoch": 0.41, "grad_norm": 2.765625, "learning_rate": 0.00017967278557477556, "loss": 2.0642, "step": 175550 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00017967166856792873, "loss": 2.0569, "step": 175555 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00017967055153386463, "loss": 1.8348, "step": 175560 }, { "epoch": 0.41, "grad_norm": 1.9140625, "learning_rate": 0.00017966943447258365, "loss": 2.1556, "step": 175565 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00017966831738408612, "loss": 2.0886, "step": 175570 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00017966720026837244, "loss": 2.0962, "step": 175575 }, { "epoch": 0.41, "grad_norm": 1.9296875, "learning_rate": 0.000179666083125443, "loss": 2.2635, "step": 175580 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00017966496595529823, "loss": 2.1216, "step": 175585 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.00017966384875793843, "loss": 2.0273, "step": 175590 }, { "epoch": 0.41, "grad_norm": 2.421875, "learning_rate": 0.000179662731533364, "loss": 2.2586, "step": 175595 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00017966161428157533, "loss": 2.145, "step": 175600 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00017966049700257283, "loss": 2.1343, "step": 175605 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00017965937969635686, "loss": 1.976, "step": 175610 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.0001796582623629278, "loss": 1.9969, "step": 175615 }, { "epoch": 0.41, "grad_norm": 1.9765625, "learning_rate": 0.00017965714500228606, "loss": 2.1997, "step": 175620 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.00017965602761443194, "loss": 2.0303, "step": 175625 }, { "epoch": 0.41, "grad_norm": 1.6796875, "learning_rate": 0.00017965491019936592, "loss": 2.21, "step": 175630 }, { "epoch": 0.41, "grad_norm": 2.921875, "learning_rate": 0.00017965379275708832, "loss": 2.1347, "step": 175635 }, { "epoch": 0.41, "grad_norm": 1.8984375, "learning_rate": 0.0001796526752875995, "loss": 2.1383, "step": 175640 }, { "epoch": 0.41, "grad_norm": 2.578125, "learning_rate": 0.00017965155779089994, "loss": 2.0173, "step": 175645 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00017965044026698994, "loss": 2.2533, "step": 175650 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00017964932271586988, "loss": 2.035, "step": 175655 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00017964820513754023, "loss": 2.2122, "step": 175660 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.00017964708753200127, "loss": 2.199, "step": 175665 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00017964596989925342, "loss": 2.3319, "step": 175670 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00017964485223929705, "loss": 2.177, "step": 175675 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00017964373455213258, "loss": 2.112, "step": 175680 }, { "epoch": 0.41, "grad_norm": 2.515625, "learning_rate": 0.00017964261683776035, "loss": 2.0011, "step": 175685 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00017964149909618078, "loss": 2.2766, "step": 175690 }, { "epoch": 0.41, "grad_norm": 1.796875, "learning_rate": 0.0001796403813273942, "loss": 2.1798, "step": 175695 }, { "epoch": 0.41, "grad_norm": 2.515625, "learning_rate": 0.00017963926353140104, "loss": 2.254, "step": 175700 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017963814570820167, "loss": 2.0409, "step": 175705 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00017963702785779643, "loss": 2.2946, "step": 175710 }, { "epoch": 0.41, "grad_norm": 1.8203125, "learning_rate": 0.00017963590998018578, "loss": 2.188, "step": 175715 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00017963479207537005, "loss": 2.1219, "step": 175720 }, { "epoch": 0.41, "grad_norm": 2.4375, "learning_rate": 0.00017963367414334963, "loss": 2.0601, "step": 175725 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00017963255618412487, "loss": 2.1839, "step": 175730 }, { "epoch": 0.41, "grad_norm": 2.59375, "learning_rate": 0.0001796314381976962, "loss": 1.9511, "step": 175735 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.000179630320184064, "loss": 2.0644, "step": 175740 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00017962920214322864, "loss": 2.2208, "step": 175745 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.0001796280840751905, "loss": 2.1836, "step": 175750 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017962696597994995, "loss": 2.064, "step": 175755 }, { "epoch": 0.41, "grad_norm": 2.421875, "learning_rate": 0.0001796258478575074, "loss": 1.9336, "step": 175760 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.0001796247297078632, "loss": 2.0412, "step": 175765 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.00017962361153101773, "loss": 2.2248, "step": 175770 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00017962249332697142, "loss": 1.9484, "step": 175775 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00017962137509572462, "loss": 2.1398, "step": 175780 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.0001796202568372777, "loss": 2.3057, "step": 175785 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017961913855163108, "loss": 2.0372, "step": 175790 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.0001796180202387851, "loss": 2.0787, "step": 175795 }, { "epoch": 0.41, "grad_norm": 2.46875, "learning_rate": 0.00017961690189874016, "loss": 2.2247, "step": 175800 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017961578353149665, "loss": 2.1741, "step": 175805 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00017961466513705495, "loss": 2.1557, "step": 175810 }, { "epoch": 0.41, "grad_norm": 1.890625, "learning_rate": 0.00017961354671541541, "loss": 2.1209, "step": 175815 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00017961242826657848, "loss": 2.2571, "step": 175820 }, { "epoch": 0.41, "grad_norm": 2.46875, "learning_rate": 0.00017961130979054447, "loss": 1.9401, "step": 175825 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.0001796101912873138, "loss": 2.0314, "step": 175830 }, { "epoch": 0.41, "grad_norm": 2.0, "learning_rate": 0.00017960907275688684, "loss": 2.2874, "step": 175835 }, { "epoch": 0.41, "grad_norm": 1.9921875, "learning_rate": 0.00017960795419926399, "loss": 2.1572, "step": 175840 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00017960683561444563, "loss": 2.1983, "step": 175845 }, { "epoch": 0.41, "grad_norm": 1.8203125, "learning_rate": 0.0001796057170024321, "loss": 1.9864, "step": 175850 }, { "epoch": 0.41, "grad_norm": 2.328125, "learning_rate": 0.00017960459836322383, "loss": 2.186, "step": 175855 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00017960347969682116, "loss": 2.1362, "step": 175860 }, { "epoch": 0.41, "grad_norm": 1.7578125, "learning_rate": 0.00017960236100322455, "loss": 2.2325, "step": 175865 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00017960124228243429, "loss": 2.1196, "step": 175870 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.0001796001235344508, "loss": 2.1472, "step": 175875 }, { "epoch": 0.41, "grad_norm": 2.75, "learning_rate": 0.00017959900475927448, "loss": 2.1617, "step": 175880 }, { "epoch": 0.41, "grad_norm": 2.359375, "learning_rate": 0.00017959788595690567, "loss": 2.0461, "step": 175885 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.00017959676712734482, "loss": 2.1191, "step": 175890 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00017959564827059224, "loss": 2.1713, "step": 175895 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.00017959452938664838, "loss": 2.1716, "step": 175900 }, { "epoch": 0.41, "grad_norm": 1.7890625, "learning_rate": 0.00017959341047551355, "loss": 2.0969, "step": 175905 }, { "epoch": 0.41, "grad_norm": 2.390625, "learning_rate": 0.00017959229153718817, "loss": 1.9105, "step": 175910 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.0001795911725716726, "loss": 2.2622, "step": 175915 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.0001795900535789673, "loss": 2.0847, "step": 175920 }, { "epoch": 0.41, "grad_norm": 2.34375, "learning_rate": 0.00017958893455907257, "loss": 1.9112, "step": 175925 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.0001795878155119888, "loss": 2.3053, "step": 175930 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.0001795866964377164, "loss": 2.1527, "step": 175935 }, { "epoch": 0.41, "grad_norm": 2.4375, "learning_rate": 0.00017958557733625574, "loss": 2.1796, "step": 175940 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.0001795844582076072, "loss": 2.1107, "step": 175945 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00017958333905177115, "loss": 2.1636, "step": 175950 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.000179582219868748, "loss": 2.0826, "step": 175955 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017958110065853817, "loss": 2.2941, "step": 175960 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00017957998142114194, "loss": 2.2421, "step": 175965 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00017957886215655975, "loss": 2.103, "step": 175970 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00017957774286479197, "loss": 2.0422, "step": 175975 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017957662354583902, "loss": 2.3727, "step": 175980 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017957550419970124, "loss": 2.129, "step": 175985 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.000179574384826379, "loss": 2.2418, "step": 175990 }, { "epoch": 0.41, "grad_norm": 1.9453125, "learning_rate": 0.00017957326542587275, "loss": 2.0963, "step": 175995 }, { "epoch": 0.41, "grad_norm": 2.1875, "learning_rate": 0.00017957214599818278, "loss": 2.1304, "step": 176000 }, { "epoch": 0.41, "grad_norm": 2.015625, "learning_rate": 0.00017957102654330955, "loss": 2.2067, "step": 176005 }, { "epoch": 0.41, "grad_norm": 1.9453125, "learning_rate": 0.0001795699070612534, "loss": 2.2177, "step": 176010 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.00017956878755201477, "loss": 2.1693, "step": 176015 }, { "epoch": 0.41, "grad_norm": 1.9453125, "learning_rate": 0.00017956766801559396, "loss": 2.0828, "step": 176020 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017956654845199138, "loss": 2.0993, "step": 176025 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00017956542886120744, "loss": 2.0704, "step": 176030 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.0001795643092432425, "loss": 2.2569, "step": 176035 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.00017956318959809698, "loss": 2.146, "step": 176040 }, { "epoch": 0.41, "grad_norm": 1.9140625, "learning_rate": 0.0001795620699257712, "loss": 2.1679, "step": 176045 }, { "epoch": 0.41, "grad_norm": 1.7109375, "learning_rate": 0.0001795609502262656, "loss": 2.1262, "step": 176050 }, { "epoch": 0.41, "grad_norm": 2.578125, "learning_rate": 0.00017955983049958052, "loss": 2.0293, "step": 176055 }, { "epoch": 0.41, "grad_norm": 2.390625, "learning_rate": 0.00017955871074571634, "loss": 2.2453, "step": 176060 }, { "epoch": 0.41, "grad_norm": 2.375, "learning_rate": 0.0001795575909646735, "loss": 2.0393, "step": 176065 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.0001795564711564523, "loss": 2.2186, "step": 176070 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.0001795553513210532, "loss": 2.0459, "step": 176075 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.00017955423145847655, "loss": 2.1285, "step": 176080 }, { "epoch": 0.41, "grad_norm": 2.390625, "learning_rate": 0.00017955311156872273, "loss": 2.2185, "step": 176085 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.0001795519916517921, "loss": 2.1203, "step": 176090 }, { "epoch": 0.41, "grad_norm": 2.140625, "learning_rate": 0.00017955087170768507, "loss": 2.1573, "step": 176095 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00017954975173640206, "loss": 2.1962, "step": 176100 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.0001795486317379434, "loss": 2.0519, "step": 176105 }, { "epoch": 0.41, "grad_norm": 2.3125, "learning_rate": 0.00017954751171230944, "loss": 2.1209, "step": 176110 }, { "epoch": 0.41, "grad_norm": 2.078125, "learning_rate": 0.00017954639165950064, "loss": 2.2857, "step": 176115 }, { "epoch": 0.41, "grad_norm": 1.7734375, "learning_rate": 0.00017954527157951734, "loss": 2.1559, "step": 176120 }, { "epoch": 0.41, "grad_norm": 2.03125, "learning_rate": 0.00017954415147235995, "loss": 1.9373, "step": 176125 }, { "epoch": 0.41, "grad_norm": 1.9609375, "learning_rate": 0.00017954303133802882, "loss": 2.0008, "step": 176130 }, { "epoch": 0.41, "grad_norm": 2.109375, "learning_rate": 0.00017954191117652434, "loss": 2.0541, "step": 176135 }, { "epoch": 0.41, "grad_norm": 1.8359375, "learning_rate": 0.00017954079098784692, "loss": 2.2213, "step": 176140 }, { "epoch": 0.41, "grad_norm": 2.046875, "learning_rate": 0.0001795396707719969, "loss": 1.9372, "step": 176145 }, { "epoch": 0.41, "grad_norm": 2.390625, "learning_rate": 0.00017953855052897473, "loss": 2.0237, "step": 176150 }, { "epoch": 0.41, "grad_norm": 1.640625, "learning_rate": 0.0001795374302587807, "loss": 2.0556, "step": 176155 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017953630996141528, "loss": 1.8561, "step": 176160 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.0001795351896368788, "loss": 2.1069, "step": 176165 }, { "epoch": 0.41, "grad_norm": 1.9609375, "learning_rate": 0.00017953406928517164, "loss": 2.0088, "step": 176170 }, { "epoch": 0.41, "grad_norm": 2.296875, "learning_rate": 0.00017953294890629424, "loss": 2.0841, "step": 176175 }, { "epoch": 0.41, "grad_norm": 1.8359375, "learning_rate": 0.00017953182850024687, "loss": 2.3226, "step": 176180 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00017953070806703004, "loss": 2.0318, "step": 176185 }, { "epoch": 0.41, "grad_norm": 2.28125, "learning_rate": 0.00017952958760664406, "loss": 2.1077, "step": 176190 }, { "epoch": 0.41, "grad_norm": 1.9453125, "learning_rate": 0.00017952846711908937, "loss": 2.0854, "step": 176195 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00017952734660436627, "loss": 2.4434, "step": 176200 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.0001795262260624752, "loss": 2.1601, "step": 176205 }, { "epoch": 0.41, "grad_norm": 2.25, "learning_rate": 0.00017952510549341653, "loss": 2.2494, "step": 176210 }, { "epoch": 0.41, "grad_norm": 2.5, "learning_rate": 0.00017952398489719061, "loss": 2.2356, "step": 176215 }, { "epoch": 0.41, "grad_norm": 2.5, "learning_rate": 0.00017952286427379792, "loss": 2.0335, "step": 176220 }, { "epoch": 0.41, "grad_norm": 2.609375, "learning_rate": 0.00017952174362323873, "loss": 1.9735, "step": 176225 }, { "epoch": 0.41, "grad_norm": 2.484375, "learning_rate": 0.0001795206229455135, "loss": 2.2323, "step": 176230 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00017951950224062254, "loss": 1.9965, "step": 176235 }, { "epoch": 0.41, "grad_norm": 2.53125, "learning_rate": 0.0001795183815085663, "loss": 2.0992, "step": 176240 }, { "epoch": 0.41, "grad_norm": 1.890625, "learning_rate": 0.00017951726074934515, "loss": 2.2959, "step": 176245 }, { "epoch": 0.41, "grad_norm": 1.9375, "learning_rate": 0.00017951613996295944, "loss": 2.1576, "step": 176250 }, { "epoch": 0.41, "grad_norm": 2.0, "learning_rate": 0.0001795150191494096, "loss": 2.3285, "step": 176255 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.00017951389830869597, "loss": 2.1021, "step": 176260 }, { "epoch": 0.41, "grad_norm": 2.15625, "learning_rate": 0.00017951277744081894, "loss": 2.1273, "step": 176265 }, { "epoch": 0.41, "grad_norm": 2.21875, "learning_rate": 0.0001795116565457789, "loss": 2.166, "step": 176270 }, { "epoch": 0.41, "grad_norm": 2.171875, "learning_rate": 0.00017951053562357627, "loss": 2.2108, "step": 176275 }, { "epoch": 0.41, "grad_norm": 2.265625, "learning_rate": 0.0001795094146742114, "loss": 1.9183, "step": 176280 }, { "epoch": 0.41, "grad_norm": 2.234375, "learning_rate": 0.00017950829369768462, "loss": 2.0841, "step": 176285 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.0001795071726939964, "loss": 2.1157, "step": 176290 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00017950605166314708, "loss": 2.0864, "step": 176295 }, { "epoch": 0.41, "grad_norm": 1.8828125, "learning_rate": 0.00017950493060513707, "loss": 2.1116, "step": 176300 }, { "epoch": 0.41, "grad_norm": 2.40625, "learning_rate": 0.00017950380951996673, "loss": 2.284, "step": 176305 }, { "epoch": 0.41, "grad_norm": 2.546875, "learning_rate": 0.00017950268840763643, "loss": 2.2117, "step": 176310 }, { "epoch": 0.41, "grad_norm": 2.125, "learning_rate": 0.00017950156726814656, "loss": 2.1805, "step": 176315 }, { "epoch": 0.41, "grad_norm": 1.828125, "learning_rate": 0.0001795004461014975, "loss": 2.2246, "step": 176320 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.0001794993249076897, "loss": 1.8765, "step": 176325 }, { "epoch": 0.41, "grad_norm": 2.0625, "learning_rate": 0.0001794982036867235, "loss": 2.0476, "step": 176330 }, { "epoch": 0.41, "grad_norm": 2.203125, "learning_rate": 0.00017949708243859922, "loss": 2.1427, "step": 176335 }, { "epoch": 0.41, "grad_norm": 2.09375, "learning_rate": 0.00017949596116331731, "loss": 2.2742, "step": 176340 }, { "epoch": 0.41, "grad_norm": 1.9453125, "learning_rate": 0.00017949483986087815, "loss": 2.2042, "step": 176345 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017949371853128208, "loss": 2.0808, "step": 176350 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.0001794925971745295, "loss": 2.0748, "step": 176355 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017949147579062087, "loss": 2.1977, "step": 176360 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.0001794903543795565, "loss": 2.0798, "step": 176365 }, { "epoch": 0.42, "grad_norm": 1.640625, "learning_rate": 0.00017948923294133673, "loss": 2.2133, "step": 176370 }, { "epoch": 0.42, "grad_norm": 1.8671875, "learning_rate": 0.00017948811147596203, "loss": 2.0249, "step": 176375 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017948698998343276, "loss": 1.9628, "step": 176380 }, { "epoch": 0.42, "grad_norm": 3.046875, "learning_rate": 0.00017948586846374927, "loss": 2.0624, "step": 176385 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.000179484746916912, "loss": 2.0717, "step": 176390 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017948362534292127, "loss": 2.0975, "step": 176395 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.0001794825037417775, "loss": 2.0971, "step": 176400 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017948138211348105, "loss": 1.9927, "step": 176405 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017948026045803235, "loss": 2.1233, "step": 176410 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017947913877543173, "loss": 2.0933, "step": 176415 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.0001794780170656796, "loss": 2.0063, "step": 176420 }, { "epoch": 0.42, "grad_norm": 1.8515625, "learning_rate": 0.00017947689532877634, "loss": 2.1211, "step": 176425 }, { "epoch": 0.42, "grad_norm": 2.421875, "learning_rate": 0.00017947577356472233, "loss": 1.9566, "step": 176430 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.00017947465177351794, "loss": 2.1924, "step": 176435 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017947352995516358, "loss": 2.1607, "step": 176440 }, { "epoch": 0.42, "grad_norm": 1.828125, "learning_rate": 0.0001794724081096596, "loss": 2.2626, "step": 176445 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.00017947128623700643, "loss": 2.0899, "step": 176450 }, { "epoch": 0.42, "grad_norm": 2.375, "learning_rate": 0.00017947016433720445, "loss": 2.1027, "step": 176455 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.000179469042410254, "loss": 2.1386, "step": 176460 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.00017946792045615546, "loss": 2.0744, "step": 176465 }, { "epoch": 0.42, "grad_norm": 2.4375, "learning_rate": 0.00017946679847490927, "loss": 1.9458, "step": 176470 }, { "epoch": 0.42, "grad_norm": 2.5, "learning_rate": 0.00017946567646651574, "loss": 2.0598, "step": 176475 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017946455443097533, "loss": 2.1931, "step": 176480 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017946343236828835, "loss": 2.0873, "step": 176485 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.00017946231027845527, "loss": 2.0909, "step": 176490 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.00017946118816147643, "loss": 1.8959, "step": 176495 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017946006601735211, "loss": 2.2804, "step": 176500 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017945894384608288, "loss": 2.1068, "step": 176505 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.000179457821647669, "loss": 2.2659, "step": 176510 }, { "epoch": 0.42, "grad_norm": 2.828125, "learning_rate": 0.00017945669942211092, "loss": 2.0873, "step": 176515 }, { "epoch": 0.42, "grad_norm": 2.53125, "learning_rate": 0.00017945557716940894, "loss": 1.9378, "step": 176520 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.00017945445488956355, "loss": 2.1578, "step": 176525 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017945333258257505, "loss": 2.0286, "step": 176530 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017945221024844385, "loss": 2.1393, "step": 176535 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017945108788717032, "loss": 1.9728, "step": 176540 }, { "epoch": 0.42, "grad_norm": 1.6484375, "learning_rate": 0.00017944996549875488, "loss": 2.0992, "step": 176545 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.0001794488430831979, "loss": 1.9629, "step": 176550 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017944772064049973, "loss": 2.0186, "step": 176555 }, { "epoch": 0.42, "grad_norm": 1.984375, "learning_rate": 0.00017944659817066078, "loss": 2.058, "step": 176560 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.0001794454756736814, "loss": 1.9681, "step": 176565 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017944435314956207, "loss": 1.9545, "step": 176570 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.0001794432305983031, "loss": 1.9712, "step": 176575 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.00017944210801990482, "loss": 2.1748, "step": 176580 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.00017944098541436773, "loss": 2.0702, "step": 176585 }, { "epoch": 0.42, "grad_norm": 1.8359375, "learning_rate": 0.00017943986278169213, "loss": 1.9857, "step": 176590 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017943874012187844, "loss": 2.0262, "step": 176595 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017943761743492706, "loss": 2.0871, "step": 176600 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017943649472083832, "loss": 2.028, "step": 176605 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.00017943537197961265, "loss": 2.2793, "step": 176610 }, { "epoch": 0.42, "grad_norm": 1.984375, "learning_rate": 0.0001794342492112504, "loss": 2.1254, "step": 176615 }, { "epoch": 0.42, "grad_norm": 2.375, "learning_rate": 0.000179433126415752, "loss": 2.0725, "step": 176620 }, { "epoch": 0.42, "grad_norm": 1.828125, "learning_rate": 0.00017943200359311776, "loss": 2.3543, "step": 176625 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017943088074334816, "loss": 2.0802, "step": 176630 }, { "epoch": 0.42, "grad_norm": 1.734375, "learning_rate": 0.0001794297578664435, "loss": 1.9753, "step": 176635 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.0001794286349624042, "loss": 2.2693, "step": 176640 }, { "epoch": 0.42, "grad_norm": 2.65625, "learning_rate": 0.0001794275120312306, "loss": 2.1668, "step": 176645 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017942638907292318, "loss": 2.0538, "step": 176650 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.00017942526608748222, "loss": 2.1243, "step": 176655 }, { "epoch": 0.42, "grad_norm": 1.9140625, "learning_rate": 0.00017942414307490817, "loss": 2.1758, "step": 176660 }, { "epoch": 0.42, "grad_norm": 1.796875, "learning_rate": 0.0001794230200352014, "loss": 1.9406, "step": 176665 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017942189696836227, "loss": 2.0957, "step": 176670 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017942077387439118, "loss": 2.1278, "step": 176675 }, { "epoch": 0.42, "grad_norm": 2.03125, "learning_rate": 0.00017941965075328852, "loss": 2.1399, "step": 176680 }, { "epoch": 0.42, "grad_norm": 1.8828125, "learning_rate": 0.00017941852760505465, "loss": 2.322, "step": 176685 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017941740442969, "loss": 2.1507, "step": 176690 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.0001794162812271949, "loss": 2.1506, "step": 176695 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.0001794151579975698, "loss": 2.0719, "step": 176700 }, { "epoch": 0.42, "grad_norm": 1.640625, "learning_rate": 0.000179414034740815, "loss": 2.0789, "step": 176705 }, { "epoch": 0.42, "grad_norm": 2.453125, "learning_rate": 0.00017941291145693092, "loss": 2.2004, "step": 176710 }, { "epoch": 0.42, "grad_norm": 1.8125, "learning_rate": 0.00017941178814591798, "loss": 2.1203, "step": 176715 }, { "epoch": 0.42, "grad_norm": 2.765625, "learning_rate": 0.0001794106648077765, "loss": 2.0734, "step": 176720 }, { "epoch": 0.42, "grad_norm": 1.953125, "learning_rate": 0.00017940954144250693, "loss": 2.0635, "step": 176725 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017940841805010963, "loss": 2.0269, "step": 176730 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.00017940729463058493, "loss": 2.0671, "step": 176735 }, { "epoch": 0.42, "grad_norm": 2.375, "learning_rate": 0.00017940617118393325, "loss": 2.1672, "step": 176740 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017940504771015502, "loss": 2.228, "step": 176745 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.0001794039242092506, "loss": 2.1144, "step": 176750 }, { "epoch": 0.42, "grad_norm": 1.96875, "learning_rate": 0.00017940280068122033, "loss": 2.1076, "step": 176755 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017940167712606462, "loss": 2.103, "step": 176760 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017940055354378383, "loss": 2.0753, "step": 176765 }, { "epoch": 0.42, "grad_norm": 1.96875, "learning_rate": 0.00017939942993437842, "loss": 2.191, "step": 176770 }, { "epoch": 0.42, "grad_norm": 2.75, "learning_rate": 0.00017939830629784873, "loss": 2.1002, "step": 176775 }, { "epoch": 0.42, "grad_norm": 2.484375, "learning_rate": 0.00017939718263419512, "loss": 2.2099, "step": 176780 }, { "epoch": 0.42, "grad_norm": 1.8515625, "learning_rate": 0.00017939605894341798, "loss": 1.9661, "step": 176785 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.00017939493522551773, "loss": 2.2307, "step": 176790 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.0001793938114804947, "loss": 2.0641, "step": 176795 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017939268770834934, "loss": 2.2155, "step": 176800 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017939156390908196, "loss": 1.9793, "step": 176805 }, { "epoch": 0.42, "grad_norm": 1.9921875, "learning_rate": 0.000179390440082693, "loss": 2.0243, "step": 176810 }, { "epoch": 0.42, "grad_norm": 1.9296875, "learning_rate": 0.00017938931622918285, "loss": 2.0726, "step": 176815 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.00017938819234855185, "loss": 2.1517, "step": 176820 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017938706844080037, "loss": 2.126, "step": 176825 }, { "epoch": 0.42, "grad_norm": 2.453125, "learning_rate": 0.0001793859445059289, "loss": 2.1905, "step": 176830 }, { "epoch": 0.42, "grad_norm": 2.421875, "learning_rate": 0.0001793848205439377, "loss": 2.1387, "step": 176835 }, { "epoch": 0.42, "grad_norm": 1.953125, "learning_rate": 0.00017938369655482722, "loss": 2.0772, "step": 176840 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017938257253859781, "loss": 2.1329, "step": 176845 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.00017938144849524989, "loss": 2.1565, "step": 176850 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.00017938032442478384, "loss": 2.1702, "step": 176855 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.0001793792003272, "loss": 2.1773, "step": 176860 }, { "epoch": 0.42, "grad_norm": 2.5, "learning_rate": 0.0001793780762024988, "loss": 2.2111, "step": 176865 }, { "epoch": 0.42, "grad_norm": 1.9140625, "learning_rate": 0.00017937695205068064, "loss": 2.2816, "step": 176870 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017937582787174583, "loss": 2.025, "step": 176875 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.00017937470366569483, "loss": 2.1709, "step": 176880 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017937357943252798, "loss": 2.123, "step": 176885 }, { "epoch": 0.42, "grad_norm": 3.03125, "learning_rate": 0.00017937245517224567, "loss": 2.1979, "step": 176890 }, { "epoch": 0.42, "grad_norm": 1.96875, "learning_rate": 0.00017937133088484828, "loss": 2.0541, "step": 176895 }, { "epoch": 0.42, "grad_norm": 1.984375, "learning_rate": 0.0001793702065703362, "loss": 2.1965, "step": 176900 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017936908222870984, "loss": 2.0684, "step": 176905 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017936795785996956, "loss": 2.0179, "step": 176910 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017936683346411573, "loss": 2.1841, "step": 176915 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017936570904114878, "loss": 2.1126, "step": 176920 }, { "epoch": 0.42, "grad_norm": 1.9453125, "learning_rate": 0.00017936458459106905, "loss": 2.2246, "step": 176925 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017936346011387691, "loss": 2.1469, "step": 176930 }, { "epoch": 0.42, "grad_norm": 1.984375, "learning_rate": 0.00017936233560957284, "loss": 2.0486, "step": 176935 }, { "epoch": 0.42, "grad_norm": 1.7890625, "learning_rate": 0.00017936121107815707, "loss": 2.0252, "step": 176940 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017936008651963016, "loss": 2.1369, "step": 176945 }, { "epoch": 0.42, "grad_norm": 1.859375, "learning_rate": 0.00017935896193399233, "loss": 2.0533, "step": 176950 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017935783732124408, "loss": 2.1412, "step": 176955 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.00017935671268138573, "loss": 2.191, "step": 176960 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017935558801441772, "loss": 2.1633, "step": 176965 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.00017935446332034038, "loss": 2.1367, "step": 176970 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.0001793533385991541, "loss": 1.8504, "step": 176975 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017935221385085933, "loss": 2.0136, "step": 176980 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017935108907545636, "loss": 2.1348, "step": 176985 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017934996427294562, "loss": 2.1514, "step": 176990 }, { "epoch": 0.42, "grad_norm": 2.609375, "learning_rate": 0.00017934883944332753, "loss": 2.3026, "step": 176995 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.0001793477145866024, "loss": 2.1698, "step": 177000 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017934658970277068, "loss": 2.2427, "step": 177005 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.0001793454647918327, "loss": 2.1333, "step": 177010 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.0001793443398537889, "loss": 2.1392, "step": 177015 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.0001793432148886396, "loss": 2.0347, "step": 177020 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.00017934208989638524, "loss": 2.2639, "step": 177025 }, { "epoch": 0.42, "grad_norm": 1.8515625, "learning_rate": 0.00017934096487702615, "loss": 2.2686, "step": 177030 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.0001793398398305628, "loss": 2.2181, "step": 177035 }, { "epoch": 0.42, "grad_norm": 1.78125, "learning_rate": 0.0001793387147569955, "loss": 2.1375, "step": 177040 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.00017933758965632464, "loss": 2.2716, "step": 177045 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.00017933646452855063, "loss": 2.0954, "step": 177050 }, { "epoch": 0.42, "grad_norm": 2.4375, "learning_rate": 0.00017933533937367386, "loss": 2.1609, "step": 177055 }, { "epoch": 0.42, "grad_norm": 2.78125, "learning_rate": 0.0001793342141916947, "loss": 2.0982, "step": 177060 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.0001793330889826135, "loss": 2.1586, "step": 177065 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017933196374643072, "loss": 1.9145, "step": 177070 }, { "epoch": 0.42, "grad_norm": 2.46875, "learning_rate": 0.0001793308384831467, "loss": 2.0528, "step": 177075 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.00017932971319276178, "loss": 2.1279, "step": 177080 }, { "epoch": 0.42, "grad_norm": 2.4375, "learning_rate": 0.00017932858787527646, "loss": 2.2076, "step": 177085 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017932746253069098, "loss": 1.9861, "step": 177090 }, { "epoch": 0.42, "grad_norm": 2.453125, "learning_rate": 0.00017932633715900584, "loss": 2.2405, "step": 177095 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017932521176022137, "loss": 2.012, "step": 177100 }, { "epoch": 0.42, "grad_norm": 1.859375, "learning_rate": 0.000179324086334338, "loss": 2.1305, "step": 177105 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017932296088135608, "loss": 2.2507, "step": 177110 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017932183540127596, "loss": 2.2398, "step": 177115 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.00017932070989409808, "loss": 2.181, "step": 177120 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017931958435982281, "loss": 2.1177, "step": 177125 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017931845879845052, "loss": 2.0728, "step": 177130 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017931733320998162, "loss": 2.1077, "step": 177135 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017931620759441649, "loss": 2.0568, "step": 177140 }, { "epoch": 0.42, "grad_norm": 2.828125, "learning_rate": 0.0001793150819517555, "loss": 2.347, "step": 177145 }, { "epoch": 0.42, "grad_norm": 1.890625, "learning_rate": 0.00017931395628199902, "loss": 2.1218, "step": 177150 }, { "epoch": 0.42, "grad_norm": 1.984375, "learning_rate": 0.00017931283058514747, "loss": 2.2265, "step": 177155 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.0001793117048612012, "loss": 2.0435, "step": 177160 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017931057911016064, "loss": 1.8436, "step": 177165 }, { "epoch": 0.42, "grad_norm": 2.53125, "learning_rate": 0.00017930945333202613, "loss": 2.4844, "step": 177170 }, { "epoch": 0.42, "grad_norm": 2.90625, "learning_rate": 0.00017930832752679812, "loss": 2.2277, "step": 177175 }, { "epoch": 0.42, "grad_norm": 2.59375, "learning_rate": 0.00017930720169447687, "loss": 2.3656, "step": 177180 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.0001793060758350629, "loss": 1.9835, "step": 177185 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.0001793049499485565, "loss": 2.1933, "step": 177190 }, { "epoch": 0.42, "grad_norm": 4.34375, "learning_rate": 0.00017930382403495812, "loss": 2.1486, "step": 177195 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.0001793026980942681, "loss": 2.0727, "step": 177200 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017930157212648685, "loss": 2.1561, "step": 177205 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017930044613161472, "loss": 2.1712, "step": 177210 }, { "epoch": 0.42, "grad_norm": 1.9453125, "learning_rate": 0.00017929932010965214, "loss": 1.9555, "step": 177215 }, { "epoch": 0.42, "grad_norm": 2.78125, "learning_rate": 0.00017929819406059948, "loss": 2.1065, "step": 177220 }, { "epoch": 0.42, "grad_norm": 2.578125, "learning_rate": 0.0001792970679844571, "loss": 2.2715, "step": 177225 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.0001792959418812254, "loss": 2.1314, "step": 177230 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017929481575090477, "loss": 2.154, "step": 177235 }, { "epoch": 0.42, "grad_norm": 2.53125, "learning_rate": 0.0001792936895934956, "loss": 2.0417, "step": 177240 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017929256340899828, "loss": 2.1129, "step": 177245 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.00017929143719741316, "loss": 2.0748, "step": 177250 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.00017929031095874066, "loss": 2.1013, "step": 177255 }, { "epoch": 0.42, "grad_norm": 1.9375, "learning_rate": 0.00017928918469298116, "loss": 1.9665, "step": 177260 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017928805840013504, "loss": 2.1194, "step": 177265 }, { "epoch": 0.42, "grad_norm": 2.8125, "learning_rate": 0.00017928693208020267, "loss": 2.1277, "step": 177270 }, { "epoch": 0.42, "grad_norm": 1.859375, "learning_rate": 0.00017928580573318444, "loss": 2.0603, "step": 177275 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017928467935908072, "loss": 2.3066, "step": 177280 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017928355295789196, "loss": 1.9499, "step": 177285 }, { "epoch": 0.42, "grad_norm": 2.484375, "learning_rate": 0.00017928242652961846, "loss": 2.2138, "step": 177290 }, { "epoch": 0.42, "grad_norm": 2.46875, "learning_rate": 0.00017928130007426068, "loss": 1.9992, "step": 177295 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017928017359181896, "loss": 2.2236, "step": 177300 }, { "epoch": 0.42, "grad_norm": 1.9296875, "learning_rate": 0.00017927904708229367, "loss": 1.9854, "step": 177305 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.0001792779205456853, "loss": 2.218, "step": 177310 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.00017927679398199405, "loss": 2.0885, "step": 177315 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.00017927566739122045, "loss": 2.144, "step": 177320 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017927454077336488, "loss": 2.0986, "step": 177325 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017927341412842765, "loss": 2.1391, "step": 177330 }, { "epoch": 0.42, "grad_norm": 2.890625, "learning_rate": 0.0001792722874564092, "loss": 2.1348, "step": 177335 }, { "epoch": 0.42, "grad_norm": 1.984375, "learning_rate": 0.0001792711607573099, "loss": 1.9538, "step": 177340 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017927003403113008, "loss": 2.0525, "step": 177345 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017926890727787024, "loss": 2.0905, "step": 177350 }, { "epoch": 0.42, "grad_norm": 2.546875, "learning_rate": 0.0001792677804975307, "loss": 2.2741, "step": 177355 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017926665369011183, "loss": 2.2214, "step": 177360 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.00017926552685561405, "loss": 2.1407, "step": 177365 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017926439999403773, "loss": 2.3028, "step": 177370 }, { "epoch": 0.42, "grad_norm": 1.9609375, "learning_rate": 0.00017926327310538323, "loss": 2.3076, "step": 177375 }, { "epoch": 0.42, "grad_norm": 1.6796875, "learning_rate": 0.00017926214618965095, "loss": 2.1487, "step": 177380 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017926101924684132, "loss": 2.1184, "step": 177385 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.0001792598922769547, "loss": 2.062, "step": 177390 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017925876527999138, "loss": 2.1479, "step": 177395 }, { "epoch": 0.42, "grad_norm": 1.9453125, "learning_rate": 0.0001792576382559519, "loss": 2.1173, "step": 177400 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017925651120483658, "loss": 2.1191, "step": 177405 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017925538412664577, "loss": 2.2372, "step": 177410 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017925425702137988, "loss": 2.1251, "step": 177415 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017925312988903932, "loss": 2.1876, "step": 177420 }, { "epoch": 0.42, "grad_norm": 1.8828125, "learning_rate": 0.00017925200272962444, "loss": 2.0612, "step": 177425 }, { "epoch": 0.42, "grad_norm": 1.828125, "learning_rate": 0.00017925087554313565, "loss": 2.2611, "step": 177430 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.00017924974832957328, "loss": 1.9775, "step": 177435 }, { "epoch": 0.42, "grad_norm": 1.8203125, "learning_rate": 0.0001792486210889378, "loss": 1.968, "step": 177440 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017924749382122954, "loss": 2.1468, "step": 177445 }, { "epoch": 0.42, "grad_norm": 1.9921875, "learning_rate": 0.0001792463665264489, "loss": 2.1277, "step": 177450 }, { "epoch": 0.42, "grad_norm": 1.96875, "learning_rate": 0.00017924523920459628, "loss": 2.0866, "step": 177455 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017924411185567206, "loss": 1.9646, "step": 177460 }, { "epoch": 0.42, "grad_norm": 2.546875, "learning_rate": 0.00017924298447967658, "loss": 2.1559, "step": 177465 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.00017924185707661025, "loss": 2.0383, "step": 177470 }, { "epoch": 0.42, "grad_norm": 1.9609375, "learning_rate": 0.0001792407296464735, "loss": 1.9847, "step": 177475 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017923960218926664, "loss": 1.9566, "step": 177480 }, { "epoch": 0.42, "grad_norm": 2.375, "learning_rate": 0.00017923847470499013, "loss": 2.3044, "step": 177485 }, { "epoch": 0.42, "grad_norm": 2.46875, "learning_rate": 0.00017923734719364434, "loss": 1.9217, "step": 177490 }, { "epoch": 0.42, "grad_norm": 3.0625, "learning_rate": 0.0001792362196552296, "loss": 2.002, "step": 177495 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.0001792350920897463, "loss": 2.0668, "step": 177500 }, { "epoch": 0.42, "grad_norm": 1.890625, "learning_rate": 0.00017923396449719492, "loss": 2.0483, "step": 177505 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017923283687757574, "loss": 2.2191, "step": 177510 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017923170923088918, "loss": 1.9663, "step": 177515 }, { "epoch": 0.42, "grad_norm": 1.921875, "learning_rate": 0.00017923058155713565, "loss": 2.0266, "step": 177520 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017922945385631555, "loss": 2.1379, "step": 177525 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.0001792283261284292, "loss": 2.0964, "step": 177530 }, { "epoch": 0.42, "grad_norm": 2.65625, "learning_rate": 0.00017922719837347703, "loss": 2.0236, "step": 177535 }, { "epoch": 0.42, "grad_norm": 2.4375, "learning_rate": 0.00017922607059145938, "loss": 2.1158, "step": 177540 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017922494278237668, "loss": 2.148, "step": 177545 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.00017922381494622932, "loss": 1.9291, "step": 177550 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017922268708301764, "loss": 2.1641, "step": 177555 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017922155919274205, "loss": 1.9372, "step": 177560 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017922043127540299, "loss": 2.201, "step": 177565 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017921930333100075, "loss": 2.0624, "step": 177570 }, { "epoch": 0.42, "grad_norm": 1.9375, "learning_rate": 0.00017921817535953576, "loss": 2.3004, "step": 177575 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017921704736100841, "loss": 2.08, "step": 177580 }, { "epoch": 0.42, "grad_norm": 1.921875, "learning_rate": 0.00017921591933541908, "loss": 2.1204, "step": 177585 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017921479128276815, "loss": 2.2155, "step": 177590 }, { "epoch": 0.42, "grad_norm": 2.5, "learning_rate": 0.000179213663203056, "loss": 2.2064, "step": 177595 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017921253509628308, "loss": 1.9507, "step": 177600 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.0001792114069624497, "loss": 2.1393, "step": 177605 }, { "epoch": 0.42, "grad_norm": 1.6875, "learning_rate": 0.00017921027880155624, "loss": 2.0915, "step": 177610 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017920915061360313, "loss": 2.2057, "step": 177615 }, { "epoch": 0.42, "grad_norm": 1.9609375, "learning_rate": 0.00017920802239859072, "loss": 1.9571, "step": 177620 }, { "epoch": 0.42, "grad_norm": 1.96875, "learning_rate": 0.00017920689415651942, "loss": 2.1254, "step": 177625 }, { "epoch": 0.42, "grad_norm": 2.515625, "learning_rate": 0.00017920576588738963, "loss": 2.2022, "step": 177630 }, { "epoch": 0.42, "grad_norm": 2.5625, "learning_rate": 0.0001792046375912017, "loss": 2.1071, "step": 177635 }, { "epoch": 0.42, "grad_norm": 2.46875, "learning_rate": 0.00017920350926795605, "loss": 2.2293, "step": 177640 }, { "epoch": 0.42, "grad_norm": 2.6875, "learning_rate": 0.000179202380917653, "loss": 2.1993, "step": 177645 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017920125254029302, "loss": 2.1333, "step": 177650 }, { "epoch": 0.42, "grad_norm": 2.46875, "learning_rate": 0.0001792001241358764, "loss": 2.1211, "step": 177655 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017919899570440365, "loss": 2.3598, "step": 177660 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017919786724587505, "loss": 2.1788, "step": 177665 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017919673876029104, "loss": 2.1008, "step": 177670 }, { "epoch": 0.42, "grad_norm": 1.734375, "learning_rate": 0.00017919561024765197, "loss": 1.8778, "step": 177675 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017919448170795825, "loss": 2.0663, "step": 177680 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017919335314121027, "loss": 2.0457, "step": 177685 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.0001791922245474084, "loss": 2.0165, "step": 177690 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.000179191095926553, "loss": 2.0628, "step": 177695 }, { "epoch": 0.42, "grad_norm": 2.65625, "learning_rate": 0.00017918996727864453, "loss": 2.2896, "step": 177700 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.0001791888386036833, "loss": 2.1194, "step": 177705 }, { "epoch": 0.42, "grad_norm": 2.5, "learning_rate": 0.00017918770990166973, "loss": 2.2023, "step": 177710 }, { "epoch": 0.42, "grad_norm": 1.921875, "learning_rate": 0.0001791865811726042, "loss": 2.1164, "step": 177715 }, { "epoch": 0.42, "grad_norm": 1.78125, "learning_rate": 0.0001791854524164871, "loss": 1.996, "step": 177720 }, { "epoch": 0.42, "grad_norm": 2.65625, "learning_rate": 0.00017918432363331883, "loss": 2.0585, "step": 177725 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017918319482309976, "loss": 2.0739, "step": 177730 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.00017918206598583028, "loss": 2.1043, "step": 177735 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017918093712151074, "loss": 2.1286, "step": 177740 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017917980823014155, "loss": 2.0697, "step": 177745 }, { "epoch": 0.42, "grad_norm": 2.375, "learning_rate": 0.0001791786793117231, "loss": 2.2721, "step": 177750 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017917755036625583, "loss": 2.2792, "step": 177755 }, { "epoch": 0.42, "grad_norm": 1.828125, "learning_rate": 0.00017917642139374005, "loss": 2.2233, "step": 177760 }, { "epoch": 0.42, "grad_norm": 1.9765625, "learning_rate": 0.00017917529239417618, "loss": 2.0776, "step": 177765 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017917416336756453, "loss": 1.8878, "step": 177770 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.0001791730343139056, "loss": 2.1805, "step": 177775 }, { "epoch": 0.42, "grad_norm": 2.515625, "learning_rate": 0.00017917190523319973, "loss": 2.1181, "step": 177780 }, { "epoch": 0.42, "grad_norm": 2.4375, "learning_rate": 0.0001791707761254473, "loss": 2.1596, "step": 177785 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017916964699064866, "loss": 2.1154, "step": 177790 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017916851782880428, "loss": 2.0786, "step": 177795 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017916738863991448, "loss": 2.168, "step": 177800 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017916625942397967, "loss": 2.1608, "step": 177805 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.0001791651301810002, "loss": 2.1722, "step": 177810 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.0001791640009109765, "loss": 2.16, "step": 177815 }, { "epoch": 0.42, "grad_norm": 1.65625, "learning_rate": 0.00017916287161390897, "loss": 2.084, "step": 177820 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.00017916174228979793, "loss": 2.2356, "step": 177825 }, { "epoch": 0.42, "grad_norm": 2.03125, "learning_rate": 0.00017916061293864384, "loss": 2.1981, "step": 177830 }, { "epoch": 0.42, "grad_norm": 2.515625, "learning_rate": 0.00017915948356044705, "loss": 2.1896, "step": 177835 }, { "epoch": 0.42, "grad_norm": 2.03125, "learning_rate": 0.0001791583541552079, "loss": 2.1114, "step": 177840 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.00017915722472292686, "loss": 2.1365, "step": 177845 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017915609526360427, "loss": 2.1791, "step": 177850 }, { "epoch": 0.42, "grad_norm": 1.9765625, "learning_rate": 0.0001791549657772405, "loss": 2.2659, "step": 177855 }, { "epoch": 0.42, "grad_norm": 1.96875, "learning_rate": 0.00017915383626383597, "loss": 2.156, "step": 177860 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.00017915270672339104, "loss": 1.9797, "step": 177865 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.00017915157715590614, "loss": 1.9643, "step": 177870 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.0001791504475613816, "loss": 2.1245, "step": 177875 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.00017914931793981787, "loss": 2.1625, "step": 177880 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017914818829121523, "loss": 2.2042, "step": 177885 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.00017914705861557422, "loss": 2.2128, "step": 177890 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017914592891289508, "loss": 2.0317, "step": 177895 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017914479918317827, "loss": 2.0152, "step": 177900 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017914366942642417, "loss": 2.1331, "step": 177905 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017914253964263312, "loss": 2.0957, "step": 177910 }, { "epoch": 0.42, "grad_norm": 2.453125, "learning_rate": 0.00017914140983180558, "loss": 2.2042, "step": 177915 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.0001791402799939419, "loss": 2.2488, "step": 177920 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017913915012904245, "loss": 2.0952, "step": 177925 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.0001791380202371076, "loss": 2.2808, "step": 177930 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017913689031813786, "loss": 2.2019, "step": 177935 }, { "epoch": 0.42, "grad_norm": 3.0, "learning_rate": 0.00017913576037213345, "loss": 1.8991, "step": 177940 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017913463039909483, "loss": 2.1482, "step": 177945 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.0001791335003990224, "loss": 2.1133, "step": 177950 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017913237037191655, "loss": 2.0015, "step": 177955 }, { "epoch": 0.42, "grad_norm": 2.375, "learning_rate": 0.0001791312403177776, "loss": 2.0819, "step": 177960 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017913011023660603, "loss": 2.061, "step": 177965 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017912898012840216, "loss": 2.1591, "step": 177970 }, { "epoch": 0.42, "grad_norm": 1.8125, "learning_rate": 0.00017912784999316638, "loss": 2.1468, "step": 177975 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017912671983089912, "loss": 2.1551, "step": 177980 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017912558964160074, "loss": 2.0617, "step": 177985 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017912445942527158, "loss": 2.0222, "step": 177990 }, { "epoch": 0.42, "grad_norm": 2.59375, "learning_rate": 0.00017912332918191212, "loss": 2.1161, "step": 177995 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017912219891152268, "loss": 2.1062, "step": 178000 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017912106861410365, "loss": 2.2385, "step": 178005 }, { "epoch": 0.42, "grad_norm": 1.9921875, "learning_rate": 0.00017911993828965544, "loss": 1.9239, "step": 178010 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.0001791188079381784, "loss": 2.2104, "step": 178015 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017911767755967296, "loss": 2.2507, "step": 178020 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017911654715413949, "loss": 2.1073, "step": 178025 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017911541672157837, "loss": 2.0872, "step": 178030 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.00017911428626199, "loss": 2.0347, "step": 178035 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017911315577537473, "loss": 2.161, "step": 178040 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017911202526173296, "loss": 1.9652, "step": 178045 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017911089472106512, "loss": 2.2351, "step": 178050 }, { "epoch": 0.42, "grad_norm": 1.953125, "learning_rate": 0.00017910976415337157, "loss": 1.9027, "step": 178055 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017910863355865265, "loss": 2.1957, "step": 178060 }, { "epoch": 0.42, "grad_norm": 1.9921875, "learning_rate": 0.00017910750293690883, "loss": 2.1363, "step": 178065 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017910637228814042, "loss": 2.0855, "step": 178070 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017910524161234785, "loss": 2.1417, "step": 178075 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.0001791041109095315, "loss": 2.1615, "step": 178080 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017910298017969175, "loss": 2.0692, "step": 178085 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017910184942282898, "loss": 2.0863, "step": 178090 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.0001791007186389436, "loss": 2.1651, "step": 178095 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017909958782803595, "loss": 2.0911, "step": 178100 }, { "epoch": 0.42, "grad_norm": 2.375, "learning_rate": 0.00017909845699010647, "loss": 2.197, "step": 178105 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.0001790973261251555, "loss": 1.9964, "step": 178110 }, { "epoch": 0.42, "grad_norm": 1.71875, "learning_rate": 0.0001790961952331835, "loss": 2.0449, "step": 178115 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017909506431419074, "loss": 2.1621, "step": 178120 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.0001790939333681777, "loss": 2.2618, "step": 178125 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017909280239514477, "loss": 2.0584, "step": 178130 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017909167139509226, "loss": 2.2598, "step": 178135 }, { "epoch": 0.42, "grad_norm": 2.453125, "learning_rate": 0.00017909054036802061, "loss": 1.9137, "step": 178140 }, { "epoch": 0.42, "grad_norm": 2.421875, "learning_rate": 0.0001790894093139302, "loss": 2.1785, "step": 178145 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.0001790882782328214, "loss": 2.1988, "step": 178150 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017908714712469464, "loss": 2.0865, "step": 178155 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.00017908601598955027, "loss": 2.0663, "step": 178160 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017908488482738867, "loss": 2.176, "step": 178165 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.00017908375363821023, "loss": 2.3338, "step": 178170 }, { "epoch": 0.42, "grad_norm": 2.03125, "learning_rate": 0.00017908262242201535, "loss": 2.242, "step": 178175 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.00017908149117880443, "loss": 2.1282, "step": 178180 }, { "epoch": 0.42, "grad_norm": 2.421875, "learning_rate": 0.00017908035990857782, "loss": 2.1099, "step": 178185 }, { "epoch": 0.42, "grad_norm": 2.03125, "learning_rate": 0.00017907922861133595, "loss": 2.0679, "step": 178190 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.00017907809728707916, "loss": 2.1569, "step": 178195 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.00017907696593580784, "loss": 1.9532, "step": 178200 }, { "epoch": 0.42, "grad_norm": 2.5, "learning_rate": 0.00017907583455752243, "loss": 2.0149, "step": 178205 }, { "epoch": 0.42, "grad_norm": 1.703125, "learning_rate": 0.00017907470315222328, "loss": 2.1279, "step": 178210 }, { "epoch": 0.42, "grad_norm": 2.5625, "learning_rate": 0.00017907357171991075, "loss": 2.2502, "step": 178215 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017907244026058527, "loss": 2.1678, "step": 178220 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017907130877424724, "loss": 1.9195, "step": 178225 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017907017726089697, "loss": 2.1187, "step": 178230 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.00017906904572053488, "loss": 2.1424, "step": 178235 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.0001790679141531614, "loss": 2.0109, "step": 178240 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.00017906678255877692, "loss": 2.1608, "step": 178245 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017906565093738175, "loss": 2.1029, "step": 178250 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.00017906451928897634, "loss": 2.0906, "step": 178255 }, { "epoch": 0.42, "grad_norm": 1.9453125, "learning_rate": 0.00017906338761356102, "loss": 2.1014, "step": 178260 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017906225591113626, "loss": 2.1694, "step": 178265 }, { "epoch": 0.42, "grad_norm": 2.453125, "learning_rate": 0.00017906112418170235, "loss": 2.2147, "step": 178270 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017905999242525978, "loss": 2.0452, "step": 178275 }, { "epoch": 0.42, "grad_norm": 2.5, "learning_rate": 0.00017905886064180883, "loss": 2.1315, "step": 178280 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017905772883134997, "loss": 2.0713, "step": 178285 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017905659699388356, "loss": 2.2291, "step": 178290 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017905546512941, "loss": 2.1798, "step": 178295 }, { "epoch": 0.42, "grad_norm": 2.53125, "learning_rate": 0.00017905433323792962, "loss": 2.0867, "step": 178300 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017905320131944284, "loss": 2.172, "step": 178305 }, { "epoch": 0.42, "grad_norm": 1.9296875, "learning_rate": 0.0001790520693739501, "loss": 2.0215, "step": 178310 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.0001790509374014517, "loss": 2.1896, "step": 178315 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.0001790498054019481, "loss": 2.1051, "step": 178320 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.0001790486733754396, "loss": 2.167, "step": 178325 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017904754132192668, "loss": 2.153, "step": 178330 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.00017904640924140967, "loss": 2.0062, "step": 178335 }, { "epoch": 0.42, "grad_norm": 1.890625, "learning_rate": 0.000179045277133889, "loss": 2.4337, "step": 178340 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.000179044144999365, "loss": 2.115, "step": 178345 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.0001790430128378381, "loss": 2.2574, "step": 178350 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.00017904188064930864, "loss": 2.2021, "step": 178355 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.0001790407484337771, "loss": 2.1081, "step": 178360 }, { "epoch": 0.42, "grad_norm": 2.65625, "learning_rate": 0.00017903961619124376, "loss": 2.1701, "step": 178365 }, { "epoch": 0.42, "grad_norm": 2.4375, "learning_rate": 0.0001790384839217091, "loss": 2.3837, "step": 178370 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017903735162517342, "loss": 2.1103, "step": 178375 }, { "epoch": 0.42, "grad_norm": 2.03125, "learning_rate": 0.00017903621930163714, "loss": 2.0659, "step": 178380 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017903508695110067, "loss": 2.0762, "step": 178385 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.0001790339545735644, "loss": 2.2633, "step": 178390 }, { "epoch": 0.42, "grad_norm": 2.4375, "learning_rate": 0.0001790328221690287, "loss": 2.0936, "step": 178395 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.0001790316897374939, "loss": 2.2769, "step": 178400 }, { "epoch": 0.42, "grad_norm": 2.5, "learning_rate": 0.00017903055727896047, "loss": 2.0395, "step": 178405 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.0001790294247934288, "loss": 2.0776, "step": 178410 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017902829228089918, "loss": 2.1613, "step": 178415 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.00017902715974137213, "loss": 2.1729, "step": 178420 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017902602717484792, "loss": 1.9936, "step": 178425 }, { "epoch": 0.42, "grad_norm": 2.53125, "learning_rate": 0.000179024894581327, "loss": 2.1547, "step": 178430 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017902376196080977, "loss": 2.1733, "step": 178435 }, { "epoch": 0.42, "grad_norm": 2.640625, "learning_rate": 0.00017902262931329655, "loss": 2.1989, "step": 178440 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.0001790214966387878, "loss": 2.0601, "step": 178445 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017902036393728388, "loss": 2.1858, "step": 178450 }, { "epoch": 0.42, "grad_norm": 2.46875, "learning_rate": 0.00017901923120878515, "loss": 2.1592, "step": 178455 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017901809845329201, "loss": 2.0255, "step": 178460 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017901696567080487, "loss": 2.0668, "step": 178465 }, { "epoch": 0.42, "grad_norm": 2.640625, "learning_rate": 0.00017901583286132408, "loss": 2.0372, "step": 178470 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017901470002485006, "loss": 2.0788, "step": 178475 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.0001790135671613832, "loss": 2.3405, "step": 178480 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017901243427092384, "loss": 1.8635, "step": 178485 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017901130135347245, "loss": 2.1285, "step": 178490 }, { "epoch": 0.42, "grad_norm": 1.7109375, "learning_rate": 0.0001790101684090293, "loss": 2.1065, "step": 178495 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.0001790090354375949, "loss": 2.2033, "step": 178500 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017900790243916955, "loss": 2.1268, "step": 178505 }, { "epoch": 0.42, "grad_norm": 1.9765625, "learning_rate": 0.00017900676941375368, "loss": 2.0105, "step": 178510 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.00017900563636134766, "loss": 2.1853, "step": 178515 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.0001790045032819519, "loss": 1.9943, "step": 178520 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017900337017556677, "loss": 2.064, "step": 178525 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017900223704219262, "loss": 2.0151, "step": 178530 }, { "epoch": 0.42, "grad_norm": 3.1875, "learning_rate": 0.0001790011038818299, "loss": 2.197, "step": 178535 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017899997069447895, "loss": 2.2843, "step": 178540 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.00017899883748014018, "loss": 2.1311, "step": 178545 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.000178997704238814, "loss": 2.1691, "step": 178550 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017899657097050076, "loss": 2.0913, "step": 178555 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017899543767520087, "loss": 2.1444, "step": 178560 }, { "epoch": 0.42, "grad_norm": 1.734375, "learning_rate": 0.00017899430435291467, "loss": 1.8637, "step": 178565 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.0001789931710036426, "loss": 2.0307, "step": 178570 }, { "epoch": 0.42, "grad_norm": 2.46875, "learning_rate": 0.00017899203762738504, "loss": 2.038, "step": 178575 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017899090422414236, "loss": 2.1595, "step": 178580 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017898977079391495, "loss": 2.0605, "step": 178585 }, { "epoch": 0.42, "grad_norm": 1.984375, "learning_rate": 0.0001789886373367032, "loss": 2.1918, "step": 178590 }, { "epoch": 0.42, "grad_norm": 1.9921875, "learning_rate": 0.0001789875038525075, "loss": 2.0229, "step": 178595 }, { "epoch": 0.42, "grad_norm": 1.953125, "learning_rate": 0.00017898637034132823, "loss": 2.0179, "step": 178600 }, { "epoch": 0.42, "grad_norm": 2.578125, "learning_rate": 0.00017898523680316583, "loss": 1.9648, "step": 178605 }, { "epoch": 0.42, "grad_norm": 1.796875, "learning_rate": 0.0001789841032380206, "loss": 2.007, "step": 178610 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.00017898296964589299, "loss": 2.0604, "step": 178615 }, { "epoch": 0.42, "grad_norm": 2.4375, "learning_rate": 0.00017898183602678332, "loss": 2.2792, "step": 178620 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017898070238069207, "loss": 2.0653, "step": 178625 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017897956870761953, "loss": 2.1207, "step": 178630 }, { "epoch": 0.42, "grad_norm": 2.640625, "learning_rate": 0.0001789784350075662, "loss": 2.248, "step": 178635 }, { "epoch": 0.42, "grad_norm": 2.578125, "learning_rate": 0.00017897730128053237, "loss": 2.2128, "step": 178640 }, { "epoch": 0.42, "grad_norm": 2.484375, "learning_rate": 0.00017897616752651847, "loss": 2.0204, "step": 178645 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017897503374552487, "loss": 2.0938, "step": 178650 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017897389993755196, "loss": 2.2679, "step": 178655 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017897276610260015, "loss": 2.2728, "step": 178660 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.0001789716322406698, "loss": 2.3112, "step": 178665 }, { "epoch": 0.42, "grad_norm": 2.515625, "learning_rate": 0.0001789704983517613, "loss": 2.2062, "step": 178670 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017896936443587507, "loss": 2.1388, "step": 178675 }, { "epoch": 0.42, "grad_norm": 2.515625, "learning_rate": 0.00017896823049301145, "loss": 2.091, "step": 178680 }, { "epoch": 0.42, "grad_norm": 1.984375, "learning_rate": 0.00017896709652317084, "loss": 2.1266, "step": 178685 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017896596252635368, "loss": 2.1887, "step": 178690 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.0001789648285025603, "loss": 2.1489, "step": 178695 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017896369445179108, "loss": 2.0126, "step": 178700 }, { "epoch": 0.42, "grad_norm": 1.7109375, "learning_rate": 0.00017896256037404647, "loss": 2.0889, "step": 178705 }, { "epoch": 0.42, "grad_norm": 2.484375, "learning_rate": 0.0001789614262693268, "loss": 2.1924, "step": 178710 }, { "epoch": 0.42, "grad_norm": 1.859375, "learning_rate": 0.00017896029213763245, "loss": 2.1164, "step": 178715 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017895915797896383, "loss": 2.1613, "step": 178720 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017895802379332135, "loss": 2.1504, "step": 178725 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.0001789568895807054, "loss": 2.0547, "step": 178730 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.0001789557553411163, "loss": 2.2341, "step": 178735 }, { "epoch": 0.42, "grad_norm": 1.7734375, "learning_rate": 0.00017895462107455452, "loss": 2.1541, "step": 178740 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.0001789534867810204, "loss": 2.2401, "step": 178745 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017895235246051435, "loss": 2.068, "step": 178750 }, { "epoch": 0.42, "grad_norm": 1.984375, "learning_rate": 0.0001789512181130367, "loss": 2.0359, "step": 178755 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.0001789500837385879, "loss": 2.022, "step": 178760 }, { "epoch": 0.42, "grad_norm": 2.578125, "learning_rate": 0.00017894894933716832, "loss": 2.0588, "step": 178765 }, { "epoch": 0.42, "grad_norm": 2.484375, "learning_rate": 0.00017894781490877836, "loss": 2.1006, "step": 178770 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.0001789466804534184, "loss": 2.1204, "step": 178775 }, { "epoch": 0.42, "grad_norm": 1.9140625, "learning_rate": 0.0001789455459710888, "loss": 1.9162, "step": 178780 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017894441146179, "loss": 2.1898, "step": 178785 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017894327692552232, "loss": 2.0509, "step": 178790 }, { "epoch": 0.42, "grad_norm": 1.890625, "learning_rate": 0.0001789421423622862, "loss": 2.2019, "step": 178795 }, { "epoch": 0.42, "grad_norm": 2.421875, "learning_rate": 0.00017894100777208203, "loss": 1.9762, "step": 178800 }, { "epoch": 0.42, "grad_norm": 1.9765625, "learning_rate": 0.00017893987315491014, "loss": 2.3166, "step": 178805 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.000178938738510771, "loss": 2.2139, "step": 178810 }, { "epoch": 0.42, "grad_norm": 1.78125, "learning_rate": 0.00017893760383966493, "loss": 2.0189, "step": 178815 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017893646914159234, "loss": 1.9679, "step": 178820 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017893533441655363, "loss": 2.035, "step": 178825 }, { "epoch": 0.42, "grad_norm": 1.7734375, "learning_rate": 0.0001789341996645492, "loss": 2.1342, "step": 178830 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017893306488557937, "loss": 1.9871, "step": 178835 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.0001789319300796446, "loss": 2.1781, "step": 178840 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.00017893079524674524, "loss": 2.2592, "step": 178845 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017892966038688172, "loss": 2.1643, "step": 178850 }, { "epoch": 0.42, "grad_norm": 1.9296875, "learning_rate": 0.00017892852550005438, "loss": 2.1775, "step": 178855 }, { "epoch": 0.42, "grad_norm": 1.96875, "learning_rate": 0.0001789273905862636, "loss": 2.1248, "step": 178860 }, { "epoch": 0.42, "grad_norm": 1.9296875, "learning_rate": 0.00017892625564550982, "loss": 2.1468, "step": 178865 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.00017892512067779339, "loss": 2.1868, "step": 178870 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.0001789239856831147, "loss": 2.2394, "step": 178875 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017892285066147416, "loss": 2.2328, "step": 178880 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.00017892171561287215, "loss": 2.1436, "step": 178885 }, { "epoch": 0.42, "grad_norm": 1.9375, "learning_rate": 0.00017892058053730904, "loss": 2.0821, "step": 178890 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.0001789194454347852, "loss": 2.0032, "step": 178895 }, { "epoch": 0.42, "grad_norm": 2.78125, "learning_rate": 0.0001789183103053011, "loss": 1.9326, "step": 178900 }, { "epoch": 0.42, "grad_norm": 1.6484375, "learning_rate": 0.00017891717514885701, "loss": 2.1874, "step": 178905 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.00017891603996545344, "loss": 2.0801, "step": 178910 }, { "epoch": 0.42, "grad_norm": 1.8359375, "learning_rate": 0.0001789149047550907, "loss": 2.0498, "step": 178915 }, { "epoch": 0.42, "grad_norm": 2.515625, "learning_rate": 0.00017891376951776917, "loss": 2.1879, "step": 178920 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017891263425348929, "loss": 2.2528, "step": 178925 }, { "epoch": 0.42, "grad_norm": 2.375, "learning_rate": 0.00017891149896225142, "loss": 2.1799, "step": 178930 }, { "epoch": 0.42, "grad_norm": 1.796875, "learning_rate": 0.00017891036364405598, "loss": 2.0155, "step": 178935 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.0001789092282989033, "loss": 2.1969, "step": 178940 }, { "epoch": 0.42, "grad_norm": 2.421875, "learning_rate": 0.0001789080929267938, "loss": 2.0688, "step": 178945 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017890695752772784, "loss": 2.0608, "step": 178950 }, { "epoch": 0.42, "grad_norm": 2.5625, "learning_rate": 0.00017890582210170587, "loss": 2.0565, "step": 178955 }, { "epoch": 0.42, "grad_norm": 2.640625, "learning_rate": 0.00017890468664872824, "loss": 2.0634, "step": 178960 }, { "epoch": 0.42, "grad_norm": 1.90625, "learning_rate": 0.00017890355116879531, "loss": 2.066, "step": 178965 }, { "epoch": 0.42, "grad_norm": 1.765625, "learning_rate": 0.0001789024156619075, "loss": 2.0991, "step": 178970 }, { "epoch": 0.42, "grad_norm": 3.109375, "learning_rate": 0.0001789012801280652, "loss": 1.9069, "step": 178975 }, { "epoch": 0.42, "grad_norm": 1.9453125, "learning_rate": 0.0001789001445672688, "loss": 2.0212, "step": 178980 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017889900897951868, "loss": 2.1316, "step": 178985 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017889787336481522, "loss": 2.1258, "step": 178990 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017889673772315882, "loss": 2.0644, "step": 178995 }, { "epoch": 0.42, "grad_norm": 1.96875, "learning_rate": 0.00017889560205454984, "loss": 2.1947, "step": 179000 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.00017889446635898872, "loss": 2.1975, "step": 179005 }, { "epoch": 0.42, "grad_norm": 1.9140625, "learning_rate": 0.0001788933306364758, "loss": 1.9982, "step": 179010 }, { "epoch": 0.42, "grad_norm": 2.625, "learning_rate": 0.00017889219488701153, "loss": 2.0159, "step": 179015 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.0001788910591105962, "loss": 2.2123, "step": 179020 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.0001788899233072303, "loss": 2.2785, "step": 179025 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017888878747691412, "loss": 2.2021, "step": 179030 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017888765161964813, "loss": 2.1361, "step": 179035 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.0001788865157354327, "loss": 2.1294, "step": 179040 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.0001788853798242682, "loss": 2.1418, "step": 179045 }, { "epoch": 0.42, "grad_norm": 1.765625, "learning_rate": 0.000178884243886155, "loss": 2.2956, "step": 179050 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017888310792109352, "loss": 2.0215, "step": 179055 }, { "epoch": 0.42, "grad_norm": 1.90625, "learning_rate": 0.00017888197192908418, "loss": 2.1772, "step": 179060 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017888083591012727, "loss": 2.1655, "step": 179065 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017887969986422325, "loss": 2.1978, "step": 179070 }, { "epoch": 0.42, "grad_norm": 2.53125, "learning_rate": 0.00017887856379137255, "loss": 2.1852, "step": 179075 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017887742769157546, "loss": 2.2779, "step": 179080 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017887629156483238, "loss": 2.0368, "step": 179085 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.00017887515541114375, "loss": 2.1184, "step": 179090 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017887401923050993, "loss": 2.0614, "step": 179095 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017887288302293134, "loss": 2.2041, "step": 179100 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017887174678840834, "loss": 2.0485, "step": 179105 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017887061052694129, "loss": 2.0766, "step": 179110 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017886947423853065, "loss": 2.0871, "step": 179115 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017886833792317671, "loss": 2.1066, "step": 179120 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017886720158087998, "loss": 2.0668, "step": 179125 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017886606521164077, "loss": 2.2171, "step": 179130 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.00017886492881545945, "loss": 2.0779, "step": 179135 }, { "epoch": 0.42, "grad_norm": 1.8359375, "learning_rate": 0.00017886379239233645, "loss": 1.9568, "step": 179140 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.00017886265594227216, "loss": 2.1346, "step": 179145 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.00017886151946526694, "loss": 2.2242, "step": 179150 }, { "epoch": 0.42, "grad_norm": 1.9375, "learning_rate": 0.00017886038296132122, "loss": 2.1442, "step": 179155 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017885924643043535, "loss": 2.2566, "step": 179160 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017885810987260974, "loss": 2.1472, "step": 179165 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017885697328784477, "loss": 2.2076, "step": 179170 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.0001788558366761408, "loss": 2.1529, "step": 179175 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017885470003749826, "loss": 1.8563, "step": 179180 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017885356337191753, "loss": 2.0269, "step": 179185 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.000178852426679399, "loss": 2.1352, "step": 179190 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.00017885128995994302, "loss": 1.9345, "step": 179195 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017885015321355004, "loss": 2.0479, "step": 179200 }, { "epoch": 0.42, "grad_norm": 2.03125, "learning_rate": 0.0001788490164402204, "loss": 2.1914, "step": 179205 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.0001788478796399545, "loss": 2.1192, "step": 179210 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.00017884674281275277, "loss": 2.1424, "step": 179215 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017884560595861555, "loss": 2.0038, "step": 179220 }, { "epoch": 0.42, "grad_norm": 1.9453125, "learning_rate": 0.00017884446907754322, "loss": 2.0595, "step": 179225 }, { "epoch": 0.42, "grad_norm": 2.78125, "learning_rate": 0.0001788433321695362, "loss": 2.0877, "step": 179230 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.0001788421952345949, "loss": 2.1876, "step": 179235 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.0001788410582727196, "loss": 2.2543, "step": 179240 }, { "epoch": 0.42, "grad_norm": 2.65625, "learning_rate": 0.00017883992128391084, "loss": 2.1067, "step": 179245 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017883878426816892, "loss": 2.1337, "step": 179250 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.0001788376472254942, "loss": 2.2049, "step": 179255 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017883651015588713, "loss": 2.0889, "step": 179260 }, { "epoch": 0.42, "grad_norm": 2.359375, "learning_rate": 0.0001788353730593481, "loss": 2.1783, "step": 179265 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017883423593587745, "loss": 2.0153, "step": 179270 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.0001788330987854756, "loss": 1.9804, "step": 179275 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017883196160814295, "loss": 2.2602, "step": 179280 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017883082440387987, "loss": 2.1514, "step": 179285 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017882968717268673, "loss": 2.2546, "step": 179290 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017882854991456396, "loss": 2.0664, "step": 179295 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.0001788274126295119, "loss": 2.0969, "step": 179300 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.000178826275317531, "loss": 2.0923, "step": 179305 }, { "epoch": 0.42, "grad_norm": 1.9375, "learning_rate": 0.0001788251379786216, "loss": 2.1218, "step": 179310 }, { "epoch": 0.42, "grad_norm": 1.9453125, "learning_rate": 0.0001788240006127841, "loss": 2.1556, "step": 179315 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.0001788228632200189, "loss": 1.9728, "step": 179320 }, { "epoch": 0.42, "grad_norm": 1.9453125, "learning_rate": 0.00017882172580032638, "loss": 2.041, "step": 179325 }, { "epoch": 0.42, "grad_norm": 1.9609375, "learning_rate": 0.00017882058835370695, "loss": 2.2119, "step": 179330 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.0001788194508801609, "loss": 2.2102, "step": 179335 }, { "epoch": 0.42, "grad_norm": 1.9765625, "learning_rate": 0.00017881831337968878, "loss": 2.0561, "step": 179340 }, { "epoch": 0.42, "grad_norm": 2.546875, "learning_rate": 0.00017881717585229088, "loss": 2.006, "step": 179345 }, { "epoch": 0.42, "grad_norm": 2.453125, "learning_rate": 0.00017881603829796757, "loss": 2.047, "step": 179350 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.0001788149007167193, "loss": 2.3483, "step": 179355 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.0001788137631085464, "loss": 2.1312, "step": 179360 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017881262547344933, "loss": 2.2262, "step": 179365 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017881148781142837, "loss": 2.0564, "step": 179370 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017881035012248404, "loss": 2.3599, "step": 179375 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.00017880921240661664, "loss": 2.0206, "step": 179380 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.0001788080746638266, "loss": 2.0174, "step": 179385 }, { "epoch": 0.42, "grad_norm": 1.71875, "learning_rate": 0.0001788069368941143, "loss": 2.1492, "step": 179390 }, { "epoch": 0.42, "grad_norm": 2.453125, "learning_rate": 0.0001788057990974801, "loss": 2.0409, "step": 179395 }, { "epoch": 0.42, "grad_norm": 2.421875, "learning_rate": 0.00017880466127392442, "loss": 2.0249, "step": 179400 }, { "epoch": 0.42, "grad_norm": 2.03125, "learning_rate": 0.00017880352342344763, "loss": 2.022, "step": 179405 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017880238554605012, "loss": 2.2243, "step": 179410 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017880124764173231, "loss": 2.1664, "step": 179415 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017880010971049455, "loss": 2.1449, "step": 179420 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017879897175233724, "loss": 2.1166, "step": 179425 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017879783376726076, "loss": 2.0272, "step": 179430 }, { "epoch": 0.42, "grad_norm": 2.515625, "learning_rate": 0.00017879669575526555, "loss": 2.1068, "step": 179435 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017879555771635193, "loss": 2.3304, "step": 179440 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017879441965052033, "loss": 2.1188, "step": 179445 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.0001787932815577711, "loss": 2.0621, "step": 179450 }, { "epoch": 0.42, "grad_norm": 1.8515625, "learning_rate": 0.0001787921434381047, "loss": 2.3014, "step": 179455 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017879100529152143, "loss": 2.3329, "step": 179460 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017878986711802178, "loss": 1.9789, "step": 179465 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.00017878872891760603, "loss": 2.1415, "step": 179470 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.00017878759069027466, "loss": 2.0681, "step": 179475 }, { "epoch": 0.42, "grad_norm": 1.875, "learning_rate": 0.000178786452436028, "loss": 1.9911, "step": 179480 }, { "epoch": 0.42, "grad_norm": 1.9609375, "learning_rate": 0.00017878531415486644, "loss": 2.2443, "step": 179485 }, { "epoch": 0.42, "grad_norm": 1.8125, "learning_rate": 0.00017878417584679042, "loss": 2.0571, "step": 179490 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017878303751180027, "loss": 2.2039, "step": 179495 }, { "epoch": 0.42, "grad_norm": 2.421875, "learning_rate": 0.00017878189914989642, "loss": 2.1565, "step": 179500 }, { "epoch": 0.42, "grad_norm": 1.859375, "learning_rate": 0.00017878076076107926, "loss": 2.0961, "step": 179505 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.00017877962234534914, "loss": 2.1182, "step": 179510 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.0001787784839027065, "loss": 1.9521, "step": 179515 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017877734543315165, "loss": 2.1625, "step": 179520 }, { "epoch": 0.42, "grad_norm": 1.84375, "learning_rate": 0.00017877620693668508, "loss": 2.0573, "step": 179525 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.0001787750684133071, "loss": 2.2166, "step": 179530 }, { "epoch": 0.42, "grad_norm": 1.9765625, "learning_rate": 0.00017877392986301815, "loss": 2.1037, "step": 179535 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017877279128581858, "loss": 2.2356, "step": 179540 }, { "epoch": 0.42, "grad_norm": 1.8203125, "learning_rate": 0.0001787716526817088, "loss": 2.0822, "step": 179545 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.0001787705140506892, "loss": 2.1083, "step": 179550 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017876937539276018, "loss": 2.0788, "step": 179555 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017876823670792206, "loss": 2.0039, "step": 179560 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017876709799617533, "loss": 2.0743, "step": 179565 }, { "epoch": 0.42, "grad_norm": 1.8828125, "learning_rate": 0.00017876595925752031, "loss": 2.2794, "step": 179570 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017876482049195743, "loss": 2.1003, "step": 179575 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017876368169948703, "loss": 2.1155, "step": 179580 }, { "epoch": 0.42, "grad_norm": 1.9921875, "learning_rate": 0.00017876254288010955, "loss": 1.9898, "step": 179585 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017876140403382532, "loss": 2.2242, "step": 179590 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017876026516063483, "loss": 2.0668, "step": 179595 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017875912626053836, "loss": 2.0739, "step": 179600 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017875798733353634, "loss": 2.211, "step": 179605 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017875684837962918, "loss": 2.2322, "step": 179610 }, { "epoch": 0.42, "grad_norm": 2.5625, "learning_rate": 0.00017875570939881726, "loss": 2.0377, "step": 179615 }, { "epoch": 0.42, "grad_norm": 2.484375, "learning_rate": 0.00017875457039110096, "loss": 2.0579, "step": 179620 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017875343135648067, "loss": 2.088, "step": 179625 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017875229229495675, "loss": 2.2996, "step": 179630 }, { "epoch": 0.42, "grad_norm": 2.515625, "learning_rate": 0.00017875115320652965, "loss": 2.0729, "step": 179635 }, { "epoch": 0.42, "grad_norm": 1.9140625, "learning_rate": 0.0001787500140911997, "loss": 2.1086, "step": 179640 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017874887494896734, "loss": 2.2268, "step": 179645 }, { "epoch": 0.42, "grad_norm": 1.9453125, "learning_rate": 0.00017874773577983294, "loss": 2.3229, "step": 179650 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017874659658379688, "loss": 1.9429, "step": 179655 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017874545736085954, "loss": 2.3687, "step": 179660 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017874431811102132, "loss": 1.9876, "step": 179665 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.00017874317883428265, "loss": 2.1075, "step": 179670 }, { "epoch": 0.42, "grad_norm": 2.59375, "learning_rate": 0.00017874203953064383, "loss": 2.2343, "step": 179675 }, { "epoch": 0.42, "grad_norm": 1.640625, "learning_rate": 0.00017874090020010534, "loss": 2.1943, "step": 179680 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.0001787397608426675, "loss": 2.0956, "step": 179685 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017873862145833077, "loss": 2.2095, "step": 179690 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.0001787374820470955, "loss": 2.2297, "step": 179695 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017873634260896207, "loss": 2.1349, "step": 179700 }, { "epoch": 0.42, "grad_norm": 2.46875, "learning_rate": 0.00017873520314393084, "loss": 2.0834, "step": 179705 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017873406365200226, "loss": 2.1824, "step": 179710 }, { "epoch": 0.42, "grad_norm": 1.9609375, "learning_rate": 0.00017873292413317672, "loss": 1.9872, "step": 179715 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017873178458745456, "loss": 2.2651, "step": 179720 }, { "epoch": 0.42, "grad_norm": 3.078125, "learning_rate": 0.00017873064501483618, "loss": 2.1825, "step": 179725 }, { "epoch": 0.42, "grad_norm": 1.9765625, "learning_rate": 0.000178729505415322, "loss": 2.0684, "step": 179730 }, { "epoch": 0.42, "grad_norm": 1.8515625, "learning_rate": 0.0001787283657889124, "loss": 2.1212, "step": 179735 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017872722613560774, "loss": 2.1699, "step": 179740 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017872608645540846, "loss": 2.2182, "step": 179745 }, { "epoch": 0.42, "grad_norm": 2.53125, "learning_rate": 0.00017872494674831492, "loss": 2.146, "step": 179750 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017872380701432748, "loss": 2.0468, "step": 179755 }, { "epoch": 0.42, "grad_norm": 2.5625, "learning_rate": 0.0001787226672534466, "loss": 2.0126, "step": 179760 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017872152746567258, "loss": 2.283, "step": 179765 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017872038765100588, "loss": 2.0771, "step": 179770 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.0001787192478094469, "loss": 2.1153, "step": 179775 }, { "epoch": 0.42, "grad_norm": 1.921875, "learning_rate": 0.00017871810794099595, "loss": 1.9269, "step": 179780 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.00017871696804565353, "loss": 2.1113, "step": 179785 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017871582812341988, "loss": 2.2516, "step": 179790 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017871468817429555, "loss": 2.0732, "step": 179795 }, { "epoch": 0.42, "grad_norm": 1.96875, "learning_rate": 0.00017871354819828082, "loss": 2.1043, "step": 179800 }, { "epoch": 0.42, "grad_norm": 1.953125, "learning_rate": 0.0001787124081953761, "loss": 2.0876, "step": 179805 }, { "epoch": 0.42, "grad_norm": 1.9140625, "learning_rate": 0.00017871126816558183, "loss": 1.9643, "step": 179810 }, { "epoch": 0.42, "grad_norm": 1.921875, "learning_rate": 0.00017871012810889836, "loss": 2.2215, "step": 179815 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017870898802532605, "loss": 2.0444, "step": 179820 }, { "epoch": 0.42, "grad_norm": 1.890625, "learning_rate": 0.00017870784791486534, "loss": 2.0254, "step": 179825 }, { "epoch": 0.42, "grad_norm": 2.75, "learning_rate": 0.00017870670777751663, "loss": 2.0049, "step": 179830 }, { "epoch": 0.42, "grad_norm": 2.375, "learning_rate": 0.00017870556761328022, "loss": 2.0581, "step": 179835 }, { "epoch": 0.42, "grad_norm": 1.796875, "learning_rate": 0.00017870442742215663, "loss": 2.1477, "step": 179840 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017870328720414613, "loss": 1.8846, "step": 179845 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017870214695924917, "loss": 1.9471, "step": 179850 }, { "epoch": 0.42, "grad_norm": 2.625, "learning_rate": 0.00017870100668746614, "loss": 2.2329, "step": 179855 }, { "epoch": 0.42, "grad_norm": 1.9609375, "learning_rate": 0.00017869986638879742, "loss": 2.1415, "step": 179860 }, { "epoch": 0.42, "grad_norm": 1.8828125, "learning_rate": 0.00017869872606324338, "loss": 2.2055, "step": 179865 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017869758571080447, "loss": 2.1787, "step": 179870 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.000178696445331481, "loss": 2.289, "step": 179875 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.0001786953049252734, "loss": 2.2163, "step": 179880 }, { "epoch": 0.42, "grad_norm": 3.015625, "learning_rate": 0.00017869416449218205, "loss": 2.1828, "step": 179885 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017869302403220735, "loss": 2.1211, "step": 179890 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.0001786918835453497, "loss": 1.9576, "step": 179895 }, { "epoch": 0.42, "grad_norm": 1.9609375, "learning_rate": 0.00017869074303160945, "loss": 2.0827, "step": 179900 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017868960249098706, "loss": 2.1064, "step": 179905 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.00017868846192348284, "loss": 2.0744, "step": 179910 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017868732132909722, "loss": 2.0928, "step": 179915 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017868618070783062, "loss": 2.1656, "step": 179920 }, { "epoch": 0.42, "grad_norm": 1.8515625, "learning_rate": 0.00017868504005968334, "loss": 2.1165, "step": 179925 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017868389938465584, "loss": 2.0044, "step": 179930 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.0001786827586827485, "loss": 2.2889, "step": 179935 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.0001786816179539617, "loss": 2.0735, "step": 179940 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017868047719829582, "loss": 2.1228, "step": 179945 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017867933641575127, "loss": 2.0421, "step": 179950 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017867819560632846, "loss": 2.1675, "step": 179955 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.0001786770547700277, "loss": 2.1308, "step": 179960 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017867591390684947, "loss": 2.1213, "step": 179965 }, { "epoch": 0.42, "grad_norm": 1.765625, "learning_rate": 0.0001786747730167941, "loss": 1.9523, "step": 179970 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.000178673632099862, "loss": 1.9443, "step": 179975 }, { "epoch": 0.42, "grad_norm": 1.765625, "learning_rate": 0.00017867249115605357, "loss": 2.102, "step": 179980 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.0001786713501853692, "loss": 2.1618, "step": 179985 }, { "epoch": 0.42, "grad_norm": 2.484375, "learning_rate": 0.00017867020918780927, "loss": 2.0015, "step": 179990 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017866906816337418, "loss": 2.2045, "step": 179995 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.0001786679271120643, "loss": 2.0921, "step": 180000 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.00017866678603387998, "loss": 2.0634, "step": 180005 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.00017866564492882172, "loss": 2.2246, "step": 180010 }, { "epoch": 0.42, "grad_norm": 2.4375, "learning_rate": 0.00017866450379688982, "loss": 2.0941, "step": 180015 }, { "epoch": 0.42, "grad_norm": 1.921875, "learning_rate": 0.0001786633626380847, "loss": 2.225, "step": 180020 }, { "epoch": 0.42, "grad_norm": 2.03125, "learning_rate": 0.00017866222145240674, "loss": 2.2012, "step": 180025 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.00017866108023985636, "loss": 2.0383, "step": 180030 }, { "epoch": 0.42, "grad_norm": 2.453125, "learning_rate": 0.00017865993900043393, "loss": 2.2004, "step": 180035 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.00017865879773413984, "loss": 1.9735, "step": 180040 }, { "epoch": 0.42, "grad_norm": 2.03125, "learning_rate": 0.00017865765644097445, "loss": 2.2332, "step": 180045 }, { "epoch": 0.42, "grad_norm": 2.421875, "learning_rate": 0.0001786565151209382, "loss": 1.9089, "step": 180050 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017865537377403145, "loss": 1.979, "step": 180055 }, { "epoch": 0.42, "grad_norm": 1.8984375, "learning_rate": 0.0001786542324002546, "loss": 2.2769, "step": 180060 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017865309099960802, "loss": 1.9844, "step": 180065 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.00017865194957209215, "loss": 2.1708, "step": 180070 }, { "epoch": 0.42, "grad_norm": 1.9609375, "learning_rate": 0.00017865080811770733, "loss": 2.1217, "step": 180075 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017864966663645398, "loss": 2.1247, "step": 180080 }, { "epoch": 0.42, "grad_norm": 1.7890625, "learning_rate": 0.00017864852512833248, "loss": 1.9928, "step": 180085 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017864738359334322, "loss": 2.2981, "step": 180090 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017864624203148654, "loss": 2.191, "step": 180095 }, { "epoch": 0.42, "grad_norm": 2.484375, "learning_rate": 0.00017864510044276295, "loss": 2.0495, "step": 180100 }, { "epoch": 0.42, "grad_norm": 1.96875, "learning_rate": 0.00017864395882717272, "loss": 1.8734, "step": 180105 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.00017864281718471626, "loss": 2.0531, "step": 180110 }, { "epoch": 0.42, "grad_norm": 2.671875, "learning_rate": 0.00017864167551539403, "loss": 2.2026, "step": 180115 }, { "epoch": 0.42, "grad_norm": 2.546875, "learning_rate": 0.0001786405338192064, "loss": 2.1183, "step": 180120 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.0001786393920961537, "loss": 2.2001, "step": 180125 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017863825034623637, "loss": 2.2719, "step": 180130 }, { "epoch": 0.42, "grad_norm": 2.515625, "learning_rate": 0.0001786371085694548, "loss": 2.0477, "step": 180135 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017863596676580935, "loss": 2.0461, "step": 180140 }, { "epoch": 0.42, "grad_norm": 1.890625, "learning_rate": 0.0001786348249353004, "loss": 2.2178, "step": 180145 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.0001786336830779284, "loss": 2.161, "step": 180150 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017863254119369372, "loss": 2.1002, "step": 180155 }, { "epoch": 0.42, "grad_norm": 2.59375, "learning_rate": 0.0001786313992825967, "loss": 1.9824, "step": 180160 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.0001786302573446378, "loss": 2.2422, "step": 180165 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017862911537981738, "loss": 1.9975, "step": 180170 }, { "epoch": 0.42, "grad_norm": 2.578125, "learning_rate": 0.00017862797338813582, "loss": 2.1618, "step": 180175 }, { "epoch": 0.42, "grad_norm": 1.890625, "learning_rate": 0.0001786268313695935, "loss": 2.0663, "step": 180180 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017862568932419085, "loss": 2.0225, "step": 180185 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017862454725192824, "loss": 2.1015, "step": 180190 }, { "epoch": 0.42, "grad_norm": 2.59375, "learning_rate": 0.00017862340515280604, "loss": 2.0275, "step": 180195 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017862226302682466, "loss": 2.2393, "step": 180200 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017862112087398452, "loss": 1.9769, "step": 180205 }, { "epoch": 0.42, "grad_norm": 1.953125, "learning_rate": 0.00017861997869428594, "loss": 2.2183, "step": 180210 }, { "epoch": 0.42, "grad_norm": 2.40625, "learning_rate": 0.0001786188364877294, "loss": 2.1131, "step": 180215 }, { "epoch": 0.42, "grad_norm": 1.859375, "learning_rate": 0.00017861769425431517, "loss": 2.2136, "step": 180220 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017861655199404377, "loss": 2.2281, "step": 180225 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.0001786154097069155, "loss": 2.1832, "step": 180230 }, { "epoch": 0.42, "grad_norm": 1.875, "learning_rate": 0.00017861426739293078, "loss": 2.1062, "step": 180235 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017861312505209, "loss": 2.05, "step": 180240 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.0001786119826843936, "loss": 1.9642, "step": 180245 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.00017861084028984186, "loss": 2.146, "step": 180250 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.00017860969786843525, "loss": 2.0091, "step": 180255 }, { "epoch": 0.42, "grad_norm": 1.609375, "learning_rate": 0.00017860855542017415, "loss": 2.1055, "step": 180260 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.0001786074129450589, "loss": 2.1444, "step": 180265 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.00017860627044308998, "loss": 2.223, "step": 180270 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017860512791426772, "loss": 2.3536, "step": 180275 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.00017860398535859254, "loss": 2.0029, "step": 180280 }, { "epoch": 0.42, "grad_norm": 3.21875, "learning_rate": 0.0001786028427760648, "loss": 2.2603, "step": 180285 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.00017860170016668488, "loss": 2.2031, "step": 180290 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.0001786005575304532, "loss": 2.1049, "step": 180295 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 0.00017859941486737016, "loss": 2.1378, "step": 180300 }, { "epoch": 0.42, "grad_norm": 2.03125, "learning_rate": 0.00017859827217743613, "loss": 2.212, "step": 180305 }, { "epoch": 0.42, "grad_norm": 1.9921875, "learning_rate": 0.0001785971294606515, "loss": 2.2567, "step": 180310 }, { "epoch": 0.42, "grad_norm": 2.171875, "learning_rate": 0.00017859598671701667, "loss": 2.1615, "step": 180315 }, { "epoch": 0.42, "grad_norm": 2.078125, "learning_rate": 0.00017859484394653203, "loss": 2.0111, "step": 180320 }, { "epoch": 0.42, "grad_norm": 1.8046875, "learning_rate": 0.00017859370114919795, "loss": 2.1387, "step": 180325 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017859255832501484, "loss": 2.2769, "step": 180330 }, { "epoch": 0.42, "grad_norm": 1.8359375, "learning_rate": 0.00017859141547398312, "loss": 2.2115, "step": 180335 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017859027259610312, "loss": 2.2975, "step": 180340 }, { "epoch": 0.42, "grad_norm": 1.8046875, "learning_rate": 0.00017858912969137525, "loss": 1.992, "step": 180345 }, { "epoch": 0.42, "grad_norm": 1.7421875, "learning_rate": 0.00017858798675979992, "loss": 2.2583, "step": 180350 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017858684380137749, "loss": 1.9295, "step": 180355 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.0001785857008161084, "loss": 2.1769, "step": 180360 }, { "epoch": 0.42, "grad_norm": 2.5, "learning_rate": 0.000178584557803993, "loss": 2.1543, "step": 180365 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017858341476503168, "loss": 2.2509, "step": 180370 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017858227169922482, "loss": 2.1466, "step": 180375 }, { "epoch": 0.42, "grad_norm": 1.7421875, "learning_rate": 0.00017858112860657288, "loss": 2.0713, "step": 180380 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017857998548707617, "loss": 2.2022, "step": 180385 }, { "epoch": 0.42, "grad_norm": 2.0, "learning_rate": 0.00017857884234073512, "loss": 2.1941, "step": 180390 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.0001785776991675501, "loss": 2.0269, "step": 180395 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.0001785765559675215, "loss": 2.007, "step": 180400 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017857541274064978, "loss": 2.0894, "step": 180405 }, { "epoch": 0.42, "grad_norm": 2.1875, "learning_rate": 0.00017857426948693524, "loss": 2.1953, "step": 180410 }, { "epoch": 0.42, "grad_norm": 2.015625, "learning_rate": 0.0001785731262063783, "loss": 2.0201, "step": 180415 }, { "epoch": 0.42, "grad_norm": 1.984375, "learning_rate": 0.00017857198289897937, "loss": 1.9117, "step": 180420 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.00017857083956473882, "loss": 2.2696, "step": 180425 }, { "epoch": 0.42, "grad_norm": 2.421875, "learning_rate": 0.000178569696203657, "loss": 1.9712, "step": 180430 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017856855281573442, "loss": 1.9413, "step": 180435 }, { "epoch": 0.42, "grad_norm": 2.28125, "learning_rate": 0.00017856740940097138, "loss": 2.1608, "step": 180440 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.00017856626595936828, "loss": 2.0422, "step": 180445 }, { "epoch": 0.42, "grad_norm": 2.546875, "learning_rate": 0.0001785651224909255, "loss": 1.9246, "step": 180450 }, { "epoch": 0.42, "grad_norm": 2.328125, "learning_rate": 0.00017856397899564347, "loss": 2.0591, "step": 180455 }, { "epoch": 0.42, "grad_norm": 2.046875, "learning_rate": 0.00017856283547352253, "loss": 2.0178, "step": 180460 }, { "epoch": 0.42, "grad_norm": 2.09375, "learning_rate": 0.00017856169192456312, "loss": 2.0524, "step": 180465 }, { "epoch": 0.42, "grad_norm": 2.609375, "learning_rate": 0.00017856054834876562, "loss": 1.9247, "step": 180470 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.0001785594047461304, "loss": 2.1392, "step": 180475 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.00017855826111665788, "loss": 2.08, "step": 180480 }, { "epoch": 0.42, "grad_norm": 1.984375, "learning_rate": 0.00017855711746034842, "loss": 2.1788, "step": 180485 }, { "epoch": 0.42, "grad_norm": 2.125, "learning_rate": 0.0001785559737772024, "loss": 1.9629, "step": 180490 }, { "epoch": 0.42, "grad_norm": 2.140625, "learning_rate": 0.0001785548300672203, "loss": 2.2798, "step": 180495 }, { "epoch": 0.42, "grad_norm": 6.15625, "learning_rate": 0.0001785536863304024, "loss": 2.1248, "step": 180500 }, { "epoch": 0.42, "grad_norm": 1.953125, "learning_rate": 0.00017855254256674913, "loss": 2.2666, "step": 180505 }, { "epoch": 0.42, "grad_norm": 2.109375, "learning_rate": 0.0001785513987762609, "loss": 2.2773, "step": 180510 }, { "epoch": 0.42, "grad_norm": 2.390625, "learning_rate": 0.00017855025495893808, "loss": 2.2363, "step": 180515 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.0001785491111147811, "loss": 1.9449, "step": 180520 }, { "epoch": 0.42, "grad_norm": 2.296875, "learning_rate": 0.00017854796724379028, "loss": 2.0317, "step": 180525 }, { "epoch": 0.42, "grad_norm": 2.5625, "learning_rate": 0.00017854682334596605, "loss": 2.0728, "step": 180530 }, { "epoch": 0.42, "grad_norm": 2.0625, "learning_rate": 0.00017854567942130882, "loss": 2.0441, "step": 180535 }, { "epoch": 0.42, "grad_norm": 2.53125, "learning_rate": 0.00017854453546981897, "loss": 1.9606, "step": 180540 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.00017854339149149686, "loss": 2.1254, "step": 180545 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017854224748634293, "loss": 1.9174, "step": 180550 }, { "epoch": 0.42, "grad_norm": 2.21875, "learning_rate": 0.00017854110345435752, "loss": 1.8714, "step": 180555 }, { "epoch": 0.42, "grad_norm": 2.203125, "learning_rate": 0.00017853995939554103, "loss": 2.1108, "step": 180560 }, { "epoch": 0.42, "grad_norm": 2.234375, "learning_rate": 0.0001785388153098939, "loss": 2.0426, "step": 180565 }, { "epoch": 0.42, "grad_norm": 2.34375, "learning_rate": 0.00017853767119741646, "loss": 2.0922, "step": 180570 }, { "epoch": 0.42, "grad_norm": 2.3125, "learning_rate": 0.00017853652705810915, "loss": 2.0916, "step": 180575 }, { "epoch": 0.42, "grad_norm": 2.46875, "learning_rate": 0.00017853538289197234, "loss": 2.3362, "step": 180580 }, { "epoch": 0.42, "grad_norm": 2.15625, "learning_rate": 0.00017853423869900636, "loss": 2.2241, "step": 180585 }, { "epoch": 0.42, "grad_norm": 2.265625, "learning_rate": 0.0001785330944792117, "loss": 2.0701, "step": 180590 }, { "epoch": 0.42, "grad_norm": 1.9375, "learning_rate": 0.00017853195023258874, "loss": 2.0171, "step": 180595 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.00017853080595913782, "loss": 2.0287, "step": 180600 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017852966165885935, "loss": 2.3144, "step": 180605 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017852851733175373, "loss": 2.0619, "step": 180610 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.0001785273729778213, "loss": 1.9825, "step": 180615 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017852622859706256, "loss": 2.1903, "step": 180620 }, { "epoch": 0.43, "grad_norm": 1.9609375, "learning_rate": 0.0001785250841894778, "loss": 2.0372, "step": 180625 }, { "epoch": 0.43, "grad_norm": 1.9921875, "learning_rate": 0.00017852393975506748, "loss": 1.9847, "step": 180630 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017852279529383193, "loss": 2.0778, "step": 180635 }, { "epoch": 0.43, "grad_norm": 2.734375, "learning_rate": 0.00017852165080577156, "loss": 2.2175, "step": 180640 }, { "epoch": 0.43, "grad_norm": 2.4375, "learning_rate": 0.00017852050629088678, "loss": 2.1122, "step": 180645 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017851936174917798, "loss": 2.0237, "step": 180650 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017851821718064553, "loss": 2.0119, "step": 180655 }, { "epoch": 0.43, "grad_norm": 1.796875, "learning_rate": 0.00017851707258528986, "loss": 2.1523, "step": 180660 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.0001785159279631113, "loss": 2.024, "step": 180665 }, { "epoch": 0.43, "grad_norm": 2.484375, "learning_rate": 0.00017851478331411028, "loss": 2.2066, "step": 180670 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.0001785136386382872, "loss": 2.2096, "step": 180675 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017851249393564244, "loss": 2.2665, "step": 180680 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017851134920617638, "loss": 2.0435, "step": 180685 }, { "epoch": 0.43, "grad_norm": 2.078125, "learning_rate": 0.00017851020444988942, "loss": 1.9736, "step": 180690 }, { "epoch": 0.43, "grad_norm": 1.9609375, "learning_rate": 0.00017850905966678194, "loss": 2.0793, "step": 180695 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017850791485685434, "loss": 2.1234, "step": 180700 }, { "epoch": 0.43, "grad_norm": 1.96875, "learning_rate": 0.00017850677002010702, "loss": 1.9696, "step": 180705 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017850562515654036, "loss": 1.9934, "step": 180710 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.0001785044802661548, "loss": 2.0275, "step": 180715 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017850333534895062, "loss": 2.0182, "step": 180720 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.0001785021904049283, "loss": 2.2333, "step": 180725 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.0001785010454340882, "loss": 2.1171, "step": 180730 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017849990043643078, "loss": 2.1584, "step": 180735 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.0001784987554119563, "loss": 2.2186, "step": 180740 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017849761036066523, "loss": 2.0103, "step": 180745 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017849646528255797, "loss": 2.1189, "step": 180750 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017849532017763487, "loss": 2.148, "step": 180755 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017849417504589639, "loss": 1.9721, "step": 180760 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.00017849302988734284, "loss": 2.1343, "step": 180765 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017849188470197465, "loss": 2.1016, "step": 180770 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017849073948979222, "loss": 2.1512, "step": 180775 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017848959425079594, "loss": 2.1365, "step": 180780 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017848844898498616, "loss": 1.9056, "step": 180785 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017848730369236332, "loss": 2.2012, "step": 180790 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.0001784861583729278, "loss": 2.0486, "step": 180795 }, { "epoch": 0.43, "grad_norm": 1.96875, "learning_rate": 0.00017848501302667998, "loss": 1.8492, "step": 180800 }, { "epoch": 0.43, "grad_norm": 1.9921875, "learning_rate": 0.00017848386765362026, "loss": 2.1617, "step": 180805 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.000178482722253749, "loss": 2.2959, "step": 180810 }, { "epoch": 0.43, "grad_norm": 1.6875, "learning_rate": 0.0001784815768270667, "loss": 2.1047, "step": 180815 }, { "epoch": 0.43, "grad_norm": 2.59375, "learning_rate": 0.00017848043137357357, "loss": 2.0906, "step": 180820 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017847928589327015, "loss": 2.1226, "step": 180825 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017847814038615675, "loss": 2.1919, "step": 180830 }, { "epoch": 0.43, "grad_norm": 1.953125, "learning_rate": 0.00017847699485223383, "loss": 2.2104, "step": 180835 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017847584929150175, "loss": 1.9521, "step": 180840 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017847470370396088, "loss": 2.0715, "step": 180845 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017847355808961162, "loss": 2.0068, "step": 180850 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017847241244845437, "loss": 2.1412, "step": 180855 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017847126678048952, "loss": 1.9352, "step": 180860 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.0001784701210857175, "loss": 2.1001, "step": 180865 }, { "epoch": 0.43, "grad_norm": 3.046875, "learning_rate": 0.00017846897536413862, "loss": 1.9639, "step": 180870 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017846782961575332, "loss": 2.0361, "step": 180875 }, { "epoch": 0.43, "grad_norm": 2.484375, "learning_rate": 0.000178466683840562, "loss": 2.2198, "step": 180880 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017846553803856506, "loss": 2.1932, "step": 180885 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.0001784643922097628, "loss": 2.1853, "step": 180890 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.00017846324635415573, "loss": 2.2657, "step": 180895 }, { "epoch": 0.43, "grad_norm": 1.7890625, "learning_rate": 0.0001784621004717442, "loss": 1.9649, "step": 180900 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017846095456252857, "loss": 2.191, "step": 180905 }, { "epoch": 0.43, "grad_norm": 1.9609375, "learning_rate": 0.0001784598086265093, "loss": 2.1959, "step": 180910 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.00017845866266368666, "loss": 2.3021, "step": 180915 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017845751667406118, "loss": 2.183, "step": 180920 }, { "epoch": 0.43, "grad_norm": 1.953125, "learning_rate": 0.00017845637065763315, "loss": 2.0582, "step": 180925 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.00017845522461440303, "loss": 2.1448, "step": 180930 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017845407854437118, "loss": 2.1007, "step": 180935 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017845293244753797, "loss": 2.1974, "step": 180940 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017845178632390382, "loss": 2.0975, "step": 180945 }, { "epoch": 0.43, "grad_norm": 1.9453125, "learning_rate": 0.00017845064017346914, "loss": 2.2236, "step": 180950 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.00017844949399623428, "loss": 2.0632, "step": 180955 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017844834779219966, "loss": 2.2631, "step": 180960 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017844720156136567, "loss": 2.118, "step": 180965 }, { "epoch": 0.43, "grad_norm": 1.765625, "learning_rate": 0.0001784460553037327, "loss": 2.0653, "step": 180970 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.0001784449090193011, "loss": 2.0001, "step": 180975 }, { "epoch": 0.43, "grad_norm": 1.8671875, "learning_rate": 0.0001784437627080713, "loss": 2.2893, "step": 180980 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017844261637004372, "loss": 2.1076, "step": 180985 }, { "epoch": 0.43, "grad_norm": 2.515625, "learning_rate": 0.00017844147000521869, "loss": 2.0295, "step": 180990 }, { "epoch": 0.43, "grad_norm": 1.859375, "learning_rate": 0.00017844032361359662, "loss": 1.9543, "step": 180995 }, { "epoch": 0.43, "grad_norm": 1.8203125, "learning_rate": 0.00017843917719517793, "loss": 2.0584, "step": 181000 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.000178438030749963, "loss": 1.9537, "step": 181005 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017843688427795224, "loss": 2.3267, "step": 181010 }, { "epoch": 0.43, "grad_norm": 1.8828125, "learning_rate": 0.00017843573777914597, "loss": 2.2481, "step": 181015 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017843459125354465, "loss": 2.0868, "step": 181020 }, { "epoch": 0.43, "grad_norm": 1.921875, "learning_rate": 0.00017843344470114863, "loss": 2.1883, "step": 181025 }, { "epoch": 0.43, "grad_norm": 1.9375, "learning_rate": 0.0001784322981219584, "loss": 2.221, "step": 181030 }, { "epoch": 0.43, "grad_norm": 3.234375, "learning_rate": 0.0001784311515159742, "loss": 2.2447, "step": 181035 }, { "epoch": 0.43, "grad_norm": 1.9296875, "learning_rate": 0.0001784300048831965, "loss": 2.0472, "step": 181040 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017842885822362572, "loss": 2.2565, "step": 181045 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.0001784277115372622, "loss": 1.964, "step": 181050 }, { "epoch": 0.43, "grad_norm": 2.59375, "learning_rate": 0.00017842656482410635, "loss": 2.1789, "step": 181055 }, { "epoch": 0.43, "grad_norm": 1.953125, "learning_rate": 0.00017842541808415856, "loss": 2.0508, "step": 181060 }, { "epoch": 0.43, "grad_norm": 2.390625, "learning_rate": 0.00017842427131741927, "loss": 2.2096, "step": 181065 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017842312452388877, "loss": 2.0835, "step": 181070 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017842197770356753, "loss": 2.0957, "step": 181075 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017842083085645592, "loss": 2.088, "step": 181080 }, { "epoch": 0.43, "grad_norm": 1.9375, "learning_rate": 0.0001784196839825543, "loss": 2.1945, "step": 181085 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017841853708186313, "loss": 2.0764, "step": 181090 }, { "epoch": 0.43, "grad_norm": 1.9765625, "learning_rate": 0.00017841739015438276, "loss": 2.1917, "step": 181095 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017841624320011362, "loss": 2.3048, "step": 181100 }, { "epoch": 0.43, "grad_norm": 1.84375, "learning_rate": 0.00017841509621905602, "loss": 2.0577, "step": 181105 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.0001784139492112104, "loss": 2.0661, "step": 181110 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.0001784128021765772, "loss": 2.1029, "step": 181115 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.0001784116551151567, "loss": 2.0585, "step": 181120 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.00017841050802694941, "loss": 2.2266, "step": 181125 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017840936091195565, "loss": 2.231, "step": 181130 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.0001784082137701758, "loss": 2.117, "step": 181135 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017840706660161035, "loss": 2.2354, "step": 181140 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.00017840591940625956, "loss": 2.034, "step": 181145 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017840477218412392, "loss": 1.9701, "step": 181150 }, { "epoch": 0.43, "grad_norm": 2.5, "learning_rate": 0.00017840362493520378, "loss": 2.2196, "step": 181155 }, { "epoch": 0.43, "grad_norm": 2.078125, "learning_rate": 0.00017840247765949955, "loss": 1.9124, "step": 181160 }, { "epoch": 0.43, "grad_norm": 1.953125, "learning_rate": 0.00017840133035701162, "loss": 2.2048, "step": 181165 }, { "epoch": 0.43, "grad_norm": 2.5625, "learning_rate": 0.00017840018302774034, "loss": 2.055, "step": 181170 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.00017839903567168616, "loss": 2.0596, "step": 181175 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.0001783978882888494, "loss": 2.1099, "step": 181180 }, { "epoch": 0.43, "grad_norm": 2.453125, "learning_rate": 0.00017839674087923054, "loss": 2.1875, "step": 181185 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017839559344282995, "loss": 2.0642, "step": 181190 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017839444597964797, "loss": 1.9523, "step": 181195 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017839329848968503, "loss": 2.111, "step": 181200 }, { "epoch": 0.43, "grad_norm": 2.796875, "learning_rate": 0.00017839215097294154, "loss": 2.0294, "step": 181205 }, { "epoch": 0.43, "grad_norm": 3.0, "learning_rate": 0.00017839100342941785, "loss": 2.1561, "step": 181210 }, { "epoch": 0.43, "grad_norm": 1.8984375, "learning_rate": 0.0001783898558591144, "loss": 1.9011, "step": 181215 }, { "epoch": 0.43, "grad_norm": 2.4375, "learning_rate": 0.00017838870826203152, "loss": 1.99, "step": 181220 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.00017838756063816964, "loss": 2.1427, "step": 181225 }, { "epoch": 0.43, "grad_norm": 1.890625, "learning_rate": 0.00017838641298752913, "loss": 2.0968, "step": 181230 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017838526531011045, "loss": 2.273, "step": 181235 }, { "epoch": 0.43, "grad_norm": 2.578125, "learning_rate": 0.0001783841176059139, "loss": 2.0805, "step": 181240 }, { "epoch": 0.43, "grad_norm": 1.7421875, "learning_rate": 0.00017838296987493995, "loss": 2.0302, "step": 181245 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017838182211718894, "loss": 2.2533, "step": 181250 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017838067433266125, "loss": 2.1747, "step": 181255 }, { "epoch": 0.43, "grad_norm": 1.859375, "learning_rate": 0.00017837952652135735, "loss": 1.9748, "step": 181260 }, { "epoch": 0.43, "grad_norm": 1.9609375, "learning_rate": 0.00017837837868327753, "loss": 2.1356, "step": 181265 }, { "epoch": 0.43, "grad_norm": 2.625, "learning_rate": 0.00017837723081842225, "loss": 2.0518, "step": 181270 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.0001783760829267919, "loss": 2.2785, "step": 181275 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017837493500838687, "loss": 2.1222, "step": 181280 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.00017837378706320753, "loss": 2.0927, "step": 181285 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017837263909125425, "loss": 2.2192, "step": 181290 }, { "epoch": 0.43, "grad_norm": 1.8828125, "learning_rate": 0.00017837149109252751, "loss": 2.0881, "step": 181295 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.0001783703430670276, "loss": 2.2331, "step": 181300 }, { "epoch": 0.43, "grad_norm": 2.5, "learning_rate": 0.000178369195014755, "loss": 2.0537, "step": 181305 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.00017836804693571002, "loss": 2.184, "step": 181310 }, { "epoch": 0.43, "grad_norm": 2.390625, "learning_rate": 0.00017836689882989312, "loss": 2.0483, "step": 181315 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017836575069730468, "loss": 2.2049, "step": 181320 }, { "epoch": 0.43, "grad_norm": 1.875, "learning_rate": 0.00017836460253794508, "loss": 2.0712, "step": 181325 }, { "epoch": 0.43, "grad_norm": 1.765625, "learning_rate": 0.00017836345435181468, "loss": 2.094, "step": 181330 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017836230613891389, "loss": 2.0941, "step": 181335 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017836115789924314, "loss": 2.1359, "step": 181340 }, { "epoch": 0.43, "grad_norm": 2.703125, "learning_rate": 0.00017836000963280282, "loss": 2.1047, "step": 181345 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017835886133959328, "loss": 2.0872, "step": 181350 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017835771301961492, "loss": 2.153, "step": 181355 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017835656467286815, "loss": 1.9369, "step": 181360 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017835541629935336, "loss": 2.129, "step": 181365 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.00017835426789907094, "loss": 2.1342, "step": 181370 }, { "epoch": 0.43, "grad_norm": 2.46875, "learning_rate": 0.00017835311947202127, "loss": 2.1905, "step": 181375 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017835197101820477, "loss": 2.0543, "step": 181380 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.0001783508225376218, "loss": 2.133, "step": 181385 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017834967403027277, "loss": 2.2765, "step": 181390 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017834852549615808, "loss": 2.2529, "step": 181395 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.0001783473769352781, "loss": 2.1421, "step": 181400 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017834622834763324, "loss": 2.0512, "step": 181405 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.0001783450797332239, "loss": 2.1499, "step": 181410 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.00017834393109205046, "loss": 2.2147, "step": 181415 }, { "epoch": 0.43, "grad_norm": 1.96875, "learning_rate": 0.00017834278242411327, "loss": 2.0974, "step": 181420 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017834163372941282, "loss": 2.0435, "step": 181425 }, { "epoch": 0.43, "grad_norm": 1.859375, "learning_rate": 0.00017834048500794941, "loss": 2.1906, "step": 181430 }, { "epoch": 0.43, "grad_norm": 1.609375, "learning_rate": 0.0001783393362597235, "loss": 1.9689, "step": 181435 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017833818748473542, "loss": 1.9682, "step": 181440 }, { "epoch": 0.43, "grad_norm": 2.53125, "learning_rate": 0.00017833703868298564, "loss": 1.9296, "step": 181445 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017833588985447448, "loss": 2.0164, "step": 181450 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017833474099920234, "loss": 2.1146, "step": 181455 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.00017833359211716964, "loss": 2.0258, "step": 181460 }, { "epoch": 0.43, "grad_norm": 2.078125, "learning_rate": 0.00017833244320837678, "loss": 2.2489, "step": 181465 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017833129427282412, "loss": 2.1, "step": 181470 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.00017833014531051209, "loss": 2.109, "step": 181475 }, { "epoch": 0.43, "grad_norm": 1.9609375, "learning_rate": 0.00017832899632144104, "loss": 2.1141, "step": 181480 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.0001783278473056114, "loss": 2.1478, "step": 181485 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017832669826302355, "loss": 2.1491, "step": 181490 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017832554919367785, "loss": 2.2625, "step": 181495 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017832440009757475, "loss": 2.1251, "step": 181500 }, { "epoch": 0.43, "grad_norm": 2.46875, "learning_rate": 0.00017832325097471462, "loss": 1.9869, "step": 181505 }, { "epoch": 0.43, "grad_norm": 2.453125, "learning_rate": 0.00017832210182509782, "loss": 2.0933, "step": 181510 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017832095264872479, "loss": 2.1814, "step": 181515 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.0001783198034455959, "loss": 2.1748, "step": 181520 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017831865421571153, "loss": 1.8675, "step": 181525 }, { "epoch": 0.43, "grad_norm": 2.53125, "learning_rate": 0.00017831750495907214, "loss": 2.1751, "step": 181530 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.000178316355675678, "loss": 1.9916, "step": 181535 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.0001783152063655296, "loss": 2.0228, "step": 181540 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017831405702862732, "loss": 1.9868, "step": 181545 }, { "epoch": 0.43, "grad_norm": 2.421875, "learning_rate": 0.0001783129076649715, "loss": 1.9251, "step": 181550 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017831175827456261, "loss": 2.1711, "step": 181555 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017831060885740098, "loss": 2.0889, "step": 181560 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017830945941348705, "loss": 2.1474, "step": 181565 }, { "epoch": 0.43, "grad_norm": 1.9453125, "learning_rate": 0.00017830830994282115, "loss": 2.1797, "step": 181570 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017830716044540373, "loss": 2.0911, "step": 181575 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.00017830601092123517, "loss": 2.1499, "step": 181580 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.00017830486137031585, "loss": 1.9056, "step": 181585 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.0001783037117926462, "loss": 2.1345, "step": 181590 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017830256218822653, "loss": 2.0524, "step": 181595 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017830141255705732, "loss": 2.0649, "step": 181600 }, { "epoch": 0.43, "grad_norm": 1.7421875, "learning_rate": 0.0001783002628991389, "loss": 2.1607, "step": 181605 }, { "epoch": 0.43, "grad_norm": 1.8359375, "learning_rate": 0.00017829911321447171, "loss": 2.2017, "step": 181610 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.00017829796350305613, "loss": 2.2259, "step": 181615 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.00017829681376489254, "loss": 2.1043, "step": 181620 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017829566399998133, "loss": 2.1152, "step": 181625 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.0001782945142083229, "loss": 2.006, "step": 181630 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017829336438991765, "loss": 2.0708, "step": 181635 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017829221454476597, "loss": 2.1699, "step": 181640 }, { "epoch": 0.43, "grad_norm": 1.921875, "learning_rate": 0.00017829106467286823, "loss": 2.2507, "step": 181645 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.0001782899147742249, "loss": 1.9867, "step": 181650 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017828876484883623, "loss": 2.0394, "step": 181655 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017828761489670276, "loss": 2.1274, "step": 181660 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.0001782864649178248, "loss": 2.2226, "step": 181665 }, { "epoch": 0.43, "grad_norm": 2.6875, "learning_rate": 0.00017828531491220274, "loss": 2.2989, "step": 181670 }, { "epoch": 0.43, "grad_norm": 1.7890625, "learning_rate": 0.00017828416487983707, "loss": 2.2703, "step": 181675 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017828301482072803, "loss": 1.9988, "step": 181680 }, { "epoch": 0.43, "grad_norm": 1.9609375, "learning_rate": 0.00017828186473487612, "loss": 1.8908, "step": 181685 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.0001782807146222817, "loss": 1.9831, "step": 181690 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.0001782795644829452, "loss": 2.2516, "step": 181695 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017827841431686695, "loss": 2.2691, "step": 181700 }, { "epoch": 0.43, "grad_norm": 2.46875, "learning_rate": 0.00017827726412404738, "loss": 2.1123, "step": 181705 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.0001782761139044869, "loss": 2.0516, "step": 181710 }, { "epoch": 0.43, "grad_norm": 1.953125, "learning_rate": 0.00017827496365818582, "loss": 2.1395, "step": 181715 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017827381338514467, "loss": 2.1509, "step": 181720 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017827266308536373, "loss": 2.0576, "step": 181725 }, { "epoch": 0.43, "grad_norm": 2.421875, "learning_rate": 0.00017827151275884342, "loss": 2.1183, "step": 181730 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017827036240558415, "loss": 2.0992, "step": 181735 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.0001782692120255863, "loss": 1.9497, "step": 181740 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017826806161885025, "loss": 1.8978, "step": 181745 }, { "epoch": 0.43, "grad_norm": 1.9765625, "learning_rate": 0.00017826691118537645, "loss": 2.0876, "step": 181750 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017826576072516522, "loss": 2.0481, "step": 181755 }, { "epoch": 0.43, "grad_norm": 1.890625, "learning_rate": 0.000178264610238217, "loss": 2.2157, "step": 181760 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.0001782634597245322, "loss": 2.1003, "step": 181765 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017826230918411114, "loss": 2.1493, "step": 181770 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017826115861695427, "loss": 2.1675, "step": 181775 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017826000802306197, "loss": 2.1691, "step": 181780 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017825885740243465, "loss": 2.1191, "step": 181785 }, { "epoch": 0.43, "grad_norm": 2.578125, "learning_rate": 0.00017825770675507265, "loss": 2.0708, "step": 181790 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017825655608097641, "loss": 2.0876, "step": 181795 }, { "epoch": 0.43, "grad_norm": 1.9140625, "learning_rate": 0.0001782554053801463, "loss": 2.1503, "step": 181800 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017825425465258277, "loss": 2.0837, "step": 181805 }, { "epoch": 0.43, "grad_norm": 1.9765625, "learning_rate": 0.00017825310389828612, "loss": 2.2123, "step": 181810 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017825195311725682, "loss": 2.0969, "step": 181815 }, { "epoch": 0.43, "grad_norm": 2.421875, "learning_rate": 0.00017825080230949523, "loss": 2.0811, "step": 181820 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017824965147500175, "loss": 2.106, "step": 181825 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017824850061377674, "loss": 2.0746, "step": 181830 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017824734972582063, "loss": 2.1533, "step": 181835 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017824619881113383, "loss": 2.0527, "step": 181840 }, { "epoch": 0.43, "grad_norm": 1.96875, "learning_rate": 0.00017824504786971668, "loss": 2.1597, "step": 181845 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.0001782438969015696, "loss": 2.0749, "step": 181850 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017824274590669303, "loss": 1.8946, "step": 181855 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017824159488508728, "loss": 2.1731, "step": 181860 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.00017824044383675283, "loss": 2.0305, "step": 181865 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017823929276168998, "loss": 2.0781, "step": 181870 }, { "epoch": 0.43, "grad_norm": 2.515625, "learning_rate": 0.0001782381416598992, "loss": 2.2439, "step": 181875 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017823699053138081, "loss": 2.1463, "step": 181880 }, { "epoch": 0.43, "grad_norm": 1.9296875, "learning_rate": 0.00017823583937613528, "loss": 2.1433, "step": 181885 }, { "epoch": 0.43, "grad_norm": 2.53125, "learning_rate": 0.00017823468819416297, "loss": 2.2621, "step": 181890 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017823353698546423, "loss": 2.3163, "step": 181895 }, { "epoch": 0.43, "grad_norm": 1.953125, "learning_rate": 0.0001782323857500395, "loss": 2.0273, "step": 181900 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017823123448788922, "loss": 2.2099, "step": 181905 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.0001782300831990137, "loss": 2.1547, "step": 181910 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017822893188341337, "loss": 1.9652, "step": 181915 }, { "epoch": 0.43, "grad_norm": 2.671875, "learning_rate": 0.00017822778054108862, "loss": 2.2327, "step": 181920 }, { "epoch": 0.43, "grad_norm": 2.46875, "learning_rate": 0.0001782266291720398, "loss": 1.9511, "step": 181925 }, { "epoch": 0.43, "grad_norm": 2.078125, "learning_rate": 0.00017822547777626742, "loss": 1.9982, "step": 181930 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017822432635377172, "loss": 1.955, "step": 181935 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017822317490455322, "loss": 2.3366, "step": 181940 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.00017822202342861228, "loss": 2.1256, "step": 181945 }, { "epoch": 0.43, "grad_norm": 1.8671875, "learning_rate": 0.00017822087192594926, "loss": 2.1806, "step": 181950 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.00017821972039656453, "loss": 2.1955, "step": 181955 }, { "epoch": 0.43, "grad_norm": 1.96875, "learning_rate": 0.00017821856884045858, "loss": 2.2351, "step": 181960 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017821741725763173, "loss": 2.1158, "step": 181965 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.0001782162656480844, "loss": 2.1349, "step": 181970 }, { "epoch": 0.43, "grad_norm": 2.625, "learning_rate": 0.00017821511401181693, "loss": 2.4122, "step": 181975 }, { "epoch": 0.43, "grad_norm": 1.78125, "learning_rate": 0.00017821396234882984, "loss": 1.9192, "step": 181980 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017821281065912337, "loss": 2.2364, "step": 181985 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.000178211658942698, "loss": 2.1548, "step": 181990 }, { "epoch": 0.43, "grad_norm": 2.484375, "learning_rate": 0.00017821050719955416, "loss": 2.144, "step": 181995 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.00017820935542969214, "loss": 2.0593, "step": 182000 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.0001782082036331124, "loss": 2.1518, "step": 182005 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.0001782070518098153, "loss": 2.1298, "step": 182010 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.00017820589995980125, "loss": 2.1704, "step": 182015 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017820474808307067, "loss": 2.1831, "step": 182020 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.00017820359617962394, "loss": 2.1784, "step": 182025 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017820244424946144, "loss": 2.0389, "step": 182030 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.00017820129229258357, "loss": 2.1701, "step": 182035 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.0001782001403089907, "loss": 2.0298, "step": 182040 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017819898829868325, "loss": 2.1816, "step": 182045 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.0001781978362616616, "loss": 2.2194, "step": 182050 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017819668419792617, "loss": 2.1688, "step": 182055 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.0001781955321074773, "loss": 2.1621, "step": 182060 }, { "epoch": 0.43, "grad_norm": 2.53125, "learning_rate": 0.00017819437999031545, "loss": 2.1357, "step": 182065 }, { "epoch": 0.43, "grad_norm": 2.078125, "learning_rate": 0.00017819322784644098, "loss": 2.1707, "step": 182070 }, { "epoch": 0.43, "grad_norm": 1.9765625, "learning_rate": 0.00017819207567585429, "loss": 2.15, "step": 182075 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.00017819092347855575, "loss": 1.9894, "step": 182080 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017818977125454578, "loss": 2.3366, "step": 182085 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017818861900382476, "loss": 2.311, "step": 182090 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.0001781874667263931, "loss": 2.1285, "step": 182095 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017818631442225117, "loss": 2.1521, "step": 182100 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017818516209139936, "loss": 1.8838, "step": 182105 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017818400973383813, "loss": 2.1103, "step": 182110 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017818285734956781, "loss": 2.2901, "step": 182115 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017818170493858878, "loss": 2.286, "step": 182120 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017818055250090148, "loss": 2.1196, "step": 182125 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017817940003650627, "loss": 2.3088, "step": 182130 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.0001781782475454036, "loss": 2.09, "step": 182135 }, { "epoch": 0.43, "grad_norm": 5.3125, "learning_rate": 0.0001781770950275938, "loss": 2.2601, "step": 182140 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.0001781759424830773, "loss": 2.3008, "step": 182145 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017817478991185442, "loss": 2.0964, "step": 182150 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.0001781736373139257, "loss": 2.0231, "step": 182155 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.0001781724846892914, "loss": 1.9211, "step": 182160 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.00017817133203795198, "loss": 2.1346, "step": 182165 }, { "epoch": 0.43, "grad_norm": 1.9140625, "learning_rate": 0.0001781701793599078, "loss": 1.9543, "step": 182170 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017816902665515928, "loss": 2.1103, "step": 182175 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017816787392370677, "loss": 1.9139, "step": 182180 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.0001781667211655507, "loss": 2.2625, "step": 182185 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017816556838069148, "loss": 2.0574, "step": 182190 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.0001781644155691295, "loss": 2.3368, "step": 182195 }, { "epoch": 0.43, "grad_norm": 2.578125, "learning_rate": 0.00017816326273086515, "loss": 1.9236, "step": 182200 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017816210986589876, "loss": 1.8825, "step": 182205 }, { "epoch": 0.43, "grad_norm": 2.46875, "learning_rate": 0.0001781609569742308, "loss": 1.9517, "step": 182210 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017815980405586166, "loss": 2.1052, "step": 182215 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.00017815865111079168, "loss": 2.2426, "step": 182220 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017815749813902132, "loss": 2.1183, "step": 182225 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.0001781563451405509, "loss": 2.0992, "step": 182230 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.00017815519211538089, "loss": 2.1632, "step": 182235 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017815403906351165, "loss": 2.0782, "step": 182240 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017815288598494356, "loss": 2.0541, "step": 182245 }, { "epoch": 0.43, "grad_norm": 1.859375, "learning_rate": 0.000178151732879677, "loss": 2.0335, "step": 182250 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017815057974771242, "loss": 2.2488, "step": 182255 }, { "epoch": 0.43, "grad_norm": 2.734375, "learning_rate": 0.0001781494265890502, "loss": 2.0713, "step": 182260 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017814827340369068, "loss": 2.0738, "step": 182265 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017814712019163434, "loss": 2.0731, "step": 182270 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017814596695288148, "loss": 1.9814, "step": 182275 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.0001781448136874326, "loss": 2.1921, "step": 182280 }, { "epoch": 0.43, "grad_norm": 2.71875, "learning_rate": 0.00017814366039528795, "loss": 1.9801, "step": 182285 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.00017814250707644805, "loss": 2.1898, "step": 182290 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017814135373091328, "loss": 1.968, "step": 182295 }, { "epoch": 0.43, "grad_norm": 1.890625, "learning_rate": 0.00017814020035868396, "loss": 2.1464, "step": 182300 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017813904695976058, "loss": 2.0353, "step": 182305 }, { "epoch": 0.43, "grad_norm": 2.421875, "learning_rate": 0.00017813789353414344, "loss": 2.0588, "step": 182310 }, { "epoch": 0.43, "grad_norm": 1.8203125, "learning_rate": 0.00017813674008183302, "loss": 2.2588, "step": 182315 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017813558660282963, "loss": 2.0539, "step": 182320 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017813443309713374, "loss": 2.3445, "step": 182325 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.00017813327956474567, "loss": 2.3415, "step": 182330 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.0001781321260056659, "loss": 2.1986, "step": 182335 }, { "epoch": 0.43, "grad_norm": 2.53125, "learning_rate": 0.00017813097241989475, "loss": 2.0875, "step": 182340 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.00017812981880743265, "loss": 2.2438, "step": 182345 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017812866516828, "loss": 2.2818, "step": 182350 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017812751150243715, "loss": 2.0901, "step": 182355 }, { "epoch": 0.43, "grad_norm": 1.9609375, "learning_rate": 0.00017812635780990456, "loss": 2.2283, "step": 182360 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017812520409068254, "loss": 2.1052, "step": 182365 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.0001781240503447716, "loss": 2.2231, "step": 182370 }, { "epoch": 0.43, "grad_norm": 2.515625, "learning_rate": 0.00017812289657217201, "loss": 2.1122, "step": 182375 }, { "epoch": 0.43, "grad_norm": 1.9453125, "learning_rate": 0.00017812174277288424, "loss": 2.1423, "step": 182380 }, { "epoch": 0.43, "grad_norm": 1.875, "learning_rate": 0.0001781205889469087, "loss": 2.0974, "step": 182385 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.0001781194350942457, "loss": 2.0441, "step": 182390 }, { "epoch": 0.43, "grad_norm": 2.390625, "learning_rate": 0.00017811828121489572, "loss": 1.8589, "step": 182395 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.0001781171273088591, "loss": 2.2046, "step": 182400 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.00017811597337613627, "loss": 2.0184, "step": 182405 }, { "epoch": 0.43, "grad_norm": 1.921875, "learning_rate": 0.00017811481941672758, "loss": 2.0737, "step": 182410 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017811366543063345, "loss": 2.0522, "step": 182415 }, { "epoch": 0.43, "grad_norm": 1.953125, "learning_rate": 0.0001781125114178543, "loss": 2.0919, "step": 182420 }, { "epoch": 0.43, "grad_norm": 1.9765625, "learning_rate": 0.00017811135737839047, "loss": 2.0982, "step": 182425 }, { "epoch": 0.43, "grad_norm": 2.078125, "learning_rate": 0.0001781102033122424, "loss": 2.1909, "step": 182430 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.0001781090492194105, "loss": 1.9682, "step": 182435 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.0001781078950998951, "loss": 2.2691, "step": 182440 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017810674095369665, "loss": 1.9612, "step": 182445 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.0001781055867808155, "loss": 2.0972, "step": 182450 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017810443258125207, "loss": 2.0308, "step": 182455 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.00017810327835500673, "loss": 2.0698, "step": 182460 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.0001781021241020799, "loss": 2.2688, "step": 182465 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.000178100969822472, "loss": 2.1018, "step": 182470 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017809981551618334, "loss": 2.1393, "step": 182475 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.00017809866118321442, "loss": 1.9115, "step": 182480 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.00017809750682356554, "loss": 2.0682, "step": 182485 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017809635243723717, "loss": 2.1157, "step": 182490 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017809519802422963, "loss": 1.9769, "step": 182495 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017809404358454337, "loss": 2.1846, "step": 182500 }, { "epoch": 0.43, "grad_norm": 1.78125, "learning_rate": 0.0001780928891181788, "loss": 2.2113, "step": 182505 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017809173462513624, "loss": 2.0934, "step": 182510 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017809058010541615, "loss": 2.247, "step": 182515 }, { "epoch": 0.43, "grad_norm": 1.890625, "learning_rate": 0.00017808942555901891, "loss": 2.0574, "step": 182520 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017808827098594486, "loss": 2.0057, "step": 182525 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.0001780871163861945, "loss": 1.8811, "step": 182530 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.0001780859617597681, "loss": 2.2608, "step": 182535 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017808480710666618, "loss": 1.9105, "step": 182540 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.00017808365242688904, "loss": 2.1835, "step": 182545 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017808249772043714, "loss": 2.1223, "step": 182550 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.0001780813429873108, "loss": 2.1828, "step": 182555 }, { "epoch": 0.43, "grad_norm": 1.8125, "learning_rate": 0.00017808018822751048, "loss": 2.1375, "step": 182560 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017807903344103655, "loss": 2.1123, "step": 182565 }, { "epoch": 0.43, "grad_norm": 1.890625, "learning_rate": 0.0001780778786278894, "loss": 1.9916, "step": 182570 }, { "epoch": 0.43, "grad_norm": 1.96875, "learning_rate": 0.00017807672378806943, "loss": 2.1063, "step": 182575 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.00017807556892157704, "loss": 2.0851, "step": 182580 }, { "epoch": 0.43, "grad_norm": 1.8125, "learning_rate": 0.00017807441402841264, "loss": 2.1726, "step": 182585 }, { "epoch": 0.43, "grad_norm": 1.9375, "learning_rate": 0.00017807325910857658, "loss": 2.1796, "step": 182590 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017807210416206928, "loss": 2.0719, "step": 182595 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017807094918889112, "loss": 2.1405, "step": 182600 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017806979418904254, "loss": 2.142, "step": 182605 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.0001780686391625239, "loss": 2.2459, "step": 182610 }, { "epoch": 0.43, "grad_norm": 2.640625, "learning_rate": 0.0001780674841093356, "loss": 2.2142, "step": 182615 }, { "epoch": 0.43, "grad_norm": 1.890625, "learning_rate": 0.000178066329029478, "loss": 2.1258, "step": 182620 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017806517392295152, "loss": 2.1071, "step": 182625 }, { "epoch": 0.43, "grad_norm": 1.859375, "learning_rate": 0.0001780640187897566, "loss": 2.1996, "step": 182630 }, { "epoch": 0.43, "grad_norm": 1.9765625, "learning_rate": 0.00017806286362989358, "loss": 2.0233, "step": 182635 }, { "epoch": 0.43, "grad_norm": 1.9765625, "learning_rate": 0.00017806170844336284, "loss": 1.8597, "step": 182640 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017806055323016487, "loss": 2.1117, "step": 182645 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017805939799029995, "loss": 2.2753, "step": 182650 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017805824272376856, "loss": 2.1577, "step": 182655 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017805708743057102, "loss": 2.1953, "step": 182660 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017805593211070778, "loss": 2.062, "step": 182665 }, { "epoch": 0.43, "grad_norm": 1.90625, "learning_rate": 0.0001780547767641792, "loss": 2.0066, "step": 182670 }, { "epoch": 0.43, "grad_norm": 1.890625, "learning_rate": 0.00017805362139098572, "loss": 2.2001, "step": 182675 }, { "epoch": 0.43, "grad_norm": 2.453125, "learning_rate": 0.0001780524659911277, "loss": 2.2507, "step": 182680 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017805131056460553, "loss": 2.0666, "step": 182685 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017805015511141963, "loss": 1.8905, "step": 182690 }, { "epoch": 0.43, "grad_norm": 1.8046875, "learning_rate": 0.00017804899963157038, "loss": 2.0747, "step": 182695 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.00017804784412505818, "loss": 2.2697, "step": 182700 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.0001780466885918834, "loss": 2.0746, "step": 182705 }, { "epoch": 0.43, "grad_norm": 1.7578125, "learning_rate": 0.00017804553303204647, "loss": 2.0105, "step": 182710 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017804437744554775, "loss": 2.0718, "step": 182715 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.0001780432218323877, "loss": 2.2281, "step": 182720 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017804206619256662, "loss": 1.9705, "step": 182725 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.000178040910526085, "loss": 2.1636, "step": 182730 }, { "epoch": 0.43, "grad_norm": 1.6640625, "learning_rate": 0.00017803975483294317, "loss": 2.3268, "step": 182735 }, { "epoch": 0.43, "grad_norm": 2.671875, "learning_rate": 0.00017803859911314155, "loss": 2.1819, "step": 182740 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017803744336668053, "loss": 2.154, "step": 182745 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017803628759356047, "loss": 2.0038, "step": 182750 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017803513179378183, "loss": 2.249, "step": 182755 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.000178033975967345, "loss": 2.1341, "step": 182760 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.0001780328201142503, "loss": 2.2706, "step": 182765 }, { "epoch": 0.43, "grad_norm": 2.5625, "learning_rate": 0.0001780316642344982, "loss": 2.1, "step": 182770 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.00017803050832808904, "loss": 2.0257, "step": 182775 }, { "epoch": 0.43, "grad_norm": 2.4375, "learning_rate": 0.0001780293523950233, "loss": 2.0379, "step": 182780 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017802819643530126, "loss": 2.1121, "step": 182785 }, { "epoch": 0.43, "grad_norm": 1.78125, "learning_rate": 0.0001780270404489234, "loss": 2.1987, "step": 182790 }, { "epoch": 0.43, "grad_norm": 2.46875, "learning_rate": 0.0001780258844358901, "loss": 2.3541, "step": 182795 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017802472839620171, "loss": 2.1029, "step": 182800 }, { "epoch": 0.43, "grad_norm": 1.90625, "learning_rate": 0.00017802357232985866, "loss": 2.0695, "step": 182805 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017802241623686137, "loss": 2.1524, "step": 182810 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.0001780212601172102, "loss": 2.0273, "step": 182815 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017802010397090554, "loss": 2.1322, "step": 182820 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.0001780189477979478, "loss": 2.1585, "step": 182825 }, { "epoch": 0.43, "grad_norm": 2.4375, "learning_rate": 0.00017801779159833738, "loss": 2.2011, "step": 182830 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017801663537207464, "loss": 2.1827, "step": 182835 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.00017801547911916005, "loss": 1.9366, "step": 182840 }, { "epoch": 0.43, "grad_norm": 2.484375, "learning_rate": 0.00017801432283959395, "loss": 2.2412, "step": 182845 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.00017801316653337672, "loss": 1.9856, "step": 182850 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017801201020050878, "loss": 2.0261, "step": 182855 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.0001780108538409905, "loss": 2.0354, "step": 182860 }, { "epoch": 0.43, "grad_norm": 1.9375, "learning_rate": 0.00017800969745482235, "loss": 1.9421, "step": 182865 }, { "epoch": 0.43, "grad_norm": 1.90625, "learning_rate": 0.00017800854104200464, "loss": 2.1559, "step": 182870 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.0001780073846025378, "loss": 2.2138, "step": 182875 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017800622813642222, "loss": 2.2164, "step": 182880 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.0001780050716436583, "loss": 2.16, "step": 182885 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017800391512424644, "loss": 1.9608, "step": 182890 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017800275857818702, "loss": 2.0685, "step": 182895 }, { "epoch": 0.43, "grad_norm": 1.890625, "learning_rate": 0.00017800160200548044, "loss": 2.206, "step": 182900 }, { "epoch": 0.43, "grad_norm": 2.78125, "learning_rate": 0.00017800044540612712, "loss": 2.2607, "step": 182905 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017799928878012744, "loss": 2.092, "step": 182910 }, { "epoch": 0.43, "grad_norm": 1.90625, "learning_rate": 0.00017799813212748177, "loss": 2.2269, "step": 182915 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.0001779969754481905, "loss": 2.1716, "step": 182920 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017799581874225408, "loss": 2.116, "step": 182925 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.00017799466200967286, "loss": 2.1452, "step": 182930 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.00017799350525044724, "loss": 2.0058, "step": 182935 }, { "epoch": 0.43, "grad_norm": 1.9921875, "learning_rate": 0.00017799234846457764, "loss": 2.2483, "step": 182940 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017799119165206443, "loss": 2.2218, "step": 182945 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017799003481290803, "loss": 2.1606, "step": 182950 }, { "epoch": 0.43, "grad_norm": 1.828125, "learning_rate": 0.0001779888779471088, "loss": 2.0406, "step": 182955 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017798772105466718, "loss": 2.0788, "step": 182960 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017798656413558353, "loss": 2.1141, "step": 182965 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017798540718985825, "loss": 2.2372, "step": 182970 }, { "epoch": 0.43, "grad_norm": 2.875, "learning_rate": 0.00017798425021749173, "loss": 2.2463, "step": 182975 }, { "epoch": 0.43, "grad_norm": 2.5, "learning_rate": 0.00017798309321848438, "loss": 2.0862, "step": 182980 }, { "epoch": 0.43, "grad_norm": 1.890625, "learning_rate": 0.0001779819361928366, "loss": 2.1664, "step": 182985 }, { "epoch": 0.43, "grad_norm": 1.875, "learning_rate": 0.0001779807791405488, "loss": 1.9912, "step": 182990 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.0001779796220616213, "loss": 2.082, "step": 182995 }, { "epoch": 0.43, "grad_norm": 1.9921875, "learning_rate": 0.00017797846495605456, "loss": 2.1197, "step": 183000 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.000177977307823849, "loss": 1.9752, "step": 183005 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017797615066500496, "loss": 2.2533, "step": 183010 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.0001779749934795228, "loss": 1.9962, "step": 183015 }, { "epoch": 0.43, "grad_norm": 2.859375, "learning_rate": 0.00017797383626740306, "loss": 2.1956, "step": 183020 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017797267902864596, "loss": 2.0661, "step": 183025 }, { "epoch": 0.43, "grad_norm": 2.078125, "learning_rate": 0.00017797152176325204, "loss": 2.1689, "step": 183030 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.0001779703644712216, "loss": 2.0485, "step": 183035 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017796920715255508, "loss": 2.3034, "step": 183040 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017796804980725286, "loss": 1.9922, "step": 183045 }, { "epoch": 0.43, "grad_norm": 1.7734375, "learning_rate": 0.00017796689243531534, "loss": 2.0257, "step": 183050 }, { "epoch": 0.43, "grad_norm": 2.515625, "learning_rate": 0.00017796573503674293, "loss": 2.06, "step": 183055 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017796457761153599, "loss": 2.1809, "step": 183060 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017796342015969496, "loss": 2.2195, "step": 183065 }, { "epoch": 0.43, "grad_norm": 1.8984375, "learning_rate": 0.00017796226268122021, "loss": 1.941, "step": 183070 }, { "epoch": 0.43, "grad_norm": 1.890625, "learning_rate": 0.0001779611051761121, "loss": 2.0666, "step": 183075 }, { "epoch": 0.43, "grad_norm": 2.640625, "learning_rate": 0.0001779599476443711, "loss": 2.0619, "step": 183080 }, { "epoch": 0.43, "grad_norm": 1.8046875, "learning_rate": 0.00017795879008599755, "loss": 2.1016, "step": 183085 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017795763250099187, "loss": 1.8807, "step": 183090 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017795647488935446, "loss": 2.1885, "step": 183095 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017795531725108569, "loss": 1.9968, "step": 183100 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017795415958618595, "loss": 2.2371, "step": 183105 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.0001779530018946557, "loss": 2.1513, "step": 183110 }, { "epoch": 0.43, "grad_norm": 1.953125, "learning_rate": 0.00017795184417649526, "loss": 2.1621, "step": 183115 }, { "epoch": 0.43, "grad_norm": 2.4375, "learning_rate": 0.00017795068643170506, "loss": 2.219, "step": 183120 }, { "epoch": 0.43, "grad_norm": 1.875, "learning_rate": 0.00017794952866028552, "loss": 2.1349, "step": 183125 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017794837086223695, "loss": 2.0848, "step": 183130 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017794721303755987, "loss": 2.1071, "step": 183135 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017794605518625455, "loss": 2.1275, "step": 183140 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017794489730832148, "loss": 2.0471, "step": 183145 }, { "epoch": 0.43, "grad_norm": 2.4375, "learning_rate": 0.000177943739403761, "loss": 2.3356, "step": 183150 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.0001779425814725735, "loss": 2.1269, "step": 183155 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017794142351475949, "loss": 2.037, "step": 183160 }, { "epoch": 0.43, "grad_norm": 2.4375, "learning_rate": 0.0001779402655303192, "loss": 2.096, "step": 183165 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017793910751925314, "loss": 2.239, "step": 183170 }, { "epoch": 0.43, "grad_norm": 2.078125, "learning_rate": 0.00017793794948156163, "loss": 2.0682, "step": 183175 }, { "epoch": 0.43, "grad_norm": 1.859375, "learning_rate": 0.00017793679141724512, "loss": 2.0933, "step": 183180 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.000177935633326304, "loss": 2.2421, "step": 183185 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017793447520873866, "loss": 2.0346, "step": 183190 }, { "epoch": 0.43, "grad_norm": 2.515625, "learning_rate": 0.00017793331706454947, "loss": 1.9971, "step": 183195 }, { "epoch": 0.43, "grad_norm": 1.921875, "learning_rate": 0.00017793215889373685, "loss": 2.3172, "step": 183200 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017793100069630119, "loss": 2.1181, "step": 183205 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017792984247224286, "loss": 2.0019, "step": 183210 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017792868422156235, "loss": 1.9938, "step": 183215 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.0001779275259442599, "loss": 2.2609, "step": 183220 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017792636764033608, "loss": 2.2413, "step": 183225 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017792520930979115, "loss": 1.9944, "step": 183230 }, { "epoch": 0.43, "grad_norm": 1.9609375, "learning_rate": 0.00017792405095262557, "loss": 1.954, "step": 183235 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017792289256883973, "loss": 2.0042, "step": 183240 }, { "epoch": 0.43, "grad_norm": 2.515625, "learning_rate": 0.00017792173415843402, "loss": 2.0613, "step": 183245 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.00017792057572140878, "loss": 2.1906, "step": 183250 }, { "epoch": 0.43, "grad_norm": 2.5, "learning_rate": 0.00017791941725776452, "loss": 2.1691, "step": 183255 }, { "epoch": 0.43, "grad_norm": 2.46875, "learning_rate": 0.00017791825876750154, "loss": 2.1795, "step": 183260 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017791710025062026, "loss": 2.1785, "step": 183265 }, { "epoch": 0.43, "grad_norm": 2.765625, "learning_rate": 0.00017791594170712114, "loss": 2.1552, "step": 183270 }, { "epoch": 0.43, "grad_norm": 1.9765625, "learning_rate": 0.00017791478313700448, "loss": 2.1513, "step": 183275 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.0001779136245402707, "loss": 2.157, "step": 183280 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017791246591692027, "loss": 1.9798, "step": 183285 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017791130726695347, "loss": 1.9761, "step": 183290 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.00017791014859037078, "loss": 2.2827, "step": 183295 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017790898988717256, "loss": 2.0098, "step": 183300 }, { "epoch": 0.43, "grad_norm": 1.9375, "learning_rate": 0.00017790783115735926, "loss": 2.046, "step": 183305 }, { "epoch": 0.43, "grad_norm": 1.9140625, "learning_rate": 0.00017790667240093119, "loss": 1.95, "step": 183310 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.0001779055136178888, "loss": 2.1378, "step": 183315 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017790435480823246, "loss": 2.2111, "step": 183320 }, { "epoch": 0.43, "grad_norm": 1.7265625, "learning_rate": 0.0001779031959719626, "loss": 2.227, "step": 183325 }, { "epoch": 0.43, "grad_norm": 2.5, "learning_rate": 0.00017790203710907955, "loss": 2.251, "step": 183330 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017790087821958382, "loss": 2.0656, "step": 183335 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.0001778997193034757, "loss": 2.0969, "step": 183340 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.0001778985603607556, "loss": 2.1269, "step": 183345 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.000177897401391424, "loss": 2.0123, "step": 183350 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.00017789624239548116, "loss": 2.1299, "step": 183355 }, { "epoch": 0.43, "grad_norm": 1.90625, "learning_rate": 0.00017789508337292758, "loss": 2.0228, "step": 183360 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017789392432376367, "loss": 2.1266, "step": 183365 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017789276524798976, "loss": 2.0784, "step": 183370 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017789160614560624, "loss": 2.0323, "step": 183375 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.00017789044701661358, "loss": 2.2175, "step": 183380 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017788928786101208, "loss": 2.2371, "step": 183385 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.0001778881286788022, "loss": 2.1532, "step": 183390 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017788696946998434, "loss": 2.0811, "step": 183395 }, { "epoch": 0.43, "grad_norm": 1.9453125, "learning_rate": 0.00017788581023455886, "loss": 2.0905, "step": 183400 }, { "epoch": 0.43, "grad_norm": 2.5, "learning_rate": 0.0001778846509725262, "loss": 1.9535, "step": 183405 }, { "epoch": 0.43, "grad_norm": 1.8984375, "learning_rate": 0.0001778834916838867, "loss": 2.1731, "step": 183410 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017788233236864083, "loss": 1.9991, "step": 183415 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.0001778811730267889, "loss": 2.121, "step": 183420 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.00017788001365833138, "loss": 2.156, "step": 183425 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017787885426326862, "loss": 2.2058, "step": 183430 }, { "epoch": 0.43, "grad_norm": 2.484375, "learning_rate": 0.00017787769484160104, "loss": 2.1106, "step": 183435 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017787653539332903, "loss": 2.0752, "step": 183440 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.00017787537591845297, "loss": 2.1989, "step": 183445 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017787421641697325, "loss": 2.2165, "step": 183450 }, { "epoch": 0.43, "grad_norm": 2.390625, "learning_rate": 0.00017787305688889032, "loss": 2.0932, "step": 183455 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017787189733420452, "loss": 2.0332, "step": 183460 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.00017787073775291627, "loss": 2.0929, "step": 183465 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017786957814502595, "loss": 2.1674, "step": 183470 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.000177868418510534, "loss": 2.1571, "step": 183475 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.00017786725884944078, "loss": 2.2961, "step": 183480 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.00017786609916174668, "loss": 2.1052, "step": 183485 }, { "epoch": 0.43, "grad_norm": 1.953125, "learning_rate": 0.00017786493944745214, "loss": 2.0254, "step": 183490 }, { "epoch": 0.43, "grad_norm": 1.828125, "learning_rate": 0.00017786377970655749, "loss": 2.2328, "step": 183495 }, { "epoch": 0.43, "grad_norm": 2.453125, "learning_rate": 0.00017786261993906318, "loss": 2.0813, "step": 183500 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017786146014496957, "loss": 2.1888, "step": 183505 }, { "epoch": 0.43, "grad_norm": 1.75, "learning_rate": 0.0001778603003242771, "loss": 2.1289, "step": 183510 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017785914047698607, "loss": 2.1605, "step": 183515 }, { "epoch": 0.43, "grad_norm": 1.9296875, "learning_rate": 0.00017785798060309703, "loss": 1.9762, "step": 183520 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017785682070261025, "loss": 2.0233, "step": 183525 }, { "epoch": 0.43, "grad_norm": 1.9453125, "learning_rate": 0.00017785566077552618, "loss": 1.9881, "step": 183530 }, { "epoch": 0.43, "grad_norm": 2.578125, "learning_rate": 0.0001778545008218452, "loss": 2.1861, "step": 183535 }, { "epoch": 0.43, "grad_norm": 2.53125, "learning_rate": 0.00017785334084156773, "loss": 2.0194, "step": 183540 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.0001778521808346941, "loss": 2.0164, "step": 183545 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017785102080122479, "loss": 2.1831, "step": 183550 }, { "epoch": 0.43, "grad_norm": 1.953125, "learning_rate": 0.00017784986074116015, "loss": 2.1733, "step": 183555 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.0001778487006545006, "loss": 2.0368, "step": 183560 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.0001778475405412465, "loss": 2.1675, "step": 183565 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.00017784638040139824, "loss": 2.1686, "step": 183570 }, { "epoch": 0.43, "grad_norm": 1.8828125, "learning_rate": 0.00017784522023495632, "loss": 2.2504, "step": 183575 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.00017784406004192103, "loss": 2.0962, "step": 183580 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017784289982229276, "loss": 2.0411, "step": 183585 }, { "epoch": 0.43, "grad_norm": 1.9765625, "learning_rate": 0.00017784173957607197, "loss": 2.0169, "step": 183590 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017784057930325905, "loss": 2.0373, "step": 183595 }, { "epoch": 0.43, "grad_norm": 2.53125, "learning_rate": 0.00017783941900385435, "loss": 2.2656, "step": 183600 }, { "epoch": 0.43, "grad_norm": 1.9296875, "learning_rate": 0.0001778382586778583, "loss": 2.0127, "step": 183605 }, { "epoch": 0.43, "grad_norm": 2.4375, "learning_rate": 0.0001778370983252713, "loss": 2.1306, "step": 183610 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017783593794609373, "loss": 2.2054, "step": 183615 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.000177834777540326, "loss": 2.2657, "step": 183620 }, { "epoch": 0.43, "grad_norm": 2.5, "learning_rate": 0.00017783361710796848, "loss": 2.0978, "step": 183625 }, { "epoch": 0.43, "grad_norm": 1.9296875, "learning_rate": 0.00017783245664902158, "loss": 1.9016, "step": 183630 }, { "epoch": 0.43, "grad_norm": 2.5, "learning_rate": 0.00017783129616348573, "loss": 2.1754, "step": 183635 }, { "epoch": 0.43, "grad_norm": 1.921875, "learning_rate": 0.00017783013565136126, "loss": 2.0021, "step": 183640 }, { "epoch": 0.43, "grad_norm": 2.078125, "learning_rate": 0.00017782897511264863, "loss": 2.2249, "step": 183645 }, { "epoch": 0.43, "grad_norm": 1.71875, "learning_rate": 0.0001778278145473482, "loss": 2.1332, "step": 183650 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017782665395546038, "loss": 2.2439, "step": 183655 }, { "epoch": 0.43, "grad_norm": 2.5, "learning_rate": 0.00017782549333698557, "loss": 2.2389, "step": 183660 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017782433269192415, "loss": 2.1531, "step": 183665 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.00017782317202027654, "loss": 2.0692, "step": 183670 }, { "epoch": 0.43, "grad_norm": 1.8515625, "learning_rate": 0.0001778220113220431, "loss": 2.0857, "step": 183675 }, { "epoch": 0.43, "grad_norm": 1.7578125, "learning_rate": 0.00017782085059722428, "loss": 2.0086, "step": 183680 }, { "epoch": 0.43, "grad_norm": 1.8828125, "learning_rate": 0.00017781968984582043, "loss": 2.1527, "step": 183685 }, { "epoch": 0.43, "grad_norm": 2.546875, "learning_rate": 0.00017781852906783197, "loss": 2.1017, "step": 183690 }, { "epoch": 0.43, "grad_norm": 2.453125, "learning_rate": 0.00017781736826325928, "loss": 2.1135, "step": 183695 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017781620743210276, "loss": 2.1716, "step": 183700 }, { "epoch": 0.43, "grad_norm": 2.484375, "learning_rate": 0.00017781504657436283, "loss": 2.1239, "step": 183705 }, { "epoch": 0.43, "grad_norm": 1.8515625, "learning_rate": 0.00017781388569003984, "loss": 2.1311, "step": 183710 }, { "epoch": 0.43, "grad_norm": 2.703125, "learning_rate": 0.00017781272477913426, "loss": 2.2523, "step": 183715 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.0001778115638416464, "loss": 2.2111, "step": 183720 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.0001778104028775767, "loss": 1.919, "step": 183725 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017780924188692557, "loss": 2.1857, "step": 183730 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.0001778080808696934, "loss": 2.0122, "step": 183735 }, { "epoch": 0.43, "grad_norm": 1.953125, "learning_rate": 0.00017780691982588058, "loss": 2.0918, "step": 183740 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.0001778057587554875, "loss": 2.1251, "step": 183745 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017780459765851455, "loss": 2.0719, "step": 183750 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017780343653496215, "loss": 2.2065, "step": 183755 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.0001778022753848307, "loss": 2.1602, "step": 183760 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017780111420812055, "loss": 2.1078, "step": 183765 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017779995300483214, "loss": 1.921, "step": 183770 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017779879177496586, "loss": 2.2119, "step": 183775 }, { "epoch": 0.43, "grad_norm": 2.390625, "learning_rate": 0.0001777976305185221, "loss": 2.0921, "step": 183780 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.00017779646923550123, "loss": 2.1353, "step": 183785 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017779530792590373, "loss": 2.146, "step": 183790 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017779414658972994, "loss": 2.1787, "step": 183795 }, { "epoch": 0.43, "grad_norm": 1.9296875, "learning_rate": 0.0001777929852269802, "loss": 1.9264, "step": 183800 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.000177791823837655, "loss": 2.0457, "step": 183805 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.0001777906624217547, "loss": 2.0995, "step": 183810 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017778950097927968, "loss": 2.0389, "step": 183815 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.0001777883395102304, "loss": 2.2103, "step": 183820 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.00017778717801460718, "loss": 2.2373, "step": 183825 }, { "epoch": 0.43, "grad_norm": 1.8125, "learning_rate": 0.00017778601649241047, "loss": 2.13, "step": 183830 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017778485494364063, "loss": 2.2974, "step": 183835 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017778369336829804, "loss": 2.2467, "step": 183840 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.0001777825317663832, "loss": 2.138, "step": 183845 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.0001777813701378964, "loss": 2.0866, "step": 183850 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017778020848283808, "loss": 2.1003, "step": 183855 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.0001777790468012086, "loss": 2.0267, "step": 183860 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017777788509300844, "loss": 2.0817, "step": 183865 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.0001777767233582379, "loss": 2.2466, "step": 183870 }, { "epoch": 0.43, "grad_norm": 1.765625, "learning_rate": 0.00017777556159689746, "loss": 1.9866, "step": 183875 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017777439980898744, "loss": 2.2154, "step": 183880 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.0001777732379945083, "loss": 2.1061, "step": 183885 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.0001777720761534604, "loss": 2.1164, "step": 183890 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017777091428584414, "loss": 2.1594, "step": 183895 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.00017776975239165995, "loss": 2.0662, "step": 183900 }, { "epoch": 0.43, "grad_norm": 1.625, "learning_rate": 0.00017776859047090814, "loss": 2.1839, "step": 183905 }, { "epoch": 0.43, "grad_norm": 2.6875, "learning_rate": 0.00017776742852358923, "loss": 2.2871, "step": 183910 }, { "epoch": 0.43, "grad_norm": 1.9375, "learning_rate": 0.00017776626654970355, "loss": 2.174, "step": 183915 }, { "epoch": 0.43, "grad_norm": 1.96875, "learning_rate": 0.00017776510454925147, "loss": 2.0858, "step": 183920 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017776394252223346, "loss": 2.0867, "step": 183925 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017776278046864987, "loss": 2.1636, "step": 183930 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.0001777616183885011, "loss": 2.153, "step": 183935 }, { "epoch": 0.43, "grad_norm": 2.453125, "learning_rate": 0.00017776045628178753, "loss": 1.9612, "step": 183940 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.0001777592941485096, "loss": 2.1784, "step": 183945 }, { "epoch": 0.43, "grad_norm": 2.796875, "learning_rate": 0.0001777581319886677, "loss": 2.0982, "step": 183950 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017775696980226217, "loss": 1.9845, "step": 183955 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.00017775580758929346, "loss": 2.1645, "step": 183960 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017775464534976196, "loss": 2.2817, "step": 183965 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017775348308366807, "loss": 1.9822, "step": 183970 }, { "epoch": 0.43, "grad_norm": 1.84375, "learning_rate": 0.00017775232079101218, "loss": 2.2511, "step": 183975 }, { "epoch": 0.43, "grad_norm": 1.9375, "learning_rate": 0.0001777511584717947, "loss": 1.8971, "step": 183980 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.000177749996126016, "loss": 1.904, "step": 183985 }, { "epoch": 0.43, "grad_norm": 1.921875, "learning_rate": 0.00017774883375367648, "loss": 2.1563, "step": 183990 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017774767135477656, "loss": 2.1981, "step": 183995 }, { "epoch": 0.43, "grad_norm": 2.4375, "learning_rate": 0.0001777465089293166, "loss": 2.1483, "step": 184000 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.0001777453464772971, "loss": 2.245, "step": 184005 }, { "epoch": 0.43, "grad_norm": 1.9453125, "learning_rate": 0.0001777441839987183, "loss": 1.972, "step": 184010 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.00017774302149358071, "loss": 2.2137, "step": 184015 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.0001777418589618847, "loss": 2.1055, "step": 184020 }, { "epoch": 0.43, "grad_norm": 2.609375, "learning_rate": 0.00017774069640363063, "loss": 2.1716, "step": 184025 }, { "epoch": 0.43, "grad_norm": 2.078125, "learning_rate": 0.00017773953381881893, "loss": 2.2465, "step": 184030 }, { "epoch": 0.43, "grad_norm": 2.390625, "learning_rate": 0.00017773837120745002, "loss": 2.1468, "step": 184035 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017773720856952428, "loss": 2.2969, "step": 184040 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017773604590504206, "loss": 2.1295, "step": 184045 }, { "epoch": 0.43, "grad_norm": 2.46875, "learning_rate": 0.00017773488321400383, "loss": 2.1468, "step": 184050 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.00017773372049640994, "loss": 2.1218, "step": 184055 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017773255775226083, "loss": 2.0873, "step": 184060 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017773139498155683, "loss": 2.1927, "step": 184065 }, { "epoch": 0.43, "grad_norm": 2.078125, "learning_rate": 0.0001777302321842984, "loss": 2.2678, "step": 184070 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.0001777290693604859, "loss": 2.0653, "step": 184075 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017772790651011974, "loss": 2.2316, "step": 184080 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017772674363320033, "loss": 2.1161, "step": 184085 }, { "epoch": 0.43, "grad_norm": 2.609375, "learning_rate": 0.00017772558072972806, "loss": 2.1741, "step": 184090 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.0001777244177997033, "loss": 2.1653, "step": 184095 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017772325484312647, "loss": 2.176, "step": 184100 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017772209185999796, "loss": 2.0422, "step": 184105 }, { "epoch": 0.43, "grad_norm": 1.9609375, "learning_rate": 0.0001777209288503182, "loss": 2.1379, "step": 184110 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017771976581408753, "loss": 2.3025, "step": 184115 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017771860275130644, "loss": 2.0345, "step": 184120 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017771743966197518, "loss": 2.0739, "step": 184125 }, { "epoch": 0.43, "grad_norm": 1.734375, "learning_rate": 0.00017771627654609428, "loss": 2.0822, "step": 184130 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.0001777151134036641, "loss": 2.0967, "step": 184135 }, { "epoch": 0.43, "grad_norm": 2.3125, "learning_rate": 0.00017771395023468502, "loss": 2.1164, "step": 184140 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017771278703915743, "loss": 2.2596, "step": 184145 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017771162381708178, "loss": 2.1352, "step": 184150 }, { "epoch": 0.43, "grad_norm": 1.90625, "learning_rate": 0.0001777104605684584, "loss": 2.0888, "step": 184155 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.0001777092972932877, "loss": 2.0679, "step": 184160 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017770813399157012, "loss": 2.0536, "step": 184165 }, { "epoch": 0.43, "grad_norm": 2.640625, "learning_rate": 0.00017770697066330602, "loss": 2.0793, "step": 184170 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017770580730849583, "loss": 2.3407, "step": 184175 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.0001777046439271399, "loss": 2.2244, "step": 184180 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017770348051923866, "loss": 2.1094, "step": 184185 }, { "epoch": 0.43, "grad_norm": 1.8984375, "learning_rate": 0.0001777023170847925, "loss": 2.1215, "step": 184190 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017770115362380183, "loss": 2.2552, "step": 184195 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017769999013626702, "loss": 2.105, "step": 184200 }, { "epoch": 0.43, "grad_norm": 2.875, "learning_rate": 0.00017769882662218854, "loss": 2.123, "step": 184205 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017769766308156667, "loss": 2.0833, "step": 184210 }, { "epoch": 0.43, "grad_norm": 1.6875, "learning_rate": 0.0001776964995144019, "loss": 2.1502, "step": 184215 }, { "epoch": 0.43, "grad_norm": 2.546875, "learning_rate": 0.0001776953359206946, "loss": 2.2605, "step": 184220 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017769417230044515, "loss": 2.1118, "step": 184225 }, { "epoch": 0.43, "grad_norm": 2.421875, "learning_rate": 0.00017769300865365395, "loss": 2.0561, "step": 184230 }, { "epoch": 0.43, "grad_norm": 2.703125, "learning_rate": 0.00017769184498032142, "loss": 2.2857, "step": 184235 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017769068128044795, "loss": 2.0892, "step": 184240 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.00017768951755403394, "loss": 2.3997, "step": 184245 }, { "epoch": 0.43, "grad_norm": 1.875, "learning_rate": 0.00017768835380107978, "loss": 2.1594, "step": 184250 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017768719002158586, "loss": 2.315, "step": 184255 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.0001776860262155526, "loss": 2.0599, "step": 184260 }, { "epoch": 0.43, "grad_norm": 3.125, "learning_rate": 0.00017768486238298038, "loss": 2.0689, "step": 184265 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.0001776836985238696, "loss": 2.0576, "step": 184270 }, { "epoch": 0.43, "grad_norm": 1.8984375, "learning_rate": 0.00017768253463822063, "loss": 1.9157, "step": 184275 }, { "epoch": 0.43, "grad_norm": 1.734375, "learning_rate": 0.00017768137072603395, "loss": 2.3491, "step": 184280 }, { "epoch": 0.43, "grad_norm": 2.4375, "learning_rate": 0.00017768020678730984, "loss": 2.2728, "step": 184285 }, { "epoch": 0.43, "grad_norm": 2.78125, "learning_rate": 0.0001776790428220488, "loss": 2.2082, "step": 184290 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.0001776778788302512, "loss": 2.1856, "step": 184295 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017767671481191743, "loss": 2.1097, "step": 184300 }, { "epoch": 0.43, "grad_norm": 2.34375, "learning_rate": 0.00017767555076704788, "loss": 1.9726, "step": 184305 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017767438669564296, "loss": 1.8716, "step": 184310 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017767322259770304, "loss": 2.0554, "step": 184315 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017767205847322854, "loss": 2.0577, "step": 184320 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017767089432221987, "loss": 2.045, "step": 184325 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017766973014467742, "loss": 2.1743, "step": 184330 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.00017766856594060156, "loss": 2.0368, "step": 184335 }, { "epoch": 0.43, "grad_norm": 2.4375, "learning_rate": 0.00017766740170999272, "loss": 1.994, "step": 184340 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.0001776662374528513, "loss": 2.2333, "step": 184345 }, { "epoch": 0.43, "grad_norm": 1.953125, "learning_rate": 0.00017766507316917767, "loss": 2.0305, "step": 184350 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017766390885897224, "loss": 2.0403, "step": 184355 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.0001776627445222354, "loss": 2.1306, "step": 184360 }, { "epoch": 0.43, "grad_norm": 1.890625, "learning_rate": 0.00017766158015896757, "loss": 1.9966, "step": 184365 }, { "epoch": 0.43, "grad_norm": 1.75, "learning_rate": 0.00017766041576916915, "loss": 2.2441, "step": 184370 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017765925135284052, "loss": 2.091, "step": 184375 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017765808690998206, "loss": 1.9731, "step": 184380 }, { "epoch": 0.43, "grad_norm": 2.5, "learning_rate": 0.0001776569224405942, "loss": 2.1927, "step": 184385 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.00017765575794467732, "loss": 2.1617, "step": 184390 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017765459342223184, "loss": 2.0839, "step": 184395 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017765342887325814, "loss": 1.9926, "step": 184400 }, { "epoch": 0.43, "grad_norm": 2.625, "learning_rate": 0.0001776522642977566, "loss": 2.0211, "step": 184405 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017765109969572766, "loss": 2.0186, "step": 184410 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.0001776499350671717, "loss": 2.1726, "step": 184415 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017764877041208908, "loss": 2.2296, "step": 184420 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.0001776476057304803, "loss": 2.0467, "step": 184425 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.00017764644102234562, "loss": 2.2595, "step": 184430 }, { "epoch": 0.43, "grad_norm": 2.453125, "learning_rate": 0.00017764527628768556, "loss": 2.0536, "step": 184435 }, { "epoch": 0.43, "grad_norm": 2.390625, "learning_rate": 0.0001776441115265004, "loss": 2.2447, "step": 184440 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017764294673879065, "loss": 1.9731, "step": 184445 }, { "epoch": 0.43, "grad_norm": 2.609375, "learning_rate": 0.00017764178192455665, "loss": 2.1273, "step": 184450 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017764061708379881, "loss": 2.0938, "step": 184455 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017763945221651754, "loss": 2.2298, "step": 184460 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.0001776382873227132, "loss": 2.1917, "step": 184465 }, { "epoch": 0.43, "grad_norm": 1.8359375, "learning_rate": 0.00017763712240238623, "loss": 2.0887, "step": 184470 }, { "epoch": 0.43, "grad_norm": 1.765625, "learning_rate": 0.00017763595745553702, "loss": 1.9781, "step": 184475 }, { "epoch": 0.43, "grad_norm": 2.359375, "learning_rate": 0.00017763479248216594, "loss": 2.0775, "step": 184480 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.0001776336274822734, "loss": 2.1295, "step": 184485 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.00017763246245585984, "loss": 2.2665, "step": 184490 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.0001776312974029256, "loss": 2.0248, "step": 184495 }, { "epoch": 0.43, "grad_norm": 2.546875, "learning_rate": 0.0001776301323234711, "loss": 2.0441, "step": 184500 }, { "epoch": 0.43, "grad_norm": 1.8515625, "learning_rate": 0.00017762896721749676, "loss": 2.2449, "step": 184505 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.00017762780208500293, "loss": 2.1346, "step": 184510 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017762663692599004, "loss": 2.1154, "step": 184515 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017762547174045849, "loss": 2.2175, "step": 184520 }, { "epoch": 0.43, "grad_norm": 1.9921875, "learning_rate": 0.00017762430652840867, "loss": 2.0656, "step": 184525 }, { "epoch": 0.43, "grad_norm": 1.859375, "learning_rate": 0.00017762314128984098, "loss": 2.0742, "step": 184530 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.00017762197602475582, "loss": 1.7908, "step": 184535 }, { "epoch": 0.43, "grad_norm": 2.296875, "learning_rate": 0.00017762081073315357, "loss": 2.1015, "step": 184540 }, { "epoch": 0.43, "grad_norm": 1.796875, "learning_rate": 0.00017761964541503463, "loss": 2.1966, "step": 184545 }, { "epoch": 0.43, "grad_norm": 2.171875, "learning_rate": 0.00017761848007039944, "loss": 2.1032, "step": 184550 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017761731469924835, "loss": 1.9603, "step": 184555 }, { "epoch": 0.43, "grad_norm": 2.40625, "learning_rate": 0.0001776161493015818, "loss": 2.0841, "step": 184560 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017761498387740016, "loss": 2.0011, "step": 184565 }, { "epoch": 0.43, "grad_norm": 2.140625, "learning_rate": 0.00017761381842670384, "loss": 2.3506, "step": 184570 }, { "epoch": 0.43, "grad_norm": 1.8203125, "learning_rate": 0.00017761265294949323, "loss": 2.2573, "step": 184575 }, { "epoch": 0.43, "grad_norm": 1.78125, "learning_rate": 0.00017761148744576872, "loss": 2.0267, "step": 184580 }, { "epoch": 0.43, "grad_norm": 2.203125, "learning_rate": 0.00017761032191553072, "loss": 2.0515, "step": 184585 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017760915635877962, "loss": 2.0401, "step": 184590 }, { "epoch": 0.43, "grad_norm": 1.8515625, "learning_rate": 0.00017760799077551584, "loss": 2.005, "step": 184595 }, { "epoch": 0.43, "grad_norm": 2.5625, "learning_rate": 0.00017760682516573977, "loss": 2.15, "step": 184600 }, { "epoch": 0.43, "grad_norm": 1.984375, "learning_rate": 0.00017760565952945178, "loss": 2.0411, "step": 184605 }, { "epoch": 0.43, "grad_norm": 2.328125, "learning_rate": 0.0001776044938666523, "loss": 2.2243, "step": 184610 }, { "epoch": 0.43, "grad_norm": 2.671875, "learning_rate": 0.00017760332817734172, "loss": 2.1326, "step": 184615 }, { "epoch": 0.43, "grad_norm": 1.921875, "learning_rate": 0.0001776021624615204, "loss": 2.0198, "step": 184620 }, { "epoch": 0.43, "grad_norm": 1.7734375, "learning_rate": 0.00017760099671918883, "loss": 2.1789, "step": 184625 }, { "epoch": 0.43, "grad_norm": 1.703125, "learning_rate": 0.0001775998309503473, "loss": 2.1161, "step": 184630 }, { "epoch": 0.43, "grad_norm": 2.28125, "learning_rate": 0.0001775986651549963, "loss": 2.1368, "step": 184635 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.0001775974993331362, "loss": 2.1054, "step": 184640 }, { "epoch": 0.43, "grad_norm": 1.84375, "learning_rate": 0.0001775963334847673, "loss": 1.9781, "step": 184645 }, { "epoch": 0.43, "grad_norm": 2.375, "learning_rate": 0.0001775951676098902, "loss": 2.1294, "step": 184650 }, { "epoch": 0.43, "grad_norm": 2.015625, "learning_rate": 0.0001775940017085051, "loss": 2.264, "step": 184655 }, { "epoch": 0.43, "grad_norm": 1.9453125, "learning_rate": 0.00017759283578061251, "loss": 2.2987, "step": 184660 }, { "epoch": 0.43, "grad_norm": 1.8125, "learning_rate": 0.00017759166982621283, "loss": 2.1232, "step": 184665 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.0001775905038453064, "loss": 2.2076, "step": 184670 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017758933783789362, "loss": 1.9114, "step": 184675 }, { "epoch": 0.43, "grad_norm": 2.109375, "learning_rate": 0.00017758817180397494, "loss": 2.1982, "step": 184680 }, { "epoch": 0.43, "grad_norm": 2.421875, "learning_rate": 0.00017758700574355073, "loss": 2.0515, "step": 184685 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.0001775858396566214, "loss": 2.1555, "step": 184690 }, { "epoch": 0.43, "grad_norm": 1.8125, "learning_rate": 0.00017758467354318734, "loss": 2.2157, "step": 184695 }, { "epoch": 0.43, "grad_norm": 1.9296875, "learning_rate": 0.00017758350740324893, "loss": 2.0791, "step": 184700 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017758234123680663, "loss": 1.9882, "step": 184705 }, { "epoch": 0.43, "grad_norm": 1.9765625, "learning_rate": 0.00017758117504386076, "loss": 2.201, "step": 184710 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.00017758000882441176, "loss": 2.2464, "step": 184715 }, { "epoch": 0.43, "grad_norm": 2.15625, "learning_rate": 0.00017757884257846003, "loss": 2.1111, "step": 184720 }, { "epoch": 0.43, "grad_norm": 2.453125, "learning_rate": 0.00017757767630600596, "loss": 2.171, "step": 184725 }, { "epoch": 0.43, "grad_norm": 2.265625, "learning_rate": 0.00017757651000704995, "loss": 2.1372, "step": 184730 }, { "epoch": 0.43, "grad_norm": 2.4375, "learning_rate": 0.00017757534368159237, "loss": 2.2524, "step": 184735 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017757417732963365, "loss": 2.0158, "step": 184740 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017757301095117423, "loss": 2.122, "step": 184745 }, { "epoch": 0.43, "grad_norm": 1.84375, "learning_rate": 0.00017757184454621441, "loss": 2.1749, "step": 184750 }, { "epoch": 0.43, "grad_norm": 2.03125, "learning_rate": 0.00017757067811475467, "loss": 2.2126, "step": 184755 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017756951165679536, "loss": 2.3002, "step": 184760 }, { "epoch": 0.43, "grad_norm": 2.25, "learning_rate": 0.0001775683451723369, "loss": 2.376, "step": 184765 }, { "epoch": 0.43, "grad_norm": 2.671875, "learning_rate": 0.00017756717866137972, "loss": 2.1475, "step": 184770 }, { "epoch": 0.43, "grad_norm": 2.09375, "learning_rate": 0.00017756601212392417, "loss": 2.0299, "step": 184775 }, { "epoch": 0.43, "grad_norm": 1.859375, "learning_rate": 0.00017756484555997063, "loss": 2.092, "step": 184780 }, { "epoch": 0.43, "grad_norm": 1.890625, "learning_rate": 0.00017756367896951954, "loss": 2.1114, "step": 184785 }, { "epoch": 0.43, "grad_norm": 1.75, "learning_rate": 0.00017756251235257132, "loss": 2.063, "step": 184790 }, { "epoch": 0.43, "grad_norm": 2.0625, "learning_rate": 0.00017756134570912635, "loss": 2.17, "step": 184795 }, { "epoch": 0.43, "grad_norm": 2.125, "learning_rate": 0.00017756017903918498, "loss": 2.1044, "step": 184800 }, { "epoch": 0.43, "grad_norm": 1.9765625, "learning_rate": 0.00017755901234274766, "loss": 2.1923, "step": 184805 }, { "epoch": 0.43, "grad_norm": 2.546875, "learning_rate": 0.0001775578456198148, "loss": 2.0034, "step": 184810 }, { "epoch": 0.43, "grad_norm": 2.234375, "learning_rate": 0.0001775566788703867, "loss": 2.449, "step": 184815 }, { "epoch": 0.43, "grad_norm": 2.0, "learning_rate": 0.0001775555120944639, "loss": 2.1759, "step": 184820 }, { "epoch": 0.43, "grad_norm": 2.21875, "learning_rate": 0.00017755434529204673, "loss": 1.9821, "step": 184825 }, { "epoch": 0.43, "grad_norm": 2.046875, "learning_rate": 0.00017755317846313554, "loss": 2.1474, "step": 184830 }, { "epoch": 0.43, "grad_norm": 1.8984375, "learning_rate": 0.00017755201160773085, "loss": 2.1768, "step": 184835 }, { "epoch": 0.43, "grad_norm": 2.515625, "learning_rate": 0.00017755084472583294, "loss": 2.0106, "step": 184840 }, { "epoch": 0.43, "grad_norm": 2.1875, "learning_rate": 0.00017754967781744225, "loss": 2.0243, "step": 184845 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017754851088255919, "loss": 2.1798, "step": 184850 }, { "epoch": 0.44, "grad_norm": 1.75, "learning_rate": 0.00017754734392118416, "loss": 2.0235, "step": 184855 }, { "epoch": 0.44, "grad_norm": 1.7109375, "learning_rate": 0.00017754617693331755, "loss": 2.1603, "step": 184860 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017754500991895976, "loss": 2.0449, "step": 184865 }, { "epoch": 0.44, "grad_norm": 1.8125, "learning_rate": 0.0001775438428781112, "loss": 2.0993, "step": 184870 }, { "epoch": 0.44, "grad_norm": 1.7890625, "learning_rate": 0.0001775426758107722, "loss": 2.0891, "step": 184875 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017754150871694327, "loss": 1.9669, "step": 184880 }, { "epoch": 0.44, "grad_norm": 1.7421875, "learning_rate": 0.00017754034159662475, "loss": 2.0553, "step": 184885 }, { "epoch": 0.44, "grad_norm": 1.90625, "learning_rate": 0.00017753917444981703, "loss": 2.1794, "step": 184890 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.0001775380072765205, "loss": 2.2307, "step": 184895 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017753684007673564, "loss": 2.2007, "step": 184900 }, { "epoch": 0.44, "grad_norm": 2.5625, "learning_rate": 0.00017753567285046275, "loss": 2.2684, "step": 184905 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.0001775345055977023, "loss": 2.2391, "step": 184910 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017753333831845461, "loss": 2.1883, "step": 184915 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017753217101272015, "loss": 1.922, "step": 184920 }, { "epoch": 0.44, "grad_norm": 2.0, "learning_rate": 0.00017753100368049927, "loss": 2.2311, "step": 184925 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017752983632179242, "loss": 1.9821, "step": 184930 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017752866893659997, "loss": 2.0837, "step": 184935 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.0001775275015249223, "loss": 2.06, "step": 184940 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017752633408675984, "loss": 2.0815, "step": 184945 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.000177525166622113, "loss": 2.2735, "step": 184950 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017752399913098215, "loss": 2.1547, "step": 184955 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017752283161336769, "loss": 2.1309, "step": 184960 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017752166406927, "loss": 2.0779, "step": 184965 }, { "epoch": 0.44, "grad_norm": 1.90625, "learning_rate": 0.00017752049649868954, "loss": 2.0848, "step": 184970 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017751932890162664, "loss": 2.0333, "step": 184975 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.00017751816127808175, "loss": 2.1448, "step": 184980 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.00017751699362805528, "loss": 2.0644, "step": 184985 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017751582595154755, "loss": 2.0121, "step": 184990 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017751465824855902, "loss": 2.132, "step": 184995 }, { "epoch": 0.44, "grad_norm": 2.375, "learning_rate": 0.0001775134905190901, "loss": 2.0837, "step": 185000 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.00017751232276314114, "loss": 2.3316, "step": 185005 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017751115498071255, "loss": 1.9399, "step": 185010 }, { "epoch": 0.44, "grad_norm": 2.484375, "learning_rate": 0.00017750998717180481, "loss": 2.2508, "step": 185015 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017750881933641818, "loss": 2.1693, "step": 185020 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.00017750765147455318, "loss": 2.1301, "step": 185025 }, { "epoch": 0.44, "grad_norm": 1.875, "learning_rate": 0.00017750648358621013, "loss": 1.9001, "step": 185030 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017750531567138945, "loss": 2.0959, "step": 185035 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.0001775041477300916, "loss": 2.2011, "step": 185040 }, { "epoch": 0.44, "grad_norm": 1.8828125, "learning_rate": 0.0001775029797623169, "loss": 2.0527, "step": 185045 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017750181176806574, "loss": 2.0998, "step": 185050 }, { "epoch": 0.44, "grad_norm": 2.53125, "learning_rate": 0.00017750064374733862, "loss": 2.1641, "step": 185055 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017749947570013585, "loss": 2.001, "step": 185060 }, { "epoch": 0.44, "grad_norm": 1.7734375, "learning_rate": 0.00017749830762645784, "loss": 2.0609, "step": 185065 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.000177497139526305, "loss": 2.1388, "step": 185070 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017749597139967774, "loss": 1.9606, "step": 185075 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017749480324657645, "loss": 1.9224, "step": 185080 }, { "epoch": 0.44, "grad_norm": 2.546875, "learning_rate": 0.00017749363506700154, "loss": 2.3059, "step": 185085 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.0001774924668609534, "loss": 2.0162, "step": 185090 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017749129862843242, "loss": 2.1157, "step": 185095 }, { "epoch": 0.44, "grad_norm": 1.9375, "learning_rate": 0.000177490130369439, "loss": 2.3558, "step": 185100 }, { "epoch": 0.44, "grad_norm": 2.5625, "learning_rate": 0.00017748896208397356, "loss": 2.3457, "step": 185105 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017748779377203645, "loss": 2.0463, "step": 185110 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017748662543362817, "loss": 2.0787, "step": 185115 }, { "epoch": 0.44, "grad_norm": 1.9765625, "learning_rate": 0.00017748545706874902, "loss": 2.3264, "step": 185120 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.0001774842886773994, "loss": 2.2335, "step": 185125 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017748312025957978, "loss": 2.028, "step": 185130 }, { "epoch": 0.44, "grad_norm": 1.9296875, "learning_rate": 0.00017748195181529051, "loss": 2.1855, "step": 185135 }, { "epoch": 0.44, "grad_norm": 1.8203125, "learning_rate": 0.000177480783344532, "loss": 2.1401, "step": 185140 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017747961484730466, "loss": 2.1054, "step": 185145 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017747844632360886, "loss": 2.2551, "step": 185150 }, { "epoch": 0.44, "grad_norm": 2.375, "learning_rate": 0.00017747727777344503, "loss": 2.0994, "step": 185155 }, { "epoch": 0.44, "grad_norm": 2.40625, "learning_rate": 0.00017747610919681354, "loss": 1.9425, "step": 185160 }, { "epoch": 0.44, "grad_norm": 2.578125, "learning_rate": 0.00017747494059371484, "loss": 2.0114, "step": 185165 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017747377196414927, "loss": 2.073, "step": 185170 }, { "epoch": 0.44, "grad_norm": 1.953125, "learning_rate": 0.00017747260330811725, "loss": 2.2049, "step": 185175 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.0001774714346256192, "loss": 2.0576, "step": 185180 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.00017747026591665547, "loss": 2.1783, "step": 185185 }, { "epoch": 0.44, "grad_norm": 1.9765625, "learning_rate": 0.00017746909718122653, "loss": 2.1603, "step": 185190 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017746792841933273, "loss": 2.124, "step": 185195 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017746675963097448, "loss": 2.1138, "step": 185200 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017746559081615218, "loss": 2.3221, "step": 185205 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017746442197486624, "loss": 2.2823, "step": 185210 }, { "epoch": 0.44, "grad_norm": 1.8203125, "learning_rate": 0.00017746325310711702, "loss": 2.0359, "step": 185215 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.000177462084212905, "loss": 2.0895, "step": 185220 }, { "epoch": 0.44, "grad_norm": 2.59375, "learning_rate": 0.00017746091529223046, "loss": 2.2188, "step": 185225 }, { "epoch": 0.44, "grad_norm": 1.625, "learning_rate": 0.0001774597463450939, "loss": 1.9344, "step": 185230 }, { "epoch": 0.44, "grad_norm": 1.6875, "learning_rate": 0.00017745857737149566, "loss": 2.2273, "step": 185235 }, { "epoch": 0.44, "grad_norm": 2.40625, "learning_rate": 0.0001774574083714362, "loss": 2.0811, "step": 185240 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.0001774562393449159, "loss": 2.1253, "step": 185245 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017745507029193512, "loss": 1.8379, "step": 185250 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017745390121249428, "loss": 2.1547, "step": 185255 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017745273210659375, "loss": 2.043, "step": 185260 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.000177451562974234, "loss": 2.1223, "step": 185265 }, { "epoch": 0.44, "grad_norm": 2.328125, "learning_rate": 0.00017745039381541542, "loss": 1.9471, "step": 185270 }, { "epoch": 0.44, "grad_norm": 1.8203125, "learning_rate": 0.00017744922463013834, "loss": 2.1247, "step": 185275 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.0001774480554184032, "loss": 2.1574, "step": 185280 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017744688618021043, "loss": 2.1624, "step": 185285 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017744571691556037, "loss": 1.9584, "step": 185290 }, { "epoch": 0.44, "grad_norm": 1.9140625, "learning_rate": 0.00017744454762445345, "loss": 2.1224, "step": 185295 }, { "epoch": 0.44, "grad_norm": 1.8125, "learning_rate": 0.00017744337830689007, "loss": 2.2904, "step": 185300 }, { "epoch": 0.44, "grad_norm": 1.8046875, "learning_rate": 0.00017744220896287062, "loss": 2.1654, "step": 185305 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017744103959239552, "loss": 2.1048, "step": 185310 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017743987019546516, "loss": 2.0389, "step": 185315 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017743870077207996, "loss": 1.8953, "step": 185320 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017743753132224024, "loss": 2.1111, "step": 185325 }, { "epoch": 0.44, "grad_norm": 2.78125, "learning_rate": 0.00017743636184594646, "loss": 2.2094, "step": 185330 }, { "epoch": 0.44, "grad_norm": 1.9765625, "learning_rate": 0.00017743519234319908, "loss": 2.1473, "step": 185335 }, { "epoch": 0.44, "grad_norm": 2.0, "learning_rate": 0.00017743402281399838, "loss": 2.0181, "step": 185340 }, { "epoch": 0.44, "grad_norm": 1.8984375, "learning_rate": 0.00017743285325834483, "loss": 1.9688, "step": 185345 }, { "epoch": 0.44, "grad_norm": 2.625, "learning_rate": 0.00017743168367623878, "loss": 2.3587, "step": 185350 }, { "epoch": 0.44, "grad_norm": 2.421875, "learning_rate": 0.0001774305140676807, "loss": 2.2232, "step": 185355 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017742934443267094, "loss": 2.0773, "step": 185360 }, { "epoch": 0.44, "grad_norm": 1.8203125, "learning_rate": 0.0001774281747712099, "loss": 1.8836, "step": 185365 }, { "epoch": 0.44, "grad_norm": 1.890625, "learning_rate": 0.000177427005083298, "loss": 2.0059, "step": 185370 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017742583536893564, "loss": 2.2212, "step": 185375 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.0001774246656281232, "loss": 2.2075, "step": 185380 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017742349586086112, "loss": 2.0086, "step": 185385 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017742232606714976, "loss": 2.0806, "step": 185390 }, { "epoch": 0.44, "grad_norm": 2.375, "learning_rate": 0.0001774211562469895, "loss": 2.3532, "step": 185395 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017741998640038076, "loss": 2.1132, "step": 185400 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.00017741881652732396, "loss": 1.9251, "step": 185405 }, { "epoch": 0.44, "grad_norm": 1.765625, "learning_rate": 0.0001774176466278195, "loss": 1.9314, "step": 185410 }, { "epoch": 0.44, "grad_norm": 1.9765625, "learning_rate": 0.0001774164767018678, "loss": 2.0001, "step": 185415 }, { "epoch": 0.44, "grad_norm": 1.8671875, "learning_rate": 0.0001774153067494692, "loss": 2.0845, "step": 185420 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.0001774141367706241, "loss": 1.9998, "step": 185425 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017741296676533294, "loss": 2.1365, "step": 185430 }, { "epoch": 0.44, "grad_norm": 2.609375, "learning_rate": 0.0001774117967335961, "loss": 2.232, "step": 185435 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017741062667541403, "loss": 1.9693, "step": 185440 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017740945659078708, "loss": 2.22, "step": 185445 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017740828647971562, "loss": 2.0942, "step": 185450 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.0001774071163422001, "loss": 2.1967, "step": 185455 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.0001774059461782409, "loss": 2.09, "step": 185460 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017740477598783842, "loss": 2.0997, "step": 185465 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.0001774036057709931, "loss": 2.1302, "step": 185470 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017740243552770527, "loss": 2.1624, "step": 185475 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017740126525797535, "loss": 2.1332, "step": 185480 }, { "epoch": 0.44, "grad_norm": 1.84375, "learning_rate": 0.00017740009496180378, "loss": 2.1442, "step": 185485 }, { "epoch": 0.44, "grad_norm": 1.8984375, "learning_rate": 0.00017739892463919095, "loss": 2.0762, "step": 185490 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.0001773977542901372, "loss": 2.0208, "step": 185495 }, { "epoch": 0.44, "grad_norm": 1.8359375, "learning_rate": 0.00017739658391464303, "loss": 2.104, "step": 185500 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017739541351270873, "loss": 2.0512, "step": 185505 }, { "epoch": 0.44, "grad_norm": 2.359375, "learning_rate": 0.00017739424308433477, "loss": 2.1249, "step": 185510 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017739307262952154, "loss": 2.2213, "step": 185515 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017739190214826941, "loss": 2.2659, "step": 185520 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017739073164057883, "loss": 1.9751, "step": 185525 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017738956110645017, "loss": 2.2529, "step": 185530 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.00017738839054588383, "loss": 1.9958, "step": 185535 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017738721995888023, "loss": 1.9948, "step": 185540 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.0001773860493454397, "loss": 1.8898, "step": 185545 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017738487870556276, "loss": 2.1814, "step": 185550 }, { "epoch": 0.44, "grad_norm": 1.90625, "learning_rate": 0.00017738370803924966, "loss": 2.3405, "step": 185555 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017738253734650097, "loss": 2.0541, "step": 185560 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.0001773813666273169, "loss": 2.3072, "step": 185565 }, { "epoch": 0.44, "grad_norm": 1.8203125, "learning_rate": 0.00017738019588169805, "loss": 2.1358, "step": 185570 }, { "epoch": 0.44, "grad_norm": 1.8203125, "learning_rate": 0.0001773790251096447, "loss": 2.0798, "step": 185575 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017737785431115724, "loss": 2.0832, "step": 185580 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017737668348623612, "loss": 2.0136, "step": 185585 }, { "epoch": 0.44, "grad_norm": 1.9296875, "learning_rate": 0.00017737551263488172, "loss": 2.2198, "step": 185590 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.0001773743417570944, "loss": 2.0151, "step": 185595 }, { "epoch": 0.44, "grad_norm": 1.90625, "learning_rate": 0.00017737317085287466, "loss": 2.1831, "step": 185600 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.00017737199992222285, "loss": 2.0694, "step": 185605 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017737082896513933, "loss": 2.091, "step": 185610 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017736965798162454, "loss": 2.2169, "step": 185615 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017736848697167884, "loss": 2.0036, "step": 185620 }, { "epoch": 0.44, "grad_norm": 2.515625, "learning_rate": 0.00017736731593530268, "loss": 1.9782, "step": 185625 }, { "epoch": 0.44, "grad_norm": 1.8359375, "learning_rate": 0.00017736614487249647, "loss": 2.2033, "step": 185630 }, { "epoch": 0.44, "grad_norm": 1.7734375, "learning_rate": 0.00017736497378326055, "loss": 2.0343, "step": 185635 }, { "epoch": 0.44, "grad_norm": 1.7890625, "learning_rate": 0.0001773638026675954, "loss": 2.1015, "step": 185640 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.0001773626315255013, "loss": 2.212, "step": 185645 }, { "epoch": 0.44, "grad_norm": 2.921875, "learning_rate": 0.00017736146035697879, "loss": 2.0878, "step": 185650 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017736028916202817, "loss": 2.1444, "step": 185655 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017735911794064988, "loss": 2.1034, "step": 185660 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.0001773579466928443, "loss": 2.1377, "step": 185665 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017735677541861185, "loss": 2.0933, "step": 185670 }, { "epoch": 0.44, "grad_norm": 2.0, "learning_rate": 0.0001773556041179529, "loss": 2.0991, "step": 185675 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.0001773544327908679, "loss": 2.1564, "step": 185680 }, { "epoch": 0.44, "grad_norm": 1.8671875, "learning_rate": 0.0001773532614373572, "loss": 1.9911, "step": 185685 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017735209005742127, "loss": 2.052, "step": 185690 }, { "epoch": 0.44, "grad_norm": 1.7265625, "learning_rate": 0.00017735091865106042, "loss": 1.8751, "step": 185695 }, { "epoch": 0.44, "grad_norm": 1.921875, "learning_rate": 0.00017734974721827511, "loss": 2.1521, "step": 185700 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017734857575906572, "loss": 2.1246, "step": 185705 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017734740427343266, "loss": 2.1981, "step": 185710 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.0001773462327613763, "loss": 2.1441, "step": 185715 }, { "epoch": 0.44, "grad_norm": 1.9296875, "learning_rate": 0.00017734506122289706, "loss": 2.14, "step": 185720 }, { "epoch": 0.44, "grad_norm": 1.9296875, "learning_rate": 0.00017734388965799538, "loss": 2.2113, "step": 185725 }, { "epoch": 0.44, "grad_norm": 1.921875, "learning_rate": 0.00017734271806667162, "loss": 2.192, "step": 185730 }, { "epoch": 0.44, "grad_norm": 1.9453125, "learning_rate": 0.00017734154644892617, "loss": 2.0941, "step": 185735 }, { "epoch": 0.44, "grad_norm": 1.890625, "learning_rate": 0.00017734037480475947, "loss": 2.2707, "step": 185740 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017733920313417185, "loss": 2.0907, "step": 185745 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.0001773380314371638, "loss": 2.0911, "step": 185750 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017733685971373566, "loss": 1.9205, "step": 185755 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017733568796388783, "loss": 2.2271, "step": 185760 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017733451618762074, "loss": 2.1958, "step": 185765 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017733334438493477, "loss": 2.1762, "step": 185770 }, { "epoch": 0.44, "grad_norm": 1.7578125, "learning_rate": 0.00017733217255583034, "loss": 2.0196, "step": 185775 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.0001773310007003078, "loss": 2.0687, "step": 185780 }, { "epoch": 0.44, "grad_norm": 1.875, "learning_rate": 0.00017732982881836767, "loss": 2.0964, "step": 185785 }, { "epoch": 0.44, "grad_norm": 1.8359375, "learning_rate": 0.0001773286569100102, "loss": 2.104, "step": 185790 }, { "epoch": 0.44, "grad_norm": 2.640625, "learning_rate": 0.00017732748497523585, "loss": 1.9553, "step": 185795 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017732631301404505, "loss": 2.0721, "step": 185800 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.0001773251410264382, "loss": 2.1992, "step": 185805 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.00017732396901241565, "loss": 2.0465, "step": 185810 }, { "epoch": 0.44, "grad_norm": 1.7265625, "learning_rate": 0.00017732279697197785, "loss": 2.0894, "step": 185815 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017732162490512518, "loss": 2.1614, "step": 185820 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017732045281185804, "loss": 2.2146, "step": 185825 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.0001773192806921768, "loss": 2.1434, "step": 185830 }, { "epoch": 0.44, "grad_norm": 1.9140625, "learning_rate": 0.0001773181085460819, "loss": 2.1765, "step": 185835 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.0001773169363735738, "loss": 2.1502, "step": 185840 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017731576417465275, "loss": 2.1401, "step": 185845 }, { "epoch": 0.44, "grad_norm": 1.6796875, "learning_rate": 0.00017731459194931926, "loss": 2.1309, "step": 185850 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017731341969757372, "loss": 2.2684, "step": 185855 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.00017731224741941647, "loss": 2.0839, "step": 185860 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.000177311075114848, "loss": 2.1718, "step": 185865 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017730990278386864, "loss": 2.2124, "step": 185870 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017730873042647884, "loss": 2.0807, "step": 185875 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017730755804267894, "loss": 2.1545, "step": 185880 }, { "epoch": 0.44, "grad_norm": 2.515625, "learning_rate": 0.00017730638563246938, "loss": 2.0315, "step": 185885 }, { "epoch": 0.44, "grad_norm": 2.390625, "learning_rate": 0.0001773052131958506, "loss": 2.0132, "step": 185890 }, { "epoch": 0.44, "grad_norm": 1.8828125, "learning_rate": 0.0001773040407328229, "loss": 1.9076, "step": 185895 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017730286824338678, "loss": 2.1834, "step": 185900 }, { "epoch": 0.44, "grad_norm": 1.9140625, "learning_rate": 0.0001773016957275426, "loss": 2.0698, "step": 185905 }, { "epoch": 0.44, "grad_norm": 2.484375, "learning_rate": 0.00017730052318529074, "loss": 2.1338, "step": 185910 }, { "epoch": 0.44, "grad_norm": 1.7578125, "learning_rate": 0.0001772993506166316, "loss": 2.1584, "step": 185915 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.00017729817802156564, "loss": 2.1713, "step": 185920 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.0001772970054000932, "loss": 2.1343, "step": 185925 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017729583275221468, "loss": 2.2132, "step": 185930 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017729466007793056, "loss": 1.9944, "step": 185935 }, { "epoch": 0.44, "grad_norm": 2.375, "learning_rate": 0.00017729348737724113, "loss": 2.0111, "step": 185940 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.00017729231465014688, "loss": 2.0196, "step": 185945 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017729114189664813, "loss": 2.0968, "step": 185950 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017728996911674537, "loss": 2.1328, "step": 185955 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017728879631043892, "loss": 2.0035, "step": 185960 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.00017728762347772923, "loss": 2.0503, "step": 185965 }, { "epoch": 0.44, "grad_norm": 2.484375, "learning_rate": 0.0001772864506186167, "loss": 1.9975, "step": 185970 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.0001772852777331017, "loss": 2.1924, "step": 185975 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.00017728410482118463, "loss": 2.275, "step": 185980 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017728293188286595, "loss": 2.142, "step": 185985 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.000177281758918146, "loss": 2.2238, "step": 185990 }, { "epoch": 0.44, "grad_norm": 1.9375, "learning_rate": 0.00017728058592702517, "loss": 2.0463, "step": 185995 }, { "epoch": 0.44, "grad_norm": 1.8515625, "learning_rate": 0.00017727941290950395, "loss": 1.9435, "step": 186000 }, { "epoch": 0.44, "grad_norm": 3.03125, "learning_rate": 0.00017727823986558262, "loss": 2.1833, "step": 186005 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017727706679526167, "loss": 2.1052, "step": 186010 }, { "epoch": 0.44, "grad_norm": 2.328125, "learning_rate": 0.00017727589369854148, "loss": 1.9308, "step": 186015 }, { "epoch": 0.44, "grad_norm": 1.9765625, "learning_rate": 0.00017727472057542243, "loss": 2.3141, "step": 186020 }, { "epoch": 0.44, "grad_norm": 2.75, "learning_rate": 0.00017727354742590494, "loss": 2.1677, "step": 186025 }, { "epoch": 0.44, "grad_norm": 1.9375, "learning_rate": 0.0001772723742499894, "loss": 1.917, "step": 186030 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.0001772712010476762, "loss": 2.2088, "step": 186035 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.0001772700278189658, "loss": 2.1101, "step": 186040 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017726885456385852, "loss": 2.0962, "step": 186045 }, { "epoch": 0.44, "grad_norm": 2.46875, "learning_rate": 0.00017726768128235483, "loss": 1.9454, "step": 186050 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017726650797445504, "loss": 2.151, "step": 186055 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017726533464015968, "loss": 2.2183, "step": 186060 }, { "epoch": 0.44, "grad_norm": 2.640625, "learning_rate": 0.00017726416127946903, "loss": 2.1016, "step": 186065 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.0001772629878923836, "loss": 1.9708, "step": 186070 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017726181447890366, "loss": 2.1051, "step": 186075 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.0001772606410390297, "loss": 2.2163, "step": 186080 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017725946757276212, "loss": 1.981, "step": 186085 }, { "epoch": 0.44, "grad_norm": 1.78125, "learning_rate": 0.0001772582940801013, "loss": 2.0339, "step": 186090 }, { "epoch": 0.44, "grad_norm": 1.8203125, "learning_rate": 0.0001772571205610477, "loss": 2.1158, "step": 186095 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017725594701560157, "loss": 2.133, "step": 186100 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.00017725477344376347, "loss": 2.2112, "step": 186105 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017725359984553372, "loss": 2.1614, "step": 186110 }, { "epoch": 0.44, "grad_norm": 1.734375, "learning_rate": 0.00017725242622091273, "loss": 1.8101, "step": 186115 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.0001772512525699009, "loss": 1.9499, "step": 186120 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.0001772500788924987, "loss": 2.0737, "step": 186125 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017724890518870643, "loss": 2.1872, "step": 186130 }, { "epoch": 0.44, "grad_norm": 1.8046875, "learning_rate": 0.00017724773145852453, "loss": 2.0395, "step": 186135 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017724655770195343, "loss": 2.1985, "step": 186140 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017724538391899347, "loss": 2.1545, "step": 186145 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.0001772442101096451, "loss": 2.2891, "step": 186150 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.0001772430362739087, "loss": 2.1131, "step": 186155 }, { "epoch": 0.44, "grad_norm": 1.9375, "learning_rate": 0.0001772418624117847, "loss": 2.296, "step": 186160 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017724068852327347, "loss": 1.9193, "step": 186165 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.0001772395146083754, "loss": 1.9566, "step": 186170 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017723834066709094, "loss": 2.217, "step": 186175 }, { "epoch": 0.44, "grad_norm": 1.890625, "learning_rate": 0.00017723716669942045, "loss": 2.2662, "step": 186180 }, { "epoch": 0.44, "grad_norm": 2.625, "learning_rate": 0.00017723599270536434, "loss": 2.0971, "step": 186185 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017723481868492299, "loss": 2.0443, "step": 186190 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.0001772336446380969, "loss": 2.2443, "step": 186195 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017723247056488633, "loss": 2.1985, "step": 186200 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.00017723129646529174, "loss": 2.1383, "step": 186205 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017723012233931357, "loss": 2.0577, "step": 186210 }, { "epoch": 0.44, "grad_norm": 1.953125, "learning_rate": 0.0001772289481869522, "loss": 2.1961, "step": 186215 }, { "epoch": 0.44, "grad_norm": 1.6953125, "learning_rate": 0.000177227774008208, "loss": 2.0757, "step": 186220 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.0001772265998030814, "loss": 1.9947, "step": 186225 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017722542557157279, "loss": 2.0284, "step": 186230 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017722425131368253, "loss": 2.2188, "step": 186235 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017722307702941115, "loss": 2.0357, "step": 186240 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.0001772219027187589, "loss": 2.2912, "step": 186245 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017722072838172629, "loss": 1.96, "step": 186250 }, { "epoch": 0.44, "grad_norm": 2.359375, "learning_rate": 0.00017721955401831365, "loss": 2.1831, "step": 186255 }, { "epoch": 0.44, "grad_norm": 1.8515625, "learning_rate": 0.00017721837962852145, "loss": 2.218, "step": 186260 }, { "epoch": 0.44, "grad_norm": 1.7734375, "learning_rate": 0.00017721720521234998, "loss": 1.7799, "step": 186265 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017721603076979977, "loss": 2.0641, "step": 186270 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017721485630087114, "loss": 2.2176, "step": 186275 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.00017721368180556452, "loss": 2.2266, "step": 186280 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.00017721250728388032, "loss": 2.0735, "step": 186285 }, { "epoch": 0.44, "grad_norm": 1.9140625, "learning_rate": 0.0001772113327358189, "loss": 1.8716, "step": 186290 }, { "epoch": 0.44, "grad_norm": 2.328125, "learning_rate": 0.0001772101581613807, "loss": 2.2509, "step": 186295 }, { "epoch": 0.44, "grad_norm": 1.625, "learning_rate": 0.00017720898356056615, "loss": 2.0712, "step": 186300 }, { "epoch": 0.44, "grad_norm": 2.328125, "learning_rate": 0.00017720780893337555, "loss": 2.0542, "step": 186305 }, { "epoch": 0.44, "grad_norm": 2.328125, "learning_rate": 0.0001772066342798094, "loss": 2.3233, "step": 186310 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.00017720545959986803, "loss": 2.078, "step": 186315 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.0001772042848935519, "loss": 2.1211, "step": 186320 }, { "epoch": 0.44, "grad_norm": 1.796875, "learning_rate": 0.00017720311016086138, "loss": 1.9897, "step": 186325 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017720193540179689, "loss": 2.2603, "step": 186330 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.0001772007606163588, "loss": 2.118, "step": 186335 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.0001771995858045475, "loss": 2.0757, "step": 186340 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.0001771984109663635, "loss": 2.1716, "step": 186345 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017719723610180708, "loss": 2.1276, "step": 186350 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.0001771960612108787, "loss": 1.9786, "step": 186355 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.0001771948862935787, "loss": 2.0392, "step": 186360 }, { "epoch": 0.44, "grad_norm": 2.453125, "learning_rate": 0.00017719371134990758, "loss": 2.1408, "step": 186365 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017719253637986566, "loss": 1.8232, "step": 186370 }, { "epoch": 0.44, "grad_norm": 2.328125, "learning_rate": 0.00017719136138345335, "loss": 1.9879, "step": 186375 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017719018636067112, "loss": 2.0683, "step": 186380 }, { "epoch": 0.44, "grad_norm": 2.359375, "learning_rate": 0.00017718901131151928, "loss": 2.0664, "step": 186385 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017718783623599832, "loss": 2.1101, "step": 186390 }, { "epoch": 0.44, "grad_norm": 1.8671875, "learning_rate": 0.00017718666113410855, "loss": 2.2231, "step": 186395 }, { "epoch": 0.44, "grad_norm": 1.953125, "learning_rate": 0.00017718548600585042, "loss": 2.0848, "step": 186400 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017718431085122436, "loss": 2.3841, "step": 186405 }, { "epoch": 0.44, "grad_norm": 2.640625, "learning_rate": 0.0001771831356702307, "loss": 2.2301, "step": 186410 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017718196046286993, "loss": 2.0888, "step": 186415 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017718078522914238, "loss": 2.0148, "step": 186420 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017717960996904846, "loss": 2.2759, "step": 186425 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.0001771784346825886, "loss": 2.1716, "step": 186430 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017717725936976318, "loss": 2.084, "step": 186435 }, { "epoch": 0.44, "grad_norm": 2.421875, "learning_rate": 0.0001771760840305726, "loss": 2.0475, "step": 186440 }, { "epoch": 0.44, "grad_norm": 2.453125, "learning_rate": 0.0001771749086650173, "loss": 2.1637, "step": 186445 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.0001771737332730976, "loss": 1.994, "step": 186450 }, { "epoch": 0.44, "grad_norm": 2.359375, "learning_rate": 0.000177172557854814, "loss": 1.7759, "step": 186455 }, { "epoch": 0.44, "grad_norm": 1.84375, "learning_rate": 0.00017717138241016685, "loss": 2.0334, "step": 186460 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017717020693915652, "loss": 2.1644, "step": 186465 }, { "epoch": 0.44, "grad_norm": 2.390625, "learning_rate": 0.0001771690314417835, "loss": 2.2003, "step": 186470 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017716785591804813, "loss": 2.2867, "step": 186475 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017716668036795076, "loss": 2.0004, "step": 186480 }, { "epoch": 0.44, "grad_norm": 1.8125, "learning_rate": 0.00017716550479149193, "loss": 2.0523, "step": 186485 }, { "epoch": 0.44, "grad_norm": 1.6484375, "learning_rate": 0.00017716432918867193, "loss": 2.0573, "step": 186490 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017716315355949117, "loss": 2.1022, "step": 186495 }, { "epoch": 0.44, "grad_norm": 2.5, "learning_rate": 0.00017716197790395013, "loss": 2.4122, "step": 186500 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017716080222204912, "loss": 2.1723, "step": 186505 }, { "epoch": 0.44, "grad_norm": 2.765625, "learning_rate": 0.00017715962651378858, "loss": 2.2033, "step": 186510 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017715845077916894, "loss": 2.1665, "step": 186515 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017715727501819054, "loss": 2.2078, "step": 186520 }, { "epoch": 0.44, "grad_norm": 1.84375, "learning_rate": 0.00017715609923085385, "loss": 2.0643, "step": 186525 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017715492341715923, "loss": 2.1267, "step": 186530 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.0001771537475771071, "loss": 2.1349, "step": 186535 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.0001771525717106978, "loss": 1.8682, "step": 186540 }, { "epoch": 0.44, "grad_norm": 1.859375, "learning_rate": 0.00017715139581793183, "loss": 2.1855, "step": 186545 }, { "epoch": 0.44, "grad_norm": 1.859375, "learning_rate": 0.00017715021989880953, "loss": 2.1468, "step": 186550 }, { "epoch": 0.44, "grad_norm": 2.640625, "learning_rate": 0.0001771490439533313, "loss": 2.0442, "step": 186555 }, { "epoch": 0.44, "grad_norm": 2.46875, "learning_rate": 0.00017714786798149755, "loss": 1.9772, "step": 186560 }, { "epoch": 0.44, "grad_norm": 2.40625, "learning_rate": 0.00017714669198330872, "loss": 2.1997, "step": 186565 }, { "epoch": 0.44, "grad_norm": 1.7890625, "learning_rate": 0.00017714551595876519, "loss": 2.1442, "step": 186570 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017714433990786735, "loss": 2.0096, "step": 186575 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017714316383061557, "loss": 2.1057, "step": 186580 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.0001771419877270103, "loss": 2.1334, "step": 186585 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017714081159705194, "loss": 2.121, "step": 186590 }, { "epoch": 0.44, "grad_norm": 1.8203125, "learning_rate": 0.00017713963544074086, "loss": 2.1024, "step": 186595 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.0001771384592580775, "loss": 2.142, "step": 186600 }, { "epoch": 0.44, "grad_norm": 2.359375, "learning_rate": 0.00017713728304906225, "loss": 2.0867, "step": 186605 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017713610681369547, "loss": 1.946, "step": 186610 }, { "epoch": 0.44, "grad_norm": 1.8046875, "learning_rate": 0.00017713493055197764, "loss": 2.1213, "step": 186615 }, { "epoch": 0.44, "grad_norm": 2.546875, "learning_rate": 0.0001771337542639091, "loss": 2.1244, "step": 186620 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017713257794949025, "loss": 2.1637, "step": 186625 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017713140160872153, "loss": 2.1575, "step": 186630 }, { "epoch": 0.44, "grad_norm": 2.484375, "learning_rate": 0.00017713022524160334, "loss": 2.1159, "step": 186635 }, { "epoch": 0.44, "grad_norm": 2.515625, "learning_rate": 0.00017712904884813603, "loss": 2.1243, "step": 186640 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017712787242832008, "loss": 2.2822, "step": 186645 }, { "epoch": 0.44, "grad_norm": 1.8515625, "learning_rate": 0.0001771266959821558, "loss": 2.0508, "step": 186650 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.0001771255195096437, "loss": 2.1592, "step": 186655 }, { "epoch": 0.44, "grad_norm": 1.953125, "learning_rate": 0.00017712434301078408, "loss": 2.1716, "step": 186660 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017712316648557742, "loss": 2.1169, "step": 186665 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.00017712198993402405, "loss": 2.0636, "step": 186670 }, { "epoch": 0.44, "grad_norm": 2.328125, "learning_rate": 0.00017712081335612446, "loss": 1.9657, "step": 186675 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017711963675187895, "loss": 2.1453, "step": 186680 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017711846012128797, "loss": 2.0451, "step": 186685 }, { "epoch": 0.44, "grad_norm": 1.953125, "learning_rate": 0.00017711728346435197, "loss": 2.0952, "step": 186690 }, { "epoch": 0.44, "grad_norm": 2.453125, "learning_rate": 0.0001771161067810713, "loss": 2.2413, "step": 186695 }, { "epoch": 0.44, "grad_norm": 2.46875, "learning_rate": 0.00017711493007144634, "loss": 2.1349, "step": 186700 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017711375333547756, "loss": 2.0723, "step": 186705 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.0001771125765731653, "loss": 2.1471, "step": 186710 }, { "epoch": 0.44, "grad_norm": 2.53125, "learning_rate": 0.00017711139978450997, "loss": 2.3979, "step": 186715 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017711022296951198, "loss": 2.0649, "step": 186720 }, { "epoch": 0.44, "grad_norm": 1.9453125, "learning_rate": 0.00017710904612817179, "loss": 2.1251, "step": 186725 }, { "epoch": 0.44, "grad_norm": 1.6953125, "learning_rate": 0.0001771078692604897, "loss": 2.0869, "step": 186730 }, { "epoch": 0.44, "grad_norm": 1.875, "learning_rate": 0.0001771066923664662, "loss": 1.9407, "step": 186735 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017710551544610165, "loss": 2.0551, "step": 186740 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017710433849939643, "loss": 1.9893, "step": 186745 }, { "epoch": 0.44, "grad_norm": 2.40625, "learning_rate": 0.000177103161526351, "loss": 2.1197, "step": 186750 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017710198452696571, "loss": 2.0977, "step": 186755 }, { "epoch": 0.44, "grad_norm": 1.921875, "learning_rate": 0.000177100807501241, "loss": 2.2018, "step": 186760 }, { "epoch": 0.44, "grad_norm": 2.359375, "learning_rate": 0.00017709963044917724, "loss": 1.9807, "step": 186765 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017709845337077486, "loss": 1.9352, "step": 186770 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017709727626603425, "loss": 2.1639, "step": 186775 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.00017709609913495582, "loss": 2.0809, "step": 186780 }, { "epoch": 0.44, "grad_norm": 1.9375, "learning_rate": 0.00017709492197753995, "loss": 2.0357, "step": 186785 }, { "epoch": 0.44, "grad_norm": 1.7890625, "learning_rate": 0.00017709374479378708, "loss": 2.173, "step": 186790 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017709256758369758, "loss": 1.9697, "step": 186795 }, { "epoch": 0.44, "grad_norm": 1.9296875, "learning_rate": 0.00017709139034727183, "loss": 2.2049, "step": 186800 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017709021308451027, "loss": 2.1032, "step": 186805 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.0001770890357954133, "loss": 1.8764, "step": 186810 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.00017708785847998134, "loss": 2.2152, "step": 186815 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017708668113821475, "loss": 2.084, "step": 186820 }, { "epoch": 0.44, "grad_norm": 1.8203125, "learning_rate": 0.00017708550377011398, "loss": 2.0334, "step": 186825 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017708432637567937, "loss": 2.131, "step": 186830 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017708314895491136, "loss": 2.0911, "step": 186835 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017708197150781036, "loss": 2.1329, "step": 186840 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017708079403437678, "loss": 2.1776, "step": 186845 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017707961653461095, "loss": 2.2339, "step": 186850 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017707843900851335, "loss": 2.1777, "step": 186855 }, { "epoch": 0.44, "grad_norm": 2.4375, "learning_rate": 0.00017707726145608436, "loss": 2.0385, "step": 186860 }, { "epoch": 0.44, "grad_norm": 2.40625, "learning_rate": 0.0001770760838773244, "loss": 2.137, "step": 186865 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017707490627223385, "loss": 2.0164, "step": 186870 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017707372864081308, "loss": 2.0187, "step": 186875 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017707255098306255, "loss": 2.1208, "step": 186880 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017707137329898265, "loss": 2.2406, "step": 186885 }, { "epoch": 0.44, "grad_norm": 1.8671875, "learning_rate": 0.00017707019558857372, "loss": 1.9383, "step": 186890 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017706901785183624, "loss": 2.1937, "step": 186895 }, { "epoch": 0.44, "grad_norm": 1.90625, "learning_rate": 0.0001770678400887706, "loss": 2.0376, "step": 186900 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.00017706666229937718, "loss": 2.0739, "step": 186905 }, { "epoch": 0.44, "grad_norm": 1.765625, "learning_rate": 0.0001770654844836564, "loss": 2.0403, "step": 186910 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.00017706430664160864, "loss": 2.0077, "step": 186915 }, { "epoch": 0.44, "grad_norm": 1.7578125, "learning_rate": 0.00017706312877323433, "loss": 1.9582, "step": 186920 }, { "epoch": 0.44, "grad_norm": 1.9453125, "learning_rate": 0.00017706195087853382, "loss": 2.1591, "step": 186925 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.0001770607729575076, "loss": 2.3231, "step": 186930 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.000177059595010156, "loss": 2.1105, "step": 186935 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017705841703647942, "loss": 2.1469, "step": 186940 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.00017705723903647832, "loss": 1.9117, "step": 186945 }, { "epoch": 0.44, "grad_norm": 1.953125, "learning_rate": 0.00017705606101015307, "loss": 1.9018, "step": 186950 }, { "epoch": 0.44, "grad_norm": 2.5, "learning_rate": 0.00017705488295750407, "loss": 1.991, "step": 186955 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017705370487853172, "loss": 2.0513, "step": 186960 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017705252677323644, "loss": 1.9375, "step": 186965 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.0001770513486416186, "loss": 2.0298, "step": 186970 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017705017048367863, "loss": 2.0101, "step": 186975 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017704899229941691, "loss": 2.1714, "step": 186980 }, { "epoch": 0.44, "grad_norm": 1.78125, "learning_rate": 0.00017704781408883386, "loss": 1.9825, "step": 186985 }, { "epoch": 0.44, "grad_norm": 1.921875, "learning_rate": 0.00017704663585192988, "loss": 1.9331, "step": 186990 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.0001770454575887054, "loss": 2.3379, "step": 186995 }, { "epoch": 0.44, "grad_norm": 2.390625, "learning_rate": 0.00017704427929916077, "loss": 2.1111, "step": 187000 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017704310098329642, "loss": 1.9897, "step": 187005 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017704192264111275, "loss": 1.9884, "step": 187010 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.00017704074427261015, "loss": 1.9161, "step": 187015 }, { "epoch": 0.44, "grad_norm": 3.421875, "learning_rate": 0.00017703956587778905, "loss": 2.0237, "step": 187020 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017703838745664982, "loss": 2.1819, "step": 187025 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.0001770372090091929, "loss": 2.083, "step": 187030 }, { "epoch": 0.44, "grad_norm": 2.390625, "learning_rate": 0.00017703603053541867, "loss": 2.2897, "step": 187035 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.00017703485203532753, "loss": 2.2062, "step": 187040 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017703367350891988, "loss": 2.1961, "step": 187045 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017703249495619615, "loss": 2.0438, "step": 187050 }, { "epoch": 0.44, "grad_norm": 2.453125, "learning_rate": 0.0001770313163771567, "loss": 2.3034, "step": 187055 }, { "epoch": 0.44, "grad_norm": 2.53125, "learning_rate": 0.00017703013777180197, "loss": 2.1441, "step": 187060 }, { "epoch": 0.44, "grad_norm": 1.6953125, "learning_rate": 0.00017702895914013233, "loss": 2.1736, "step": 187065 }, { "epoch": 0.44, "grad_norm": 1.953125, "learning_rate": 0.00017702778048214824, "loss": 1.9719, "step": 187070 }, { "epoch": 0.44, "grad_norm": 1.9453125, "learning_rate": 0.00017702660179785002, "loss": 2.0673, "step": 187075 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017702542308723813, "loss": 1.9929, "step": 187080 }, { "epoch": 0.44, "grad_norm": 1.9765625, "learning_rate": 0.00017702424435031298, "loss": 2.1086, "step": 187085 }, { "epoch": 0.44, "grad_norm": 1.9453125, "learning_rate": 0.00017702306558707494, "loss": 2.2756, "step": 187090 }, { "epoch": 0.44, "grad_norm": 2.71875, "learning_rate": 0.0001770218867975244, "loss": 2.2106, "step": 187095 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.0001770207079816618, "loss": 2.197, "step": 187100 }, { "epoch": 0.44, "grad_norm": 2.609375, "learning_rate": 0.0001770195291394875, "loss": 2.0942, "step": 187105 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.000177018350271002, "loss": 2.0466, "step": 187110 }, { "epoch": 0.44, "grad_norm": 2.65625, "learning_rate": 0.00017701717137620561, "loss": 2.0782, "step": 187115 }, { "epoch": 0.44, "grad_norm": 2.484375, "learning_rate": 0.00017701599245509875, "loss": 2.3071, "step": 187120 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017701481350768181, "loss": 2.1082, "step": 187125 }, { "epoch": 0.44, "grad_norm": 2.40625, "learning_rate": 0.00017701363453395523, "loss": 2.2884, "step": 187130 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.0001770124555339194, "loss": 1.9799, "step": 187135 }, { "epoch": 0.44, "grad_norm": 1.9765625, "learning_rate": 0.0001770112765075747, "loss": 2.1931, "step": 187140 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.0001770100974549216, "loss": 2.1059, "step": 187145 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.0001770089183759604, "loss": 1.9505, "step": 187150 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017700773927069156, "loss": 2.1434, "step": 187155 }, { "epoch": 0.44, "grad_norm": 2.359375, "learning_rate": 0.0001770065601391155, "loss": 2.1717, "step": 187160 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.0001770053809812326, "loss": 2.0894, "step": 187165 }, { "epoch": 0.44, "grad_norm": 1.9453125, "learning_rate": 0.0001770042017970433, "loss": 2.0693, "step": 187170 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.0001770030225865479, "loss": 2.0689, "step": 187175 }, { "epoch": 0.44, "grad_norm": 1.859375, "learning_rate": 0.0001770018433497469, "loss": 2.145, "step": 187180 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017700066408664068, "loss": 2.0873, "step": 187185 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017699948479722963, "loss": 2.158, "step": 187190 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017699830548151418, "loss": 2.0522, "step": 187195 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017699712613949468, "loss": 2.306, "step": 187200 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017699594677117158, "loss": 2.0873, "step": 187205 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.0001769947673765453, "loss": 2.223, "step": 187210 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017699358795561615, "loss": 2.2224, "step": 187215 }, { "epoch": 0.44, "grad_norm": 2.359375, "learning_rate": 0.00017699240850838462, "loss": 2.1228, "step": 187220 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.0001769912290348511, "loss": 2.0816, "step": 187225 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017699004953501598, "loss": 2.2132, "step": 187230 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017698887000887963, "loss": 1.9981, "step": 187235 }, { "epoch": 0.44, "grad_norm": 2.421875, "learning_rate": 0.0001769876904564425, "loss": 2.0805, "step": 187240 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017698651087770498, "loss": 2.0854, "step": 187245 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017698533127266753, "loss": 1.9506, "step": 187250 }, { "epoch": 0.44, "grad_norm": 1.9375, "learning_rate": 0.00017698415164133042, "loss": 2.2231, "step": 187255 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017698297198369417, "loss": 2.08, "step": 187260 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.0001769817922997591, "loss": 2.2594, "step": 187265 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017698061258952569, "loss": 2.1167, "step": 187270 }, { "epoch": 0.44, "grad_norm": 2.375, "learning_rate": 0.0001769794328529943, "loss": 2.1321, "step": 187275 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.0001769782530901653, "loss": 2.0743, "step": 187280 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.00017697707330103919, "loss": 2.106, "step": 187285 }, { "epoch": 0.44, "grad_norm": 2.390625, "learning_rate": 0.00017697589348561628, "loss": 2.1554, "step": 187290 }, { "epoch": 0.44, "grad_norm": 2.4375, "learning_rate": 0.000176974713643897, "loss": 2.0228, "step": 187295 }, { "epoch": 0.44, "grad_norm": 2.453125, "learning_rate": 0.0001769735337758818, "loss": 2.0123, "step": 187300 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017697235388157103, "loss": 2.3303, "step": 187305 }, { "epoch": 0.44, "grad_norm": 1.796875, "learning_rate": 0.0001769711739609651, "loss": 2.1457, "step": 187310 }, { "epoch": 0.44, "grad_norm": 2.421875, "learning_rate": 0.0001769699940140644, "loss": 2.0281, "step": 187315 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.0001769688140408694, "loss": 2.1345, "step": 187320 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017696763404138045, "loss": 1.996, "step": 187325 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017696645401559796, "loss": 2.1451, "step": 187330 }, { "epoch": 0.44, "grad_norm": 1.9765625, "learning_rate": 0.0001769652739635223, "loss": 2.0203, "step": 187335 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.0001769640938851539, "loss": 2.0258, "step": 187340 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.0001769629137804932, "loss": 2.0901, "step": 187345 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017696173364954062, "loss": 2.1077, "step": 187350 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.00017696055349229644, "loss": 2.1069, "step": 187355 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.00017695937330876115, "loss": 2.1703, "step": 187360 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017695819309893515, "loss": 2.0905, "step": 187365 }, { "epoch": 0.44, "grad_norm": 2.421875, "learning_rate": 0.00017695701286281885, "loss": 2.1418, "step": 187370 }, { "epoch": 0.44, "grad_norm": 1.953125, "learning_rate": 0.00017695583260041265, "loss": 2.2839, "step": 187375 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017695465231171693, "loss": 1.8639, "step": 187380 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.0001769534719967321, "loss": 2.1258, "step": 187385 }, { "epoch": 0.44, "grad_norm": 1.8515625, "learning_rate": 0.00017695229165545854, "loss": 2.057, "step": 187390 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.0001769511112878967, "loss": 2.2579, "step": 187395 }, { "epoch": 0.44, "grad_norm": 2.0, "learning_rate": 0.000176949930894047, "loss": 2.0784, "step": 187400 }, { "epoch": 0.44, "grad_norm": 2.359375, "learning_rate": 0.00017694875047390976, "loss": 2.1258, "step": 187405 }, { "epoch": 0.44, "grad_norm": 1.7578125, "learning_rate": 0.00017694757002748544, "loss": 2.0567, "step": 187410 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017694638955477446, "loss": 2.0633, "step": 187415 }, { "epoch": 0.44, "grad_norm": 2.0, "learning_rate": 0.0001769452090557772, "loss": 2.0504, "step": 187420 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017694402853049405, "loss": 2.2406, "step": 187425 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017694284797892541, "loss": 2.1461, "step": 187430 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017694166740107173, "loss": 2.1328, "step": 187435 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017694048679693333, "loss": 2.1323, "step": 187440 }, { "epoch": 0.44, "grad_norm": 2.484375, "learning_rate": 0.0001769393061665107, "loss": 2.1904, "step": 187445 }, { "epoch": 0.44, "grad_norm": 2.0, "learning_rate": 0.0001769381255098042, "loss": 2.1294, "step": 187450 }, { "epoch": 0.44, "grad_norm": 1.8984375, "learning_rate": 0.00017693694482681424, "loss": 2.1569, "step": 187455 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.0001769357641175412, "loss": 2.1326, "step": 187460 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017693458338198555, "loss": 2.3019, "step": 187465 }, { "epoch": 0.44, "grad_norm": 1.921875, "learning_rate": 0.00017693340262014764, "loss": 2.1837, "step": 187470 }, { "epoch": 0.44, "grad_norm": 1.9375, "learning_rate": 0.00017693222183202788, "loss": 2.2284, "step": 187475 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017693104101762668, "loss": 2.1162, "step": 187480 }, { "epoch": 0.44, "grad_norm": 2.484375, "learning_rate": 0.00017692986017694444, "loss": 1.98, "step": 187485 }, { "epoch": 0.44, "grad_norm": 2.0, "learning_rate": 0.00017692867930998158, "loss": 2.2221, "step": 187490 }, { "epoch": 0.44, "grad_norm": 2.453125, "learning_rate": 0.00017692749841673845, "loss": 2.1395, "step": 187495 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017692631749721552, "loss": 2.0641, "step": 187500 }, { "epoch": 0.44, "grad_norm": 1.71875, "learning_rate": 0.00017692513655141317, "loss": 1.9843, "step": 187505 }, { "epoch": 0.44, "grad_norm": 2.0, "learning_rate": 0.00017692395557933178, "loss": 2.2245, "step": 187510 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017692277458097175, "loss": 2.1321, "step": 187515 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017692159355633353, "loss": 2.0687, "step": 187520 }, { "epoch": 0.44, "grad_norm": 2.671875, "learning_rate": 0.00017692041250541753, "loss": 2.1054, "step": 187525 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017691923142822407, "loss": 1.9757, "step": 187530 }, { "epoch": 0.44, "grad_norm": 2.0, "learning_rate": 0.00017691805032475364, "loss": 2.13, "step": 187535 }, { "epoch": 0.44, "grad_norm": 2.796875, "learning_rate": 0.00017691686919500658, "loss": 2.1852, "step": 187540 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017691568803898338, "loss": 2.1692, "step": 187545 }, { "epoch": 0.44, "grad_norm": 3.34375, "learning_rate": 0.0001769145068566843, "loss": 2.0571, "step": 187550 }, { "epoch": 0.44, "grad_norm": 2.40625, "learning_rate": 0.00017691332564810988, "loss": 2.3002, "step": 187555 }, { "epoch": 0.44, "grad_norm": 1.828125, "learning_rate": 0.00017691214441326047, "loss": 2.1106, "step": 187560 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017691096315213648, "loss": 2.2249, "step": 187565 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017690978186473831, "loss": 2.209, "step": 187570 }, { "epoch": 0.44, "grad_norm": 2.515625, "learning_rate": 0.00017690860055106638, "loss": 2.3742, "step": 187575 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017690741921112105, "loss": 2.1827, "step": 187580 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.00017690623784490274, "loss": 1.9832, "step": 187585 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017690505645241188, "loss": 2.24, "step": 187590 }, { "epoch": 0.44, "grad_norm": 1.9453125, "learning_rate": 0.00017690387503364888, "loss": 2.1335, "step": 187595 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017690269358861408, "loss": 2.155, "step": 187600 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017690151211730796, "loss": 2.141, "step": 187605 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017690033061973086, "loss": 2.0461, "step": 187610 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017689914909588324, "loss": 2.1882, "step": 187615 }, { "epoch": 0.44, "grad_norm": 2.359375, "learning_rate": 0.00017689796754576545, "loss": 2.1463, "step": 187620 }, { "epoch": 0.44, "grad_norm": 2.328125, "learning_rate": 0.00017689678596937794, "loss": 2.2153, "step": 187625 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017689560436672108, "loss": 2.2037, "step": 187630 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.0001768944227377953, "loss": 2.0606, "step": 187635 }, { "epoch": 0.44, "grad_norm": 2.375, "learning_rate": 0.00017689324108260097, "loss": 2.0146, "step": 187640 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017689205940113855, "loss": 2.1535, "step": 187645 }, { "epoch": 0.44, "grad_norm": 1.78125, "learning_rate": 0.00017689087769340837, "loss": 2.031, "step": 187650 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.0001768896959594109, "loss": 2.1327, "step": 187655 }, { "epoch": 0.44, "grad_norm": 1.9453125, "learning_rate": 0.0001768885141991465, "loss": 2.0342, "step": 187660 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017688733241261558, "loss": 2.1364, "step": 187665 }, { "epoch": 0.44, "grad_norm": 2.484375, "learning_rate": 0.00017688615059981857, "loss": 2.1944, "step": 187670 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017688496876075584, "loss": 2.1071, "step": 187675 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017688378689542783, "loss": 2.2109, "step": 187680 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017688260500383493, "loss": 1.996, "step": 187685 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017688142308597754, "loss": 2.3236, "step": 187690 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017688024114185602, "loss": 2.2186, "step": 187695 }, { "epoch": 0.44, "grad_norm": 2.390625, "learning_rate": 0.00017687905917147085, "loss": 2.1746, "step": 187700 }, { "epoch": 0.44, "grad_norm": 1.828125, "learning_rate": 0.00017687787717482237, "loss": 2.3263, "step": 187705 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.00017687669515191104, "loss": 2.0509, "step": 187710 }, { "epoch": 0.44, "grad_norm": 1.9453125, "learning_rate": 0.00017687551310273722, "loss": 2.015, "step": 187715 }, { "epoch": 0.44, "grad_norm": 1.796875, "learning_rate": 0.00017687433102730134, "loss": 1.9944, "step": 187720 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.0001768731489256038, "loss": 2.0828, "step": 187725 }, { "epoch": 0.44, "grad_norm": 1.921875, "learning_rate": 0.000176871966797645, "loss": 2.2743, "step": 187730 }, { "epoch": 0.44, "grad_norm": 2.53125, "learning_rate": 0.00017687078464342535, "loss": 2.059, "step": 187735 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.00017686960246294522, "loss": 2.1768, "step": 187740 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017686842025620503, "loss": 2.3294, "step": 187745 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017686723802320522, "loss": 2.151, "step": 187750 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017686605576394614, "loss": 2.2273, "step": 187755 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017686487347842825, "loss": 2.0805, "step": 187760 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017686369116665194, "loss": 2.168, "step": 187765 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017686250882861757, "loss": 1.9437, "step": 187770 }, { "epoch": 0.44, "grad_norm": 1.9296875, "learning_rate": 0.00017686132646432557, "loss": 2.2052, "step": 187775 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017686014407377636, "loss": 2.0325, "step": 187780 }, { "epoch": 0.44, "grad_norm": 1.8203125, "learning_rate": 0.00017685896165697034, "loss": 2.1481, "step": 187785 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017685777921390788, "loss": 2.0311, "step": 187790 }, { "epoch": 0.44, "grad_norm": 2.5, "learning_rate": 0.00017685659674458943, "loss": 1.9586, "step": 187795 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017685541424901534, "loss": 2.1998, "step": 187800 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.0001768542317271861, "loss": 2.0102, "step": 187805 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017685304917910202, "loss": 2.0678, "step": 187810 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017685186660476357, "loss": 2.1949, "step": 187815 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017685068400417108, "loss": 2.1113, "step": 187820 }, { "epoch": 0.44, "grad_norm": 2.515625, "learning_rate": 0.00017684950137732505, "loss": 2.1771, "step": 187825 }, { "epoch": 0.44, "grad_norm": 1.90625, "learning_rate": 0.00017684831872422582, "loss": 2.1085, "step": 187830 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017684713604487382, "loss": 2.103, "step": 187835 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017684595333926944, "loss": 2.1647, "step": 187840 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.0001768447706074131, "loss": 2.0072, "step": 187845 }, { "epoch": 0.44, "grad_norm": 2.5625, "learning_rate": 0.00017684358784930516, "loss": 2.1525, "step": 187850 }, { "epoch": 0.44, "grad_norm": 2.5, "learning_rate": 0.00017684240506494607, "loss": 2.0314, "step": 187855 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.00017684122225433623, "loss": 2.2089, "step": 187860 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017684003941747603, "loss": 2.0599, "step": 187865 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.0001768388565543659, "loss": 2.1906, "step": 187870 }, { "epoch": 0.44, "grad_norm": 2.59375, "learning_rate": 0.00017683767366500619, "loss": 2.1227, "step": 187875 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017683649074939735, "loss": 2.1128, "step": 187880 }, { "epoch": 0.44, "grad_norm": 2.671875, "learning_rate": 0.00017683530780753976, "loss": 2.1693, "step": 187885 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017683412483943386, "loss": 2.2413, "step": 187890 }, { "epoch": 0.44, "grad_norm": 1.8515625, "learning_rate": 0.00017683294184508004, "loss": 2.1003, "step": 187895 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017683175882447864, "loss": 2.2858, "step": 187900 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017683057577763016, "loss": 2.0699, "step": 187905 }, { "epoch": 0.44, "grad_norm": 2.421875, "learning_rate": 0.00017682939270453494, "loss": 1.9287, "step": 187910 }, { "epoch": 0.44, "grad_norm": 2.390625, "learning_rate": 0.0001768282096051934, "loss": 2.1863, "step": 187915 }, { "epoch": 0.44, "grad_norm": 1.875, "learning_rate": 0.000176827026479606, "loss": 1.867, "step": 187920 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.00017682584332777307, "loss": 2.2985, "step": 187925 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.000176824660149695, "loss": 2.2443, "step": 187930 }, { "epoch": 0.44, "grad_norm": 1.8125, "learning_rate": 0.00017682347694537227, "loss": 2.2163, "step": 187935 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017682229371480525, "loss": 2.0352, "step": 187940 }, { "epoch": 0.44, "grad_norm": 2.5, "learning_rate": 0.00017682111045799433, "loss": 2.0524, "step": 187945 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017681992717493992, "loss": 2.1582, "step": 187950 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017681874386564242, "loss": 2.2905, "step": 187955 }, { "epoch": 0.44, "grad_norm": 1.765625, "learning_rate": 0.0001768175605301023, "loss": 2.0367, "step": 187960 }, { "epoch": 0.44, "grad_norm": 2.609375, "learning_rate": 0.00017681637716831985, "loss": 2.0695, "step": 187965 }, { "epoch": 0.44, "grad_norm": 1.8515625, "learning_rate": 0.00017681519378029553, "loss": 2.1593, "step": 187970 }, { "epoch": 0.44, "grad_norm": 1.9296875, "learning_rate": 0.0001768140103660298, "loss": 2.0643, "step": 187975 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017681282692552297, "loss": 2.0122, "step": 187980 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.0001768116434587755, "loss": 2.1346, "step": 187985 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017681045996578774, "loss": 2.145, "step": 187990 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.0001768092764465602, "loss": 2.2928, "step": 187995 }, { "epoch": 0.44, "grad_norm": 2.46875, "learning_rate": 0.00017680809290109318, "loss": 2.1442, "step": 188000 }, { "epoch": 0.44, "grad_norm": 1.9296875, "learning_rate": 0.0001768069093293871, "loss": 2.1758, "step": 188005 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017680572573144243, "loss": 2.0686, "step": 188010 }, { "epoch": 0.44, "grad_norm": 1.9765625, "learning_rate": 0.00017680454210725955, "loss": 2.1633, "step": 188015 }, { "epoch": 0.44, "grad_norm": 2.4375, "learning_rate": 0.0001768033584568388, "loss": 2.0732, "step": 188020 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017680217478018064, "loss": 2.0853, "step": 188025 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.00017680099107728544, "loss": 1.8965, "step": 188030 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.00017679980734815366, "loss": 2.2227, "step": 188035 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017679862359278568, "loss": 2.1819, "step": 188040 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017679743981118188, "loss": 2.1533, "step": 188045 }, { "epoch": 0.44, "grad_norm": 1.9375, "learning_rate": 0.00017679625600334266, "loss": 2.1182, "step": 188050 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.0001767950721692685, "loss": 2.1114, "step": 188055 }, { "epoch": 0.44, "grad_norm": 1.921875, "learning_rate": 0.00017679388830895971, "loss": 2.1568, "step": 188060 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017679270442241674, "loss": 2.2577, "step": 188065 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017679152050964, "loss": 2.1961, "step": 188070 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.0001767903365706299, "loss": 1.9902, "step": 188075 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.0001767891526053868, "loss": 2.0072, "step": 188080 }, { "epoch": 0.44, "grad_norm": 2.71875, "learning_rate": 0.00017678796861391114, "loss": 2.1479, "step": 188085 }, { "epoch": 0.44, "grad_norm": 2.515625, "learning_rate": 0.0001767867845962033, "loss": 2.0932, "step": 188090 }, { "epoch": 0.44, "grad_norm": 2.328125, "learning_rate": 0.00017678560055226373, "loss": 2.0857, "step": 188095 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.0001767844164820928, "loss": 2.1749, "step": 188100 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017678323238569092, "loss": 2.0159, "step": 188105 }, { "epoch": 0.44, "grad_norm": 2.53125, "learning_rate": 0.00017678204826305848, "loss": 2.1582, "step": 188110 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.0001767808641141959, "loss": 2.0908, "step": 188115 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017677967993910362, "loss": 2.3701, "step": 188120 }, { "epoch": 0.44, "grad_norm": 1.8671875, "learning_rate": 0.000176778495737782, "loss": 1.9996, "step": 188125 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017677731151023146, "loss": 2.0803, "step": 188130 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017677612725645235, "loss": 2.1583, "step": 188135 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.00017677494297644515, "loss": 2.0478, "step": 188140 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.00017677375867021026, "loss": 2.026, "step": 188145 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017677257433774802, "loss": 2.0057, "step": 188150 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017677138997905888, "loss": 2.2738, "step": 188155 }, { "epoch": 0.44, "grad_norm": 2.515625, "learning_rate": 0.00017677020559414328, "loss": 2.3862, "step": 188160 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017676902118300155, "loss": 1.9282, "step": 188165 }, { "epoch": 0.44, "grad_norm": 1.9296875, "learning_rate": 0.00017676783674563416, "loss": 2.0048, "step": 188170 }, { "epoch": 0.44, "grad_norm": 1.6171875, "learning_rate": 0.00017676665228204144, "loss": 2.1525, "step": 188175 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017676546779222388, "loss": 1.9549, "step": 188180 }, { "epoch": 0.44, "grad_norm": 1.5546875, "learning_rate": 0.00017676428327618183, "loss": 2.1733, "step": 188185 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.0001767630987339157, "loss": 1.9508, "step": 188190 }, { "epoch": 0.44, "grad_norm": 2.421875, "learning_rate": 0.00017676191416542592, "loss": 2.1886, "step": 188195 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.00017676072957071285, "loss": 2.1183, "step": 188200 }, { "epoch": 0.44, "grad_norm": 2.328125, "learning_rate": 0.00017675954494977695, "loss": 2.1023, "step": 188205 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.0001767583603026186, "loss": 2.2344, "step": 188210 }, { "epoch": 0.44, "grad_norm": 1.8984375, "learning_rate": 0.0001767571756292382, "loss": 2.0606, "step": 188215 }, { "epoch": 0.44, "grad_norm": 1.703125, "learning_rate": 0.00017675599092963613, "loss": 2.1642, "step": 188220 }, { "epoch": 0.44, "grad_norm": 2.515625, "learning_rate": 0.00017675480620381285, "loss": 2.2851, "step": 188225 }, { "epoch": 0.44, "grad_norm": 1.921875, "learning_rate": 0.00017675362145176871, "loss": 2.3015, "step": 188230 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017675243667350418, "loss": 2.1493, "step": 188235 }, { "epoch": 0.44, "grad_norm": 1.890625, "learning_rate": 0.0001767512518690196, "loss": 2.0997, "step": 188240 }, { "epoch": 0.44, "grad_norm": 2.4375, "learning_rate": 0.0001767500670383154, "loss": 2.113, "step": 188245 }, { "epoch": 0.44, "grad_norm": 2.40625, "learning_rate": 0.000176748882181392, "loss": 2.1219, "step": 188250 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017674769729824976, "loss": 1.9287, "step": 188255 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017674651238888914, "loss": 2.172, "step": 188260 }, { "epoch": 0.44, "grad_norm": 1.796875, "learning_rate": 0.0001767453274533105, "loss": 2.1134, "step": 188265 }, { "epoch": 0.44, "grad_norm": 1.765625, "learning_rate": 0.0001767441424915143, "loss": 2.2332, "step": 188270 }, { "epoch": 0.44, "grad_norm": 1.890625, "learning_rate": 0.00017674295750350088, "loss": 2.0744, "step": 188275 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.00017674177248927066, "loss": 2.172, "step": 188280 }, { "epoch": 0.44, "grad_norm": 1.765625, "learning_rate": 0.0001767405874488241, "loss": 1.9209, "step": 188285 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017673940238216153, "loss": 2.0274, "step": 188290 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.0001767382172892834, "loss": 2.242, "step": 188295 }, { "epoch": 0.44, "grad_norm": 1.90625, "learning_rate": 0.0001767370321701901, "loss": 2.0115, "step": 188300 }, { "epoch": 0.44, "grad_norm": 2.890625, "learning_rate": 0.00017673584702488206, "loss": 2.2477, "step": 188305 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.00017673466185335964, "loss": 2.2508, "step": 188310 }, { "epoch": 0.44, "grad_norm": 2.515625, "learning_rate": 0.00017673347665562327, "loss": 2.1025, "step": 188315 }, { "epoch": 0.44, "grad_norm": 1.9375, "learning_rate": 0.00017673229143167336, "loss": 2.0721, "step": 188320 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017673110618151027, "loss": 2.1683, "step": 188325 }, { "epoch": 0.44, "grad_norm": 2.578125, "learning_rate": 0.0001767299209051345, "loss": 2.0088, "step": 188330 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017672873560254637, "loss": 2.2853, "step": 188335 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.0001767275502737463, "loss": 1.9596, "step": 188340 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.00017672636491873473, "loss": 2.1429, "step": 188345 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017672517953751205, "loss": 2.0746, "step": 188350 }, { "epoch": 0.44, "grad_norm": 1.921875, "learning_rate": 0.00017672399413007863, "loss": 2.3296, "step": 188355 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.0001767228086964349, "loss": 2.0034, "step": 188360 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017672162323658128, "loss": 2.07, "step": 188365 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017672043775051818, "loss": 2.0139, "step": 188370 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017671925223824596, "loss": 2.101, "step": 188375 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017671806669976507, "loss": 2.1184, "step": 188380 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.0001767168811350759, "loss": 2.1074, "step": 188385 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017671569554417884, "loss": 2.2753, "step": 188390 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.0001767145099270743, "loss": 2.216, "step": 188395 }, { "epoch": 0.44, "grad_norm": 1.78125, "learning_rate": 0.0001767133242837627, "loss": 2.0442, "step": 188400 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017671213861424442, "loss": 2.1359, "step": 188405 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017671095291851992, "loss": 2.093, "step": 188410 }, { "epoch": 0.44, "grad_norm": 2.484375, "learning_rate": 0.00017670976719658956, "loss": 2.1479, "step": 188415 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.00017670858144845373, "loss": 2.1239, "step": 188420 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017670739567411285, "loss": 2.02, "step": 188425 }, { "epoch": 0.44, "grad_norm": 2.421875, "learning_rate": 0.00017670620987356736, "loss": 1.9876, "step": 188430 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017670502404681763, "loss": 2.161, "step": 188435 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.00017670383819386407, "loss": 2.2572, "step": 188440 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.0001767026523147071, "loss": 1.9054, "step": 188445 }, { "epoch": 0.44, "grad_norm": 2.46875, "learning_rate": 0.00017670146640934707, "loss": 2.1127, "step": 188450 }, { "epoch": 0.44, "grad_norm": 1.625, "learning_rate": 0.00017670028047778448, "loss": 2.1579, "step": 188455 }, { "epoch": 0.44, "grad_norm": 2.609375, "learning_rate": 0.00017669909452001968, "loss": 2.0415, "step": 188460 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017669790853605302, "loss": 2.0983, "step": 188465 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017669672252588502, "loss": 1.9936, "step": 188470 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017669553648951602, "loss": 1.9915, "step": 188475 }, { "epoch": 0.44, "grad_norm": 2.375, "learning_rate": 0.0001766943504269464, "loss": 2.1401, "step": 188480 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017669316433817665, "loss": 2.1581, "step": 188485 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.0001766919782232071, "loss": 1.9979, "step": 188490 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017669079208203816, "loss": 2.2205, "step": 188495 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.00017668960591467027, "loss": 2.2536, "step": 188500 }, { "epoch": 0.44, "grad_norm": 3.171875, "learning_rate": 0.00017668841972110383, "loss": 2.0921, "step": 188505 }, { "epoch": 0.44, "grad_norm": 1.828125, "learning_rate": 0.00017668723350133925, "loss": 2.2054, "step": 188510 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.0001766860472553769, "loss": 2.1376, "step": 188515 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017668486098321718, "loss": 1.9122, "step": 188520 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017668367468486054, "loss": 1.8953, "step": 188525 }, { "epoch": 0.44, "grad_norm": 2.578125, "learning_rate": 0.00017668248836030738, "loss": 2.1583, "step": 188530 }, { "epoch": 0.44, "grad_norm": 2.453125, "learning_rate": 0.00017668130200955808, "loss": 2.0202, "step": 188535 }, { "epoch": 0.44, "grad_norm": 2.75, "learning_rate": 0.00017668011563261305, "loss": 2.2164, "step": 188540 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017667892922947274, "loss": 1.8973, "step": 188545 }, { "epoch": 0.44, "grad_norm": 2.40625, "learning_rate": 0.00017667774280013748, "loss": 2.0801, "step": 188550 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.00017667655634460769, "loss": 2.1051, "step": 188555 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017667536986288383, "loss": 2.1558, "step": 188560 }, { "epoch": 0.44, "grad_norm": 2.28125, "learning_rate": 0.00017667418335496628, "loss": 2.0418, "step": 188565 }, { "epoch": 0.44, "grad_norm": 1.9296875, "learning_rate": 0.00017667299682085544, "loss": 2.3287, "step": 188570 }, { "epoch": 0.44, "grad_norm": 1.7890625, "learning_rate": 0.0001766718102605517, "loss": 2.2391, "step": 188575 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.00017667062367405546, "loss": 1.9402, "step": 188580 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017666943706136717, "loss": 2.1299, "step": 188585 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017666825042248721, "loss": 2.2805, "step": 188590 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017666706375741597, "loss": 2.003, "step": 188595 }, { "epoch": 0.44, "grad_norm": 1.8828125, "learning_rate": 0.00017666587706615392, "loss": 2.2341, "step": 188600 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.00017666469034870134, "loss": 2.0393, "step": 188605 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.00017666350360505878, "loss": 2.0073, "step": 188610 }, { "epoch": 0.44, "grad_norm": 1.9375, "learning_rate": 0.00017666231683522652, "loss": 2.0584, "step": 188615 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017666113003920507, "loss": 2.3044, "step": 188620 }, { "epoch": 0.44, "grad_norm": 2.390625, "learning_rate": 0.00017665994321699476, "loss": 2.2302, "step": 188625 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.000176658756368596, "loss": 2.0242, "step": 188630 }, { "epoch": 0.44, "grad_norm": 2.53125, "learning_rate": 0.00017665756949400926, "loss": 2.1001, "step": 188635 }, { "epoch": 0.44, "grad_norm": 1.8203125, "learning_rate": 0.0001766563825932349, "loss": 2.2617, "step": 188640 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017665519566627334, "loss": 2.0947, "step": 188645 }, { "epoch": 0.44, "grad_norm": 2.21875, "learning_rate": 0.00017665400871312494, "loss": 2.1682, "step": 188650 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017665282173379016, "loss": 1.9413, "step": 188655 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017665163472826935, "loss": 2.0858, "step": 188660 }, { "epoch": 0.44, "grad_norm": 2.65625, "learning_rate": 0.000176650447696563, "loss": 2.0337, "step": 188665 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017664926063867148, "loss": 2.1138, "step": 188670 }, { "epoch": 0.44, "grad_norm": 2.4375, "learning_rate": 0.00017664807355459515, "loss": 2.1201, "step": 188675 }, { "epoch": 0.44, "grad_norm": 2.34375, "learning_rate": 0.00017664688644433442, "loss": 2.163, "step": 188680 }, { "epoch": 0.44, "grad_norm": 2.390625, "learning_rate": 0.00017664569930788977, "loss": 2.1734, "step": 188685 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017664451214526155, "loss": 2.0712, "step": 188690 }, { "epoch": 0.44, "grad_norm": 2.40625, "learning_rate": 0.00017664332495645019, "loss": 2.1532, "step": 188695 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017664213774145604, "loss": 2.0384, "step": 188700 }, { "epoch": 0.44, "grad_norm": 1.921875, "learning_rate": 0.00017664095050027955, "loss": 2.023, "step": 188705 }, { "epoch": 0.44, "grad_norm": 2.578125, "learning_rate": 0.00017663976323292112, "loss": 1.9659, "step": 188710 }, { "epoch": 0.44, "grad_norm": 1.75, "learning_rate": 0.00017663857593938121, "loss": 2.1841, "step": 188715 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.0001766373886196601, "loss": 2.0758, "step": 188720 }, { "epoch": 0.44, "grad_norm": 2.078125, "learning_rate": 0.0001766362012737583, "loss": 2.0601, "step": 188725 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.0001766350139016762, "loss": 2.1887, "step": 188730 }, { "epoch": 0.44, "grad_norm": 2.3125, "learning_rate": 0.00017663382650341417, "loss": 2.0689, "step": 188735 }, { "epoch": 0.44, "grad_norm": 2.46875, "learning_rate": 0.00017663263907897263, "loss": 2.3693, "step": 188740 }, { "epoch": 0.44, "grad_norm": 1.9453125, "learning_rate": 0.00017663145162835202, "loss": 2.1759, "step": 188745 }, { "epoch": 0.44, "grad_norm": 2.484375, "learning_rate": 0.0001766302641515527, "loss": 2.1755, "step": 188750 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017662907664857508, "loss": 2.0836, "step": 188755 }, { "epoch": 0.44, "grad_norm": 2.171875, "learning_rate": 0.0001766278891194196, "loss": 2.2394, "step": 188760 }, { "epoch": 0.44, "grad_norm": 1.90625, "learning_rate": 0.00017662670156408662, "loss": 2.107, "step": 188765 }, { "epoch": 0.44, "grad_norm": 1.90625, "learning_rate": 0.00017662551398257658, "loss": 1.9879, "step": 188770 }, { "epoch": 0.44, "grad_norm": 2.0, "learning_rate": 0.00017662432637488988, "loss": 2.0795, "step": 188775 }, { "epoch": 0.44, "grad_norm": 1.8828125, "learning_rate": 0.0001766231387410269, "loss": 2.1914, "step": 188780 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017662195108098812, "loss": 2.1025, "step": 188785 }, { "epoch": 0.44, "grad_norm": 1.8671875, "learning_rate": 0.00017662076339477383, "loss": 2.0065, "step": 188790 }, { "epoch": 0.44, "grad_norm": 2.25, "learning_rate": 0.00017661957568238453, "loss": 2.1412, "step": 188795 }, { "epoch": 0.44, "grad_norm": 2.375, "learning_rate": 0.00017661838794382056, "loss": 2.1346, "step": 188800 }, { "epoch": 0.44, "grad_norm": 2.234375, "learning_rate": 0.0001766172001790824, "loss": 2.17, "step": 188805 }, { "epoch": 0.44, "grad_norm": 1.859375, "learning_rate": 0.00017661601238817037, "loss": 2.127, "step": 188810 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017661482457108498, "loss": 2.1847, "step": 188815 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017661363672782655, "loss": 2.1933, "step": 188820 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017661244885839553, "loss": 2.0729, "step": 188825 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017661126096279225, "loss": 2.1228, "step": 188830 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017661007304101722, "loss": 2.0955, "step": 188835 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017660888509307078, "loss": 1.8948, "step": 188840 }, { "epoch": 0.44, "grad_norm": 2.375, "learning_rate": 0.00017660769711895337, "loss": 1.976, "step": 188845 }, { "epoch": 0.44, "grad_norm": 2.390625, "learning_rate": 0.00017660650911866535, "loss": 2.1272, "step": 188850 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017660532109220722, "loss": 2.1359, "step": 188855 }, { "epoch": 0.44, "grad_norm": 2.046875, "learning_rate": 0.0001766041330395793, "loss": 2.1395, "step": 188860 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017660294496078197, "loss": 2.1617, "step": 188865 }, { "epoch": 0.44, "grad_norm": 2.40625, "learning_rate": 0.00017660175685581573, "loss": 2.1489, "step": 188870 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.0001766005687246809, "loss": 2.0221, "step": 188875 }, { "epoch": 0.44, "grad_norm": 2.59375, "learning_rate": 0.00017659938056737798, "loss": 2.1044, "step": 188880 }, { "epoch": 0.44, "grad_norm": 1.84375, "learning_rate": 0.0001765981923839073, "loss": 2.2273, "step": 188885 }, { "epoch": 0.44, "grad_norm": 2.0625, "learning_rate": 0.00017659700417426926, "loss": 2.1794, "step": 188890 }, { "epoch": 0.44, "grad_norm": 2.5625, "learning_rate": 0.00017659581593846438, "loss": 2.2882, "step": 188895 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.0001765946276764929, "loss": 2.2616, "step": 188900 }, { "epoch": 0.44, "grad_norm": 2.1875, "learning_rate": 0.00017659343938835533, "loss": 2.3809, "step": 188905 }, { "epoch": 0.44, "grad_norm": 2.0, "learning_rate": 0.00017659225107405203, "loss": 2.0749, "step": 188910 }, { "epoch": 0.44, "grad_norm": 2.265625, "learning_rate": 0.00017659106273358347, "loss": 2.1177, "step": 188915 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017658987436695, "loss": 2.1036, "step": 188920 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.00017658868597415203, "loss": 2.0378, "step": 188925 }, { "epoch": 0.44, "grad_norm": 2.703125, "learning_rate": 0.00017658749755518994, "loss": 2.1313, "step": 188930 }, { "epoch": 0.44, "grad_norm": 2.09375, "learning_rate": 0.00017658630911006424, "loss": 2.0866, "step": 188935 }, { "epoch": 0.44, "grad_norm": 1.890625, "learning_rate": 0.00017658512063877525, "loss": 2.323, "step": 188940 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017658393214132337, "loss": 1.9764, "step": 188945 }, { "epoch": 0.44, "grad_norm": 2.109375, "learning_rate": 0.00017658274361770905, "loss": 2.2205, "step": 188950 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017658155506793265, "loss": 2.0821, "step": 188955 }, { "epoch": 0.44, "grad_norm": 2.90625, "learning_rate": 0.00017658036649199462, "loss": 2.0552, "step": 188960 }, { "epoch": 0.44, "grad_norm": 2.5, "learning_rate": 0.00017657917788989536, "loss": 2.0075, "step": 188965 }, { "epoch": 0.44, "grad_norm": 1.9921875, "learning_rate": 0.00017657798926163523, "loss": 2.291, "step": 188970 }, { "epoch": 0.44, "grad_norm": 1.9296875, "learning_rate": 0.00017657680060721468, "loss": 2.1026, "step": 188975 }, { "epoch": 0.44, "grad_norm": 2.03125, "learning_rate": 0.00017657561192663414, "loss": 2.1417, "step": 188980 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017657442321989395, "loss": 2.0781, "step": 188985 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.00017657323448699456, "loss": 2.3442, "step": 188990 }, { "epoch": 0.44, "grad_norm": 1.921875, "learning_rate": 0.0001765720457279364, "loss": 1.9441, "step": 188995 }, { "epoch": 0.44, "grad_norm": 2.015625, "learning_rate": 0.00017657085694271975, "loss": 1.9691, "step": 189000 }, { "epoch": 0.44, "grad_norm": 2.125, "learning_rate": 0.00017656966813134515, "loss": 2.1366, "step": 189005 }, { "epoch": 0.44, "grad_norm": 1.6953125, "learning_rate": 0.000176568479293813, "loss": 2.1048, "step": 189010 }, { "epoch": 0.44, "grad_norm": 1.921875, "learning_rate": 0.00017656729043012364, "loss": 2.2472, "step": 189015 }, { "epoch": 0.44, "grad_norm": 1.953125, "learning_rate": 0.0001765661015402775, "loss": 2.0628, "step": 189020 }, { "epoch": 0.44, "grad_norm": 1.9609375, "learning_rate": 0.000176564912624275, "loss": 1.9935, "step": 189025 }, { "epoch": 0.44, "grad_norm": 1.984375, "learning_rate": 0.00017656372368211653, "loss": 2.1126, "step": 189030 }, { "epoch": 0.44, "grad_norm": 2.203125, "learning_rate": 0.00017656253471380251, "loss": 1.9943, "step": 189035 }, { "epoch": 0.44, "grad_norm": 1.65625, "learning_rate": 0.00017656134571933333, "loss": 2.1928, "step": 189040 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017656015669870943, "loss": 2.0073, "step": 189045 }, { "epoch": 0.44, "grad_norm": 1.7734375, "learning_rate": 0.00017655896765193116, "loss": 2.2045, "step": 189050 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017655777857899897, "loss": 2.1962, "step": 189055 }, { "epoch": 0.44, "grad_norm": 2.71875, "learning_rate": 0.00017655658947991328, "loss": 2.2482, "step": 189060 }, { "epoch": 0.44, "grad_norm": 2.15625, "learning_rate": 0.00017655540035467444, "loss": 2.0874, "step": 189065 }, { "epoch": 0.44, "grad_norm": 1.96875, "learning_rate": 0.00017655421120328288, "loss": 2.1891, "step": 189070 }, { "epoch": 0.44, "grad_norm": 2.140625, "learning_rate": 0.00017655302202573904, "loss": 2.1127, "step": 189075 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.0001765518328220433, "loss": 2.0707, "step": 189080 }, { "epoch": 0.44, "grad_norm": 2.296875, "learning_rate": 0.000176550643592196, "loss": 2.0643, "step": 189085 }, { "epoch": 0.44, "grad_norm": 2.4375, "learning_rate": 0.0001765494543361977, "loss": 2.2316, "step": 189090 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.0001765482650540487, "loss": 2.1644, "step": 189095 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017654707574574937, "loss": 2.2967, "step": 189100 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017654588641130022, "loss": 2.1046, "step": 189105 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.0001765446970507016, "loss": 2.1616, "step": 189110 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.0001765435076639539, "loss": 2.1477, "step": 189115 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017654231825105758, "loss": 2.1078, "step": 189120 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.000176541128812013, "loss": 2.0352, "step": 189125 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.0001765399393468206, "loss": 1.9727, "step": 189130 }, { "epoch": 0.45, "grad_norm": 3.09375, "learning_rate": 0.00017653874985548072, "loss": 2.1267, "step": 189135 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017653756033799388, "loss": 2.2122, "step": 189140 }, { "epoch": 0.45, "grad_norm": 1.8203125, "learning_rate": 0.0001765363707943604, "loss": 2.1289, "step": 189145 }, { "epoch": 0.45, "grad_norm": 1.90625, "learning_rate": 0.0001765351812245807, "loss": 2.1108, "step": 189150 }, { "epoch": 0.45, "grad_norm": 1.9921875, "learning_rate": 0.00017653399162865518, "loss": 2.018, "step": 189155 }, { "epoch": 0.45, "grad_norm": 1.953125, "learning_rate": 0.00017653280200658427, "loss": 2.1948, "step": 189160 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.00017653161235836837, "loss": 2.2855, "step": 189165 }, { "epoch": 0.45, "grad_norm": 1.9765625, "learning_rate": 0.00017653042268400788, "loss": 2.0099, "step": 189170 }, { "epoch": 0.45, "grad_norm": 1.828125, "learning_rate": 0.0001765292329835032, "loss": 1.9419, "step": 189175 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.0001765280432568548, "loss": 2.0533, "step": 189180 }, { "epoch": 0.45, "grad_norm": 2.484375, "learning_rate": 0.00017652685350406295, "loss": 1.9643, "step": 189185 }, { "epoch": 0.45, "grad_norm": 2.578125, "learning_rate": 0.0001765256637251282, "loss": 2.1069, "step": 189190 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017652447392005086, "loss": 1.9474, "step": 189195 }, { "epoch": 0.45, "grad_norm": 2.765625, "learning_rate": 0.0001765232840888314, "loss": 2.2368, "step": 189200 }, { "epoch": 0.45, "grad_norm": 2.484375, "learning_rate": 0.00017652209423147018, "loss": 1.985, "step": 189205 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017652090434796763, "loss": 2.1926, "step": 189210 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.00017651971443832416, "loss": 2.1736, "step": 189215 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017651852450254016, "loss": 2.2089, "step": 189220 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017651733454061603, "loss": 1.9177, "step": 189225 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.0001765161445525522, "loss": 2.1324, "step": 189230 }, { "epoch": 0.45, "grad_norm": 1.921875, "learning_rate": 0.00017651495453834906, "loss": 2.0934, "step": 189235 }, { "epoch": 0.45, "grad_norm": 2.46875, "learning_rate": 0.00017651376449800705, "loss": 2.0358, "step": 189240 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.00017651257443152652, "loss": 2.1352, "step": 189245 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017651138433890794, "loss": 2.1052, "step": 189250 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017651019422015168, "loss": 2.2509, "step": 189255 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017650900407525813, "loss": 2.1211, "step": 189260 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017650781390422772, "loss": 2.0337, "step": 189265 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017650662370706085, "loss": 2.0089, "step": 189270 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017650543348375795, "loss": 2.1286, "step": 189275 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.0001765042432343194, "loss": 2.1697, "step": 189280 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.0001765030529587456, "loss": 2.176, "step": 189285 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017650186265703697, "loss": 2.2481, "step": 189290 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017650067232919393, "loss": 2.1672, "step": 189295 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017649948197521686, "loss": 2.1633, "step": 189300 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.0001764982915951062, "loss": 2.1186, "step": 189305 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017649710118886232, "loss": 2.2137, "step": 189310 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017649591075648562, "loss": 2.1055, "step": 189315 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017649472029797656, "loss": 2.1502, "step": 189320 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.00017649352981333552, "loss": 2.011, "step": 189325 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017649233930256287, "loss": 1.9952, "step": 189330 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017649114876565908, "loss": 2.0165, "step": 189335 }, { "epoch": 0.45, "grad_norm": 2.359375, "learning_rate": 0.00017648995820262453, "loss": 2.249, "step": 189340 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.0001764887676134596, "loss": 2.2011, "step": 189345 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017648757699816473, "loss": 1.9717, "step": 189350 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.0001764863863567403, "loss": 2.0934, "step": 189355 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017648519568918677, "loss": 1.9854, "step": 189360 }, { "epoch": 0.45, "grad_norm": 2.546875, "learning_rate": 0.00017648400499550447, "loss": 1.9659, "step": 189365 }, { "epoch": 0.45, "grad_norm": 1.9140625, "learning_rate": 0.00017648281427569388, "loss": 2.0901, "step": 189370 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017648162352975537, "loss": 1.9654, "step": 189375 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.00017648043275768932, "loss": 2.3404, "step": 189380 }, { "epoch": 0.45, "grad_norm": 1.8984375, "learning_rate": 0.0001764792419594962, "loss": 2.1654, "step": 189385 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017647805113517637, "loss": 2.0271, "step": 189390 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.00017647686028473026, "loss": 2.0956, "step": 189395 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017647566940815824, "loss": 2.1489, "step": 189400 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.0001764744785054608, "loss": 2.2064, "step": 189405 }, { "epoch": 0.45, "grad_norm": 1.75, "learning_rate": 0.00017647328757663825, "loss": 1.9636, "step": 189410 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.00017647209662169104, "loss": 2.4529, "step": 189415 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017647090564061957, "loss": 1.9861, "step": 189420 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017646971463342425, "loss": 2.1302, "step": 189425 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017646852360010554, "loss": 2.143, "step": 189430 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.00017646733254066374, "loss": 2.1464, "step": 189435 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017646614145509933, "loss": 2.2983, "step": 189440 }, { "epoch": 0.45, "grad_norm": 2.359375, "learning_rate": 0.00017646495034341267, "loss": 1.9516, "step": 189445 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017646375920560422, "loss": 2.206, "step": 189450 }, { "epoch": 0.45, "grad_norm": 2.375, "learning_rate": 0.00017646256804167439, "loss": 1.9318, "step": 189455 }, { "epoch": 0.45, "grad_norm": 3.265625, "learning_rate": 0.00017646137685162355, "loss": 2.1203, "step": 189460 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.00017646018563545208, "loss": 1.9567, "step": 189465 }, { "epoch": 0.45, "grad_norm": 1.9609375, "learning_rate": 0.00017645899439316045, "loss": 2.1482, "step": 189470 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017645780312474903, "loss": 2.0206, "step": 189475 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017645661183021824, "loss": 1.9907, "step": 189480 }, { "epoch": 0.45, "grad_norm": 1.8125, "learning_rate": 0.0001764554205095685, "loss": 2.0964, "step": 189485 }, { "epoch": 0.45, "grad_norm": 2.375, "learning_rate": 0.0001764542291628002, "loss": 2.0306, "step": 189490 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017645303778991372, "loss": 2.146, "step": 189495 }, { "epoch": 0.45, "grad_norm": 1.8515625, "learning_rate": 0.00017645184639090956, "loss": 2.1209, "step": 189500 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.000176450654965788, "loss": 2.1248, "step": 189505 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017644946351454951, "loss": 2.011, "step": 189510 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017644827203719453, "loss": 1.9772, "step": 189515 }, { "epoch": 0.45, "grad_norm": 2.96875, "learning_rate": 0.00017644708053372342, "loss": 2.0633, "step": 189520 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.0001764458890041366, "loss": 2.2057, "step": 189525 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017644469744843448, "loss": 1.9334, "step": 189530 }, { "epoch": 0.45, "grad_norm": 2.71875, "learning_rate": 0.00017644350586661745, "loss": 2.2508, "step": 189535 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.00017644231425868596, "loss": 2.2632, "step": 189540 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017644112262464037, "loss": 2.4001, "step": 189545 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.0001764399309644811, "loss": 2.2021, "step": 189550 }, { "epoch": 0.45, "grad_norm": 1.953125, "learning_rate": 0.00017643873927820856, "loss": 2.1034, "step": 189555 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.0001764375475658232, "loss": 1.9326, "step": 189560 }, { "epoch": 0.45, "grad_norm": 1.734375, "learning_rate": 0.00017643635582732537, "loss": 2.0206, "step": 189565 }, { "epoch": 0.45, "grad_norm": 1.8671875, "learning_rate": 0.00017643516406271544, "loss": 2.1621, "step": 189570 }, { "epoch": 0.45, "grad_norm": 2.515625, "learning_rate": 0.00017643397227199396, "loss": 2.1662, "step": 189575 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017643278045516119, "loss": 2.0472, "step": 189580 }, { "epoch": 0.45, "grad_norm": 1.75, "learning_rate": 0.00017643158861221762, "loss": 1.9442, "step": 189585 }, { "epoch": 0.45, "grad_norm": 1.703125, "learning_rate": 0.00017643039674316362, "loss": 2.0184, "step": 189590 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017642920484799962, "loss": 2.0066, "step": 189595 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017642801292672603, "loss": 2.0315, "step": 189600 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.0001764268209793432, "loss": 2.1351, "step": 189605 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017642562900585163, "loss": 2.2386, "step": 189610 }, { "epoch": 0.45, "grad_norm": 1.8828125, "learning_rate": 0.00017642443700625164, "loss": 2.0646, "step": 189615 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017642324498054373, "loss": 2.1152, "step": 189620 }, { "epoch": 0.45, "grad_norm": 1.953125, "learning_rate": 0.00017642205292872819, "loss": 2.2669, "step": 189625 }, { "epoch": 0.45, "grad_norm": 2.5625, "learning_rate": 0.0001764208608508055, "loss": 2.1847, "step": 189630 }, { "epoch": 0.45, "grad_norm": 1.9140625, "learning_rate": 0.00017641966874677612, "loss": 2.049, "step": 189635 }, { "epoch": 0.45, "grad_norm": 1.71875, "learning_rate": 0.00017641847661664035, "loss": 2.1401, "step": 189640 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017641728446039862, "loss": 2.1482, "step": 189645 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017641609227805137, "loss": 2.0107, "step": 189650 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017641490006959905, "loss": 2.0768, "step": 189655 }, { "epoch": 0.45, "grad_norm": 1.7421875, "learning_rate": 0.00017641370783504198, "loss": 2.1187, "step": 189660 }, { "epoch": 0.45, "grad_norm": 1.9609375, "learning_rate": 0.00017641251557438058, "loss": 2.0682, "step": 189665 }, { "epoch": 0.45, "grad_norm": 1.734375, "learning_rate": 0.0001764113232876153, "loss": 2.1963, "step": 189670 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.0001764101309747465, "loss": 2.2005, "step": 189675 }, { "epoch": 0.45, "grad_norm": 2.484375, "learning_rate": 0.00017640893863577465, "loss": 2.0617, "step": 189680 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.0001764077462707001, "loss": 2.2343, "step": 189685 }, { "epoch": 0.45, "grad_norm": 2.375, "learning_rate": 0.0001764065538795233, "loss": 1.983, "step": 189690 }, { "epoch": 0.45, "grad_norm": 2.640625, "learning_rate": 0.00017640536146224463, "loss": 2.036, "step": 189695 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.0001764041690188645, "loss": 2.2295, "step": 189700 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.0001764029765493833, "loss": 2.2307, "step": 189705 }, { "epoch": 0.45, "grad_norm": 2.90625, "learning_rate": 0.00017640178405380148, "loss": 2.0942, "step": 189710 }, { "epoch": 0.45, "grad_norm": 1.734375, "learning_rate": 0.0001764005915321194, "loss": 2.1208, "step": 189715 }, { "epoch": 0.45, "grad_norm": 1.875, "learning_rate": 0.00017639939898433753, "loss": 2.1617, "step": 189720 }, { "epoch": 0.45, "grad_norm": 1.8125, "learning_rate": 0.00017639820641045622, "loss": 2.1806, "step": 189725 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.0001763970138104759, "loss": 2.1561, "step": 189730 }, { "epoch": 0.45, "grad_norm": 2.328125, "learning_rate": 0.00017639582118439697, "loss": 2.1787, "step": 189735 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.00017639462853221983, "loss": 2.4001, "step": 189740 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017639343585394493, "loss": 2.1889, "step": 189745 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017639224314957262, "loss": 1.9496, "step": 189750 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017639105041910335, "loss": 2.2057, "step": 189755 }, { "epoch": 0.45, "grad_norm": 1.8984375, "learning_rate": 0.00017638985766253747, "loss": 2.0654, "step": 189760 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.0001763886648798755, "loss": 2.0459, "step": 189765 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017638747207111776, "loss": 2.2833, "step": 189770 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017638627923626462, "loss": 2.1249, "step": 189775 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017638508637531658, "loss": 2.2609, "step": 189780 }, { "epoch": 0.45, "grad_norm": 2.5, "learning_rate": 0.000176383893488274, "loss": 2.0752, "step": 189785 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017638270057513734, "loss": 2.0098, "step": 189790 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.0001763815076359069, "loss": 2.0771, "step": 189795 }, { "epoch": 0.45, "grad_norm": 1.8984375, "learning_rate": 0.00017638031467058317, "loss": 1.955, "step": 189800 }, { "epoch": 0.45, "grad_norm": 2.421875, "learning_rate": 0.00017637912167916655, "loss": 2.1101, "step": 189805 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017637792866165744, "loss": 2.1113, "step": 189810 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017637673561805625, "loss": 2.2021, "step": 189815 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017637554254836335, "loss": 2.0798, "step": 189820 }, { "epoch": 0.45, "grad_norm": 1.8359375, "learning_rate": 0.00017637434945257923, "loss": 2.0204, "step": 189825 }, { "epoch": 0.45, "grad_norm": 1.78125, "learning_rate": 0.0001763731563307042, "loss": 2.0236, "step": 189830 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017637196318273874, "loss": 2.024, "step": 189835 }, { "epoch": 0.45, "grad_norm": 1.890625, "learning_rate": 0.00017637077000868322, "loss": 2.1904, "step": 189840 }, { "epoch": 0.45, "grad_norm": 2.359375, "learning_rate": 0.00017636957680853806, "loss": 2.0955, "step": 189845 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017636838358230369, "loss": 2.2055, "step": 189850 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017636719032998046, "loss": 2.2495, "step": 189855 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017636599705156885, "loss": 2.0552, "step": 189860 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017636480374706918, "loss": 2.24, "step": 189865 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017636361041648195, "loss": 2.1274, "step": 189870 }, { "epoch": 0.45, "grad_norm": 1.890625, "learning_rate": 0.0001763624170598075, "loss": 2.0251, "step": 189875 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.0001763612236770463, "loss": 2.1734, "step": 189880 }, { "epoch": 0.45, "grad_norm": 1.984375, "learning_rate": 0.0001763600302681987, "loss": 2.2007, "step": 189885 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.0001763588368332651, "loss": 1.9503, "step": 189890 }, { "epoch": 0.45, "grad_norm": 2.421875, "learning_rate": 0.00017635764337224597, "loss": 2.193, "step": 189895 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.0001763564498851417, "loss": 2.2322, "step": 189900 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017635525637195264, "loss": 1.9818, "step": 189905 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017635406283267928, "loss": 2.1896, "step": 189910 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017635286926732198, "loss": 2.0329, "step": 189915 }, { "epoch": 0.45, "grad_norm": 1.90625, "learning_rate": 0.00017635167567588112, "loss": 2.0193, "step": 189920 }, { "epoch": 0.45, "grad_norm": 1.84375, "learning_rate": 0.00017635048205835716, "loss": 2.0086, "step": 189925 }, { "epoch": 0.45, "grad_norm": 2.59375, "learning_rate": 0.00017634928841475049, "loss": 2.1006, "step": 189930 }, { "epoch": 0.45, "grad_norm": 2.65625, "learning_rate": 0.00017634809474506154, "loss": 2.1772, "step": 189935 }, { "epoch": 0.45, "grad_norm": 1.734375, "learning_rate": 0.00017634690104929068, "loss": 1.9825, "step": 189940 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017634570732743833, "loss": 2.1488, "step": 189945 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.0001763445135795049, "loss": 2.0107, "step": 189950 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017634331980549082, "loss": 2.1301, "step": 189955 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.00017634212600539642, "loss": 2.1554, "step": 189960 }, { "epoch": 0.45, "grad_norm": 1.734375, "learning_rate": 0.00017634093217922225, "loss": 2.0942, "step": 189965 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.00017633973832696857, "loss": 2.0358, "step": 189970 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.0001763385444486359, "loss": 2.1496, "step": 189975 }, { "epoch": 0.45, "grad_norm": 1.859375, "learning_rate": 0.00017633735054422455, "loss": 2.0125, "step": 189980 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.000176336156613735, "loss": 2.0157, "step": 189985 }, { "epoch": 0.45, "grad_norm": 1.9140625, "learning_rate": 0.00017633496265716763, "loss": 1.9359, "step": 189990 }, { "epoch": 0.45, "grad_norm": 1.859375, "learning_rate": 0.00017633376867452286, "loss": 2.2755, "step": 189995 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017633257466580108, "loss": 2.0706, "step": 190000 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.0001763313806310027, "loss": 2.2415, "step": 190005 }, { "epoch": 0.45, "grad_norm": 1.9453125, "learning_rate": 0.00017633018657012818, "loss": 2.131, "step": 190010 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017632899248317782, "loss": 2.2398, "step": 190015 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017632779837015216, "loss": 2.1376, "step": 190020 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017632660423105148, "loss": 2.0862, "step": 190025 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.0001763254100658763, "loss": 2.017, "step": 190030 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017632421587462693, "loss": 2.1613, "step": 190035 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017632302165730384, "loss": 2.0118, "step": 190040 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.00017632182741390744, "loss": 2.0964, "step": 190045 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017632063314443807, "loss": 2.3086, "step": 190050 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017631943884889625, "loss": 2.1129, "step": 190055 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017631824452728228, "loss": 2.1558, "step": 190060 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017631705017959664, "loss": 2.1787, "step": 190065 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017631585580583968, "loss": 2.0085, "step": 190070 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017631466140601186, "loss": 2.086, "step": 190075 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017631346698011359, "loss": 2.1469, "step": 190080 }, { "epoch": 0.45, "grad_norm": 2.578125, "learning_rate": 0.00017631227252814524, "loss": 1.9513, "step": 190085 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017631107805010722, "loss": 2.0646, "step": 190090 }, { "epoch": 0.45, "grad_norm": 1.9765625, "learning_rate": 0.00017630988354599994, "loss": 2.1984, "step": 190095 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017630868901582385, "loss": 2.1045, "step": 190100 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017630749445957932, "loss": 2.0805, "step": 190105 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017630629987726675, "loss": 1.9903, "step": 190110 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017630510526888657, "loss": 2.0572, "step": 190115 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.00017630391063443922, "loss": 2.0801, "step": 190120 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017630271597392504, "loss": 1.9933, "step": 190125 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017630152128734444, "loss": 2.0842, "step": 190130 }, { "epoch": 0.45, "grad_norm": 1.9453125, "learning_rate": 0.0001763003265746979, "loss": 1.9942, "step": 190135 }, { "epoch": 0.45, "grad_norm": 1.7890625, "learning_rate": 0.00017629913183598577, "loss": 2.1869, "step": 190140 }, { "epoch": 0.45, "grad_norm": 2.328125, "learning_rate": 0.00017629793707120848, "loss": 1.9924, "step": 190145 }, { "epoch": 0.45, "grad_norm": 1.8828125, "learning_rate": 0.0001762967422803664, "loss": 2.1945, "step": 190150 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017629554746346, "loss": 2.12, "step": 190155 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017629435262048965, "loss": 2.062, "step": 190160 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.00017629315775145575, "loss": 1.9412, "step": 190165 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017629196285635877, "loss": 2.0898, "step": 190170 }, { "epoch": 0.45, "grad_norm": 2.328125, "learning_rate": 0.00017629076793519903, "loss": 2.0658, "step": 190175 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.000176289572987977, "loss": 2.0429, "step": 190180 }, { "epoch": 0.45, "grad_norm": 2.421875, "learning_rate": 0.00017628837801469304, "loss": 2.2101, "step": 190185 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.0001762871830153476, "loss": 2.0177, "step": 190190 }, { "epoch": 0.45, "grad_norm": 1.9921875, "learning_rate": 0.0001762859879899411, "loss": 2.2055, "step": 190195 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017628479293847388, "loss": 2.239, "step": 190200 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.0001762835978609464, "loss": 2.3166, "step": 190205 }, { "epoch": 0.45, "grad_norm": 2.390625, "learning_rate": 0.0001762824027573591, "loss": 2.162, "step": 190210 }, { "epoch": 0.45, "grad_norm": 1.78125, "learning_rate": 0.0001762812076277123, "loss": 2.0407, "step": 190215 }, { "epoch": 0.45, "grad_norm": 2.359375, "learning_rate": 0.00017628001247200649, "loss": 2.1734, "step": 190220 }, { "epoch": 0.45, "grad_norm": 1.9609375, "learning_rate": 0.00017627881729024206, "loss": 1.9631, "step": 190225 }, { "epoch": 0.45, "grad_norm": 2.859375, "learning_rate": 0.00017627762208241937, "loss": 1.9843, "step": 190230 }, { "epoch": 0.45, "grad_norm": 1.9140625, "learning_rate": 0.00017627642684853883, "loss": 2.2055, "step": 190235 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017627523158860093, "loss": 2.2963, "step": 190240 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017627403630260605, "loss": 1.913, "step": 190245 }, { "epoch": 0.45, "grad_norm": 1.9765625, "learning_rate": 0.00017627284099055454, "loss": 1.9618, "step": 190250 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017627164565244682, "loss": 1.9288, "step": 190255 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017627045028828335, "loss": 2.1394, "step": 190260 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.0001762692548980645, "loss": 2.2156, "step": 190265 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017626805948179068, "loss": 2.0415, "step": 190270 }, { "epoch": 0.45, "grad_norm": 3.328125, "learning_rate": 0.00017626686403946233, "loss": 2.1352, "step": 190275 }, { "epoch": 0.45, "grad_norm": 1.78125, "learning_rate": 0.0001762656685710798, "loss": 2.0179, "step": 190280 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017626447307664357, "loss": 2.0178, "step": 190285 }, { "epoch": 0.45, "grad_norm": 1.984375, "learning_rate": 0.00017626327755615405, "loss": 1.9763, "step": 190290 }, { "epoch": 0.45, "grad_norm": 1.9765625, "learning_rate": 0.00017626208200961153, "loss": 2.3319, "step": 190295 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017626088643701656, "loss": 2.073, "step": 190300 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017625969083836946, "loss": 2.1872, "step": 190305 }, { "epoch": 0.45, "grad_norm": 2.765625, "learning_rate": 0.00017625849521367065, "loss": 2.1759, "step": 190310 }, { "epoch": 0.45, "grad_norm": 1.859375, "learning_rate": 0.0001762572995629206, "loss": 2.2057, "step": 190315 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.00017625610388611964, "loss": 2.1718, "step": 190320 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017625490818326824, "loss": 2.2648, "step": 190325 }, { "epoch": 0.45, "grad_norm": 2.328125, "learning_rate": 0.00017625371245436674, "loss": 2.2361, "step": 190330 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017625251669941563, "loss": 2.1293, "step": 190335 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.00017625132091841526, "loss": 2.0221, "step": 190340 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.00017625012511136604, "loss": 2.1525, "step": 190345 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017624892927826843, "loss": 2.2519, "step": 190350 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.00017624773341912278, "loss": 2.1298, "step": 190355 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.0001762465375339295, "loss": 2.0285, "step": 190360 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017624534162268907, "loss": 2.1082, "step": 190365 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017624414568540183, "loss": 2.1716, "step": 190370 }, { "epoch": 0.45, "grad_norm": 1.90625, "learning_rate": 0.00017624294972206817, "loss": 2.0679, "step": 190375 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.0001762417537326886, "loss": 2.07, "step": 190380 }, { "epoch": 0.45, "grad_norm": 1.9765625, "learning_rate": 0.0001762405577172634, "loss": 2.1843, "step": 190385 }, { "epoch": 0.45, "grad_norm": 2.328125, "learning_rate": 0.00017623936167579307, "loss": 2.2941, "step": 190390 }, { "epoch": 0.45, "grad_norm": 2.578125, "learning_rate": 0.000176238165608278, "loss": 1.9685, "step": 190395 }, { "epoch": 0.45, "grad_norm": 1.6953125, "learning_rate": 0.0001762369695147186, "loss": 2.0539, "step": 190400 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017623577339511524, "loss": 2.2395, "step": 190405 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017623457724946838, "loss": 2.2333, "step": 190410 }, { "epoch": 0.45, "grad_norm": 2.5, "learning_rate": 0.00017623338107777836, "loss": 2.2366, "step": 190415 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017623218488004567, "loss": 2.2302, "step": 190420 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.0001762309886562707, "loss": 2.0298, "step": 190425 }, { "epoch": 0.45, "grad_norm": 1.8046875, "learning_rate": 0.00017622979240645382, "loss": 2.3006, "step": 190430 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017622859613059547, "loss": 2.2964, "step": 190435 }, { "epoch": 0.45, "grad_norm": 1.828125, "learning_rate": 0.00017622739982869602, "loss": 2.0455, "step": 190440 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017622620350075594, "loss": 1.9907, "step": 190445 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017622500714677562, "loss": 2.1982, "step": 190450 }, { "epoch": 0.45, "grad_norm": 1.859375, "learning_rate": 0.0001762238107667554, "loss": 1.9771, "step": 190455 }, { "epoch": 0.45, "grad_norm": 2.578125, "learning_rate": 0.0001762226143606958, "loss": 2.2181, "step": 190460 }, { "epoch": 0.45, "grad_norm": 1.9765625, "learning_rate": 0.00017622141792859714, "loss": 2.1465, "step": 190465 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017622022147045988, "loss": 2.0135, "step": 190470 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.0001762190249862844, "loss": 2.1202, "step": 190475 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.0001762178284760711, "loss": 1.9737, "step": 190480 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017621663193982044, "loss": 2.0746, "step": 190485 }, { "epoch": 0.45, "grad_norm": 1.9453125, "learning_rate": 0.0001762154353775328, "loss": 1.9602, "step": 190490 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017621423878920854, "loss": 2.0337, "step": 190495 }, { "epoch": 0.45, "grad_norm": 1.953125, "learning_rate": 0.00017621304217484816, "loss": 2.0388, "step": 190500 }, { "epoch": 0.45, "grad_norm": 1.90625, "learning_rate": 0.000176211845534452, "loss": 2.0684, "step": 190505 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.0001762106488680205, "loss": 2.1868, "step": 190510 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017620945217555406, "loss": 1.9985, "step": 190515 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017620825545705307, "loss": 2.0727, "step": 190520 }, { "epoch": 0.45, "grad_norm": 1.78125, "learning_rate": 0.00017620705871251798, "loss": 2.0683, "step": 190525 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017620586194194918, "loss": 2.1135, "step": 190530 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017620466514534707, "loss": 2.1247, "step": 190535 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017620346832271207, "loss": 2.2012, "step": 190540 }, { "epoch": 0.45, "grad_norm": 1.953125, "learning_rate": 0.00017620227147404456, "loss": 2.1158, "step": 190545 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.00017620107459934497, "loss": 2.0723, "step": 190550 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017619987769861375, "loss": 2.033, "step": 190555 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.00017619868077185121, "loss": 2.0402, "step": 190560 }, { "epoch": 0.45, "grad_norm": 2.875, "learning_rate": 0.00017619748381905788, "loss": 2.1822, "step": 190565 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.0001761962868402341, "loss": 2.1564, "step": 190570 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.0001761950898353802, "loss": 2.1021, "step": 190575 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017619389280449678, "loss": 2.0949, "step": 190580 }, { "epoch": 0.45, "grad_norm": 1.8125, "learning_rate": 0.00017619269574758412, "loss": 1.9467, "step": 190585 }, { "epoch": 0.45, "grad_norm": 1.9609375, "learning_rate": 0.00017619149866464264, "loss": 2.1776, "step": 190590 }, { "epoch": 0.45, "grad_norm": 2.390625, "learning_rate": 0.00017619030155567274, "loss": 2.116, "step": 190595 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.0001761891044206749, "loss": 2.1015, "step": 190600 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017618790725964945, "loss": 2.1003, "step": 190605 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017618671007259683, "loss": 2.0129, "step": 190610 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017618551285951745, "loss": 2.1498, "step": 190615 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017618431562041172, "loss": 2.1887, "step": 190620 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017618311835528006, "loss": 2.0884, "step": 190625 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017618192106412287, "loss": 2.1774, "step": 190630 }, { "epoch": 0.45, "grad_norm": 1.875, "learning_rate": 0.0001761807237469405, "loss": 2.2832, "step": 190635 }, { "epoch": 0.45, "grad_norm": 2.421875, "learning_rate": 0.00017617952640373345, "loss": 2.1086, "step": 190640 }, { "epoch": 0.45, "grad_norm": 1.984375, "learning_rate": 0.0001761783290345021, "loss": 2.0533, "step": 190645 }, { "epoch": 0.45, "grad_norm": 1.7734375, "learning_rate": 0.00017617713163924683, "loss": 2.2209, "step": 190650 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017617593421796808, "loss": 2.1306, "step": 190655 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017617473677066628, "loss": 2.1689, "step": 190660 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017617353929734176, "loss": 2.0804, "step": 190665 }, { "epoch": 0.45, "grad_norm": 1.7890625, "learning_rate": 0.00017617234179799498, "loss": 2.0857, "step": 190670 }, { "epoch": 0.45, "grad_norm": 1.8515625, "learning_rate": 0.00017617114427262638, "loss": 1.9701, "step": 190675 }, { "epoch": 0.45, "grad_norm": 2.359375, "learning_rate": 0.00017616994672123632, "loss": 2.1786, "step": 190680 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017616874914382522, "loss": 2.2427, "step": 190685 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.0001761675515403935, "loss": 2.0299, "step": 190690 }, { "epoch": 0.45, "grad_norm": 1.8515625, "learning_rate": 0.00017616635391094155, "loss": 2.173, "step": 190695 }, { "epoch": 0.45, "grad_norm": 1.953125, "learning_rate": 0.00017616515625546982, "loss": 1.993, "step": 190700 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017616395857397867, "loss": 2.0471, "step": 190705 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.00017616276086646852, "loss": 2.127, "step": 190710 }, { "epoch": 0.45, "grad_norm": 2.53125, "learning_rate": 0.0001761615631329398, "loss": 2.2484, "step": 190715 }, { "epoch": 0.45, "grad_norm": 1.953125, "learning_rate": 0.00017616036537339294, "loss": 2.1705, "step": 190720 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017615916758782828, "loss": 1.9977, "step": 190725 }, { "epoch": 0.45, "grad_norm": 2.375, "learning_rate": 0.0001761579697762463, "loss": 2.0713, "step": 190730 }, { "epoch": 0.45, "grad_norm": 2.859375, "learning_rate": 0.00017615677193864735, "loss": 2.2634, "step": 190735 }, { "epoch": 0.45, "grad_norm": 1.84375, "learning_rate": 0.00017615557407503184, "loss": 2.0312, "step": 190740 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.00017615437618540025, "loss": 2.0943, "step": 190745 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017615317826975293, "loss": 2.1592, "step": 190750 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.0001761519803280903, "loss": 2.0664, "step": 190755 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.00017615078236041278, "loss": 2.1724, "step": 190760 }, { "epoch": 0.45, "grad_norm": 1.6796875, "learning_rate": 0.00017614958436672077, "loss": 1.9881, "step": 190765 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017614838634701468, "loss": 2.1311, "step": 190770 }, { "epoch": 0.45, "grad_norm": 2.578125, "learning_rate": 0.0001761471883012949, "loss": 2.2781, "step": 190775 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.0001761459902295619, "loss": 2.2041, "step": 190780 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017614479213181602, "loss": 2.1288, "step": 190785 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.0001761435940080577, "loss": 2.0713, "step": 190790 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017614239585828735, "loss": 2.0738, "step": 190795 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017614119768250537, "loss": 2.1242, "step": 190800 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.00017613999948071219, "loss": 1.9485, "step": 190805 }, { "epoch": 0.45, "grad_norm": 8.4375, "learning_rate": 0.0001761388012529082, "loss": 2.1623, "step": 190810 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.0001761376029990938, "loss": 2.3289, "step": 190815 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017613640471926943, "loss": 2.2076, "step": 190820 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017613520641343548, "loss": 2.1595, "step": 190825 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017613400808159235, "loss": 1.9616, "step": 190830 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017613280972374052, "loss": 1.9317, "step": 190835 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017613161133988027, "loss": 2.2202, "step": 190840 }, { "epoch": 0.45, "grad_norm": 4.59375, "learning_rate": 0.0001761304129300121, "loss": 2.0873, "step": 190845 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017612921449413642, "loss": 2.2469, "step": 190850 }, { "epoch": 0.45, "grad_norm": 1.9609375, "learning_rate": 0.0001761280160322536, "loss": 2.2194, "step": 190855 }, { "epoch": 0.45, "grad_norm": 1.8828125, "learning_rate": 0.00017612681754436408, "loss": 2.2468, "step": 190860 }, { "epoch": 0.45, "grad_norm": 1.8359375, "learning_rate": 0.00017612561903046828, "loss": 1.9193, "step": 190865 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017612442049056654, "loss": 2.1365, "step": 190870 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017612322192465935, "loss": 2.2917, "step": 190875 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017612202333274708, "loss": 2.1781, "step": 190880 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017612082471483013, "loss": 2.1882, "step": 190885 }, { "epoch": 0.45, "grad_norm": 2.546875, "learning_rate": 0.00017611962607090895, "loss": 2.3049, "step": 190890 }, { "epoch": 0.45, "grad_norm": 1.8515625, "learning_rate": 0.0001761184274009839, "loss": 1.954, "step": 190895 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017611722870505543, "loss": 2.0947, "step": 190900 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017611602998312393, "loss": 2.2476, "step": 190905 }, { "epoch": 0.45, "grad_norm": 2.328125, "learning_rate": 0.0001761148312351898, "loss": 2.1819, "step": 190910 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017611363246125352, "loss": 2.2457, "step": 190915 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.0001761124336613154, "loss": 2.2408, "step": 190920 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017611123483537587, "loss": 2.1924, "step": 190925 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.0001761100359834354, "loss": 2.1762, "step": 190930 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017610883710549434, "loss": 2.072, "step": 190935 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017610763820155313, "loss": 2.0831, "step": 190940 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.00017610643927161218, "loss": 2.0092, "step": 190945 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017610524031567187, "loss": 2.1751, "step": 190950 }, { "epoch": 0.45, "grad_norm": 2.421875, "learning_rate": 0.00017610404133373267, "loss": 2.0077, "step": 190955 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.0001761028423257949, "loss": 2.0376, "step": 190960 }, { "epoch": 0.45, "grad_norm": 1.90625, "learning_rate": 0.00017610164329185906, "loss": 2.2216, "step": 190965 }, { "epoch": 0.45, "grad_norm": 1.875, "learning_rate": 0.0001761004442319255, "loss": 2.0866, "step": 190970 }, { "epoch": 0.45, "grad_norm": 1.90625, "learning_rate": 0.00017609924514599463, "loss": 2.2375, "step": 190975 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017609804603406688, "loss": 1.9378, "step": 190980 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017609684689614267, "loss": 1.9537, "step": 190985 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017609564773222238, "loss": 2.0025, "step": 190990 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017609444854230647, "loss": 2.1196, "step": 190995 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.0001760932493263953, "loss": 2.1496, "step": 191000 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.0001760920500844893, "loss": 2.189, "step": 191005 }, { "epoch": 0.45, "grad_norm": 2.53125, "learning_rate": 0.00017609085081658887, "loss": 2.1291, "step": 191010 }, { "epoch": 0.45, "grad_norm": 1.9140625, "learning_rate": 0.00017608965152269442, "loss": 2.1602, "step": 191015 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.0001760884522028064, "loss": 2.1282, "step": 191020 }, { "epoch": 0.45, "grad_norm": 2.4375, "learning_rate": 0.00017608725285692513, "loss": 2.1435, "step": 191025 }, { "epoch": 0.45, "grad_norm": 2.53125, "learning_rate": 0.00017608605348505113, "loss": 2.2103, "step": 191030 }, { "epoch": 0.45, "grad_norm": 2.546875, "learning_rate": 0.0001760848540871847, "loss": 2.181, "step": 191035 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017608365466332634, "loss": 1.9911, "step": 191040 }, { "epoch": 0.45, "grad_norm": 1.796875, "learning_rate": 0.0001760824552134764, "loss": 2.0923, "step": 191045 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017608125573763533, "loss": 1.8524, "step": 191050 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017608005623580354, "loss": 2.062, "step": 191055 }, { "epoch": 0.45, "grad_norm": 1.9375, "learning_rate": 0.0001760788567079814, "loss": 2.2675, "step": 191060 }, { "epoch": 0.45, "grad_norm": 1.921875, "learning_rate": 0.00017607765715416934, "loss": 2.1018, "step": 191065 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.00017607645757436777, "loss": 2.175, "step": 191070 }, { "epoch": 0.45, "grad_norm": 1.984375, "learning_rate": 0.00017607525796857712, "loss": 2.0633, "step": 191075 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017607405833679776, "loss": 2.2025, "step": 191080 }, { "epoch": 0.45, "grad_norm": 2.359375, "learning_rate": 0.00017607285867903013, "loss": 2.1298, "step": 191085 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017607165899527464, "loss": 2.1515, "step": 191090 }, { "epoch": 0.45, "grad_norm": 1.8203125, "learning_rate": 0.00017607045928553168, "loss": 2.3109, "step": 191095 }, { "epoch": 0.45, "grad_norm": 1.7109375, "learning_rate": 0.00017606925954980168, "loss": 1.9773, "step": 191100 }, { "epoch": 0.45, "grad_norm": 1.984375, "learning_rate": 0.00017606805978808505, "loss": 2.209, "step": 191105 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017606686000038216, "loss": 2.0953, "step": 191110 }, { "epoch": 0.45, "grad_norm": 2.515625, "learning_rate": 0.0001760656601866935, "loss": 1.9885, "step": 191115 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.0001760644603470194, "loss": 2.1515, "step": 191120 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.00017606326048136032, "loss": 1.9503, "step": 191125 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017606206058971662, "loss": 2.0307, "step": 191130 }, { "epoch": 0.45, "grad_norm": 2.53125, "learning_rate": 0.00017606086067208877, "loss": 2.1977, "step": 191135 }, { "epoch": 0.45, "grad_norm": 1.8046875, "learning_rate": 0.00017605966072847711, "loss": 2.2044, "step": 191140 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017605846075888213, "loss": 2.209, "step": 191145 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.0001760572607633042, "loss": 2.1873, "step": 191150 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017605606074174373, "loss": 2.1629, "step": 191155 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017605486069420108, "loss": 2.3148, "step": 191160 }, { "epoch": 0.45, "grad_norm": 1.890625, "learning_rate": 0.00017605366062067675, "loss": 2.1783, "step": 191165 }, { "epoch": 0.45, "grad_norm": 1.875, "learning_rate": 0.00017605246052117114, "loss": 2.0938, "step": 191170 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017605126039568457, "loss": 2.2071, "step": 191175 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017605006024421757, "loss": 2.2422, "step": 191180 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017604886006677045, "loss": 2.0178, "step": 191185 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017604765986334368, "loss": 2.3214, "step": 191190 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017604645963393764, "loss": 2.1499, "step": 191195 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017604525937855273, "loss": 2.1225, "step": 191200 }, { "epoch": 0.45, "grad_norm": 1.84375, "learning_rate": 0.00017604405909718942, "loss": 2.3645, "step": 191205 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017604285878984805, "loss": 2.2924, "step": 191210 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017604165845652906, "loss": 2.0168, "step": 191215 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.0001760404580972329, "loss": 1.9974, "step": 191220 }, { "epoch": 0.45, "grad_norm": 1.921875, "learning_rate": 0.0001760392577119599, "loss": 2.0898, "step": 191225 }, { "epoch": 0.45, "grad_norm": 1.9609375, "learning_rate": 0.00017603805730071053, "loss": 2.1592, "step": 191230 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017603685686348516, "loss": 2.2126, "step": 191235 }, { "epoch": 0.45, "grad_norm": 1.90625, "learning_rate": 0.00017603565640028423, "loss": 2.1793, "step": 191240 }, { "epoch": 0.45, "grad_norm": 2.46875, "learning_rate": 0.00017603445591110814, "loss": 2.1219, "step": 191245 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.0001760332553959573, "loss": 2.1581, "step": 191250 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.00017603205485483217, "loss": 2.1544, "step": 191255 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017603085428773304, "loss": 2.0827, "step": 191260 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017602965369466043, "loss": 1.9837, "step": 191265 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.0001760284530756147, "loss": 2.0615, "step": 191270 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017602725243059627, "loss": 2.1256, "step": 191275 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017602605175960555, "loss": 2.2038, "step": 191280 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017602485106264296, "loss": 2.3574, "step": 191285 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.0001760236503397089, "loss": 2.2549, "step": 191290 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017602244959080378, "loss": 1.9985, "step": 191295 }, { "epoch": 0.45, "grad_norm": 1.8125, "learning_rate": 0.000176021248815928, "loss": 2.1515, "step": 191300 }, { "epoch": 0.45, "grad_norm": 2.546875, "learning_rate": 0.000176020048015082, "loss": 2.1371, "step": 191305 }, { "epoch": 0.45, "grad_norm": 1.75, "learning_rate": 0.0001760188471882662, "loss": 2.2527, "step": 191310 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017601764633548092, "loss": 1.9968, "step": 191315 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017601644545672666, "loss": 1.9855, "step": 191320 }, { "epoch": 0.45, "grad_norm": 2.53125, "learning_rate": 0.0001760152445520038, "loss": 2.1979, "step": 191325 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017601404362131278, "loss": 1.9805, "step": 191330 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017601284266465398, "loss": 2.0298, "step": 191335 }, { "epoch": 0.45, "grad_norm": 1.8984375, "learning_rate": 0.00017601164168202778, "loss": 2.1494, "step": 191340 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017601044067343464, "loss": 2.019, "step": 191345 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017600923963887496, "loss": 2.1697, "step": 191350 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017600803857834916, "loss": 2.0588, "step": 191355 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.0001760068374918576, "loss": 1.998, "step": 191360 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017600563637940075, "loss": 2.1194, "step": 191365 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.000176004435240979, "loss": 2.038, "step": 191370 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017600323407659273, "loss": 1.9297, "step": 191375 }, { "epoch": 0.45, "grad_norm": 1.7421875, "learning_rate": 0.00017600203288624242, "loss": 2.1062, "step": 191380 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017600083166992842, "loss": 2.1443, "step": 191385 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017599963042765114, "loss": 2.1134, "step": 191390 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.00017599842915941102, "loss": 1.9964, "step": 191395 }, { "epoch": 0.45, "grad_norm": 2.375, "learning_rate": 0.00017599722786520845, "loss": 2.0583, "step": 191400 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017599602654504385, "loss": 2.2404, "step": 191405 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017599482519891764, "loss": 2.1258, "step": 191410 }, { "epoch": 0.45, "grad_norm": 1.921875, "learning_rate": 0.00017599362382683022, "loss": 2.3079, "step": 191415 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.000175992422428782, "loss": 2.1781, "step": 191420 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.0001759912210047734, "loss": 2.2579, "step": 191425 }, { "epoch": 0.45, "grad_norm": 1.828125, "learning_rate": 0.0001759900195548048, "loss": 2.1202, "step": 191430 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.00017598881807887664, "loss": 2.0803, "step": 191435 }, { "epoch": 0.45, "grad_norm": 1.9921875, "learning_rate": 0.0001759876165769893, "loss": 2.062, "step": 191440 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.00017598641504914324, "loss": 2.2342, "step": 191445 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017598521349533884, "loss": 2.0592, "step": 191450 }, { "epoch": 0.45, "grad_norm": 2.578125, "learning_rate": 0.0001759840119155765, "loss": 2.1153, "step": 191455 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017598281030985663, "loss": 2.179, "step": 191460 }, { "epoch": 0.45, "grad_norm": 1.8203125, "learning_rate": 0.00017598160867817968, "loss": 2.1458, "step": 191465 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017598040702054604, "loss": 2.0822, "step": 191470 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.0001759792053369561, "loss": 2.1173, "step": 191475 }, { "epoch": 0.45, "grad_norm": 1.7421875, "learning_rate": 0.00017597800362741028, "loss": 2.0102, "step": 191480 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017597680189190902, "loss": 2.017, "step": 191485 }, { "epoch": 0.45, "grad_norm": 2.515625, "learning_rate": 0.00017597560013045267, "loss": 2.1277, "step": 191490 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.0001759743983430417, "loss": 2.1404, "step": 191495 }, { "epoch": 0.45, "grad_norm": 1.7734375, "learning_rate": 0.0001759731965296765, "loss": 2.166, "step": 191500 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017597199469035748, "loss": 2.2687, "step": 191505 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.00017597079282508505, "loss": 2.106, "step": 191510 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017596959093385962, "loss": 1.8844, "step": 191515 }, { "epoch": 0.45, "grad_norm": 2.359375, "learning_rate": 0.00017596838901668159, "loss": 2.0238, "step": 191520 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017596718707355136, "loss": 2.1507, "step": 191525 }, { "epoch": 0.45, "grad_norm": 1.8671875, "learning_rate": 0.0001759659851044694, "loss": 2.0023, "step": 191530 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017596478310943605, "loss": 2.0087, "step": 191535 }, { "epoch": 0.45, "grad_norm": 2.390625, "learning_rate": 0.00017596358108845178, "loss": 2.2088, "step": 191540 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017596237904151694, "loss": 2.1554, "step": 191545 }, { "epoch": 0.45, "grad_norm": 2.71875, "learning_rate": 0.000175961176968632, "loss": 2.2234, "step": 191550 }, { "epoch": 0.45, "grad_norm": 2.375, "learning_rate": 0.00017595997486979733, "loss": 2.0739, "step": 191555 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.0001759587727450134, "loss": 2.0828, "step": 191560 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.0001759575705942805, "loss": 2.1686, "step": 191565 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017595636841759917, "loss": 2.1541, "step": 191570 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017595516621496976, "loss": 2.2478, "step": 191575 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017595396398639268, "loss": 2.0098, "step": 191580 }, { "epoch": 0.45, "grad_norm": 1.765625, "learning_rate": 0.00017595276173186836, "loss": 1.8513, "step": 191585 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.0001759515594513972, "loss": 2.0344, "step": 191590 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017595035714497957, "loss": 2.1327, "step": 191595 }, { "epoch": 0.45, "grad_norm": 2.578125, "learning_rate": 0.00017594915481261597, "loss": 2.091, "step": 191600 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.0001759479524543067, "loss": 1.9835, "step": 191605 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017594675007005228, "loss": 2.1233, "step": 191610 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017594554765985307, "loss": 2.0806, "step": 191615 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.0001759443452237095, "loss": 2.1633, "step": 191620 }, { "epoch": 0.45, "grad_norm": 1.4453125, "learning_rate": 0.0001759431427616219, "loss": 2.0321, "step": 191625 }, { "epoch": 0.45, "grad_norm": 2.328125, "learning_rate": 0.0001759419402735908, "loss": 2.0377, "step": 191630 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017594073775961655, "loss": 2.0646, "step": 191635 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017593953521969957, "loss": 2.0111, "step": 191640 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.0001759383326538403, "loss": 2.2791, "step": 191645 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017593713006203903, "loss": 2.1611, "step": 191650 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.0001759359274442963, "loss": 2.0587, "step": 191655 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.0001759347248006125, "loss": 2.0274, "step": 191660 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.000175933522130988, "loss": 2.0783, "step": 191665 }, { "epoch": 0.45, "grad_norm": 1.640625, "learning_rate": 0.00017593231943542324, "loss": 2.0853, "step": 191670 }, { "epoch": 0.45, "grad_norm": 2.5, "learning_rate": 0.0001759311167139186, "loss": 2.1917, "step": 191675 }, { "epoch": 0.45, "grad_norm": 1.9765625, "learning_rate": 0.00017592991396647457, "loss": 2.2136, "step": 191680 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.00017592871119309145, "loss": 2.1494, "step": 191685 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.00017592750839376972, "loss": 2.2473, "step": 191690 }, { "epoch": 0.45, "grad_norm": 2.359375, "learning_rate": 0.00017592630556850978, "loss": 2.0, "step": 191695 }, { "epoch": 0.45, "grad_norm": 1.890625, "learning_rate": 0.00017592510271731204, "loss": 2.1682, "step": 191700 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.0001759238998401769, "loss": 2.1252, "step": 191705 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.0001759226969371048, "loss": 1.9992, "step": 191710 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.0001759214940080961, "loss": 2.0068, "step": 191715 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017592029105315126, "loss": 1.9648, "step": 191720 }, { "epoch": 0.45, "grad_norm": 1.859375, "learning_rate": 0.00017591908807227066, "loss": 2.1897, "step": 191725 }, { "epoch": 0.45, "grad_norm": 1.921875, "learning_rate": 0.00017591788506545473, "loss": 2.0358, "step": 191730 }, { "epoch": 0.45, "grad_norm": 1.9375, "learning_rate": 0.0001759166820327039, "loss": 2.0819, "step": 191735 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.0001759154789740185, "loss": 2.0314, "step": 191740 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017591427588939902, "loss": 2.2202, "step": 191745 }, { "epoch": 0.45, "grad_norm": 1.609375, "learning_rate": 0.00017591307277884584, "loss": 2.0386, "step": 191750 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.0001759118696423594, "loss": 2.2086, "step": 191755 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017591066647994005, "loss": 1.9703, "step": 191760 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.0001759094632915883, "loss": 1.9639, "step": 191765 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017590826007730446, "loss": 2.1159, "step": 191770 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017590705683708896, "loss": 2.1522, "step": 191775 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.00017590585357094227, "loss": 2.103, "step": 191780 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017590465027886475, "loss": 2.1172, "step": 191785 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017590344696085682, "loss": 2.0141, "step": 191790 }, { "epoch": 0.45, "grad_norm": 1.9609375, "learning_rate": 0.00017590224361691893, "loss": 2.0932, "step": 191795 }, { "epoch": 0.45, "grad_norm": 2.328125, "learning_rate": 0.00017590104024705144, "loss": 2.0997, "step": 191800 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.00017589983685125475, "loss": 2.1244, "step": 191805 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017589863342952933, "loss": 2.0406, "step": 191810 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.00017589742998187554, "loss": 2.1083, "step": 191815 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.00017589622650829382, "loss": 2.202, "step": 191820 }, { "epoch": 0.45, "grad_norm": 1.765625, "learning_rate": 0.00017589502300878457, "loss": 2.2566, "step": 191825 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.0001758938194833482, "loss": 2.1082, "step": 191830 }, { "epoch": 0.45, "grad_norm": 2.359375, "learning_rate": 0.00017589261593198513, "loss": 2.0606, "step": 191835 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017589141235469579, "loss": 2.0976, "step": 191840 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.0001758902087514805, "loss": 2.061, "step": 191845 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.00017588900512233982, "loss": 2.186, "step": 191850 }, { "epoch": 0.45, "grad_norm": 1.8203125, "learning_rate": 0.00017588780146727404, "loss": 2.1215, "step": 191855 }, { "epoch": 0.45, "grad_norm": 2.609375, "learning_rate": 0.0001758865977862836, "loss": 1.9926, "step": 191860 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.00017588539407936894, "loss": 2.1185, "step": 191865 }, { "epoch": 0.45, "grad_norm": 1.7734375, "learning_rate": 0.00017588419034653046, "loss": 1.9472, "step": 191870 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017588298658776855, "loss": 2.1589, "step": 191875 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017588178280308366, "loss": 2.1532, "step": 191880 }, { "epoch": 0.45, "grad_norm": 2.484375, "learning_rate": 0.00017588057899247615, "loss": 2.0937, "step": 191885 }, { "epoch": 0.45, "grad_norm": 1.828125, "learning_rate": 0.00017587937515594644, "loss": 2.1733, "step": 191890 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017587817129349499, "loss": 2.0781, "step": 191895 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017587696740512215, "loss": 2.1241, "step": 191900 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.0001758757634908284, "loss": 1.9776, "step": 191905 }, { "epoch": 0.45, "grad_norm": 1.953125, "learning_rate": 0.00017587455955061409, "loss": 2.0655, "step": 191910 }, { "epoch": 0.45, "grad_norm": 2.359375, "learning_rate": 0.00017587335558447967, "loss": 2.272, "step": 191915 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017587215159242553, "loss": 2.0688, "step": 191920 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017587094757445207, "loss": 2.1691, "step": 191925 }, { "epoch": 0.45, "grad_norm": 1.6796875, "learning_rate": 0.00017586974353055973, "loss": 2.3077, "step": 191930 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017586853946074892, "loss": 2.0277, "step": 191935 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017586733536502003, "loss": 2.1441, "step": 191940 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.0001758661312433735, "loss": 2.0177, "step": 191945 }, { "epoch": 0.45, "grad_norm": 2.53125, "learning_rate": 0.0001758649270958097, "loss": 1.8745, "step": 191950 }, { "epoch": 0.45, "grad_norm": 1.9453125, "learning_rate": 0.00017586372292232908, "loss": 2.0856, "step": 191955 }, { "epoch": 0.45, "grad_norm": 2.4375, "learning_rate": 0.00017586251872293206, "loss": 2.2186, "step": 191960 }, { "epoch": 0.45, "grad_norm": 1.8828125, "learning_rate": 0.00017586131449761898, "loss": 2.1729, "step": 191965 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017586011024639032, "loss": 2.2759, "step": 191970 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017585890596924646, "loss": 2.1255, "step": 191975 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017585770166618785, "loss": 2.2225, "step": 191980 }, { "epoch": 0.45, "grad_norm": 2.390625, "learning_rate": 0.00017585649733721482, "loss": 2.0192, "step": 191985 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.00017585529298232787, "loss": 2.1456, "step": 191990 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.0001758540886015274, "loss": 2.1276, "step": 191995 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017585288419481377, "loss": 2.2757, "step": 192000 }, { "epoch": 0.45, "grad_norm": 1.8984375, "learning_rate": 0.0001758516797621874, "loss": 2.1086, "step": 192005 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017585047530364875, "loss": 2.0943, "step": 192010 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.0001758492708191982, "loss": 2.0407, "step": 192015 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017584806630883618, "loss": 2.3083, "step": 192020 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017584686177256304, "loss": 2.1864, "step": 192025 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017584565721037927, "loss": 2.258, "step": 192030 }, { "epoch": 0.45, "grad_norm": 1.78125, "learning_rate": 0.00017584445262228523, "loss": 2.2027, "step": 192035 }, { "epoch": 0.45, "grad_norm": 1.9765625, "learning_rate": 0.00017584324800828135, "loss": 2.1347, "step": 192040 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.00017584204336836803, "loss": 2.1033, "step": 192045 }, { "epoch": 0.45, "grad_norm": 2.6875, "learning_rate": 0.00017584083870254572, "loss": 2.0891, "step": 192050 }, { "epoch": 0.45, "grad_norm": 2.484375, "learning_rate": 0.0001758396340108148, "loss": 1.9407, "step": 192055 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.00017583842929317568, "loss": 2.3126, "step": 192060 }, { "epoch": 0.45, "grad_norm": 1.859375, "learning_rate": 0.0001758372245496288, "loss": 2.2398, "step": 192065 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.0001758360197801745, "loss": 2.0348, "step": 192070 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017583481498481326, "loss": 2.2355, "step": 192075 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.0001758336101635455, "loss": 2.0963, "step": 192080 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017583240531637156, "loss": 2.0866, "step": 192085 }, { "epoch": 0.45, "grad_norm": 1.828125, "learning_rate": 0.00017583120044329192, "loss": 2.1933, "step": 192090 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017582999554430697, "loss": 2.269, "step": 192095 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.0001758287906194171, "loss": 1.8462, "step": 192100 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017582758566862277, "loss": 2.1531, "step": 192105 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.00017582638069192435, "loss": 2.0641, "step": 192110 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017582517568932225, "loss": 1.9973, "step": 192115 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017582397066081691, "loss": 1.9572, "step": 192120 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017582276560640869, "loss": 2.2739, "step": 192125 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017582156052609808, "loss": 2.0848, "step": 192130 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017582035541988545, "loss": 2.0963, "step": 192135 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017581915028777118, "loss": 2.0071, "step": 192140 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017581794512975572, "loss": 2.2932, "step": 192145 }, { "epoch": 0.45, "grad_norm": 2.8125, "learning_rate": 0.0001758167399458395, "loss": 2.0368, "step": 192150 }, { "epoch": 0.45, "grad_norm": 1.953125, "learning_rate": 0.0001758155347360229, "loss": 2.2699, "step": 192155 }, { "epoch": 0.45, "grad_norm": 1.9921875, "learning_rate": 0.0001758143295003063, "loss": 1.8951, "step": 192160 }, { "epoch": 0.45, "grad_norm": 2.375, "learning_rate": 0.0001758131242386902, "loss": 2.0593, "step": 192165 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017581191895117492, "loss": 1.9907, "step": 192170 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017581071363776096, "loss": 2.1041, "step": 192175 }, { "epoch": 0.45, "grad_norm": 1.9140625, "learning_rate": 0.00017580950829844866, "loss": 2.1662, "step": 192180 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017580830293323845, "loss": 2.1226, "step": 192185 }, { "epoch": 0.45, "grad_norm": 1.8984375, "learning_rate": 0.00017580709754213075, "loss": 2.2224, "step": 192190 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017580589212512599, "loss": 2.1668, "step": 192195 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.00017580468668222454, "loss": 2.0368, "step": 192200 }, { "epoch": 0.45, "grad_norm": 2.609375, "learning_rate": 0.00017580348121342684, "loss": 2.2595, "step": 192205 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.0001758022757187333, "loss": 2.0811, "step": 192210 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017580107019814434, "loss": 2.1938, "step": 192215 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.00017579986465166033, "loss": 2.1469, "step": 192220 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017579865907928172, "loss": 2.0591, "step": 192225 }, { "epoch": 0.45, "grad_norm": 1.828125, "learning_rate": 0.00017579745348100895, "loss": 2.1642, "step": 192230 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017579624785684237, "loss": 2.137, "step": 192235 }, { "epoch": 0.45, "grad_norm": 2.421875, "learning_rate": 0.00017579504220678241, "loss": 2.1256, "step": 192240 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017579383653082949, "loss": 2.2558, "step": 192245 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017579263082898402, "loss": 2.2171, "step": 192250 }, { "epoch": 0.45, "grad_norm": 1.921875, "learning_rate": 0.00017579142510124642, "loss": 2.0552, "step": 192255 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017579021934761708, "loss": 2.0195, "step": 192260 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017578901356809644, "loss": 2.1612, "step": 192265 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.0001757878077626849, "loss": 2.0885, "step": 192270 }, { "epoch": 0.45, "grad_norm": 2.4375, "learning_rate": 0.00017578660193138287, "loss": 2.1945, "step": 192275 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017578539607419075, "loss": 1.9633, "step": 192280 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.000175784190191109, "loss": 2.1669, "step": 192285 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017578298428213795, "loss": 1.9705, "step": 192290 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017578177834727806, "loss": 2.1361, "step": 192295 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.00017578057238652978, "loss": 2.1672, "step": 192300 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017577936639989346, "loss": 2.1488, "step": 192305 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.00017577816038736953, "loss": 2.0068, "step": 192310 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.0001757769543489584, "loss": 2.1744, "step": 192315 }, { "epoch": 0.45, "grad_norm": 2.921875, "learning_rate": 0.00017577574828466053, "loss": 2.002, "step": 192320 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017577454219447623, "loss": 2.0541, "step": 192325 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.00017577333607840602, "loss": 2.1082, "step": 192330 }, { "epoch": 0.45, "grad_norm": 1.9765625, "learning_rate": 0.00017577212993645023, "loss": 2.0625, "step": 192335 }, { "epoch": 0.45, "grad_norm": 1.953125, "learning_rate": 0.00017577092376860933, "loss": 2.089, "step": 192340 }, { "epoch": 0.45, "grad_norm": 2.375, "learning_rate": 0.0001757697175748837, "loss": 2.0092, "step": 192345 }, { "epoch": 0.45, "grad_norm": 1.671875, "learning_rate": 0.00017576851135527375, "loss": 2.0315, "step": 192350 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017576730510977993, "loss": 2.0178, "step": 192355 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.00017576609883840257, "loss": 2.0999, "step": 192360 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.0001757648925411422, "loss": 2.0666, "step": 192365 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017576368621799912, "loss": 2.2085, "step": 192370 }, { "epoch": 0.45, "grad_norm": 1.796875, "learning_rate": 0.00017576247986897384, "loss": 1.9667, "step": 192375 }, { "epoch": 0.45, "grad_norm": 1.7734375, "learning_rate": 0.0001757612734940667, "loss": 2.2495, "step": 192380 }, { "epoch": 0.45, "grad_norm": 2.359375, "learning_rate": 0.0001757600670932781, "loss": 2.167, "step": 192385 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017575886066660855, "loss": 1.9352, "step": 192390 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017575765421405835, "loss": 2.0005, "step": 192395 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.00017575644773562798, "loss": 2.1175, "step": 192400 }, { "epoch": 0.45, "grad_norm": 1.828125, "learning_rate": 0.00017575524123131784, "loss": 2.148, "step": 192405 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017575403470112834, "loss": 1.9417, "step": 192410 }, { "epoch": 0.45, "grad_norm": 2.59375, "learning_rate": 0.00017575282814505987, "loss": 1.9667, "step": 192415 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017575162156311283, "loss": 2.2631, "step": 192420 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017575041495528773, "loss": 2.0072, "step": 192425 }, { "epoch": 0.45, "grad_norm": 1.875, "learning_rate": 0.00017574920832158486, "loss": 2.1339, "step": 192430 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017574800166200475, "loss": 2.0783, "step": 192435 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017574679497654768, "loss": 1.9625, "step": 192440 }, { "epoch": 0.45, "grad_norm": 1.875, "learning_rate": 0.00017574558826521417, "loss": 2.0552, "step": 192445 }, { "epoch": 0.45, "grad_norm": 2.59375, "learning_rate": 0.00017574438152800456, "loss": 2.0776, "step": 192450 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017574317476491932, "loss": 2.2436, "step": 192455 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017574196797595884, "loss": 2.1719, "step": 192460 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017574076116112352, "loss": 2.0082, "step": 192465 }, { "epoch": 0.45, "grad_norm": 2.578125, "learning_rate": 0.0001757395543204138, "loss": 1.9919, "step": 192470 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017573834745383007, "loss": 1.9701, "step": 192475 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.0001757371405613727, "loss": 2.1751, "step": 192480 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017573593364304222, "loss": 2.1465, "step": 192485 }, { "epoch": 0.45, "grad_norm": 1.9765625, "learning_rate": 0.00017573472669883894, "loss": 2.2211, "step": 192490 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017573351972876327, "loss": 2.1815, "step": 192495 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017573231273281572, "loss": 2.2235, "step": 192500 }, { "epoch": 0.45, "grad_norm": 1.8125, "learning_rate": 0.0001757311057109966, "loss": 1.9788, "step": 192505 }, { "epoch": 0.45, "grad_norm": 2.234375, "learning_rate": 0.00017572989866330638, "loss": 2.227, "step": 192510 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017572869158974546, "loss": 2.1561, "step": 192515 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.0001757274844903142, "loss": 2.2256, "step": 192520 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.0001757262773650131, "loss": 2.147, "step": 192525 }, { "epoch": 0.45, "grad_norm": 1.984375, "learning_rate": 0.00017572507021384253, "loss": 2.1963, "step": 192530 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017572386303680286, "loss": 1.9823, "step": 192535 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.0001757226558338946, "loss": 1.9578, "step": 192540 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017572144860511805, "loss": 2.0677, "step": 192545 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.0001757202413504737, "loss": 2.2122, "step": 192550 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017571903406996198, "loss": 2.1094, "step": 192555 }, { "epoch": 0.45, "grad_norm": 2.359375, "learning_rate": 0.00017571782676358323, "loss": 2.0647, "step": 192560 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.0001757166194313379, "loss": 2.0766, "step": 192565 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.0001757154120732264, "loss": 2.1494, "step": 192570 }, { "epoch": 0.45, "grad_norm": 1.9140625, "learning_rate": 0.00017571420468924918, "loss": 1.9062, "step": 192575 }, { "epoch": 0.45, "grad_norm": 1.8671875, "learning_rate": 0.00017571299727940659, "loss": 1.9777, "step": 192580 }, { "epoch": 0.45, "grad_norm": 2.59375, "learning_rate": 0.00017571178984369902, "loss": 2.1419, "step": 192585 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.000175710582382127, "loss": 2.2216, "step": 192590 }, { "epoch": 0.45, "grad_norm": 1.8515625, "learning_rate": 0.00017570937489469083, "loss": 1.9879, "step": 192595 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017570816738139095, "loss": 2.0815, "step": 192600 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017570695984222784, "loss": 1.9543, "step": 192605 }, { "epoch": 0.45, "grad_norm": 1.8828125, "learning_rate": 0.00017570575227720183, "loss": 2.1798, "step": 192610 }, { "epoch": 0.45, "grad_norm": 1.890625, "learning_rate": 0.00017570454468631336, "loss": 2.2745, "step": 192615 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017570333706956283, "loss": 2.1376, "step": 192620 }, { "epoch": 0.45, "grad_norm": 1.921875, "learning_rate": 0.0001757021294269507, "loss": 2.1168, "step": 192625 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017570092175847734, "loss": 2.1798, "step": 192630 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017569971406414315, "loss": 1.9014, "step": 192635 }, { "epoch": 0.45, "grad_norm": 1.953125, "learning_rate": 0.0001756985063439486, "loss": 1.972, "step": 192640 }, { "epoch": 0.45, "grad_norm": 2.390625, "learning_rate": 0.00017569729859789405, "loss": 2.1344, "step": 192645 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017569609082597994, "loss": 2.1415, "step": 192650 }, { "epoch": 0.45, "grad_norm": 1.9921875, "learning_rate": 0.00017569488302820666, "loss": 2.0506, "step": 192655 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.00017569367520457464, "loss": 1.9985, "step": 192660 }, { "epoch": 0.45, "grad_norm": 1.84375, "learning_rate": 0.00017569246735508426, "loss": 2.1051, "step": 192665 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017569125947973597, "loss": 2.0574, "step": 192670 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.0001756900515785302, "loss": 2.2099, "step": 192675 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017568884365146733, "loss": 2.0743, "step": 192680 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017568763569854778, "loss": 2.0304, "step": 192685 }, { "epoch": 0.45, "grad_norm": 2.609375, "learning_rate": 0.00017568642771977194, "loss": 2.0006, "step": 192690 }, { "epoch": 0.45, "grad_norm": 1.8359375, "learning_rate": 0.00017568521971514028, "loss": 2.2679, "step": 192695 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.00017568401168465314, "loss": 2.3086, "step": 192700 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017568280362831098, "loss": 2.2089, "step": 192705 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017568159554611417, "loss": 2.138, "step": 192710 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.0001756803874380632, "loss": 2.0711, "step": 192715 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017567917930415842, "loss": 2.1793, "step": 192720 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017567797114440028, "loss": 2.0328, "step": 192725 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017567676295878915, "loss": 2.0677, "step": 192730 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.0001756755547473255, "loss": 1.9835, "step": 192735 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017567434651000966, "loss": 2.0223, "step": 192740 }, { "epoch": 0.45, "grad_norm": 1.875, "learning_rate": 0.00017567313824684214, "loss": 2.1264, "step": 192745 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017567192995782325, "loss": 2.1028, "step": 192750 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.0001756707216429535, "loss": 2.2099, "step": 192755 }, { "epoch": 0.45, "grad_norm": 1.8984375, "learning_rate": 0.00017566951330223323, "loss": 1.9491, "step": 192760 }, { "epoch": 0.45, "grad_norm": 1.9375, "learning_rate": 0.00017566830493566287, "loss": 2.2146, "step": 192765 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.0001756670965432429, "loss": 2.1335, "step": 192770 }, { "epoch": 0.45, "grad_norm": 2.421875, "learning_rate": 0.0001756658881249736, "loss": 2.2602, "step": 192775 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.00017566467968085553, "loss": 2.12, "step": 192780 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017566347121088902, "loss": 1.9388, "step": 192785 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.00017566226271507448, "loss": 2.089, "step": 192790 }, { "epoch": 0.45, "grad_norm": 1.9609375, "learning_rate": 0.00017566105419341234, "loss": 2.0016, "step": 192795 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017565984564590301, "loss": 2.2543, "step": 192800 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.0001756586370725469, "loss": 2.0926, "step": 192805 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.00017565742847334447, "loss": 2.1518, "step": 192810 }, { "epoch": 0.45, "grad_norm": 1.8203125, "learning_rate": 0.00017565621984829604, "loss": 2.0356, "step": 192815 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017565501119740212, "loss": 2.1475, "step": 192820 }, { "epoch": 0.45, "grad_norm": 1.859375, "learning_rate": 0.00017565380252066302, "loss": 1.8682, "step": 192825 }, { "epoch": 0.45, "grad_norm": 1.6640625, "learning_rate": 0.00017565259381807923, "loss": 2.0206, "step": 192830 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.00017565138508965114, "loss": 2.1101, "step": 192835 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.0001756501763353792, "loss": 2.0449, "step": 192840 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017564896755526376, "loss": 2.0259, "step": 192845 }, { "epoch": 0.45, "grad_norm": 2.3125, "learning_rate": 0.00017564775874930524, "loss": 2.0455, "step": 192850 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.0001756465499175041, "loss": 2.0951, "step": 192855 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.00017564534105986074, "loss": 1.9941, "step": 192860 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017564413217637553, "loss": 2.1131, "step": 192865 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.0001756429232670489, "loss": 2.1813, "step": 192870 }, { "epoch": 0.45, "grad_norm": 2.90625, "learning_rate": 0.00017564171433188133, "loss": 2.1522, "step": 192875 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.00017564050537087313, "loss": 2.0952, "step": 192880 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017563929638402478, "loss": 2.0722, "step": 192885 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017563808737133666, "loss": 1.9549, "step": 192890 }, { "epoch": 0.45, "grad_norm": 1.921875, "learning_rate": 0.00017563687833280922, "loss": 2.013, "step": 192895 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017563566926844282, "loss": 2.1087, "step": 192900 }, { "epoch": 0.45, "grad_norm": 2.546875, "learning_rate": 0.00017563446017823794, "loss": 2.0663, "step": 192905 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017563325106219495, "loss": 2.2208, "step": 192910 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.00017563204192031425, "loss": 2.1784, "step": 192915 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.0001756308327525963, "loss": 2.1569, "step": 192920 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017562962355904146, "loss": 2.1268, "step": 192925 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017562841433965018, "loss": 2.126, "step": 192930 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017562720509442284, "loss": 2.0855, "step": 192935 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017562599582335992, "loss": 2.141, "step": 192940 }, { "epoch": 0.45, "grad_norm": 1.9375, "learning_rate": 0.00017562478652646176, "loss": 2.0524, "step": 192945 }, { "epoch": 0.45, "grad_norm": 2.71875, "learning_rate": 0.00017562357720372882, "loss": 2.0719, "step": 192950 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.00017562236785516149, "loss": 1.8871, "step": 192955 }, { "epoch": 0.45, "grad_norm": 2.640625, "learning_rate": 0.00017562115848076016, "loss": 2.0524, "step": 192960 }, { "epoch": 0.45, "grad_norm": 2.6875, "learning_rate": 0.0001756199490805253, "loss": 2.2216, "step": 192965 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.00017561873965445728, "loss": 2.0686, "step": 192970 }, { "epoch": 0.45, "grad_norm": 1.6640625, "learning_rate": 0.00017561753020255654, "loss": 2.0672, "step": 192975 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017561632072482346, "loss": 2.1548, "step": 192980 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017561511122125847, "loss": 2.0582, "step": 192985 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.000175613901691862, "loss": 2.1595, "step": 192990 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017561269213663448, "loss": 2.3195, "step": 192995 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017561148255557625, "loss": 2.0227, "step": 193000 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017561027294868778, "loss": 2.1547, "step": 193005 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.00017560906331596946, "loss": 2.075, "step": 193010 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017560785365742173, "loss": 2.2473, "step": 193015 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017560664397304497, "loss": 1.9165, "step": 193020 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017560543426283962, "loss": 2.0584, "step": 193025 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.0001756042245268061, "loss": 2.0371, "step": 193030 }, { "epoch": 0.45, "grad_norm": 2.0, "learning_rate": 0.00017560301476494478, "loss": 2.1292, "step": 193035 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.00017560180497725608, "loss": 2.2481, "step": 193040 }, { "epoch": 0.45, "grad_norm": 2.65625, "learning_rate": 0.00017560059516374046, "loss": 2.1746, "step": 193045 }, { "epoch": 0.45, "grad_norm": 1.7109375, "learning_rate": 0.00017559938532439834, "loss": 1.9785, "step": 193050 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017559817545923003, "loss": 2.0431, "step": 193055 }, { "epoch": 0.45, "grad_norm": 1.84375, "learning_rate": 0.00017559696556823605, "loss": 2.3148, "step": 193060 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017559575565141678, "loss": 1.9286, "step": 193065 }, { "epoch": 0.45, "grad_norm": 1.828125, "learning_rate": 0.0001755945457087726, "loss": 2.1305, "step": 193070 }, { "epoch": 0.45, "grad_norm": 2.484375, "learning_rate": 0.00017559333574030398, "loss": 2.0246, "step": 193075 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.00017559212574601128, "loss": 2.2326, "step": 193080 }, { "epoch": 0.45, "grad_norm": 2.40625, "learning_rate": 0.00017559091572589495, "loss": 2.1674, "step": 193085 }, { "epoch": 0.45, "grad_norm": 1.8359375, "learning_rate": 0.00017558970567995542, "loss": 1.9829, "step": 193090 }, { "epoch": 0.45, "grad_norm": 2.28125, "learning_rate": 0.00017558849560819304, "loss": 2.1316, "step": 193095 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017558728551060826, "loss": 2.2826, "step": 193100 }, { "epoch": 0.45, "grad_norm": 2.25, "learning_rate": 0.0001755860753872015, "loss": 2.1144, "step": 193105 }, { "epoch": 0.45, "grad_norm": 2.375, "learning_rate": 0.0001755848652379732, "loss": 2.296, "step": 193110 }, { "epoch": 0.45, "grad_norm": 1.90625, "learning_rate": 0.00017558365506292368, "loss": 2.1123, "step": 193115 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017558244486205347, "loss": 2.1396, "step": 193120 }, { "epoch": 0.45, "grad_norm": 2.078125, "learning_rate": 0.00017558123463536288, "loss": 2.0961, "step": 193125 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.0001755800243828524, "loss": 2.1018, "step": 193130 }, { "epoch": 0.45, "grad_norm": 2.171875, "learning_rate": 0.0001755788141045224, "loss": 2.0152, "step": 193135 }, { "epoch": 0.45, "grad_norm": 2.109375, "learning_rate": 0.0001755776038003733, "loss": 2.0817, "step": 193140 }, { "epoch": 0.45, "grad_norm": 2.265625, "learning_rate": 0.00017557639347040552, "loss": 2.226, "step": 193145 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.0001755751831146195, "loss": 2.0832, "step": 193150 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.00017557397273301558, "loss": 2.2723, "step": 193155 }, { "epoch": 0.45, "grad_norm": 1.7890625, "learning_rate": 0.00017557276232559426, "loss": 2.082, "step": 193160 }, { "epoch": 0.45, "grad_norm": 2.03125, "learning_rate": 0.0001755715518923559, "loss": 2.1243, "step": 193165 }, { "epoch": 0.45, "grad_norm": 1.40625, "learning_rate": 0.00017557034143330093, "loss": 1.8477, "step": 193170 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017556913094842974, "loss": 2.1534, "step": 193175 }, { "epoch": 0.45, "grad_norm": 1.8984375, "learning_rate": 0.0001755679204377428, "loss": 2.1536, "step": 193180 }, { "epoch": 0.45, "grad_norm": 2.1875, "learning_rate": 0.00017556670990124049, "loss": 2.0916, "step": 193185 }, { "epoch": 0.45, "grad_norm": 2.453125, "learning_rate": 0.00017556549933892316, "loss": 2.1152, "step": 193190 }, { "epoch": 0.45, "grad_norm": 2.375, "learning_rate": 0.00017556428875079133, "loss": 2.3473, "step": 193195 }, { "epoch": 0.45, "grad_norm": 1.84375, "learning_rate": 0.00017556307813684538, "loss": 2.1603, "step": 193200 }, { "epoch": 0.45, "grad_norm": 2.15625, "learning_rate": 0.00017556186749708572, "loss": 2.0843, "step": 193205 }, { "epoch": 0.45, "grad_norm": 1.9921875, "learning_rate": 0.00017556065683151272, "loss": 2.1012, "step": 193210 }, { "epoch": 0.45, "grad_norm": 2.015625, "learning_rate": 0.00017555944614012684, "loss": 2.0392, "step": 193215 }, { "epoch": 0.45, "grad_norm": 2.5625, "learning_rate": 0.0001755582354229285, "loss": 2.0329, "step": 193220 }, { "epoch": 0.45, "grad_norm": 2.5, "learning_rate": 0.0001755570246799181, "loss": 2.117, "step": 193225 }, { "epoch": 0.45, "grad_norm": 2.6875, "learning_rate": 0.000175555813911096, "loss": 2.2515, "step": 193230 }, { "epoch": 0.45, "grad_norm": 1.953125, "learning_rate": 0.0001755546031164627, "loss": 1.9918, "step": 193235 }, { "epoch": 0.45, "grad_norm": 2.125, "learning_rate": 0.00017555339229601856, "loss": 2.0977, "step": 193240 }, { "epoch": 0.45, "grad_norm": 2.203125, "learning_rate": 0.00017555218144976403, "loss": 2.1434, "step": 193245 }, { "epoch": 0.45, "grad_norm": 2.21875, "learning_rate": 0.00017555097057769952, "loss": 2.0494, "step": 193250 }, { "epoch": 0.45, "grad_norm": 1.9296875, "learning_rate": 0.0001755497596798254, "loss": 2.0028, "step": 193255 }, { "epoch": 0.45, "grad_norm": 2.390625, "learning_rate": 0.0001755485487561421, "loss": 2.1985, "step": 193260 }, { "epoch": 0.45, "grad_norm": 1.875, "learning_rate": 0.00017554733780665008, "loss": 2.1782, "step": 193265 }, { "epoch": 0.45, "grad_norm": 1.96875, "learning_rate": 0.0001755461268313497, "loss": 2.2132, "step": 193270 }, { "epoch": 0.45, "grad_norm": 2.296875, "learning_rate": 0.0001755449158302414, "loss": 2.3257, "step": 193275 }, { "epoch": 0.45, "grad_norm": 1.9453125, "learning_rate": 0.00017554370480332557, "loss": 2.059, "step": 193280 }, { "epoch": 0.45, "grad_norm": 1.9453125, "learning_rate": 0.00017554249375060268, "loss": 2.0149, "step": 193285 }, { "epoch": 0.45, "grad_norm": 2.328125, "learning_rate": 0.0001755412826720731, "loss": 1.9131, "step": 193290 }, { "epoch": 0.45, "grad_norm": 2.09375, "learning_rate": 0.00017554007156773719, "loss": 2.2212, "step": 193295 }, { "epoch": 0.45, "grad_norm": 1.8671875, "learning_rate": 0.00017553886043759545, "loss": 2.123, "step": 193300 }, { "epoch": 0.45, "grad_norm": 2.34375, "learning_rate": 0.0001755376492816483, "loss": 2.1728, "step": 193305 }, { "epoch": 0.45, "grad_norm": 1.890625, "learning_rate": 0.00017553643809989605, "loss": 2.1359, "step": 193310 }, { "epoch": 0.45, "grad_norm": 2.140625, "learning_rate": 0.00017553522689233925, "loss": 2.1946, "step": 193315 }, { "epoch": 0.45, "grad_norm": 2.0625, "learning_rate": 0.00017553401565897821, "loss": 2.179, "step": 193320 }, { "epoch": 0.45, "grad_norm": 2.328125, "learning_rate": 0.00017553280439981341, "loss": 2.0879, "step": 193325 }, { "epoch": 0.45, "grad_norm": 2.46875, "learning_rate": 0.00017553159311484523, "loss": 2.0648, "step": 193330 }, { "epoch": 0.45, "grad_norm": 2.84375, "learning_rate": 0.00017553038180407406, "loss": 2.2572, "step": 193335 }, { "epoch": 0.45, "grad_norm": 2.046875, "learning_rate": 0.00017552917046750034, "loss": 2.0478, "step": 193340 }, { "epoch": 0.46, "grad_norm": 1.8046875, "learning_rate": 0.0001755279591051245, "loss": 2.1282, "step": 193345 }, { "epoch": 0.46, "grad_norm": 2.890625, "learning_rate": 0.00017552674771694694, "loss": 2.0708, "step": 193350 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.0001755255363029681, "loss": 2.0546, "step": 193355 }, { "epoch": 0.46, "grad_norm": 2.390625, "learning_rate": 0.00017552432486318833, "loss": 2.2679, "step": 193360 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.00017552311339760808, "loss": 1.9568, "step": 193365 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.0001755219019062278, "loss": 2.0788, "step": 193370 }, { "epoch": 0.46, "grad_norm": 1.734375, "learning_rate": 0.00017552069038904782, "loss": 1.9533, "step": 193375 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017551947884606863, "loss": 2.2103, "step": 193380 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017551826727729063, "loss": 2.1291, "step": 193385 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.0001755170556827142, "loss": 2.0802, "step": 193390 }, { "epoch": 0.46, "grad_norm": 1.8828125, "learning_rate": 0.00017551584406233977, "loss": 2.111, "step": 193395 }, { "epoch": 0.46, "grad_norm": 2.46875, "learning_rate": 0.00017551463241616776, "loss": 2.208, "step": 193400 }, { "epoch": 0.46, "grad_norm": 1.9453125, "learning_rate": 0.0001755134207441986, "loss": 2.1284, "step": 193405 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017551220904643268, "loss": 2.2548, "step": 193410 }, { "epoch": 0.46, "grad_norm": 1.921875, "learning_rate": 0.0001755109973228704, "loss": 1.9828, "step": 193415 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.0001755097855735122, "loss": 2.1405, "step": 193420 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017550857379835852, "loss": 2.1345, "step": 193425 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017550736199740968, "loss": 1.8753, "step": 193430 }, { "epoch": 0.46, "grad_norm": 2.5625, "learning_rate": 0.00017550615017066624, "loss": 2.1422, "step": 193435 }, { "epoch": 0.46, "grad_norm": 2.515625, "learning_rate": 0.00017550493831812846, "loss": 2.0985, "step": 193440 }, { "epoch": 0.46, "grad_norm": 1.6796875, "learning_rate": 0.00017550372643979686, "loss": 2.0687, "step": 193445 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.00017550251453567182, "loss": 2.1886, "step": 193450 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.0001755013026057537, "loss": 2.1414, "step": 193455 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017550009065004304, "loss": 2.2076, "step": 193460 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.00017549887866854014, "loss": 1.93, "step": 193465 }, { "epoch": 0.46, "grad_norm": 2.546875, "learning_rate": 0.00017549766666124543, "loss": 2.1053, "step": 193470 }, { "epoch": 0.46, "grad_norm": 2.359375, "learning_rate": 0.00017549645462815937, "loss": 2.1114, "step": 193475 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017549524256928235, "loss": 2.0947, "step": 193480 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.0001754940304846148, "loss": 2.1838, "step": 193485 }, { "epoch": 0.46, "grad_norm": 1.953125, "learning_rate": 0.00017549281837415712, "loss": 2.0209, "step": 193490 }, { "epoch": 0.46, "grad_norm": 1.875, "learning_rate": 0.00017549160623790973, "loss": 2.0591, "step": 193495 }, { "epoch": 0.46, "grad_norm": 2.46875, "learning_rate": 0.00017549039407587302, "loss": 2.2303, "step": 193500 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.00017548918188804743, "loss": 2.1165, "step": 193505 }, { "epoch": 0.46, "grad_norm": 1.9140625, "learning_rate": 0.00017548796967443333, "loss": 2.0042, "step": 193510 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017548675743503123, "loss": 2.1265, "step": 193515 }, { "epoch": 0.46, "grad_norm": 1.8671875, "learning_rate": 0.00017548554516984144, "loss": 2.2085, "step": 193520 }, { "epoch": 0.46, "grad_norm": 1.7734375, "learning_rate": 0.00017548433287886445, "loss": 2.0846, "step": 193525 }, { "epoch": 0.46, "grad_norm": 1.828125, "learning_rate": 0.00017548312056210063, "loss": 2.0238, "step": 193530 }, { "epoch": 0.46, "grad_norm": 2.625, "learning_rate": 0.0001754819082195504, "loss": 2.1733, "step": 193535 }, { "epoch": 0.46, "grad_norm": 1.7109375, "learning_rate": 0.0001754806958512142, "loss": 2.0142, "step": 193540 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.0001754794834570924, "loss": 2.0959, "step": 193545 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017547827103718547, "loss": 2.1295, "step": 193550 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017547705859149377, "loss": 2.1027, "step": 193555 }, { "epoch": 0.46, "grad_norm": 1.7578125, "learning_rate": 0.00017547584612001774, "loss": 2.2269, "step": 193560 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.00017547463362275779, "loss": 1.9953, "step": 193565 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017547342109971433, "loss": 1.9757, "step": 193570 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.0001754722085508878, "loss": 1.9848, "step": 193575 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.0001754709959762786, "loss": 2.1979, "step": 193580 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017546978337588712, "loss": 2.0752, "step": 193585 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017546857074971382, "loss": 2.2206, "step": 193590 }, { "epoch": 0.46, "grad_norm": 2.703125, "learning_rate": 0.00017546735809775905, "loss": 2.1665, "step": 193595 }, { "epoch": 0.46, "grad_norm": 1.671875, "learning_rate": 0.0001754661454200233, "loss": 1.974, "step": 193600 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.0001754649327165069, "loss": 2.0457, "step": 193605 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017546371998721036, "loss": 2.0529, "step": 193610 }, { "epoch": 0.46, "grad_norm": 2.359375, "learning_rate": 0.000175462507232134, "loss": 2.2201, "step": 193615 }, { "epoch": 0.46, "grad_norm": 2.390625, "learning_rate": 0.0001754612944512783, "loss": 2.0692, "step": 193620 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017546008164464366, "loss": 1.9993, "step": 193625 }, { "epoch": 0.46, "grad_norm": 2.359375, "learning_rate": 0.0001754588688122305, "loss": 2.0421, "step": 193630 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017545765595403917, "loss": 2.2302, "step": 193635 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017545644307007018, "loss": 2.1432, "step": 193640 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017545523016032388, "loss": 2.172, "step": 193645 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.00017545401722480076, "loss": 2.0622, "step": 193650 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.0001754528042635011, "loss": 2.027, "step": 193655 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.00017545159127642544, "loss": 2.0415, "step": 193660 }, { "epoch": 0.46, "grad_norm": 2.359375, "learning_rate": 0.00017545037826357415, "loss": 2.2918, "step": 193665 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017544916522494762, "loss": 2.1201, "step": 193670 }, { "epoch": 0.46, "grad_norm": 1.84375, "learning_rate": 0.00017544795216054628, "loss": 2.2415, "step": 193675 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017544673907037058, "loss": 2.2013, "step": 193680 }, { "epoch": 0.46, "grad_norm": 2.59375, "learning_rate": 0.00017544552595442087, "loss": 2.076, "step": 193685 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.00017544431281269764, "loss": 2.2392, "step": 193690 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.00017544309964520122, "loss": 2.2203, "step": 193695 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.0001754418864519321, "loss": 2.0595, "step": 193700 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.00017544067323289067, "loss": 2.1878, "step": 193705 }, { "epoch": 0.46, "grad_norm": 1.7890625, "learning_rate": 0.0001754394599880773, "loss": 1.945, "step": 193710 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.0001754382467174925, "loss": 2.0648, "step": 193715 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.0001754370334211366, "loss": 2.0982, "step": 193720 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.00017543582009901, "loss": 2.017, "step": 193725 }, { "epoch": 0.46, "grad_norm": 1.8515625, "learning_rate": 0.00017543460675111322, "loss": 2.0195, "step": 193730 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017543339337744657, "loss": 2.1008, "step": 193735 }, { "epoch": 0.46, "grad_norm": 1.7890625, "learning_rate": 0.00017543217997801055, "loss": 2.156, "step": 193740 }, { "epoch": 0.46, "grad_norm": 2.609375, "learning_rate": 0.00017543096655280548, "loss": 2.1038, "step": 193745 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017542975310183182, "loss": 2.1051, "step": 193750 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.00017542853962509002, "loss": 2.1038, "step": 193755 }, { "epoch": 0.46, "grad_norm": 1.9921875, "learning_rate": 0.00017542732612258046, "loss": 2.0574, "step": 193760 }, { "epoch": 0.46, "grad_norm": 2.359375, "learning_rate": 0.00017542611259430352, "loss": 2.0666, "step": 193765 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.0001754248990402597, "loss": 2.056, "step": 193770 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.00017542368546044935, "loss": 2.1243, "step": 193775 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017542247185487288, "loss": 2.0743, "step": 193780 }, { "epoch": 0.46, "grad_norm": 2.65625, "learning_rate": 0.00017542125822353076, "loss": 2.0637, "step": 193785 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017542004456642336, "loss": 2.1656, "step": 193790 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.0001754188308835511, "loss": 1.8436, "step": 193795 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.0001754176171749144, "loss": 2.1111, "step": 193800 }, { "epoch": 0.46, "grad_norm": 1.78125, "learning_rate": 0.00017541640344051368, "loss": 1.8923, "step": 193805 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.00017541518968034934, "loss": 1.9869, "step": 193810 }, { "epoch": 0.46, "grad_norm": 2.4375, "learning_rate": 0.0001754139758944218, "loss": 2.0384, "step": 193815 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.0001754127620827315, "loss": 2.1796, "step": 193820 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.0001754115482452788, "loss": 2.2261, "step": 193825 }, { "epoch": 0.46, "grad_norm": 2.609375, "learning_rate": 0.00017541033438206417, "loss": 2.026, "step": 193830 }, { "epoch": 0.46, "grad_norm": 2.546875, "learning_rate": 0.000175409120493088, "loss": 2.0503, "step": 193835 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.0001754079065783507, "loss": 2.1982, "step": 193840 }, { "epoch": 0.46, "grad_norm": 1.90625, "learning_rate": 0.00017540669263785267, "loss": 2.225, "step": 193845 }, { "epoch": 0.46, "grad_norm": 2.40625, "learning_rate": 0.0001754054786715944, "loss": 2.1047, "step": 193850 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017540426467957623, "loss": 2.0534, "step": 193855 }, { "epoch": 0.46, "grad_norm": 1.7421875, "learning_rate": 0.00017540305066179857, "loss": 2.116, "step": 193860 }, { "epoch": 0.46, "grad_norm": 1.859375, "learning_rate": 0.0001754018366182619, "loss": 1.9946, "step": 193865 }, { "epoch": 0.46, "grad_norm": 2.5, "learning_rate": 0.00017540062254896657, "loss": 1.9219, "step": 193870 }, { "epoch": 0.46, "grad_norm": 1.890625, "learning_rate": 0.00017539940845391302, "loss": 1.9964, "step": 193875 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017539819433310164, "loss": 2.2109, "step": 193880 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.0001753969801865329, "loss": 2.1115, "step": 193885 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.0001753957660142072, "loss": 1.9632, "step": 193890 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.0001753945518161249, "loss": 2.2777, "step": 193895 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.0001753933375922865, "loss": 1.9647, "step": 193900 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017539212334269233, "loss": 2.0683, "step": 193905 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017539090906734285, "loss": 1.8749, "step": 193910 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.0001753896947662385, "loss": 2.1992, "step": 193915 }, { "epoch": 0.46, "grad_norm": 1.9609375, "learning_rate": 0.00017538848043937964, "loss": 1.9517, "step": 193920 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017538726608676668, "loss": 2.131, "step": 193925 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.0001753860517084001, "loss": 2.0321, "step": 193930 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017538483730428024, "loss": 2.1237, "step": 193935 }, { "epoch": 0.46, "grad_norm": 2.609375, "learning_rate": 0.00017538362287440758, "loss": 2.1733, "step": 193940 }, { "epoch": 0.46, "grad_norm": 2.0, "learning_rate": 0.0001753824084187825, "loss": 2.1947, "step": 193945 }, { "epoch": 0.46, "grad_norm": 2.359375, "learning_rate": 0.00017538119393740545, "loss": 2.2454, "step": 193950 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.0001753799794302768, "loss": 2.1165, "step": 193955 }, { "epoch": 0.46, "grad_norm": 2.640625, "learning_rate": 0.00017537876489739698, "loss": 2.1578, "step": 193960 }, { "epoch": 0.46, "grad_norm": 1.9609375, "learning_rate": 0.00017537755033876636, "loss": 2.1343, "step": 193965 }, { "epoch": 0.46, "grad_norm": 1.875, "learning_rate": 0.00017537633575438549, "loss": 2.2999, "step": 193970 }, { "epoch": 0.46, "grad_norm": 3.40625, "learning_rate": 0.00017537512114425462, "loss": 1.8749, "step": 193975 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.0001753739065083743, "loss": 2.2153, "step": 193980 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017537269184674482, "loss": 2.1283, "step": 193985 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.00017537147715936672, "loss": 2.0845, "step": 193990 }, { "epoch": 0.46, "grad_norm": 1.8671875, "learning_rate": 0.00017537026244624033, "loss": 2.0516, "step": 193995 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017536904770736614, "loss": 2.0451, "step": 194000 }, { "epoch": 0.46, "grad_norm": 2.3125, "learning_rate": 0.00017536783294274443, "loss": 2.0265, "step": 194005 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017536661815237576, "loss": 2.0945, "step": 194010 }, { "epoch": 0.46, "grad_norm": 2.28125, "learning_rate": 0.0001753654033362605, "loss": 2.2495, "step": 194015 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.000175364188494399, "loss": 2.1813, "step": 194020 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017536297362679174, "loss": 2.1638, "step": 194025 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017536175873343913, "loss": 2.0661, "step": 194030 }, { "epoch": 0.46, "grad_norm": 1.9296875, "learning_rate": 0.0001753605438143416, "loss": 1.9381, "step": 194035 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.00017535932886949952, "loss": 2.1643, "step": 194040 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017535811389891333, "loss": 2.1672, "step": 194045 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.0001753568989025834, "loss": 2.1783, "step": 194050 }, { "epoch": 0.46, "grad_norm": 1.90625, "learning_rate": 0.00017535568388051025, "loss": 2.1055, "step": 194055 }, { "epoch": 0.46, "grad_norm": 2.53125, "learning_rate": 0.0001753544688326942, "loss": 2.0844, "step": 194060 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017535325375913572, "loss": 1.9446, "step": 194065 }, { "epoch": 0.46, "grad_norm": 2.5, "learning_rate": 0.0001753520386598352, "loss": 2.1026, "step": 194070 }, { "epoch": 0.46, "grad_norm": 1.9765625, "learning_rate": 0.00017535082353479302, "loss": 1.9754, "step": 194075 }, { "epoch": 0.46, "grad_norm": 2.4375, "learning_rate": 0.00017534960838400964, "loss": 2.1784, "step": 194080 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017534839320748548, "loss": 2.0727, "step": 194085 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017534717800522098, "loss": 2.1256, "step": 194090 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.00017534596277721647, "loss": 2.1131, "step": 194095 }, { "epoch": 0.46, "grad_norm": 1.953125, "learning_rate": 0.0001753447475234724, "loss": 1.9541, "step": 194100 }, { "epoch": 0.46, "grad_norm": 1.765625, "learning_rate": 0.00017534353224398923, "loss": 2.1566, "step": 194105 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017534231693876735, "loss": 2.1597, "step": 194110 }, { "epoch": 0.46, "grad_norm": 2.4375, "learning_rate": 0.00017534110160780716, "loss": 2.3207, "step": 194115 }, { "epoch": 0.46, "grad_norm": 1.703125, "learning_rate": 0.00017533988625110906, "loss": 2.0828, "step": 194120 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.0001753386708686735, "loss": 2.0934, "step": 194125 }, { "epoch": 0.46, "grad_norm": 1.84375, "learning_rate": 0.0001753374554605009, "loss": 2.092, "step": 194130 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017533624002659165, "loss": 2.1405, "step": 194135 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017533502456694618, "loss": 1.9258, "step": 194140 }, { "epoch": 0.46, "grad_norm": 2.0, "learning_rate": 0.00017533380908156486, "loss": 2.1399, "step": 194145 }, { "epoch": 0.46, "grad_norm": 1.8984375, "learning_rate": 0.00017533259357044819, "loss": 2.1546, "step": 194150 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.0001753313780335965, "loss": 2.0894, "step": 194155 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.0001753301624710103, "loss": 2.0966, "step": 194160 }, { "epoch": 0.46, "grad_norm": 1.9375, "learning_rate": 0.00017532894688268992, "loss": 1.9211, "step": 194165 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017532773126863578, "loss": 2.0638, "step": 194170 }, { "epoch": 0.46, "grad_norm": 2.46875, "learning_rate": 0.00017532651562884838, "loss": 2.191, "step": 194175 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017532529996332804, "loss": 2.2505, "step": 194180 }, { "epoch": 0.46, "grad_norm": 1.90625, "learning_rate": 0.00017532408427207522, "loss": 1.9734, "step": 194185 }, { "epoch": 0.46, "grad_norm": 2.3125, "learning_rate": 0.00017532286855509032, "loss": 2.192, "step": 194190 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017532165281237375, "loss": 2.0731, "step": 194195 }, { "epoch": 0.46, "grad_norm": 1.9765625, "learning_rate": 0.00017532043704392595, "loss": 2.0033, "step": 194200 }, { "epoch": 0.46, "grad_norm": 2.640625, "learning_rate": 0.00017531922124974734, "loss": 2.2176, "step": 194205 }, { "epoch": 0.46, "grad_norm": 1.8203125, "learning_rate": 0.0001753180054298383, "loss": 1.9554, "step": 194210 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017531678958419925, "loss": 2.0098, "step": 194215 }, { "epoch": 0.46, "grad_norm": 1.8203125, "learning_rate": 0.00017531557371283066, "loss": 2.1741, "step": 194220 }, { "epoch": 0.46, "grad_norm": 1.8671875, "learning_rate": 0.00017531435781573286, "loss": 1.6902, "step": 194225 }, { "epoch": 0.46, "grad_norm": 2.28125, "learning_rate": 0.00017531314189290633, "loss": 1.83, "step": 194230 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.00017531192594435146, "loss": 2.0983, "step": 194235 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017531070997006868, "loss": 1.9132, "step": 194240 }, { "epoch": 0.46, "grad_norm": 2.46875, "learning_rate": 0.0001753094939700584, "loss": 2.1688, "step": 194245 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.000175308277944321, "loss": 2.0071, "step": 194250 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017530706189285695, "loss": 2.0783, "step": 194255 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.0001753058458156666, "loss": 2.2017, "step": 194260 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.0001753046297127505, "loss": 2.2558, "step": 194265 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.0001753034135841089, "loss": 2.2311, "step": 194270 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.00017530219742974232, "loss": 1.9722, "step": 194275 }, { "epoch": 0.46, "grad_norm": 1.7734375, "learning_rate": 0.00017530098124965112, "loss": 2.0722, "step": 194280 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.00017529976504383573, "loss": 1.963, "step": 194285 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017529854881229658, "loss": 2.2081, "step": 194290 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017529733255503414, "loss": 2.1153, "step": 194295 }, { "epoch": 0.46, "grad_norm": 1.8828125, "learning_rate": 0.00017529611627204868, "loss": 2.1572, "step": 194300 }, { "epoch": 0.46, "grad_norm": 1.671875, "learning_rate": 0.00017529489996334077, "loss": 2.0776, "step": 194305 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017529368362891073, "loss": 2.2164, "step": 194310 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.000175292467268759, "loss": 2.0112, "step": 194315 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.00017529125088288597, "loss": 2.066, "step": 194320 }, { "epoch": 0.46, "grad_norm": 1.7578125, "learning_rate": 0.00017529003447129212, "loss": 2.0308, "step": 194325 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017528881803397783, "loss": 2.1376, "step": 194330 }, { "epoch": 0.46, "grad_norm": 1.7265625, "learning_rate": 0.0001752876015709435, "loss": 2.0747, "step": 194335 }, { "epoch": 0.46, "grad_norm": 2.34375, "learning_rate": 0.00017528638508218955, "loss": 2.1203, "step": 194340 }, { "epoch": 0.46, "grad_norm": 2.0, "learning_rate": 0.0001752851685677164, "loss": 2.2791, "step": 194345 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.0001752839520275245, "loss": 2.3322, "step": 194350 }, { "epoch": 0.46, "grad_norm": 2.546875, "learning_rate": 0.0001752827354616142, "loss": 2.2892, "step": 194355 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.00017528151886998602, "loss": 2.1519, "step": 194360 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017528030225264028, "loss": 2.0506, "step": 194365 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.0001752790856095774, "loss": 2.1062, "step": 194370 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017527786894079784, "loss": 2.1747, "step": 194375 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017527665224630195, "loss": 1.9093, "step": 194380 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017527543552609025, "loss": 2.0373, "step": 194385 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017527421878016307, "loss": 2.3363, "step": 194390 }, { "epoch": 0.46, "grad_norm": 2.46875, "learning_rate": 0.00017527300200852083, "loss": 2.2735, "step": 194395 }, { "epoch": 0.46, "grad_norm": 2.34375, "learning_rate": 0.000175271785211164, "loss": 2.0127, "step": 194400 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017527056838809296, "loss": 2.0654, "step": 194405 }, { "epoch": 0.46, "grad_norm": 1.9375, "learning_rate": 0.0001752693515393081, "loss": 1.8273, "step": 194410 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.00017526813466480992, "loss": 2.0707, "step": 194415 }, { "epoch": 0.46, "grad_norm": 1.9375, "learning_rate": 0.00017526691776459873, "loss": 2.0797, "step": 194420 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017526570083867501, "loss": 2.1931, "step": 194425 }, { "epoch": 0.46, "grad_norm": 1.875, "learning_rate": 0.00017526448388703914, "loss": 2.0898, "step": 194430 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017526326690969158, "loss": 1.9972, "step": 194435 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.00017526204990663273, "loss": 2.1269, "step": 194440 }, { "epoch": 0.46, "grad_norm": 1.9453125, "learning_rate": 0.000175260832877863, "loss": 1.9405, "step": 194445 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.0001752596158233828, "loss": 2.094, "step": 194450 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017525839874319254, "loss": 2.2546, "step": 194455 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017525718163729264, "loss": 1.8897, "step": 194460 }, { "epoch": 0.46, "grad_norm": 1.765625, "learning_rate": 0.00017525596450568355, "loss": 2.0711, "step": 194465 }, { "epoch": 0.46, "grad_norm": 2.546875, "learning_rate": 0.00017525474734836563, "loss": 1.9147, "step": 194470 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.00017525353016533933, "loss": 2.1654, "step": 194475 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017525231295660508, "loss": 2.2694, "step": 194480 }, { "epoch": 0.46, "grad_norm": 1.9140625, "learning_rate": 0.00017525109572216326, "loss": 1.9056, "step": 194485 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.0001752498784620143, "loss": 2.1503, "step": 194490 }, { "epoch": 0.46, "grad_norm": 2.4375, "learning_rate": 0.0001752486611761586, "loss": 2.0671, "step": 194495 }, { "epoch": 0.46, "grad_norm": 2.3125, "learning_rate": 0.00017524744386459662, "loss": 2.1209, "step": 194500 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017524622652732874, "loss": 2.1632, "step": 194505 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017524500916435537, "loss": 2.0155, "step": 194510 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.00017524379177567698, "loss": 2.0494, "step": 194515 }, { "epoch": 0.46, "grad_norm": 2.46875, "learning_rate": 0.00017524257436129392, "loss": 2.2068, "step": 194520 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017524135692120662, "loss": 2.1636, "step": 194525 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.00017524013945541554, "loss": 1.9383, "step": 194530 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017523892196392104, "loss": 2.1045, "step": 194535 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017523770444672355, "loss": 2.2845, "step": 194540 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017523648690382352, "loss": 2.1446, "step": 194545 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.00017523526933522132, "loss": 2.3027, "step": 194550 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.0001752340517409174, "loss": 2.2342, "step": 194555 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.00017523283412091217, "loss": 2.0112, "step": 194560 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017523161647520604, "loss": 2.1917, "step": 194565 }, { "epoch": 0.46, "grad_norm": 1.953125, "learning_rate": 0.0001752303988037994, "loss": 2.1164, "step": 194570 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.0001752291811066927, "loss": 2.316, "step": 194575 }, { "epoch": 0.46, "grad_norm": 1.71875, "learning_rate": 0.00017522796338388636, "loss": 2.2447, "step": 194580 }, { "epoch": 0.46, "grad_norm": 1.921875, "learning_rate": 0.00017522674563538078, "loss": 2.076, "step": 194585 }, { "epoch": 0.46, "grad_norm": 2.3125, "learning_rate": 0.0001752255278611764, "loss": 2.05, "step": 194590 }, { "epoch": 0.46, "grad_norm": 1.90625, "learning_rate": 0.0001752243100612736, "loss": 1.8713, "step": 194595 }, { "epoch": 0.46, "grad_norm": 2.5, "learning_rate": 0.00017522309223567278, "loss": 2.1514, "step": 194600 }, { "epoch": 0.46, "grad_norm": 1.8125, "learning_rate": 0.0001752218743843744, "loss": 2.0285, "step": 194605 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.0001752206565073789, "loss": 2.0484, "step": 194610 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017521943860468665, "loss": 2.1522, "step": 194615 }, { "epoch": 0.46, "grad_norm": 1.9375, "learning_rate": 0.00017521822067629802, "loss": 2.1023, "step": 194620 }, { "epoch": 0.46, "grad_norm": 2.546875, "learning_rate": 0.00017521700272221354, "loss": 2.243, "step": 194625 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017521578474243357, "loss": 2.0242, "step": 194630 }, { "epoch": 0.46, "grad_norm": 1.859375, "learning_rate": 0.00017521456673695848, "loss": 2.2211, "step": 194635 }, { "epoch": 0.46, "grad_norm": 2.0, "learning_rate": 0.00017521334870578876, "loss": 1.9593, "step": 194640 }, { "epoch": 0.46, "grad_norm": 2.390625, "learning_rate": 0.0001752121306489248, "loss": 2.1737, "step": 194645 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017521091256636698, "loss": 2.236, "step": 194650 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.00017520969445811578, "loss": 2.133, "step": 194655 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017520847632417156, "loss": 2.1506, "step": 194660 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017520725816453479, "loss": 2.1396, "step": 194665 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.00017520603997920583, "loss": 2.0552, "step": 194670 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017520482176818517, "loss": 2.1605, "step": 194675 }, { "epoch": 0.46, "grad_norm": 2.4375, "learning_rate": 0.0001752036035314731, "loss": 2.0553, "step": 194680 }, { "epoch": 0.46, "grad_norm": 1.75, "learning_rate": 0.00017520238526907018, "loss": 1.9869, "step": 194685 }, { "epoch": 0.46, "grad_norm": 2.359375, "learning_rate": 0.00017520116698097673, "loss": 2.1892, "step": 194690 }, { "epoch": 0.46, "grad_norm": 2.5625, "learning_rate": 0.00017519994866719324, "loss": 2.1975, "step": 194695 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.00017519873032772, "loss": 2.0784, "step": 194700 }, { "epoch": 0.46, "grad_norm": 1.8359375, "learning_rate": 0.0001751975119625576, "loss": 2.2043, "step": 194705 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017519629357170633, "loss": 1.9727, "step": 194710 }, { "epoch": 0.46, "grad_norm": 1.8203125, "learning_rate": 0.00017519507515516663, "loss": 2.2696, "step": 194715 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017519385671293893, "loss": 2.2419, "step": 194720 }, { "epoch": 0.46, "grad_norm": 1.8203125, "learning_rate": 0.00017519263824502366, "loss": 2.126, "step": 194725 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.0001751914197514212, "loss": 1.9259, "step": 194730 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.000175190201232132, "loss": 2.0927, "step": 194735 }, { "epoch": 0.46, "grad_norm": 1.9296875, "learning_rate": 0.00017518898268715648, "loss": 1.952, "step": 194740 }, { "epoch": 0.46, "grad_norm": 1.9921875, "learning_rate": 0.00017518776411649503, "loss": 2.1223, "step": 194745 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017518654552014807, "loss": 2.1078, "step": 194750 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.000175185326898116, "loss": 2.0989, "step": 194755 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.0001751841082503993, "loss": 2.0506, "step": 194760 }, { "epoch": 0.46, "grad_norm": 1.6796875, "learning_rate": 0.00017518288957699834, "loss": 2.1938, "step": 194765 }, { "epoch": 0.46, "grad_norm": 2.546875, "learning_rate": 0.00017518167087791353, "loss": 2.1733, "step": 194770 }, { "epoch": 0.46, "grad_norm": 2.453125, "learning_rate": 0.00017518045215314526, "loss": 1.9968, "step": 194775 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017517923340269403, "loss": 2.1056, "step": 194780 }, { "epoch": 0.46, "grad_norm": 1.953125, "learning_rate": 0.0001751780146265602, "loss": 2.0219, "step": 194785 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.0001751767958247442, "loss": 2.2655, "step": 194790 }, { "epoch": 0.46, "grad_norm": 1.875, "learning_rate": 0.00017517557699724644, "loss": 2.1243, "step": 194795 }, { "epoch": 0.46, "grad_norm": 1.9140625, "learning_rate": 0.00017517435814406735, "loss": 2.0553, "step": 194800 }, { "epoch": 0.46, "grad_norm": 2.484375, "learning_rate": 0.00017517313926520734, "loss": 2.3037, "step": 194805 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.0001751719203606668, "loss": 2.1956, "step": 194810 }, { "epoch": 0.46, "grad_norm": 1.9921875, "learning_rate": 0.0001751707014304462, "loss": 2.2306, "step": 194815 }, { "epoch": 0.46, "grad_norm": 2.609375, "learning_rate": 0.0001751694824745459, "loss": 2.0137, "step": 194820 }, { "epoch": 0.46, "grad_norm": 1.8515625, "learning_rate": 0.00017516826349296634, "loss": 2.0857, "step": 194825 }, { "epoch": 0.46, "grad_norm": 1.59375, "learning_rate": 0.00017516704448570792, "loss": 2.0526, "step": 194830 }, { "epoch": 0.46, "grad_norm": 2.453125, "learning_rate": 0.0001751658254527711, "loss": 2.1907, "step": 194835 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017516460639415628, "loss": 2.0253, "step": 194840 }, { "epoch": 0.46, "grad_norm": 1.7890625, "learning_rate": 0.00017516338730986383, "loss": 2.1223, "step": 194845 }, { "epoch": 0.46, "grad_norm": 1.65625, "learning_rate": 0.00017516216819989423, "loss": 2.3491, "step": 194850 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.0001751609490642479, "loss": 2.1981, "step": 194855 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017515972990292516, "loss": 2.1999, "step": 194860 }, { "epoch": 0.46, "grad_norm": 2.609375, "learning_rate": 0.00017515851071592655, "loss": 2.2059, "step": 194865 }, { "epoch": 0.46, "grad_norm": 1.828125, "learning_rate": 0.0001751572915032524, "loss": 2.1357, "step": 194870 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.00017515607226490317, "loss": 2.0373, "step": 194875 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017515485300087925, "loss": 2.1621, "step": 194880 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017515363371118107, "loss": 2.224, "step": 194885 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017515241439580907, "loss": 2.0406, "step": 194890 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017515119505476362, "loss": 2.1472, "step": 194895 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017514997568804515, "loss": 2.0723, "step": 194900 }, { "epoch": 0.46, "grad_norm": 1.90625, "learning_rate": 0.0001751487562956541, "loss": 2.1075, "step": 194905 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017514753687759087, "loss": 2.0783, "step": 194910 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.0001751463174338559, "loss": 2.124, "step": 194915 }, { "epoch": 0.46, "grad_norm": 2.390625, "learning_rate": 0.00017514509796444955, "loss": 1.9728, "step": 194920 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017514387846937228, "loss": 2.3586, "step": 194925 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017514265894862452, "loss": 2.201, "step": 194930 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.00017514143940220665, "loss": 2.0922, "step": 194935 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.00017514021983011907, "loss": 2.124, "step": 194940 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.0001751390002323623, "loss": 2.1048, "step": 194945 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.0001751377806089366, "loss": 2.3467, "step": 194950 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017513656095984254, "loss": 2.3284, "step": 194955 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017513534128508042, "loss": 2.2166, "step": 194960 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017513412158465073, "loss": 2.14, "step": 194965 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017513290185855386, "loss": 2.2198, "step": 194970 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017513168210679024, "loss": 1.9251, "step": 194975 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017513046232936027, "loss": 1.9569, "step": 194980 }, { "epoch": 0.46, "grad_norm": 2.34375, "learning_rate": 0.00017512924252626437, "loss": 2.1857, "step": 194985 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.00017512802269750296, "loss": 2.0532, "step": 194990 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017512680284307642, "loss": 2.1911, "step": 194995 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017512558296298525, "loss": 2.1489, "step": 195000 }, { "epoch": 0.46, "grad_norm": 1.7109375, "learning_rate": 0.00017512436305722976, "loss": 2.1049, "step": 195005 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017512314312581048, "loss": 2.1676, "step": 195010 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017512192316872775, "loss": 2.0393, "step": 195015 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.000175120703185982, "loss": 1.956, "step": 195020 }, { "epoch": 0.46, "grad_norm": 2.0, "learning_rate": 0.00017511948317757366, "loss": 1.8724, "step": 195025 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.0001751182631435032, "loss": 2.2394, "step": 195030 }, { "epoch": 0.46, "grad_norm": 2.484375, "learning_rate": 0.0001751170430837709, "loss": 2.1428, "step": 195035 }, { "epoch": 0.46, "grad_norm": 2.546875, "learning_rate": 0.00017511582299837729, "loss": 1.9943, "step": 195040 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.00017511460288732275, "loss": 2.0799, "step": 195045 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.0001751133827506077, "loss": 2.4232, "step": 195050 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.00017511216258823256, "loss": 2.2383, "step": 195055 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017511094240019772, "loss": 2.1671, "step": 195060 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017510972218650364, "loss": 2.1427, "step": 195065 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017510850194715072, "loss": 2.1988, "step": 195070 }, { "epoch": 0.46, "grad_norm": 1.9921875, "learning_rate": 0.00017510728168213934, "loss": 2.1426, "step": 195075 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017510606139147, "loss": 2.1762, "step": 195080 }, { "epoch": 0.46, "grad_norm": 1.8203125, "learning_rate": 0.00017510484107514305, "loss": 1.9603, "step": 195085 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017510362073315888, "loss": 2.3303, "step": 195090 }, { "epoch": 0.46, "grad_norm": 1.9453125, "learning_rate": 0.000175102400365518, "loss": 2.29, "step": 195095 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.00017510117997222076, "loss": 2.1566, "step": 195100 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017509995955326764, "loss": 2.1227, "step": 195105 }, { "epoch": 0.46, "grad_norm": 1.6875, "learning_rate": 0.00017509873910865898, "loss": 2.0655, "step": 195110 }, { "epoch": 0.46, "grad_norm": 2.40625, "learning_rate": 0.00017509751863839518, "loss": 1.9176, "step": 195115 }, { "epoch": 0.46, "grad_norm": 2.671875, "learning_rate": 0.00017509629814247676, "loss": 2.0023, "step": 195120 }, { "epoch": 0.46, "grad_norm": 1.9140625, "learning_rate": 0.00017509507762090407, "loss": 2.1796, "step": 195125 }, { "epoch": 0.46, "grad_norm": 1.9921875, "learning_rate": 0.00017509385707367754, "loss": 2.1701, "step": 195130 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.0001750926365007976, "loss": 2.0104, "step": 195135 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017509141590226466, "loss": 2.133, "step": 195140 }, { "epoch": 0.46, "grad_norm": 1.828125, "learning_rate": 0.0001750901952780791, "loss": 1.9958, "step": 195145 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017508897462824137, "loss": 2.254, "step": 195150 }, { "epoch": 0.46, "grad_norm": 2.3125, "learning_rate": 0.0001750877539527519, "loss": 2.2081, "step": 195155 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017508653325161108, "loss": 2.1644, "step": 195160 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017508531252481935, "loss": 2.1621, "step": 195165 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.0001750840917723771, "loss": 2.0206, "step": 195170 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017508287099428474, "loss": 2.2028, "step": 195175 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.00017508165019054276, "loss": 2.1408, "step": 195180 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017508042936115152, "loss": 2.2164, "step": 195185 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.0001750792085061114, "loss": 2.119, "step": 195190 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017507798762542292, "loss": 2.1381, "step": 195195 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.0001750767667190864, "loss": 2.0166, "step": 195200 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017507554578710232, "loss": 2.2187, "step": 195205 }, { "epoch": 0.46, "grad_norm": 2.796875, "learning_rate": 0.00017507432482947104, "loss": 2.2116, "step": 195210 }, { "epoch": 0.46, "grad_norm": 2.5, "learning_rate": 0.00017507310384619302, "loss": 2.0513, "step": 195215 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017507188283726868, "loss": 2.2577, "step": 195220 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.0001750706618026984, "loss": 2.0549, "step": 195225 }, { "epoch": 0.46, "grad_norm": 2.390625, "learning_rate": 0.00017506944074248266, "loss": 2.0758, "step": 195230 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.0001750682196566218, "loss": 2.1059, "step": 195235 }, { "epoch": 0.46, "grad_norm": 1.8515625, "learning_rate": 0.00017506699854511628, "loss": 2.1606, "step": 195240 }, { "epoch": 0.46, "grad_norm": 2.46875, "learning_rate": 0.00017506577740796652, "loss": 2.1642, "step": 195245 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017506455624517292, "loss": 2.0968, "step": 195250 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.00017506333505673592, "loss": 2.234, "step": 195255 }, { "epoch": 0.46, "grad_norm": 2.28125, "learning_rate": 0.00017506211384265595, "loss": 2.1522, "step": 195260 }, { "epoch": 0.46, "grad_norm": 1.8515625, "learning_rate": 0.00017506089260293335, "loss": 2.1219, "step": 195265 }, { "epoch": 0.46, "grad_norm": 1.9453125, "learning_rate": 0.0001750596713375686, "loss": 2.0946, "step": 195270 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.0001750584500465621, "loss": 2.2368, "step": 195275 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017505722872991427, "loss": 2.1073, "step": 195280 }, { "epoch": 0.46, "grad_norm": 1.8671875, "learning_rate": 0.00017505600738762557, "loss": 2.1149, "step": 195285 }, { "epoch": 0.46, "grad_norm": 1.9921875, "learning_rate": 0.00017505478601969636, "loss": 2.0616, "step": 195290 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017505356462612704, "loss": 2.0254, "step": 195295 }, { "epoch": 0.46, "grad_norm": 1.9765625, "learning_rate": 0.0001750523432069181, "loss": 2.0695, "step": 195300 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.0001750511217620699, "loss": 2.3714, "step": 195305 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017504990029158289, "loss": 2.1408, "step": 195310 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017504867879545745, "loss": 2.066, "step": 195315 }, { "epoch": 0.46, "grad_norm": 1.875, "learning_rate": 0.00017504745727369401, "loss": 2.1281, "step": 195320 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.00017504623572629305, "loss": 2.2245, "step": 195325 }, { "epoch": 0.46, "grad_norm": 2.0, "learning_rate": 0.0001750450141532549, "loss": 2.0647, "step": 195330 }, { "epoch": 0.46, "grad_norm": 2.828125, "learning_rate": 0.00017504379255458001, "loss": 2.1142, "step": 195335 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.0001750425709302688, "loss": 2.0265, "step": 195340 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.0001750413492803217, "loss": 2.1166, "step": 195345 }, { "epoch": 0.46, "grad_norm": 2.46875, "learning_rate": 0.00017504012760473912, "loss": 2.1658, "step": 195350 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017503890590352147, "loss": 2.156, "step": 195355 }, { "epoch": 0.46, "grad_norm": 2.390625, "learning_rate": 0.00017503768417666913, "loss": 2.1689, "step": 195360 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.0001750364624241826, "loss": 2.0584, "step": 195365 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.0001750352406460622, "loss": 2.1229, "step": 195370 }, { "epoch": 0.46, "grad_norm": 1.890625, "learning_rate": 0.00017503401884230848, "loss": 2.0428, "step": 195375 }, { "epoch": 0.46, "grad_norm": 1.90625, "learning_rate": 0.0001750327970129217, "loss": 2.1062, "step": 195380 }, { "epoch": 0.46, "grad_norm": 2.453125, "learning_rate": 0.0001750315751579024, "loss": 2.1543, "step": 195385 }, { "epoch": 0.46, "grad_norm": 2.40625, "learning_rate": 0.00017503035327725094, "loss": 2.1061, "step": 195390 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017502913137096778, "loss": 1.9714, "step": 195395 }, { "epoch": 0.46, "grad_norm": 1.6796875, "learning_rate": 0.00017502790943905327, "loss": 2.0902, "step": 195400 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017502668748150787, "loss": 2.1838, "step": 195405 }, { "epoch": 0.46, "grad_norm": 1.8125, "learning_rate": 0.000175025465498332, "loss": 1.8679, "step": 195410 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.0001750242434895261, "loss": 2.0228, "step": 195415 }, { "epoch": 0.46, "grad_norm": 1.8828125, "learning_rate": 0.00017502302145509053, "loss": 2.1727, "step": 195420 }, { "epoch": 0.46, "grad_norm": 2.671875, "learning_rate": 0.0001750217993950257, "loss": 2.0879, "step": 195425 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017502057730933214, "loss": 2.1672, "step": 195430 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.00017501935519801014, "loss": 1.8833, "step": 195435 }, { "epoch": 0.46, "grad_norm": 2.578125, "learning_rate": 0.00017501813306106017, "loss": 2.2859, "step": 195440 }, { "epoch": 0.46, "grad_norm": 1.8203125, "learning_rate": 0.00017501691089848265, "loss": 2.0376, "step": 195445 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017501568871027798, "loss": 2.1819, "step": 195450 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017501446649644662, "loss": 2.3197, "step": 195455 }, { "epoch": 0.46, "grad_norm": 1.7734375, "learning_rate": 0.00017501324425698894, "loss": 2.1122, "step": 195460 }, { "epoch": 0.46, "grad_norm": 2.28125, "learning_rate": 0.00017501202199190536, "loss": 2.0108, "step": 195465 }, { "epoch": 0.46, "grad_norm": 1.8125, "learning_rate": 0.00017501079970119637, "loss": 2.1377, "step": 195470 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017500957738486228, "loss": 2.2143, "step": 195475 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017500835504290354, "loss": 2.1677, "step": 195480 }, { "epoch": 0.46, "grad_norm": 1.8046875, "learning_rate": 0.00017500713267532063, "loss": 1.9554, "step": 195485 }, { "epoch": 0.46, "grad_norm": 2.484375, "learning_rate": 0.0001750059102821139, "loss": 2.0997, "step": 195490 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.00017500468786328378, "loss": 2.0623, "step": 195495 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017500346541883075, "loss": 2.0927, "step": 195500 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017500224294875514, "loss": 2.124, "step": 195505 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.0001750010204530574, "loss": 2.1239, "step": 195510 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017499979793173797, "loss": 2.073, "step": 195515 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017499857538479723, "loss": 2.087, "step": 195520 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017499735281223562, "loss": 1.9427, "step": 195525 }, { "epoch": 0.46, "grad_norm": 2.28125, "learning_rate": 0.00017499613021405356, "loss": 1.9672, "step": 195530 }, { "epoch": 0.46, "grad_norm": 1.7578125, "learning_rate": 0.00017499490759025145, "loss": 2.1761, "step": 195535 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017499368494082972, "loss": 2.1067, "step": 195540 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.0001749924622657888, "loss": 2.1279, "step": 195545 }, { "epoch": 0.46, "grad_norm": 1.78125, "learning_rate": 0.0001749912395651291, "loss": 1.9632, "step": 195550 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.000174990016838851, "loss": 1.9933, "step": 195555 }, { "epoch": 0.46, "grad_norm": 2.3125, "learning_rate": 0.000174988794086955, "loss": 2.1133, "step": 195560 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017498757130944144, "loss": 2.3082, "step": 195565 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017498634850631075, "loss": 2.1662, "step": 195570 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.0001749851256775634, "loss": 1.8714, "step": 195575 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017498390282319975, "loss": 2.0587, "step": 195580 }, { "epoch": 0.46, "grad_norm": 1.859375, "learning_rate": 0.00017498267994322023, "loss": 1.9925, "step": 195585 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017498145703762527, "loss": 2.0469, "step": 195590 }, { "epoch": 0.46, "grad_norm": 1.625, "learning_rate": 0.00017498023410641528, "loss": 2.0487, "step": 195595 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.0001749790111495907, "loss": 2.1002, "step": 195600 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017497778816715195, "loss": 1.9663, "step": 195605 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017497656515909937, "loss": 1.9056, "step": 195610 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017497534212543346, "loss": 1.9537, "step": 195615 }, { "epoch": 0.46, "grad_norm": 1.734375, "learning_rate": 0.00017497411906615464, "loss": 1.9399, "step": 195620 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017497289598126327, "loss": 2.0126, "step": 195625 }, { "epoch": 0.46, "grad_norm": 2.453125, "learning_rate": 0.00017497167287075982, "loss": 2.1361, "step": 195630 }, { "epoch": 0.46, "grad_norm": 2.578125, "learning_rate": 0.00017497044973464467, "loss": 2.1931, "step": 195635 }, { "epoch": 0.46, "grad_norm": 2.453125, "learning_rate": 0.00017496922657291828, "loss": 2.3392, "step": 195640 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017496800338558102, "loss": 2.1419, "step": 195645 }, { "epoch": 0.46, "grad_norm": 2.34375, "learning_rate": 0.00017496678017263332, "loss": 2.1551, "step": 195650 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.00017496555693407562, "loss": 1.8757, "step": 195655 }, { "epoch": 0.46, "grad_norm": 1.9609375, "learning_rate": 0.00017496433366990833, "loss": 2.1649, "step": 195660 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017496311038013186, "loss": 2.2979, "step": 195665 }, { "epoch": 0.46, "grad_norm": 1.8828125, "learning_rate": 0.00017496188706474664, "loss": 2.2126, "step": 195670 }, { "epoch": 0.46, "grad_norm": 1.8046875, "learning_rate": 0.0001749606637237531, "loss": 2.0828, "step": 195675 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.0001749594403571516, "loss": 1.999, "step": 195680 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017495821696494258, "loss": 2.1715, "step": 195685 }, { "epoch": 0.46, "grad_norm": 2.546875, "learning_rate": 0.00017495699354712653, "loss": 2.1897, "step": 195690 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017495577010370376, "loss": 2.0142, "step": 195695 }, { "epoch": 0.46, "grad_norm": 1.8671875, "learning_rate": 0.0001749545466346748, "loss": 2.1264, "step": 195700 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017495332314003994, "loss": 2.0651, "step": 195705 }, { "epoch": 0.46, "grad_norm": 2.4375, "learning_rate": 0.00017495209961979972, "loss": 1.9734, "step": 195710 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017495087607395445, "loss": 2.0923, "step": 195715 }, { "epoch": 0.46, "grad_norm": 1.8125, "learning_rate": 0.00017494965250250465, "loss": 1.9432, "step": 195720 }, { "epoch": 0.46, "grad_norm": 1.875, "learning_rate": 0.00017494842890545067, "loss": 2.1258, "step": 195725 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017494720528279297, "loss": 2.2281, "step": 195730 }, { "epoch": 0.46, "grad_norm": 2.390625, "learning_rate": 0.00017494598163453188, "loss": 2.1509, "step": 195735 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017494475796066793, "loss": 2.0344, "step": 195740 }, { "epoch": 0.46, "grad_norm": 1.828125, "learning_rate": 0.0001749435342612015, "loss": 2.0993, "step": 195745 }, { "epoch": 0.46, "grad_norm": 1.8046875, "learning_rate": 0.000174942310536133, "loss": 2.0743, "step": 195750 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017494108678546284, "loss": 2.1188, "step": 195755 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.0001749398630091914, "loss": 1.9494, "step": 195760 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.0001749386392073192, "loss": 2.041, "step": 195765 }, { "epoch": 0.46, "grad_norm": 1.9296875, "learning_rate": 0.0001749374153798466, "loss": 2.2487, "step": 195770 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017493619152677398, "loss": 2.2044, "step": 195775 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017493496764810185, "loss": 2.0446, "step": 195780 }, { "epoch": 0.46, "grad_norm": 2.34375, "learning_rate": 0.00017493374374383055, "loss": 2.1254, "step": 195785 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.0001749325198139605, "loss": 1.8694, "step": 195790 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.0001749312958584922, "loss": 2.2675, "step": 195795 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.00017493007187742595, "loss": 2.0906, "step": 195800 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017492884787076226, "loss": 2.0573, "step": 195805 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017492762383850151, "loss": 2.3026, "step": 195810 }, { "epoch": 0.46, "grad_norm": 2.640625, "learning_rate": 0.00017492639978064413, "loss": 2.305, "step": 195815 }, { "epoch": 0.46, "grad_norm": 1.9921875, "learning_rate": 0.00017492517569719053, "loss": 2.1282, "step": 195820 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.0001749239515881411, "loss": 2.037, "step": 195825 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017492272745349636, "loss": 2.1893, "step": 195830 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.0001749215032932566, "loss": 2.0182, "step": 195835 }, { "epoch": 0.46, "grad_norm": 1.90625, "learning_rate": 0.00017492027910742233, "loss": 2.2705, "step": 195840 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017491905489599392, "loss": 2.2873, "step": 195845 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.0001749178306589718, "loss": 2.1623, "step": 195850 }, { "epoch": 0.46, "grad_norm": 1.7421875, "learning_rate": 0.0001749166063963564, "loss": 2.0936, "step": 195855 }, { "epoch": 0.46, "grad_norm": 2.46875, "learning_rate": 0.00017491538210814812, "loss": 2.0951, "step": 195860 }, { "epoch": 0.46, "grad_norm": 1.953125, "learning_rate": 0.00017491415779434736, "loss": 2.0483, "step": 195865 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.00017491293345495458, "loss": 2.1298, "step": 195870 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.0001749117090899702, "loss": 2.2337, "step": 195875 }, { "epoch": 0.46, "grad_norm": 1.6484375, "learning_rate": 0.00017491048469939462, "loss": 2.2745, "step": 195880 }, { "epoch": 0.46, "grad_norm": 1.859375, "learning_rate": 0.00017490926028322824, "loss": 1.9877, "step": 195885 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017490803584147154, "loss": 2.085, "step": 195890 }, { "epoch": 0.46, "grad_norm": 2.390625, "learning_rate": 0.00017490681137412488, "loss": 2.0196, "step": 195895 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.0001749055868811887, "loss": 2.1174, "step": 195900 }, { "epoch": 0.46, "grad_norm": 2.28125, "learning_rate": 0.00017490436236266337, "loss": 2.2053, "step": 195905 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017490313781854937, "loss": 2.05, "step": 195910 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.0001749019132488471, "loss": 2.067, "step": 195915 }, { "epoch": 0.46, "grad_norm": 2.453125, "learning_rate": 0.000174900688653557, "loss": 2.1868, "step": 195920 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017489946403267945, "loss": 1.9071, "step": 195925 }, { "epoch": 0.46, "grad_norm": 1.9140625, "learning_rate": 0.0001748982393862149, "loss": 2.0748, "step": 195930 }, { "epoch": 0.46, "grad_norm": 1.9453125, "learning_rate": 0.00017489701471416373, "loss": 2.0962, "step": 195935 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.0001748957900165264, "loss": 2.2224, "step": 195940 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.0001748945652933033, "loss": 1.8719, "step": 195945 }, { "epoch": 0.46, "grad_norm": 2.28125, "learning_rate": 0.00017489334054449483, "loss": 1.9593, "step": 195950 }, { "epoch": 0.46, "grad_norm": 1.9921875, "learning_rate": 0.0001748921157701015, "loss": 2.2858, "step": 195955 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017489089097012364, "loss": 2.2147, "step": 195960 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017488966614456167, "loss": 2.0009, "step": 195965 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017488844129341606, "loss": 2.1086, "step": 195970 }, { "epoch": 0.46, "grad_norm": 1.75, "learning_rate": 0.00017488721641668718, "loss": 1.8411, "step": 195975 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017488599151437546, "loss": 2.0046, "step": 195980 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017488476658648137, "loss": 2.0015, "step": 195985 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017488354163300524, "loss": 2.107, "step": 195990 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017488231665394754, "loss": 2.0378, "step": 195995 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017488109164930872, "loss": 1.9934, "step": 196000 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017487986661908912, "loss": 2.0336, "step": 196005 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.0001748786415632892, "loss": 2.0419, "step": 196010 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.0001748774164819094, "loss": 2.0785, "step": 196015 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.00017487619137495012, "loss": 2.2038, "step": 196020 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017487496624241176, "loss": 2.0562, "step": 196025 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.00017487374108429477, "loss": 2.1224, "step": 196030 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017487251590059955, "loss": 2.1016, "step": 196035 }, { "epoch": 0.46, "grad_norm": 3.265625, "learning_rate": 0.00017487129069132648, "loss": 2.2287, "step": 196040 }, { "epoch": 0.46, "grad_norm": 1.7421875, "learning_rate": 0.00017487006545647605, "loss": 1.9231, "step": 196045 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017486884019604867, "loss": 2.0732, "step": 196050 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017486761491004472, "loss": 2.0687, "step": 196055 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017486638959846463, "loss": 2.3037, "step": 196060 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017486516426130883, "loss": 2.2327, "step": 196065 }, { "epoch": 0.46, "grad_norm": 1.859375, "learning_rate": 0.00017486393889857774, "loss": 2.042, "step": 196070 }, { "epoch": 0.46, "grad_norm": 2.484375, "learning_rate": 0.00017486271351027175, "loss": 2.2961, "step": 196075 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.0001748614880963913, "loss": 2.1181, "step": 196080 }, { "epoch": 0.46, "grad_norm": 2.75, "learning_rate": 0.0001748602626569368, "loss": 2.1486, "step": 196085 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.0001748590371919087, "loss": 2.0352, "step": 196090 }, { "epoch": 0.46, "grad_norm": 1.9921875, "learning_rate": 0.00017485781170130737, "loss": 2.1994, "step": 196095 }, { "epoch": 0.46, "grad_norm": 2.65625, "learning_rate": 0.00017485658618513326, "loss": 2.0749, "step": 196100 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017485536064338684, "loss": 2.0976, "step": 196105 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017485413507606842, "loss": 1.9393, "step": 196110 }, { "epoch": 0.46, "grad_norm": 1.640625, "learning_rate": 0.00017485290948317846, "loss": 1.9601, "step": 196115 }, { "epoch": 0.46, "grad_norm": 2.34375, "learning_rate": 0.0001748516838647174, "loss": 2.0151, "step": 196120 }, { "epoch": 0.46, "grad_norm": 1.9921875, "learning_rate": 0.00017485045822068567, "loss": 2.1885, "step": 196125 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.00017484923255108362, "loss": 2.0041, "step": 196130 }, { "epoch": 0.46, "grad_norm": 2.34375, "learning_rate": 0.00017484800685591174, "loss": 2.2848, "step": 196135 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017484678113517042, "loss": 2.2443, "step": 196140 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.0001748455553888601, "loss": 2.1661, "step": 196145 }, { "epoch": 0.46, "grad_norm": 1.8203125, "learning_rate": 0.00017484432961698115, "loss": 2.2017, "step": 196150 }, { "epoch": 0.46, "grad_norm": 2.671875, "learning_rate": 0.00017484310381953404, "loss": 2.15, "step": 196155 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017484187799651917, "loss": 2.1596, "step": 196160 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017484065214793694, "loss": 2.1884, "step": 196165 }, { "epoch": 0.46, "grad_norm": 2.40625, "learning_rate": 0.0001748394262737878, "loss": 2.148, "step": 196170 }, { "epoch": 0.46, "grad_norm": 3.078125, "learning_rate": 0.00017483820037407215, "loss": 2.021, "step": 196175 }, { "epoch": 0.46, "grad_norm": 1.859375, "learning_rate": 0.00017483697444879045, "loss": 2.001, "step": 196180 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.00017483574849794304, "loss": 2.1196, "step": 196185 }, { "epoch": 0.46, "grad_norm": 2.3125, "learning_rate": 0.0001748345225215304, "loss": 2.1797, "step": 196190 }, { "epoch": 0.46, "grad_norm": 1.953125, "learning_rate": 0.0001748332965195529, "loss": 2.2242, "step": 196195 }, { "epoch": 0.46, "grad_norm": 3.796875, "learning_rate": 0.000174832070492011, "loss": 2.0592, "step": 196200 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017483084443890512, "loss": 2.1007, "step": 196205 }, { "epoch": 0.46, "grad_norm": 1.9375, "learning_rate": 0.00017482961836023567, "loss": 2.2352, "step": 196210 }, { "epoch": 0.46, "grad_norm": 2.515625, "learning_rate": 0.00017482839225600304, "loss": 2.1136, "step": 196215 }, { "epoch": 0.46, "grad_norm": 1.953125, "learning_rate": 0.00017482716612620772, "loss": 2.084, "step": 196220 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017482593997085005, "loss": 2.1654, "step": 196225 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017482471378993047, "loss": 2.0758, "step": 196230 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017482348758344945, "loss": 2.0284, "step": 196235 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.0001748222613514073, "loss": 1.9934, "step": 196240 }, { "epoch": 0.46, "grad_norm": 1.7890625, "learning_rate": 0.00017482103509380457, "loss": 2.1859, "step": 196245 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.0001748198088106416, "loss": 2.001, "step": 196250 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.00017481858250191886, "loss": 2.2231, "step": 196255 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.0001748173561676367, "loss": 1.9216, "step": 196260 }, { "epoch": 0.46, "grad_norm": 1.9453125, "learning_rate": 0.0001748161298077956, "loss": 2.1728, "step": 196265 }, { "epoch": 0.46, "grad_norm": 2.578125, "learning_rate": 0.0001748149034223959, "loss": 2.1285, "step": 196270 }, { "epoch": 0.46, "grad_norm": 2.359375, "learning_rate": 0.0001748136770114381, "loss": 2.1913, "step": 196275 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017481245057492258, "loss": 2.0631, "step": 196280 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.0001748112241128498, "loss": 2.0029, "step": 196285 }, { "epoch": 0.46, "grad_norm": 2.484375, "learning_rate": 0.00017480999762522012, "loss": 2.0132, "step": 196290 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017480877111203402, "loss": 2.1339, "step": 196295 }, { "epoch": 0.46, "grad_norm": 1.78125, "learning_rate": 0.00017480754457329186, "loss": 2.1278, "step": 196300 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.0001748063180089941, "loss": 2.1686, "step": 196305 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017480509141914116, "loss": 2.106, "step": 196310 }, { "epoch": 0.46, "grad_norm": 2.484375, "learning_rate": 0.0001748038648037334, "loss": 2.0752, "step": 196315 }, { "epoch": 0.46, "grad_norm": 2.28125, "learning_rate": 0.0001748026381627713, "loss": 1.9244, "step": 196320 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.0001748014114962553, "loss": 2.0897, "step": 196325 }, { "epoch": 0.46, "grad_norm": 1.84375, "learning_rate": 0.00017480018480418573, "loss": 2.3355, "step": 196330 }, { "epoch": 0.46, "grad_norm": 1.953125, "learning_rate": 0.0001747989580865631, "loss": 2.1465, "step": 196335 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017479773134338776, "loss": 2.1688, "step": 196340 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017479650457466017, "loss": 2.0026, "step": 196345 }, { "epoch": 0.46, "grad_norm": 2.3125, "learning_rate": 0.00017479527778038075, "loss": 2.0028, "step": 196350 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017479405096054986, "loss": 2.2735, "step": 196355 }, { "epoch": 0.46, "grad_norm": 1.9921875, "learning_rate": 0.000174792824115168, "loss": 2.0118, "step": 196360 }, { "epoch": 0.46, "grad_norm": 1.9765625, "learning_rate": 0.00017479159724423558, "loss": 1.9879, "step": 196365 }, { "epoch": 0.46, "grad_norm": 1.890625, "learning_rate": 0.00017479037034775295, "loss": 2.028, "step": 196370 }, { "epoch": 0.46, "grad_norm": 1.9140625, "learning_rate": 0.00017478914342572058, "loss": 2.1336, "step": 196375 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017478791647813894, "loss": 2.2335, "step": 196380 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.0001747866895050083, "loss": 2.1796, "step": 196385 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017478546250632922, "loss": 2.3876, "step": 196390 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017478423548210206, "loss": 1.9678, "step": 196395 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017478300843232726, "loss": 2.255, "step": 196400 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.0001747817813570052, "loss": 2.0022, "step": 196405 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.0001747805542561364, "loss": 2.1889, "step": 196410 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017477932712972114, "loss": 2.2037, "step": 196415 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.0001747780999777599, "loss": 1.9799, "step": 196420 }, { "epoch": 0.46, "grad_norm": 1.7890625, "learning_rate": 0.00017477687280025315, "loss": 2.2353, "step": 196425 }, { "epoch": 0.46, "grad_norm": 1.7734375, "learning_rate": 0.00017477564559720122, "loss": 2.1491, "step": 196430 }, { "epoch": 0.46, "grad_norm": 2.453125, "learning_rate": 0.0001747744183686046, "loss": 1.9994, "step": 196435 }, { "epoch": 0.46, "grad_norm": 1.8203125, "learning_rate": 0.0001747731911144637, "loss": 2.0898, "step": 196440 }, { "epoch": 0.46, "grad_norm": 1.9375, "learning_rate": 0.00017477196383477889, "loss": 2.0628, "step": 196445 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.0001747707365295506, "loss": 2.1348, "step": 196450 }, { "epoch": 0.46, "grad_norm": 1.890625, "learning_rate": 0.00017476950919877933, "loss": 2.1645, "step": 196455 }, { "epoch": 0.46, "grad_norm": 2.4375, "learning_rate": 0.0001747682818424654, "loss": 2.2154, "step": 196460 }, { "epoch": 0.46, "grad_norm": 1.859375, "learning_rate": 0.00017476705446060927, "loss": 1.9623, "step": 196465 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017476582705321136, "loss": 2.1713, "step": 196470 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.0001747645996202721, "loss": 2.0738, "step": 196475 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017476337216179187, "loss": 2.1506, "step": 196480 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017476214467777117, "loss": 2.0678, "step": 196485 }, { "epoch": 0.46, "grad_norm": 2.28125, "learning_rate": 0.00017476091716821032, "loss": 2.1596, "step": 196490 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.0001747596896331098, "loss": 2.1655, "step": 196495 }, { "epoch": 0.46, "grad_norm": 2.234375, "learning_rate": 0.00017475846207247, "loss": 2.1158, "step": 196500 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017475723448629138, "loss": 1.9253, "step": 196505 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.00017475600687457432, "loss": 2.0358, "step": 196510 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017475477923731926, "loss": 2.3587, "step": 196515 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.0001747535515745266, "loss": 2.1472, "step": 196520 }, { "epoch": 0.46, "grad_norm": 2.4375, "learning_rate": 0.00017475232388619675, "loss": 2.1463, "step": 196525 }, { "epoch": 0.46, "grad_norm": 1.6171875, "learning_rate": 0.0001747510961723302, "loss": 1.9424, "step": 196530 }, { "epoch": 0.46, "grad_norm": 1.8828125, "learning_rate": 0.00017474986843292727, "loss": 2.081, "step": 196535 }, { "epoch": 0.46, "grad_norm": 2.65625, "learning_rate": 0.00017474864066798843, "loss": 2.1519, "step": 196540 }, { "epoch": 0.46, "grad_norm": 2.5, "learning_rate": 0.00017474741287751416, "loss": 2.1137, "step": 196545 }, { "epoch": 0.46, "grad_norm": 2.359375, "learning_rate": 0.0001747461850615048, "loss": 1.9736, "step": 196550 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017474495721996075, "loss": 2.0029, "step": 196555 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017474372935288247, "loss": 2.1791, "step": 196560 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.0001747425014602704, "loss": 2.1606, "step": 196565 }, { "epoch": 0.46, "grad_norm": 2.46875, "learning_rate": 0.00017474127354212492, "loss": 2.155, "step": 196570 }, { "epoch": 0.46, "grad_norm": 1.8984375, "learning_rate": 0.00017474004559844648, "loss": 2.0645, "step": 196575 }, { "epoch": 0.46, "grad_norm": 1.7578125, "learning_rate": 0.00017473881762923544, "loss": 2.159, "step": 196580 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.0001747375896344923, "loss": 2.0588, "step": 196585 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017473636161421748, "loss": 2.1003, "step": 196590 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017473513356841132, "loss": 1.9706, "step": 196595 }, { "epoch": 0.46, "grad_norm": 1.9609375, "learning_rate": 0.0001747339054970743, "loss": 2.0384, "step": 196600 }, { "epoch": 0.46, "grad_norm": 1.8046875, "learning_rate": 0.0001747326774002068, "loss": 2.0172, "step": 196605 }, { "epoch": 0.46, "grad_norm": 2.59375, "learning_rate": 0.00017473144927780928, "loss": 2.1451, "step": 196610 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017473022112988212, "loss": 2.1616, "step": 196615 }, { "epoch": 0.46, "grad_norm": 1.8359375, "learning_rate": 0.0001747289929564258, "loss": 2.0749, "step": 196620 }, { "epoch": 0.46, "grad_norm": 1.78125, "learning_rate": 0.00017472776475744065, "loss": 2.0569, "step": 196625 }, { "epoch": 0.46, "grad_norm": 2.28125, "learning_rate": 0.00017472653653292718, "loss": 2.2349, "step": 196630 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017472530828288577, "loss": 2.2036, "step": 196635 }, { "epoch": 0.46, "grad_norm": 1.90625, "learning_rate": 0.00017472408000731684, "loss": 1.9753, "step": 196640 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.0001747228517062208, "loss": 1.9769, "step": 196645 }, { "epoch": 0.46, "grad_norm": 2.453125, "learning_rate": 0.00017472162337959807, "loss": 2.2116, "step": 196650 }, { "epoch": 0.46, "grad_norm": 1.9296875, "learning_rate": 0.00017472039502744907, "loss": 2.282, "step": 196655 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017471916664977427, "loss": 2.3974, "step": 196660 }, { "epoch": 0.46, "grad_norm": 2.40625, "learning_rate": 0.000174717938246574, "loss": 2.0501, "step": 196665 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.00017471670981784876, "loss": 2.1068, "step": 196670 }, { "epoch": 0.46, "grad_norm": 1.7265625, "learning_rate": 0.0001747154813635989, "loss": 2.1041, "step": 196675 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017471425288382493, "loss": 1.9679, "step": 196680 }, { "epoch": 0.46, "grad_norm": 1.90625, "learning_rate": 0.00017471302437852717, "loss": 1.9255, "step": 196685 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.00017471179584770612, "loss": 2.1632, "step": 196690 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017471056729136212, "loss": 1.9669, "step": 196695 }, { "epoch": 0.46, "grad_norm": 1.9140625, "learning_rate": 0.0001747093387094957, "loss": 1.9974, "step": 196700 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.00017470811010210718, "loss": 2.0183, "step": 196705 }, { "epoch": 0.46, "grad_norm": 1.703125, "learning_rate": 0.000174706881469197, "loss": 2.1804, "step": 196710 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.0001747056528107656, "loss": 2.0189, "step": 196715 }, { "epoch": 0.46, "grad_norm": 1.546875, "learning_rate": 0.0001747044241268134, "loss": 2.0325, "step": 196720 }, { "epoch": 0.46, "grad_norm": 1.8359375, "learning_rate": 0.00017470319541734082, "loss": 2.0618, "step": 196725 }, { "epoch": 0.46, "grad_norm": 2.3125, "learning_rate": 0.00017470196668234827, "loss": 1.9459, "step": 196730 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.00017470073792183618, "loss": 2.1785, "step": 196735 }, { "epoch": 0.46, "grad_norm": 2.390625, "learning_rate": 0.00017469950913580494, "loss": 2.0968, "step": 196740 }, { "epoch": 0.46, "grad_norm": 2.0, "learning_rate": 0.000174698280324255, "loss": 2.003, "step": 196745 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.0001746970514871868, "loss": 2.1008, "step": 196750 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.0001746958226246007, "loss": 1.9017, "step": 196755 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.0001746945937364972, "loss": 2.0898, "step": 196760 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017469336482287662, "loss": 2.0802, "step": 196765 }, { "epoch": 0.46, "grad_norm": 1.9453125, "learning_rate": 0.00017469213588373946, "loss": 2.1323, "step": 196770 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.0001746909069190861, "loss": 2.1745, "step": 196775 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017468967792891698, "loss": 2.1288, "step": 196780 }, { "epoch": 0.46, "grad_norm": 1.75, "learning_rate": 0.0001746884489132325, "loss": 2.0568, "step": 196785 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.0001746872198720331, "loss": 2.2399, "step": 196790 }, { "epoch": 0.46, "grad_norm": 2.390625, "learning_rate": 0.0001746859908053192, "loss": 2.1243, "step": 196795 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017468476171309119, "loss": 2.2126, "step": 196800 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.00017468353259534952, "loss": 2.3118, "step": 196805 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017468230345209457, "loss": 2.1043, "step": 196810 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017468107428332685, "loss": 1.9493, "step": 196815 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.0001746798450890467, "loss": 2.1025, "step": 196820 }, { "epoch": 0.46, "grad_norm": 2.328125, "learning_rate": 0.00017467861586925457, "loss": 2.1805, "step": 196825 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017467738662395083, "loss": 2.0032, "step": 196830 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017467615735313597, "loss": 2.0784, "step": 196835 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017467492805681037, "loss": 2.0076, "step": 196840 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017467369873497445, "loss": 2.0519, "step": 196845 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017467246938762866, "loss": 2.0693, "step": 196850 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.0001746712400147734, "loss": 1.9887, "step": 196855 }, { "epoch": 0.46, "grad_norm": 1.9375, "learning_rate": 0.0001746700106164091, "loss": 2.1003, "step": 196860 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017466878119253614, "loss": 2.1867, "step": 196865 }, { "epoch": 0.46, "grad_norm": 2.453125, "learning_rate": 0.000174667551743155, "loss": 2.2033, "step": 196870 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017466632226826603, "loss": 2.0893, "step": 196875 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.00017466509276786973, "loss": 2.0256, "step": 196880 }, { "epoch": 0.46, "grad_norm": 2.3125, "learning_rate": 0.00017466386324196647, "loss": 2.1607, "step": 196885 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017466263369055667, "loss": 2.0936, "step": 196890 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017466140411364077, "loss": 2.1579, "step": 196895 }, { "epoch": 0.46, "grad_norm": 2.453125, "learning_rate": 0.00017466017451121915, "loss": 2.246, "step": 196900 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.0001746589448832923, "loss": 2.359, "step": 196905 }, { "epoch": 0.46, "grad_norm": 1.890625, "learning_rate": 0.0001746577152298606, "loss": 2.068, "step": 196910 }, { "epoch": 0.46, "grad_norm": 1.8125, "learning_rate": 0.00017465648555092448, "loss": 2.2509, "step": 196915 }, { "epoch": 0.46, "grad_norm": 1.9296875, "learning_rate": 0.0001746552558464843, "loss": 2.0579, "step": 196920 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017465402611654054, "loss": 2.1187, "step": 196925 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017465279636109363, "loss": 2.0285, "step": 196930 }, { "epoch": 0.46, "grad_norm": 2.34375, "learning_rate": 0.000174651566580144, "loss": 2.18, "step": 196935 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017465033677369199, "loss": 2.1193, "step": 196940 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.0001746491069417381, "loss": 2.0924, "step": 196945 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017464787708428272, "loss": 1.9823, "step": 196950 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017464664720132626, "loss": 1.9154, "step": 196955 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017464541729286914, "loss": 1.989, "step": 196960 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.00017464418735891184, "loss": 2.0564, "step": 196965 }, { "epoch": 0.46, "grad_norm": 1.9296875, "learning_rate": 0.00017464295739945466, "loss": 2.1399, "step": 196970 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017464172741449817, "loss": 2.1729, "step": 196975 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017464049740404267, "loss": 1.9703, "step": 196980 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.0001746392673680886, "loss": 2.013, "step": 196985 }, { "epoch": 0.46, "grad_norm": 1.8359375, "learning_rate": 0.00017463803730663645, "loss": 2.0661, "step": 196990 }, { "epoch": 0.46, "grad_norm": 1.953125, "learning_rate": 0.0001746368072196866, "loss": 2.0871, "step": 196995 }, { "epoch": 0.46, "grad_norm": 2.578125, "learning_rate": 0.0001746355771072394, "loss": 2.3024, "step": 197000 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017463434696929537, "loss": 2.0976, "step": 197005 }, { "epoch": 0.46, "grad_norm": 1.8828125, "learning_rate": 0.0001746331168058549, "loss": 2.282, "step": 197010 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.0001746318866169184, "loss": 2.0083, "step": 197015 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.0001746306564024863, "loss": 1.9242, "step": 197020 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.000174629426162559, "loss": 1.9064, "step": 197025 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.00017462819589713691, "loss": 2.0594, "step": 197030 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.00017462696560622052, "loss": 2.1131, "step": 197035 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.0001746257352898102, "loss": 2.2535, "step": 197040 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.00017462450494790636, "loss": 2.1768, "step": 197045 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017462327458050942, "loss": 2.3347, "step": 197050 }, { "epoch": 0.46, "grad_norm": 2.546875, "learning_rate": 0.00017462204418761985, "loss": 2.101, "step": 197055 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.000174620813769238, "loss": 2.1218, "step": 197060 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017461958332536437, "loss": 2.1008, "step": 197065 }, { "epoch": 0.46, "grad_norm": 1.765625, "learning_rate": 0.0001746183528559993, "loss": 1.9892, "step": 197070 }, { "epoch": 0.46, "grad_norm": 1.8203125, "learning_rate": 0.00017461712236114328, "loss": 2.2002, "step": 197075 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.0001746158918407967, "loss": 2.1111, "step": 197080 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017461466129495995, "loss": 2.0724, "step": 197085 }, { "epoch": 0.46, "grad_norm": 2.3125, "learning_rate": 0.00017461343072363347, "loss": 2.1606, "step": 197090 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017461220012681772, "loss": 2.0524, "step": 197095 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017461096950451306, "loss": 2.2704, "step": 197100 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.00017460973885671997, "loss": 1.8786, "step": 197105 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.0001746085081834388, "loss": 2.0678, "step": 197110 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017460727748467005, "loss": 2.1859, "step": 197115 }, { "epoch": 0.46, "grad_norm": 1.921875, "learning_rate": 0.0001746060467604141, "loss": 2.1258, "step": 197120 }, { "epoch": 0.46, "grad_norm": 3.9375, "learning_rate": 0.00017460481601067133, "loss": 2.0963, "step": 197125 }, { "epoch": 0.46, "grad_norm": 1.9921875, "learning_rate": 0.00017460358523544226, "loss": 1.9111, "step": 197130 }, { "epoch": 0.46, "grad_norm": 2.3125, "learning_rate": 0.00017460235443472722, "loss": 2.0065, "step": 197135 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017460112360852666, "loss": 2.0396, "step": 197140 }, { "epoch": 0.46, "grad_norm": 2.453125, "learning_rate": 0.000174599892756841, "loss": 2.0551, "step": 197145 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.00017459866187967066, "loss": 2.1571, "step": 197150 }, { "epoch": 0.46, "grad_norm": 2.734375, "learning_rate": 0.00017459743097701611, "loss": 2.1622, "step": 197155 }, { "epoch": 0.46, "grad_norm": 1.8671875, "learning_rate": 0.00017459620004887767, "loss": 2.0834, "step": 197160 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017459496909525584, "loss": 2.1065, "step": 197165 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.000174593738116151, "loss": 2.2209, "step": 197170 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017459250711156362, "loss": 2.1314, "step": 197175 }, { "epoch": 0.46, "grad_norm": 2.625, "learning_rate": 0.00017459127608149405, "loss": 2.2342, "step": 197180 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.0001745900450259428, "loss": 2.08, "step": 197185 }, { "epoch": 0.46, "grad_norm": 1.5703125, "learning_rate": 0.00017458881394491019, "loss": 2.1786, "step": 197190 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.00017458758283839666, "loss": 2.1662, "step": 197195 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017458635170640273, "loss": 2.1916, "step": 197200 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.0001745851205489287, "loss": 2.0528, "step": 197205 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017458388936597506, "loss": 1.891, "step": 197210 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017458265815754222, "loss": 1.9806, "step": 197215 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017458142692363055, "loss": 2.0508, "step": 197220 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017458019566424056, "loss": 2.1184, "step": 197225 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017457896437937263, "loss": 2.1063, "step": 197230 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017457773306902712, "loss": 2.1812, "step": 197235 }, { "epoch": 0.46, "grad_norm": 1.9765625, "learning_rate": 0.00017457650173320453, "loss": 2.196, "step": 197240 }, { "epoch": 0.46, "grad_norm": 2.5625, "learning_rate": 0.00017457527037190526, "loss": 2.2402, "step": 197245 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017457403898512973, "loss": 2.2845, "step": 197250 }, { "epoch": 0.46, "grad_norm": 2.078125, "learning_rate": 0.00017457280757287834, "loss": 2.1123, "step": 197255 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017457157613515154, "loss": 2.096, "step": 197260 }, { "epoch": 0.46, "grad_norm": 1.9609375, "learning_rate": 0.00017457034467194973, "loss": 2.1431, "step": 197265 }, { "epoch": 0.46, "grad_norm": 1.9375, "learning_rate": 0.00017456911318327334, "loss": 1.9461, "step": 197270 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.00017456788166912276, "loss": 1.976, "step": 197275 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.0001745666501294985, "loss": 2.1394, "step": 197280 }, { "epoch": 0.46, "grad_norm": 2.359375, "learning_rate": 0.00017456541856440086, "loss": 2.0378, "step": 197285 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017456418697383037, "loss": 1.9892, "step": 197290 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.00017456295535778736, "loss": 2.1818, "step": 197295 }, { "epoch": 0.46, "grad_norm": 2.78125, "learning_rate": 0.00017456172371627232, "loss": 2.0042, "step": 197300 }, { "epoch": 0.46, "grad_norm": 1.8125, "learning_rate": 0.00017456049204928563, "loss": 2.1839, "step": 197305 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017455926035682772, "loss": 2.1604, "step": 197310 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017455802863889904, "loss": 1.9261, "step": 197315 }, { "epoch": 0.46, "grad_norm": 2.109375, "learning_rate": 0.00017455679689549995, "loss": 2.001, "step": 197320 }, { "epoch": 0.46, "grad_norm": 2.28125, "learning_rate": 0.00017455556512663091, "loss": 1.9996, "step": 197325 }, { "epoch": 0.46, "grad_norm": 2.34375, "learning_rate": 0.00017455433333229237, "loss": 2.0283, "step": 197330 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.0001745531015124847, "loss": 2.1362, "step": 197335 }, { "epoch": 0.46, "grad_norm": 2.46875, "learning_rate": 0.00017455186966720834, "loss": 2.0523, "step": 197340 }, { "epoch": 0.46, "grad_norm": 1.8828125, "learning_rate": 0.0001745506377964637, "loss": 2.1142, "step": 197345 }, { "epoch": 0.46, "grad_norm": 2.734375, "learning_rate": 0.0001745494059002512, "loss": 2.2982, "step": 197350 }, { "epoch": 0.46, "grad_norm": 2.125, "learning_rate": 0.00017454817397857128, "loss": 2.2581, "step": 197355 }, { "epoch": 0.46, "grad_norm": 1.9609375, "learning_rate": 0.00017454694203142436, "loss": 2.1751, "step": 197360 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.00017454571005881087, "loss": 2.1181, "step": 197365 }, { "epoch": 0.46, "grad_norm": 2.03125, "learning_rate": 0.00017454447806073117, "loss": 2.0371, "step": 197370 }, { "epoch": 0.46, "grad_norm": 2.40625, "learning_rate": 0.00017454324603718578, "loss": 2.1675, "step": 197375 }, { "epoch": 0.46, "grad_norm": 2.390625, "learning_rate": 0.000174542013988175, "loss": 2.1451, "step": 197380 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017454078191369936, "loss": 2.0418, "step": 197385 }, { "epoch": 0.46, "grad_norm": 2.296875, "learning_rate": 0.00017453954981375922, "loss": 2.2111, "step": 197390 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017453831768835502, "loss": 2.0629, "step": 197395 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017453708553748717, "loss": 2.1783, "step": 197400 }, { "epoch": 0.46, "grad_norm": 1.8046875, "learning_rate": 0.00017453585336115612, "loss": 2.0531, "step": 197405 }, { "epoch": 0.46, "grad_norm": 1.96875, "learning_rate": 0.0001745346211593623, "loss": 1.9998, "step": 197410 }, { "epoch": 0.46, "grad_norm": 1.78125, "learning_rate": 0.00017453338893210605, "loss": 2.1826, "step": 197415 }, { "epoch": 0.46, "grad_norm": 2.625, "learning_rate": 0.00017453215667938784, "loss": 2.099, "step": 197420 }, { "epoch": 0.46, "grad_norm": 2.53125, "learning_rate": 0.0001745309244012081, "loss": 1.9406, "step": 197425 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017452969209756728, "loss": 2.1195, "step": 197430 }, { "epoch": 0.46, "grad_norm": 2.359375, "learning_rate": 0.00017452845976846572, "loss": 2.314, "step": 197435 }, { "epoch": 0.46, "grad_norm": 2.0, "learning_rate": 0.00017452722741390393, "loss": 2.0132, "step": 197440 }, { "epoch": 0.46, "grad_norm": 1.984375, "learning_rate": 0.00017452599503388225, "loss": 2.0269, "step": 197445 }, { "epoch": 0.46, "grad_norm": 1.890625, "learning_rate": 0.00017452476262840117, "loss": 2.0855, "step": 197450 }, { "epoch": 0.46, "grad_norm": 2.1875, "learning_rate": 0.00017452353019746107, "loss": 2.1518, "step": 197455 }, { "epoch": 0.46, "grad_norm": 2.09375, "learning_rate": 0.00017452229774106236, "loss": 2.1983, "step": 197460 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.00017452106525920547, "loss": 2.1068, "step": 197465 }, { "epoch": 0.46, "grad_norm": 2.0625, "learning_rate": 0.00017451983275189087, "loss": 2.2713, "step": 197470 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.00017451860021911895, "loss": 2.1103, "step": 197475 }, { "epoch": 0.46, "grad_norm": 3.015625, "learning_rate": 0.00017451736766089008, "loss": 2.0054, "step": 197480 }, { "epoch": 0.46, "grad_norm": 2.015625, "learning_rate": 0.00017451613507720476, "loss": 2.2756, "step": 197485 }, { "epoch": 0.46, "grad_norm": 1.9296875, "learning_rate": 0.00017451490246806338, "loss": 2.0018, "step": 197490 }, { "epoch": 0.46, "grad_norm": 1.9453125, "learning_rate": 0.00017451366983346636, "loss": 2.2259, "step": 197495 }, { "epoch": 0.46, "grad_norm": 2.359375, "learning_rate": 0.0001745124371734141, "loss": 2.2114, "step": 197500 }, { "epoch": 0.46, "grad_norm": 2.5625, "learning_rate": 0.00017451120448790705, "loss": 2.1485, "step": 197505 }, { "epoch": 0.46, "grad_norm": 2.265625, "learning_rate": 0.0001745099717769456, "loss": 2.1372, "step": 197510 }, { "epoch": 0.46, "grad_norm": 2.140625, "learning_rate": 0.00017450873904053022, "loss": 2.212, "step": 197515 }, { "epoch": 0.46, "grad_norm": 2.171875, "learning_rate": 0.0001745075062786613, "loss": 2.1541, "step": 197520 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017450627349133922, "loss": 2.0975, "step": 197525 }, { "epoch": 0.46, "grad_norm": 2.421875, "learning_rate": 0.00017450504067856453, "loss": 2.2105, "step": 197530 }, { "epoch": 0.46, "grad_norm": 2.0, "learning_rate": 0.00017450380784033752, "loss": 2.0584, "step": 197535 }, { "epoch": 0.46, "grad_norm": 2.375, "learning_rate": 0.00017450257497665861, "loss": 2.151, "step": 197540 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.00017450134208752834, "loss": 2.176, "step": 197545 }, { "epoch": 0.46, "grad_norm": 1.8046875, "learning_rate": 0.00017450010917294706, "loss": 2.0781, "step": 197550 }, { "epoch": 0.46, "grad_norm": 1.8125, "learning_rate": 0.00017449887623291517, "loss": 2.2975, "step": 197555 }, { "epoch": 0.46, "grad_norm": 2.21875, "learning_rate": 0.0001744976432674331, "loss": 2.1066, "step": 197560 }, { "epoch": 0.46, "grad_norm": 2.15625, "learning_rate": 0.0001744964102765013, "loss": 2.1421, "step": 197565 }, { "epoch": 0.46, "grad_norm": 2.25, "learning_rate": 0.00017449517726012017, "loss": 2.1191, "step": 197570 }, { "epoch": 0.46, "grad_norm": 2.203125, "learning_rate": 0.00017449394421829013, "loss": 2.2856, "step": 197575 }, { "epoch": 0.46, "grad_norm": 2.046875, "learning_rate": 0.0001744927111510116, "loss": 2.0346, "step": 197580 }, { "epoch": 0.46, "grad_norm": 2.484375, "learning_rate": 0.00017449147805828505, "loss": 2.2744, "step": 197585 }, { "epoch": 0.46, "grad_norm": 2.59375, "learning_rate": 0.0001744902449401108, "loss": 2.003, "step": 197590 }, { "epoch": 0.47, "grad_norm": 2.71875, "learning_rate": 0.00017448901179648938, "loss": 1.9342, "step": 197595 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017448777862742116, "loss": 2.1, "step": 197600 }, { "epoch": 0.47, "grad_norm": 1.921875, "learning_rate": 0.00017448654543290653, "loss": 2.0117, "step": 197605 }, { "epoch": 0.47, "grad_norm": 2.640625, "learning_rate": 0.000174485312212946, "loss": 2.2781, "step": 197610 }, { "epoch": 0.47, "grad_norm": 1.890625, "learning_rate": 0.0001744840789675399, "loss": 1.8198, "step": 197615 }, { "epoch": 0.47, "grad_norm": 1.9296875, "learning_rate": 0.00017448284569668868, "loss": 1.9528, "step": 197620 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017448161240039278, "loss": 2.1891, "step": 197625 }, { "epoch": 0.47, "grad_norm": 2.34375, "learning_rate": 0.00017448037907865263, "loss": 1.8968, "step": 197630 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.0001744791457314686, "loss": 2.0463, "step": 197635 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017447791235884115, "loss": 2.2325, "step": 197640 }, { "epoch": 0.47, "grad_norm": 1.9453125, "learning_rate": 0.0001744766789607707, "loss": 2.1545, "step": 197645 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017447544553725767, "loss": 2.1695, "step": 197650 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.00017447421208830247, "loss": 1.9852, "step": 197655 }, { "epoch": 0.47, "grad_norm": 2.484375, "learning_rate": 0.00017447297861390552, "loss": 2.0389, "step": 197660 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017447174511406728, "loss": 1.9618, "step": 197665 }, { "epoch": 0.47, "grad_norm": 1.9453125, "learning_rate": 0.00017447051158878813, "loss": 2.2775, "step": 197670 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017446927803806847, "loss": 2.1829, "step": 197675 }, { "epoch": 0.47, "grad_norm": 2.46875, "learning_rate": 0.00017446804446190882, "loss": 2.2489, "step": 197680 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017446681086030947, "loss": 1.9867, "step": 197685 }, { "epoch": 0.47, "grad_norm": 2.375, "learning_rate": 0.00017446557723327092, "loss": 2.2247, "step": 197690 }, { "epoch": 0.47, "grad_norm": 2.5625, "learning_rate": 0.0001744643435807936, "loss": 2.0029, "step": 197695 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.0001744631099028779, "loss": 2.126, "step": 197700 }, { "epoch": 0.47, "grad_norm": 2.484375, "learning_rate": 0.00017446187619952425, "loss": 1.8995, "step": 197705 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.0001744606424707331, "loss": 2.0917, "step": 197710 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.00017445940871650478, "loss": 2.1142, "step": 197715 }, { "epoch": 0.47, "grad_norm": 1.828125, "learning_rate": 0.00017445817493683984, "loss": 2.1201, "step": 197720 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.0001744569411317386, "loss": 2.043, "step": 197725 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.0001744557073012015, "loss": 2.0841, "step": 197730 }, { "epoch": 0.47, "grad_norm": 1.90625, "learning_rate": 0.00017445447344522906, "loss": 2.1074, "step": 197735 }, { "epoch": 0.47, "grad_norm": 2.578125, "learning_rate": 0.00017445323956382156, "loss": 2.047, "step": 197740 }, { "epoch": 0.47, "grad_norm": 1.9296875, "learning_rate": 0.00017445200565697949, "loss": 2.1527, "step": 197745 }, { "epoch": 0.47, "grad_norm": 1.671875, "learning_rate": 0.00017445077172470325, "loss": 2.0952, "step": 197750 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.0001744495377669933, "loss": 2.1278, "step": 197755 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017444830378385005, "loss": 2.1187, "step": 197760 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.00017444706977527388, "loss": 2.0619, "step": 197765 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017444583574126524, "loss": 2.0946, "step": 197770 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017444460168182456, "loss": 2.1198, "step": 197775 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.0001744433675969523, "loss": 2.0383, "step": 197780 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017444213348664876, "loss": 2.0417, "step": 197785 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.00017444089935091448, "loss": 2.0725, "step": 197790 }, { "epoch": 0.47, "grad_norm": 1.9140625, "learning_rate": 0.00017443966518974983, "loss": 1.9866, "step": 197795 }, { "epoch": 0.47, "grad_norm": 2.390625, "learning_rate": 0.00017443843100315524, "loss": 2.0632, "step": 197800 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017443719679113113, "loss": 1.8624, "step": 197805 }, { "epoch": 0.47, "grad_norm": 3.359375, "learning_rate": 0.00017443596255367792, "loss": 2.1196, "step": 197810 }, { "epoch": 0.47, "grad_norm": 1.9453125, "learning_rate": 0.00017443472829079606, "loss": 2.0093, "step": 197815 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017443349400248593, "loss": 2.204, "step": 197820 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017443225968874794, "loss": 2.061, "step": 197825 }, { "epoch": 0.47, "grad_norm": 2.40625, "learning_rate": 0.00017443102534958258, "loss": 2.0692, "step": 197830 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017442979098499021, "loss": 2.0991, "step": 197835 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.0001744285565949713, "loss": 2.1518, "step": 197840 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017442732217952622, "loss": 2.1095, "step": 197845 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017442608773865544, "loss": 2.061, "step": 197850 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.0001744248532723593, "loss": 2.3389, "step": 197855 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.00017442361878063835, "loss": 2.1585, "step": 197860 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.0001744223842634929, "loss": 1.9279, "step": 197865 }, { "epoch": 0.47, "grad_norm": 1.8125, "learning_rate": 0.00017442114972092345, "loss": 2.1603, "step": 197870 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017441991515293034, "loss": 2.0559, "step": 197875 }, { "epoch": 0.47, "grad_norm": 2.453125, "learning_rate": 0.0001744186805595141, "loss": 2.26, "step": 197880 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017441744594067505, "loss": 2.1549, "step": 197885 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017441621129641363, "loss": 2.3525, "step": 197890 }, { "epoch": 0.47, "grad_norm": 2.421875, "learning_rate": 0.00017441497662673032, "loss": 2.3025, "step": 197895 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017441374193162548, "loss": 2.1692, "step": 197900 }, { "epoch": 0.47, "grad_norm": 1.9921875, "learning_rate": 0.00017441250721109957, "loss": 2.1362, "step": 197905 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017441127246515297, "loss": 2.0822, "step": 197910 }, { "epoch": 0.47, "grad_norm": 2.390625, "learning_rate": 0.00017441003769378617, "loss": 1.9173, "step": 197915 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017440880289699954, "loss": 2.03, "step": 197920 }, { "epoch": 0.47, "grad_norm": 1.9921875, "learning_rate": 0.00017440756807479352, "loss": 2.1535, "step": 197925 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.0001744063332271685, "loss": 2.3161, "step": 197930 }, { "epoch": 0.47, "grad_norm": 1.875, "learning_rate": 0.00017440509835412493, "loss": 2.0248, "step": 197935 }, { "epoch": 0.47, "grad_norm": 1.8828125, "learning_rate": 0.00017440386345566323, "loss": 2.098, "step": 197940 }, { "epoch": 0.47, "grad_norm": 2.59375, "learning_rate": 0.00017440262853178384, "loss": 2.1199, "step": 197945 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017440139358248718, "loss": 2.2429, "step": 197950 }, { "epoch": 0.47, "grad_norm": 1.890625, "learning_rate": 0.0001744001586077736, "loss": 2.2992, "step": 197955 }, { "epoch": 0.47, "grad_norm": 1.8359375, "learning_rate": 0.00017439892360764363, "loss": 2.1218, "step": 197960 }, { "epoch": 0.47, "grad_norm": 1.8984375, "learning_rate": 0.0001743976885820976, "loss": 2.0765, "step": 197965 }, { "epoch": 0.47, "grad_norm": 2.421875, "learning_rate": 0.000174396453531136, "loss": 2.1899, "step": 197970 }, { "epoch": 0.47, "grad_norm": 2.515625, "learning_rate": 0.00017439521845475918, "loss": 1.959, "step": 197975 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.0001743939833529676, "loss": 2.1341, "step": 197980 }, { "epoch": 0.47, "grad_norm": 1.84375, "learning_rate": 0.00017439274822576175, "loss": 2.1362, "step": 197985 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.00017439151307314195, "loss": 2.0218, "step": 197990 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017439027789510866, "loss": 2.1299, "step": 197995 }, { "epoch": 0.47, "grad_norm": 1.8828125, "learning_rate": 0.0001743890426916623, "loss": 2.0379, "step": 198000 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017438780746280332, "loss": 2.0333, "step": 198005 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017438657220853208, "loss": 2.1587, "step": 198010 }, { "epoch": 0.47, "grad_norm": 2.4375, "learning_rate": 0.00017438533692884907, "loss": 2.0127, "step": 198015 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017438410162375463, "loss": 1.9773, "step": 198020 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017438286629324926, "loss": 2.2742, "step": 198025 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.0001743816309373334, "loss": 2.2601, "step": 198030 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017438039555600735, "loss": 1.9621, "step": 198035 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017437916014927164, "loss": 2.0015, "step": 198040 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017437792471712668, "loss": 1.9377, "step": 198045 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017437668925957285, "loss": 2.0237, "step": 198050 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.0001743754537766106, "loss": 2.1102, "step": 198055 }, { "epoch": 0.47, "grad_norm": 2.328125, "learning_rate": 0.00017437421826824035, "loss": 1.9824, "step": 198060 }, { "epoch": 0.47, "grad_norm": 1.9453125, "learning_rate": 0.0001743729827344625, "loss": 2.1819, "step": 198065 }, { "epoch": 0.47, "grad_norm": 2.75, "learning_rate": 0.0001743717471752775, "loss": 2.04, "step": 198070 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017437051159068577, "loss": 2.0352, "step": 198075 }, { "epoch": 0.47, "grad_norm": 2.484375, "learning_rate": 0.00017436927598068772, "loss": 2.1872, "step": 198080 }, { "epoch": 0.47, "grad_norm": 2.34375, "learning_rate": 0.00017436804034528378, "loss": 2.0906, "step": 198085 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017436680468447437, "loss": 2.1046, "step": 198090 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017436556899825991, "loss": 1.9995, "step": 198095 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.0001743643332866408, "loss": 2.1738, "step": 198100 }, { "epoch": 0.47, "grad_norm": 1.8515625, "learning_rate": 0.0001743630975496175, "loss": 2.0625, "step": 198105 }, { "epoch": 0.47, "grad_norm": 4.375, "learning_rate": 0.00017436186178719046, "loss": 2.0565, "step": 198110 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017436062599936, "loss": 1.8711, "step": 198115 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017435939018612664, "loss": 2.0852, "step": 198120 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.0001743581543474907, "loss": 2.1371, "step": 198125 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017435691848345273, "loss": 2.082, "step": 198130 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017435568259401309, "loss": 1.9837, "step": 198135 }, { "epoch": 0.47, "grad_norm": 1.953125, "learning_rate": 0.00017435444667917218, "loss": 2.0122, "step": 198140 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017435321073893043, "loss": 2.078, "step": 198145 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.0001743519747732883, "loss": 2.0621, "step": 198150 }, { "epoch": 0.47, "grad_norm": 1.9375, "learning_rate": 0.00017435073878224614, "loss": 1.9231, "step": 198155 }, { "epoch": 0.47, "grad_norm": 2.40625, "learning_rate": 0.00017434950276580446, "loss": 2.0267, "step": 198160 }, { "epoch": 0.47, "grad_norm": 1.90625, "learning_rate": 0.00017434826672396364, "loss": 2.1044, "step": 198165 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.0001743470306567241, "loss": 2.3451, "step": 198170 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017434579456408628, "loss": 2.0411, "step": 198175 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017434455844605058, "loss": 2.2274, "step": 198180 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.0001743433223026174, "loss": 2.2632, "step": 198185 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017434208613378721, "loss": 2.2809, "step": 198190 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017434084993956042, "loss": 2.1449, "step": 198195 }, { "epoch": 0.47, "grad_norm": 1.6953125, "learning_rate": 0.00017433961371993747, "loss": 1.9859, "step": 198200 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017433837747491874, "loss": 2.0641, "step": 198205 }, { "epoch": 0.47, "grad_norm": 1.921875, "learning_rate": 0.00017433714120450465, "loss": 2.2229, "step": 198210 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017433590490869568, "loss": 1.9918, "step": 198215 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.0001743346685874922, "loss": 2.1149, "step": 198220 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017433343224089465, "loss": 2.1596, "step": 198225 }, { "epoch": 0.47, "grad_norm": 2.578125, "learning_rate": 0.00017433219586890342, "loss": 2.0055, "step": 198230 }, { "epoch": 0.47, "grad_norm": 2.390625, "learning_rate": 0.00017433095947151902, "loss": 2.0908, "step": 198235 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.0001743297230487418, "loss": 2.2754, "step": 198240 }, { "epoch": 0.47, "grad_norm": 2.421875, "learning_rate": 0.0001743284866005722, "loss": 2.0109, "step": 198245 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.0001743272501270106, "loss": 2.2846, "step": 198250 }, { "epoch": 0.47, "grad_norm": 2.375, "learning_rate": 0.00017432601362805752, "loss": 2.3452, "step": 198255 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.0001743247771037133, "loss": 2.3132, "step": 198260 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.0001743235405539784, "loss": 2.2211, "step": 198265 }, { "epoch": 0.47, "grad_norm": 2.546875, "learning_rate": 0.0001743223039788532, "loss": 2.1131, "step": 198270 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017432106737833817, "loss": 2.3153, "step": 198275 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017431983075243373, "loss": 2.0745, "step": 198280 }, { "epoch": 0.47, "grad_norm": 1.9296875, "learning_rate": 0.00017431859410114027, "loss": 1.9682, "step": 198285 }, { "epoch": 0.47, "grad_norm": 1.859375, "learning_rate": 0.00017431735742445823, "loss": 2.1357, "step": 198290 }, { "epoch": 0.47, "grad_norm": 1.90625, "learning_rate": 0.00017431612072238805, "loss": 2.0123, "step": 198295 }, { "epoch": 0.47, "grad_norm": 1.84375, "learning_rate": 0.00017431488399493014, "loss": 2.1608, "step": 198300 }, { "epoch": 0.47, "grad_norm": 2.4375, "learning_rate": 0.0001743136472420849, "loss": 2.0278, "step": 198305 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017431241046385275, "loss": 2.1318, "step": 198310 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017431117366023416, "loss": 2.1007, "step": 198315 }, { "epoch": 0.47, "grad_norm": 1.9921875, "learning_rate": 0.00017430993683122955, "loss": 2.1189, "step": 198320 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017430869997683927, "loss": 2.0165, "step": 198325 }, { "epoch": 0.47, "grad_norm": 1.8984375, "learning_rate": 0.0001743074630970638, "loss": 1.9543, "step": 198330 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017430622619190355, "loss": 2.0205, "step": 198335 }, { "epoch": 0.47, "grad_norm": 2.390625, "learning_rate": 0.000174304989261359, "loss": 2.0921, "step": 198340 }, { "epoch": 0.47, "grad_norm": 2.5625, "learning_rate": 0.00017430375230543046, "loss": 2.1249, "step": 198345 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.0001743025153241184, "loss": 2.1253, "step": 198350 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.00017430127831742328, "loss": 1.9879, "step": 198355 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.0001743000412853455, "loss": 2.0741, "step": 198360 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.0001742988042278855, "loss": 2.2179, "step": 198365 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017429756714504364, "loss": 2.1282, "step": 198370 }, { "epoch": 0.47, "grad_norm": 1.9609375, "learning_rate": 0.0001742963300368204, "loss": 2.1697, "step": 198375 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.0001742950929032162, "loss": 2.1995, "step": 198380 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017429385574423142, "loss": 2.1865, "step": 198385 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.0001742926185598665, "loss": 2.0772, "step": 198390 }, { "epoch": 0.47, "grad_norm": 2.71875, "learning_rate": 0.00017429138135012192, "loss": 1.9843, "step": 198395 }, { "epoch": 0.47, "grad_norm": 2.390625, "learning_rate": 0.00017429014411499805, "loss": 2.1383, "step": 198400 }, { "epoch": 0.47, "grad_norm": 2.578125, "learning_rate": 0.00017428890685449531, "loss": 2.0, "step": 198405 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017428766956861412, "loss": 2.0276, "step": 198410 }, { "epoch": 0.47, "grad_norm": 2.5, "learning_rate": 0.00017428643225735491, "loss": 2.2749, "step": 198415 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017428519492071812, "loss": 2.0597, "step": 198420 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017428395755870416, "loss": 2.024, "step": 198425 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017428272017131345, "loss": 1.933, "step": 198430 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017428148275854644, "loss": 2.2445, "step": 198435 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.0001742802453204035, "loss": 2.1849, "step": 198440 }, { "epoch": 0.47, "grad_norm": 2.609375, "learning_rate": 0.0001742790078568851, "loss": 2.0863, "step": 198445 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.0001742777703679916, "loss": 2.1441, "step": 198450 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017427653285372353, "loss": 2.0635, "step": 198455 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.0001742752953140812, "loss": 2.0184, "step": 198460 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017427405774906508, "loss": 2.0018, "step": 198465 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017427282015867562, "loss": 2.0821, "step": 198470 }, { "epoch": 0.47, "grad_norm": 2.53125, "learning_rate": 0.00017427158254291323, "loss": 2.2407, "step": 198475 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.0001742703449017783, "loss": 1.9299, "step": 198480 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.0001742691072352713, "loss": 2.1834, "step": 198485 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.0001742678695433926, "loss": 2.0834, "step": 198490 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017426663182614264, "loss": 2.1045, "step": 198495 }, { "epoch": 0.47, "grad_norm": 1.8203125, "learning_rate": 0.00017426539408352184, "loss": 1.9384, "step": 198500 }, { "epoch": 0.47, "grad_norm": 1.8125, "learning_rate": 0.00017426415631553068, "loss": 2.038, "step": 198505 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.0001742629185221695, "loss": 2.2069, "step": 198510 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017426168070343878, "loss": 2.0518, "step": 198515 }, { "epoch": 0.47, "grad_norm": 1.8671875, "learning_rate": 0.0001742604428593389, "loss": 2.1126, "step": 198520 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017425920498987033, "loss": 2.1344, "step": 198525 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017425796709503346, "loss": 2.096, "step": 198530 }, { "epoch": 0.47, "grad_norm": 2.59375, "learning_rate": 0.00017425672917482875, "loss": 1.9815, "step": 198535 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017425549122925658, "loss": 1.9373, "step": 198540 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017425425325831736, "loss": 2.1038, "step": 198545 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017425301526201157, "loss": 2.2014, "step": 198550 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017425177724033958, "loss": 2.0979, "step": 198555 }, { "epoch": 0.47, "grad_norm": 5.34375, "learning_rate": 0.00017425053919330183, "loss": 1.9808, "step": 198560 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.00017424930112089877, "loss": 2.1376, "step": 198565 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.0001742480630231308, "loss": 2.2857, "step": 198570 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017424682489999835, "loss": 1.9832, "step": 198575 }, { "epoch": 0.47, "grad_norm": 1.7734375, "learning_rate": 0.00017424558675150183, "loss": 2.0381, "step": 198580 }, { "epoch": 0.47, "grad_norm": 1.921875, "learning_rate": 0.00017424434857764168, "loss": 2.2377, "step": 198585 }, { "epoch": 0.47, "grad_norm": 3.3125, "learning_rate": 0.0001742431103784183, "loss": 1.9172, "step": 198590 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017424187215383215, "loss": 2.3634, "step": 198595 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017424063390388358, "loss": 2.1563, "step": 198600 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.0001742393956285731, "loss": 2.0777, "step": 198605 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.0001742381573279011, "loss": 2.1401, "step": 198610 }, { "epoch": 0.47, "grad_norm": 1.875, "learning_rate": 0.000174236919001868, "loss": 2.216, "step": 198615 }, { "epoch": 0.47, "grad_norm": 1.9375, "learning_rate": 0.0001742356806504742, "loss": 2.2338, "step": 198620 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.00017423444227372014, "loss": 2.0271, "step": 198625 }, { "epoch": 0.47, "grad_norm": 1.921875, "learning_rate": 0.0001742332038716063, "loss": 1.9936, "step": 198630 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.000174231965444133, "loss": 2.1901, "step": 198635 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017423072699130075, "loss": 2.087, "step": 198640 }, { "epoch": 0.47, "grad_norm": 2.65625, "learning_rate": 0.0001742294885131099, "loss": 2.0571, "step": 198645 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017422825000956095, "loss": 2.2319, "step": 198650 }, { "epoch": 0.47, "grad_norm": 4.59375, "learning_rate": 0.00017422701148065426, "loss": 2.2099, "step": 198655 }, { "epoch": 0.47, "grad_norm": 2.328125, "learning_rate": 0.00017422577292639026, "loss": 2.1084, "step": 198660 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.0001742245343467694, "loss": 1.9516, "step": 198665 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.0001742232957417921, "loss": 2.057, "step": 198670 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.0001742220571114588, "loss": 2.2141, "step": 198675 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017422081845576987, "loss": 2.0192, "step": 198680 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.0001742195797747258, "loss": 2.1372, "step": 198685 }, { "epoch": 0.47, "grad_norm": 1.90625, "learning_rate": 0.00017421834106832692, "loss": 2.205, "step": 198690 }, { "epoch": 0.47, "grad_norm": 1.8125, "learning_rate": 0.00017421710233657373, "loss": 2.2526, "step": 198695 }, { "epoch": 0.47, "grad_norm": 2.328125, "learning_rate": 0.00017421586357946664, "loss": 2.127, "step": 198700 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017421462479700605, "loss": 2.3779, "step": 198705 }, { "epoch": 0.47, "grad_norm": 2.515625, "learning_rate": 0.00017421338598919243, "loss": 2.0553, "step": 198710 }, { "epoch": 0.47, "grad_norm": 1.78125, "learning_rate": 0.00017421214715602617, "loss": 1.9963, "step": 198715 }, { "epoch": 0.47, "grad_norm": 1.90625, "learning_rate": 0.00017421090829750763, "loss": 2.1764, "step": 198720 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017420966941363735, "loss": 2.1949, "step": 198725 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017420843050441572, "loss": 2.0981, "step": 198730 }, { "epoch": 0.47, "grad_norm": 1.6640625, "learning_rate": 0.0001742071915698431, "loss": 2.0664, "step": 198735 }, { "epoch": 0.47, "grad_norm": 2.4375, "learning_rate": 0.00017420595260991999, "loss": 2.0824, "step": 198740 }, { "epoch": 0.47, "grad_norm": 2.328125, "learning_rate": 0.00017420471362464678, "loss": 2.1239, "step": 198745 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017420347461402386, "loss": 2.094, "step": 198750 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.0001742022355780517, "loss": 2.0025, "step": 198755 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017420099651673074, "loss": 1.9062, "step": 198760 }, { "epoch": 0.47, "grad_norm": 2.34375, "learning_rate": 0.00017419975743006134, "loss": 2.1002, "step": 198765 }, { "epoch": 0.47, "grad_norm": 1.859375, "learning_rate": 0.000174198518318044, "loss": 2.1357, "step": 198770 }, { "epoch": 0.47, "grad_norm": 2.484375, "learning_rate": 0.00017419727918067905, "loss": 2.331, "step": 198775 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.000174196040017967, "loss": 2.153, "step": 198780 }, { "epoch": 0.47, "grad_norm": 1.9609375, "learning_rate": 0.0001741948008299082, "loss": 2.263, "step": 198785 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017419356161650314, "loss": 2.1691, "step": 198790 }, { "epoch": 0.47, "grad_norm": 1.84375, "learning_rate": 0.0001741923223777522, "loss": 2.0557, "step": 198795 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017419108311365582, "loss": 2.0592, "step": 198800 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017418984382421444, "loss": 2.1394, "step": 198805 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017418860450942844, "loss": 2.1333, "step": 198810 }, { "epoch": 0.47, "grad_norm": 4.03125, "learning_rate": 0.00017418736516929827, "loss": 2.2014, "step": 198815 }, { "epoch": 0.47, "grad_norm": 2.546875, "learning_rate": 0.00017418612580382434, "loss": 2.183, "step": 198820 }, { "epoch": 0.47, "grad_norm": 1.7578125, "learning_rate": 0.00017418488641300712, "loss": 2.0224, "step": 198825 }, { "epoch": 0.47, "grad_norm": 1.953125, "learning_rate": 0.00017418364699684698, "loss": 2.0453, "step": 198830 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017418240755534434, "loss": 2.3168, "step": 198835 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017418116808849967, "loss": 2.2384, "step": 198840 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017417992859631337, "loss": 2.1584, "step": 198845 }, { "epoch": 0.47, "grad_norm": 2.484375, "learning_rate": 0.00017417868907878585, "loss": 2.2565, "step": 198850 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017417744953591755, "loss": 1.981, "step": 198855 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017417620996770886, "loss": 2.1204, "step": 198860 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017417497037416026, "loss": 2.0483, "step": 198865 }, { "epoch": 0.47, "grad_norm": 1.9453125, "learning_rate": 0.00017417373075527214, "loss": 2.1602, "step": 198870 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.00017417249111104495, "loss": 2.0827, "step": 198875 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017417125144147906, "loss": 2.1207, "step": 198880 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017417001174657493, "loss": 2.0316, "step": 198885 }, { "epoch": 0.47, "grad_norm": 2.5625, "learning_rate": 0.00017416877202633296, "loss": 2.1518, "step": 198890 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017416753228075363, "loss": 2.2723, "step": 198895 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.0001741662925098373, "loss": 2.2962, "step": 198900 }, { "epoch": 0.47, "grad_norm": 1.8671875, "learning_rate": 0.00017416505271358444, "loss": 2.1985, "step": 198905 }, { "epoch": 0.47, "grad_norm": 1.8671875, "learning_rate": 0.00017416381289199544, "loss": 1.9713, "step": 198910 }, { "epoch": 0.47, "grad_norm": 2.515625, "learning_rate": 0.00017416257304507076, "loss": 1.8735, "step": 198915 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017416133317281078, "loss": 2.2253, "step": 198920 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017416009327521595, "loss": 2.2215, "step": 198925 }, { "epoch": 0.47, "grad_norm": 1.90625, "learning_rate": 0.00017415885335228667, "loss": 2.0835, "step": 198930 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017415761340402338, "loss": 2.178, "step": 198935 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017415637343042652, "loss": 2.2174, "step": 198940 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017415513343149649, "loss": 2.183, "step": 198945 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017415389340723374, "loss": 2.0891, "step": 198950 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017415265335763867, "loss": 1.9742, "step": 198955 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.0001741514132827117, "loss": 2.1446, "step": 198960 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017415017318245325, "loss": 2.0295, "step": 198965 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017414893305686376, "loss": 2.1852, "step": 198970 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017414769290594365, "loss": 2.237, "step": 198975 }, { "epoch": 0.47, "grad_norm": 2.5625, "learning_rate": 0.00017414645272969338, "loss": 1.893, "step": 198980 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.00017414521252811328, "loss": 2.1117, "step": 198985 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.00017414397230120388, "loss": 1.9541, "step": 198990 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017414273204896554, "loss": 2.2752, "step": 198995 }, { "epoch": 0.47, "grad_norm": 1.8046875, "learning_rate": 0.0001741414917713987, "loss": 2.0378, "step": 199000 }, { "epoch": 0.47, "grad_norm": 1.90625, "learning_rate": 0.00017414025146850377, "loss": 2.1971, "step": 199005 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017413901114028118, "loss": 2.2735, "step": 199010 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017413777078673136, "loss": 2.1441, "step": 199015 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017413653040785474, "loss": 2.0793, "step": 199020 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017413529000365175, "loss": 2.1472, "step": 199025 }, { "epoch": 0.47, "grad_norm": 1.890625, "learning_rate": 0.00017413404957412277, "loss": 2.0282, "step": 199030 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017413280911926827, "loss": 2.0407, "step": 199035 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017413156863908867, "loss": 2.0426, "step": 199040 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.0001741303281335844, "loss": 1.8606, "step": 199045 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.0001741290876027558, "loss": 2.0947, "step": 199050 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.0001741278470466034, "loss": 2.1005, "step": 199055 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.0001741266064651276, "loss": 2.2758, "step": 199060 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017412536585832878, "loss": 2.099, "step": 199065 }, { "epoch": 0.47, "grad_norm": 2.671875, "learning_rate": 0.0001741241252262074, "loss": 2.1384, "step": 199070 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017412288456876386, "loss": 2.3604, "step": 199075 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017412164388599862, "loss": 2.0355, "step": 199080 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017412040317791208, "loss": 2.008, "step": 199085 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017411916244450466, "loss": 2.0774, "step": 199090 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017411792168577677, "loss": 2.0321, "step": 199095 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.0001741166809017289, "loss": 2.1174, "step": 199100 }, { "epoch": 0.47, "grad_norm": 1.7265625, "learning_rate": 0.0001741154400923614, "loss": 2.095, "step": 199105 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017411419925767468, "loss": 2.0857, "step": 199110 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.00017411295839766928, "loss": 2.0017, "step": 199115 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.0001741117175123455, "loss": 2.008, "step": 199120 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017411047660170382, "loss": 2.0996, "step": 199125 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.0001741092356657447, "loss": 2.1205, "step": 199130 }, { "epoch": 0.47, "grad_norm": 1.8828125, "learning_rate": 0.00017410799470446846, "loss": 2.1901, "step": 199135 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017410675371787565, "loss": 2.2556, "step": 199140 }, { "epoch": 0.47, "grad_norm": 1.9453125, "learning_rate": 0.00017410551270596658, "loss": 2.2058, "step": 199145 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017410427166874171, "loss": 2.2412, "step": 199150 }, { "epoch": 0.47, "grad_norm": 3.125, "learning_rate": 0.00017410303060620152, "loss": 2.0421, "step": 199155 }, { "epoch": 0.47, "grad_norm": 1.953125, "learning_rate": 0.00017410178951834634, "loss": 2.1973, "step": 199160 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.0001741005484051767, "loss": 2.0548, "step": 199165 }, { "epoch": 0.47, "grad_norm": 2.78125, "learning_rate": 0.00017409930726669295, "loss": 1.9151, "step": 199170 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.0001740980661028955, "loss": 2.175, "step": 199175 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017409682491378484, "loss": 2.0798, "step": 199180 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.00017409558369936137, "loss": 2.1763, "step": 199185 }, { "epoch": 0.47, "grad_norm": 1.7578125, "learning_rate": 0.00017409434245962548, "loss": 2.0447, "step": 199190 }, { "epoch": 0.47, "grad_norm": 2.40625, "learning_rate": 0.00017409310119457762, "loss": 1.9623, "step": 199195 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017409185990421822, "loss": 2.0741, "step": 199200 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017409061858854772, "loss": 2.2202, "step": 199205 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.0001740893772475665, "loss": 2.1722, "step": 199210 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.000174088135881275, "loss": 2.004, "step": 199215 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017408689448967366, "loss": 1.9334, "step": 199220 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.0001740856530727629, "loss": 2.0196, "step": 199225 }, { "epoch": 0.47, "grad_norm": 1.90625, "learning_rate": 0.00017408441163054312, "loss": 1.7073, "step": 199230 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017408317016301476, "loss": 2.0345, "step": 199235 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017408192867017825, "loss": 1.966, "step": 199240 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017408068715203402, "loss": 2.27, "step": 199245 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017407944560858247, "loss": 2.202, "step": 199250 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017407820403982405, "loss": 2.1089, "step": 199255 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017407696244575916, "loss": 2.0592, "step": 199260 }, { "epoch": 0.47, "grad_norm": 2.515625, "learning_rate": 0.00017407572082638826, "loss": 2.1459, "step": 199265 }, { "epoch": 0.47, "grad_norm": 2.828125, "learning_rate": 0.00017407447918171173, "loss": 2.1177, "step": 199270 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017407323751173, "loss": 2.2728, "step": 199275 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.0001740719958164435, "loss": 1.9509, "step": 199280 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.0001740707540958527, "loss": 1.9873, "step": 199285 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017406951234995798, "loss": 2.0259, "step": 199290 }, { "epoch": 0.47, "grad_norm": 1.8984375, "learning_rate": 0.00017406827057875977, "loss": 2.0513, "step": 199295 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017406702878225848, "loss": 2.234, "step": 199300 }, { "epoch": 0.47, "grad_norm": 1.9765625, "learning_rate": 0.00017406578696045456, "loss": 2.0514, "step": 199305 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017406454511334843, "loss": 2.2315, "step": 199310 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.0001740633032409405, "loss": 2.1545, "step": 199315 }, { "epoch": 0.47, "grad_norm": 2.953125, "learning_rate": 0.0001740620613432312, "loss": 2.1185, "step": 199320 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.000174060819420221, "loss": 2.0084, "step": 199325 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017405957747191022, "loss": 2.1634, "step": 199330 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017405833549829938, "loss": 2.225, "step": 199335 }, { "epoch": 0.47, "grad_norm": 1.8828125, "learning_rate": 0.00017405709349938885, "loss": 2.1032, "step": 199340 }, { "epoch": 0.47, "grad_norm": 2.40625, "learning_rate": 0.00017405585147517907, "loss": 2.1169, "step": 199345 }, { "epoch": 0.47, "grad_norm": 2.828125, "learning_rate": 0.0001740546094256705, "loss": 2.2068, "step": 199350 }, { "epoch": 0.47, "grad_norm": 1.8984375, "learning_rate": 0.0001740533673508635, "loss": 2.1278, "step": 199355 }, { "epoch": 0.47, "grad_norm": 1.8046875, "learning_rate": 0.00017405212525075853, "loss": 2.0401, "step": 199360 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017405088312535602, "loss": 2.0287, "step": 199365 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.0001740496409746564, "loss": 2.0787, "step": 199370 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017404839879866003, "loss": 2.1317, "step": 199375 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017404715659736746, "loss": 2.0392, "step": 199380 }, { "epoch": 0.47, "grad_norm": 2.390625, "learning_rate": 0.00017404591437077897, "loss": 2.0941, "step": 199385 }, { "epoch": 0.47, "grad_norm": 2.375, "learning_rate": 0.0001740446721188951, "loss": 2.2176, "step": 199390 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.0001740434298417162, "loss": 2.1907, "step": 199395 }, { "epoch": 0.47, "grad_norm": 2.5, "learning_rate": 0.00017404218753924272, "loss": 2.0686, "step": 199400 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.00017404094521147512, "loss": 2.1707, "step": 199405 }, { "epoch": 0.47, "grad_norm": 1.8984375, "learning_rate": 0.00017403970285841377, "loss": 2.2171, "step": 199410 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017403846048005906, "loss": 2.1344, "step": 199415 }, { "epoch": 0.47, "grad_norm": 1.9921875, "learning_rate": 0.00017403721807641154, "loss": 2.0956, "step": 199420 }, { "epoch": 0.47, "grad_norm": 1.84375, "learning_rate": 0.00017403597564747157, "loss": 2.1557, "step": 199425 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017403473319323954, "loss": 2.3003, "step": 199430 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.0001740334907137159, "loss": 2.0712, "step": 199435 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017403224820890104, "loss": 2.018, "step": 199440 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.0001740310056787955, "loss": 1.986, "step": 199445 }, { "epoch": 0.47, "grad_norm": 1.9453125, "learning_rate": 0.0001740297631233996, "loss": 2.17, "step": 199450 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017402852054271373, "loss": 2.1769, "step": 199455 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017402727793673842, "loss": 2.0092, "step": 199460 }, { "epoch": 0.47, "grad_norm": 3.1875, "learning_rate": 0.00017402603530547407, "loss": 2.1317, "step": 199465 }, { "epoch": 0.47, "grad_norm": 1.6640625, "learning_rate": 0.00017402479264892105, "loss": 2.0716, "step": 199470 }, { "epoch": 0.47, "grad_norm": 2.6875, "learning_rate": 0.00017402354996707984, "loss": 2.1676, "step": 199475 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017402230725995085, "loss": 2.2302, "step": 199480 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017402106452753447, "loss": 2.2165, "step": 199485 }, { "epoch": 0.47, "grad_norm": 2.375, "learning_rate": 0.0001740198217698312, "loss": 1.9401, "step": 199490 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017401857898684134, "loss": 2.0342, "step": 199495 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017401733617856542, "loss": 2.2088, "step": 199500 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017401609334500385, "loss": 2.0076, "step": 199505 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017401485048615704, "loss": 2.1337, "step": 199510 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.00017401360760202543, "loss": 2.172, "step": 199515 }, { "epoch": 0.47, "grad_norm": 2.328125, "learning_rate": 0.00017401236469260942, "loss": 2.0241, "step": 199520 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017401112175790943, "loss": 2.0597, "step": 199525 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.0001740098787979259, "loss": 2.0778, "step": 199530 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017400863581265927, "loss": 2.0617, "step": 199535 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017400739280210993, "loss": 2.0363, "step": 199540 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017400614976627833, "loss": 1.988, "step": 199545 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017400490670516487, "loss": 2.1298, "step": 199550 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017400366361877004, "loss": 2.1356, "step": 199555 }, { "epoch": 0.47, "grad_norm": 1.828125, "learning_rate": 0.00017400242050709416, "loss": 2.1669, "step": 199560 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.00017400117737013775, "loss": 2.1559, "step": 199565 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.00017399993420790118, "loss": 2.3257, "step": 199570 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.0001739986910203849, "loss": 2.0415, "step": 199575 }, { "epoch": 0.47, "grad_norm": 2.546875, "learning_rate": 0.00017399744780758928, "loss": 2.2226, "step": 199580 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.00017399620456951483, "loss": 2.0865, "step": 199585 }, { "epoch": 0.47, "grad_norm": 1.8828125, "learning_rate": 0.00017399496130616194, "loss": 2.118, "step": 199590 }, { "epoch": 0.47, "grad_norm": 1.921875, "learning_rate": 0.000173993718017531, "loss": 2.2713, "step": 199595 }, { "epoch": 0.47, "grad_norm": 1.953125, "learning_rate": 0.00017399247470362253, "loss": 1.9985, "step": 199600 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017399123136443683, "loss": 2.0321, "step": 199605 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017398998799997439, "loss": 2.0371, "step": 199610 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017398874461023564, "loss": 2.1486, "step": 199615 }, { "epoch": 0.47, "grad_norm": 1.828125, "learning_rate": 0.00017398750119522097, "loss": 1.9083, "step": 199620 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017398625775493084, "loss": 2.1083, "step": 199625 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.00017398501428936568, "loss": 1.9144, "step": 199630 }, { "epoch": 0.47, "grad_norm": 2.421875, "learning_rate": 0.0001739837707985259, "loss": 2.2089, "step": 199635 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017398252728241192, "loss": 2.0518, "step": 199640 }, { "epoch": 0.47, "grad_norm": 2.390625, "learning_rate": 0.00017398128374102414, "loss": 2.1642, "step": 199645 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017398004017436303, "loss": 2.1838, "step": 199650 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017397879658242896, "loss": 1.9952, "step": 199655 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017397755296522243, "loss": 2.1494, "step": 199660 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017397630932274383, "loss": 2.262, "step": 199665 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.00017397506565499358, "loss": 2.075, "step": 199670 }, { "epoch": 0.47, "grad_norm": 1.9921875, "learning_rate": 0.00017397382196197207, "loss": 1.9999, "step": 199675 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017397257824367982, "loss": 2.1828, "step": 199680 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017397133450011714, "loss": 2.1597, "step": 199685 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017397009073128454, "loss": 2.2366, "step": 199690 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.0001739688469371824, "loss": 1.7833, "step": 199695 }, { "epoch": 0.47, "grad_norm": 2.6875, "learning_rate": 0.0001739676031178112, "loss": 2.0978, "step": 199700 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017396635927317128, "loss": 2.048, "step": 199705 }, { "epoch": 0.47, "grad_norm": 1.7109375, "learning_rate": 0.00017396511540326314, "loss": 2.2502, "step": 199710 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017396387150808712, "loss": 2.0375, "step": 199715 }, { "epoch": 0.47, "grad_norm": 1.8828125, "learning_rate": 0.00017396262758764377, "loss": 2.1054, "step": 199720 }, { "epoch": 0.47, "grad_norm": 2.515625, "learning_rate": 0.00017396138364193342, "loss": 2.0292, "step": 199725 }, { "epoch": 0.47, "grad_norm": 1.90625, "learning_rate": 0.0001739601396709565, "loss": 1.8803, "step": 199730 }, { "epoch": 0.47, "grad_norm": 3.9375, "learning_rate": 0.00017395889567471348, "loss": 1.9964, "step": 199735 }, { "epoch": 0.47, "grad_norm": 2.328125, "learning_rate": 0.00017395765165320473, "loss": 2.0724, "step": 199740 }, { "epoch": 0.47, "grad_norm": 1.921875, "learning_rate": 0.00017395640760643074, "loss": 2.0336, "step": 199745 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017395516353439186, "loss": 2.2308, "step": 199750 }, { "epoch": 0.47, "grad_norm": 2.625, "learning_rate": 0.0001739539194370886, "loss": 2.164, "step": 199755 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.00017395267531452132, "loss": 1.939, "step": 199760 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.00017395143116669045, "loss": 1.8967, "step": 199765 }, { "epoch": 0.47, "grad_norm": 2.6875, "learning_rate": 0.00017395018699359647, "loss": 2.1124, "step": 199770 }, { "epoch": 0.47, "grad_norm": 1.78125, "learning_rate": 0.00017394894279523974, "loss": 2.1795, "step": 199775 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.0001739476985716207, "loss": 2.2153, "step": 199780 }, { "epoch": 0.47, "grad_norm": 1.9765625, "learning_rate": 0.0001739464543227398, "loss": 2.217, "step": 199785 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017394521004859744, "loss": 2.195, "step": 199790 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017394396574919405, "loss": 1.9496, "step": 199795 }, { "epoch": 0.47, "grad_norm": 3.078125, "learning_rate": 0.00017394272142453007, "loss": 2.288, "step": 199800 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017394147707460594, "loss": 2.2593, "step": 199805 }, { "epoch": 0.47, "grad_norm": 2.40625, "learning_rate": 0.00017394023269942205, "loss": 2.3266, "step": 199810 }, { "epoch": 0.47, "grad_norm": 1.8671875, "learning_rate": 0.00017393898829897882, "loss": 1.8738, "step": 199815 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017393774387327668, "loss": 2.1334, "step": 199820 }, { "epoch": 0.47, "grad_norm": 1.984375, "learning_rate": 0.00017393649942231608, "loss": 2.0688, "step": 199825 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017393525494609742, "loss": 2.0667, "step": 199830 }, { "epoch": 0.47, "grad_norm": 1.84375, "learning_rate": 0.00017393401044462115, "loss": 2.1066, "step": 199835 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017393276591788767, "loss": 1.9931, "step": 199840 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.00017393152136589742, "loss": 2.1346, "step": 199845 }, { "epoch": 0.47, "grad_norm": 1.796875, "learning_rate": 0.00017393027678865082, "loss": 2.1051, "step": 199850 }, { "epoch": 0.47, "grad_norm": 2.5, "learning_rate": 0.0001739290321861483, "loss": 2.1339, "step": 199855 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017392778755839026, "loss": 2.0104, "step": 199860 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.0001739265429053772, "loss": 2.0641, "step": 199865 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017392529822710947, "loss": 2.152, "step": 199870 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.0001739240535235875, "loss": 2.2178, "step": 199875 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017392280879481174, "loss": 2.2206, "step": 199880 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017392156404078262, "loss": 1.9562, "step": 199885 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.00017392031926150053, "loss": 2.218, "step": 199890 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017391907445696593, "loss": 1.9931, "step": 199895 }, { "epoch": 0.47, "grad_norm": 2.5, "learning_rate": 0.00017391782962717923, "loss": 2.2414, "step": 199900 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017391658477214087, "loss": 2.0344, "step": 199905 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017391533989185125, "loss": 2.1611, "step": 199910 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.0001739140949863108, "loss": 1.969, "step": 199915 }, { "epoch": 0.47, "grad_norm": 1.90625, "learning_rate": 0.00017391285005552, "loss": 2.1026, "step": 199920 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.0001739116050994792, "loss": 2.0649, "step": 199925 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.00017391036011818884, "loss": 2.1624, "step": 199930 }, { "epoch": 0.47, "grad_norm": 1.8515625, "learning_rate": 0.0001739091151116494, "loss": 2.0161, "step": 199935 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017390787007986121, "loss": 2.1982, "step": 199940 }, { "epoch": 0.47, "grad_norm": 2.328125, "learning_rate": 0.00017390662502282482, "loss": 2.2315, "step": 199945 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017390537994054053, "loss": 2.058, "step": 199950 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.00017390413483300886, "loss": 2.2522, "step": 199955 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.00017390288970023016, "loss": 2.1649, "step": 199960 }, { "epoch": 0.47, "grad_norm": 2.328125, "learning_rate": 0.00017390164454220491, "loss": 2.1155, "step": 199965 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017390039935893354, "loss": 2.0871, "step": 199970 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017389915415041643, "loss": 2.0447, "step": 199975 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.00017389790891665402, "loss": 1.917, "step": 199980 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017389666365764676, "loss": 2.3183, "step": 199985 }, { "epoch": 0.47, "grad_norm": 2.609375, "learning_rate": 0.00017389541837339507, "loss": 2.2458, "step": 199990 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017389417306389935, "loss": 2.0813, "step": 199995 }, { "epoch": 0.47, "grad_norm": 2.484375, "learning_rate": 0.00017389292772916005, "loss": 2.1623, "step": 200000 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017389168236917757, "loss": 2.2644, "step": 200005 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017389043698395236, "loss": 2.0645, "step": 200010 }, { "epoch": 0.47, "grad_norm": 1.9921875, "learning_rate": 0.00017388919157348482, "loss": 2.1139, "step": 200015 }, { "epoch": 0.47, "grad_norm": 1.9453125, "learning_rate": 0.00017388794613777542, "loss": 2.1266, "step": 200020 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017388670067682455, "loss": 1.9951, "step": 200025 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017388545519063264, "loss": 2.0603, "step": 200030 }, { "epoch": 0.47, "grad_norm": 1.9921875, "learning_rate": 0.00017388420967920013, "loss": 2.1105, "step": 200035 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017388296414252738, "loss": 2.1618, "step": 200040 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017388171858061493, "loss": 2.1856, "step": 200045 }, { "epoch": 0.47, "grad_norm": 2.34375, "learning_rate": 0.00017388047299346312, "loss": 2.2015, "step": 200050 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.0001738792273810724, "loss": 2.1023, "step": 200055 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.0001738779817434432, "loss": 2.031, "step": 200060 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017387673608057594, "loss": 1.9876, "step": 200065 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017387549039247105, "loss": 2.2981, "step": 200070 }, { "epoch": 0.47, "grad_norm": 1.9609375, "learning_rate": 0.00017387424467912895, "loss": 2.0383, "step": 200075 }, { "epoch": 0.47, "grad_norm": 1.953125, "learning_rate": 0.0001738729989405501, "loss": 2.088, "step": 200080 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.00017387175317673482, "loss": 2.0633, "step": 200085 }, { "epoch": 0.47, "grad_norm": 1.890625, "learning_rate": 0.00017387050738768367, "loss": 2.1779, "step": 200090 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.000173869261573397, "loss": 2.0497, "step": 200095 }, { "epoch": 0.47, "grad_norm": 2.390625, "learning_rate": 0.00017386801573387526, "loss": 2.2815, "step": 200100 }, { "epoch": 0.47, "grad_norm": 2.5, "learning_rate": 0.00017386676986911885, "loss": 2.0487, "step": 200105 }, { "epoch": 0.47, "grad_norm": 1.7734375, "learning_rate": 0.00017386552397912822, "loss": 2.017, "step": 200110 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.0001738642780639038, "loss": 2.1214, "step": 200115 }, { "epoch": 0.47, "grad_norm": 1.984375, "learning_rate": 0.00017386303212344598, "loss": 2.1698, "step": 200120 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017386178615775522, "loss": 1.7524, "step": 200125 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017386054016683194, "loss": 1.9678, "step": 200130 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017385929415067653, "loss": 2.1053, "step": 200135 }, { "epoch": 0.47, "grad_norm": 1.734375, "learning_rate": 0.00017385804810928946, "loss": 2.2093, "step": 200140 }, { "epoch": 0.47, "grad_norm": 3.0, "learning_rate": 0.00017385680204267117, "loss": 2.1062, "step": 200145 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017385555595082205, "loss": 2.0426, "step": 200150 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.0001738543098337425, "loss": 2.1318, "step": 200155 }, { "epoch": 0.47, "grad_norm": 2.90625, "learning_rate": 0.00017385306369143298, "loss": 2.162, "step": 200160 }, { "epoch": 0.47, "grad_norm": 1.9765625, "learning_rate": 0.00017385181752389394, "loss": 2.0817, "step": 200165 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017385057133112577, "loss": 2.2019, "step": 200170 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.0001738493251131289, "loss": 2.0713, "step": 200175 }, { "epoch": 0.47, "grad_norm": 2.484375, "learning_rate": 0.00017384807886990376, "loss": 1.9863, "step": 200180 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.0001738468326014508, "loss": 2.1893, "step": 200185 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017384558630777043, "loss": 2.2699, "step": 200190 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.000173844339988863, "loss": 2.083, "step": 200195 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.00017384309364472907, "loss": 2.1547, "step": 200200 }, { "epoch": 0.47, "grad_norm": 2.34375, "learning_rate": 0.000173841847275369, "loss": 2.2888, "step": 200205 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017384060088078318, "loss": 2.1423, "step": 200210 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.0001738393544609721, "loss": 2.1953, "step": 200215 }, { "epoch": 0.47, "grad_norm": 1.9921875, "learning_rate": 0.0001738381080159361, "loss": 2.1006, "step": 200220 }, { "epoch": 0.47, "grad_norm": 1.9375, "learning_rate": 0.00017383686154567572, "loss": 2.0322, "step": 200225 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017383561505019134, "loss": 2.0323, "step": 200230 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017383436852948334, "loss": 2.2278, "step": 200235 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.0001738331219835522, "loss": 2.0867, "step": 200240 }, { "epoch": 0.47, "grad_norm": 1.8671875, "learning_rate": 0.0001738318754123983, "loss": 2.1285, "step": 200245 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.0001738306288160221, "loss": 2.1425, "step": 200250 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017382938219442405, "loss": 2.1623, "step": 200255 }, { "epoch": 0.47, "grad_norm": 2.734375, "learning_rate": 0.00017382813554760448, "loss": 2.1086, "step": 200260 }, { "epoch": 0.47, "grad_norm": 1.9765625, "learning_rate": 0.00017382688887556395, "loss": 2.0101, "step": 200265 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017382564217830279, "loss": 1.9262, "step": 200270 }, { "epoch": 0.47, "grad_norm": 1.78125, "learning_rate": 0.00017382439545582146, "loss": 2.2784, "step": 200275 }, { "epoch": 0.47, "grad_norm": 1.8515625, "learning_rate": 0.00017382314870812034, "loss": 2.038, "step": 200280 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017382190193519992, "loss": 2.1772, "step": 200285 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.0001738206551370606, "loss": 2.188, "step": 200290 }, { "epoch": 0.47, "grad_norm": 2.484375, "learning_rate": 0.0001738194083137028, "loss": 2.0972, "step": 200295 }, { "epoch": 0.47, "grad_norm": 1.8828125, "learning_rate": 0.00017381816146512698, "loss": 2.2474, "step": 200300 }, { "epoch": 0.47, "grad_norm": 1.7265625, "learning_rate": 0.0001738169145913335, "loss": 2.1322, "step": 200305 }, { "epoch": 0.47, "grad_norm": 2.515625, "learning_rate": 0.00017381566769232286, "loss": 2.1071, "step": 200310 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.0001738144207680954, "loss": 2.1612, "step": 200315 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017381317381865165, "loss": 2.13, "step": 200320 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017381192684399194, "loss": 2.1379, "step": 200325 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017381067984411676, "loss": 2.2668, "step": 200330 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.0001738094328190265, "loss": 2.1218, "step": 200335 }, { "epoch": 0.47, "grad_norm": 2.5, "learning_rate": 0.00017380818576872157, "loss": 1.99, "step": 200340 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017380693869320247, "loss": 2.0822, "step": 200345 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017380569159246957, "loss": 2.2093, "step": 200350 }, { "epoch": 0.47, "grad_norm": 1.921875, "learning_rate": 0.0001738044444665233, "loss": 2.2809, "step": 200355 }, { "epoch": 0.47, "grad_norm": 1.59375, "learning_rate": 0.00017380319731536412, "loss": 1.9602, "step": 200360 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.0001738019501389924, "loss": 2.0725, "step": 200365 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.0001738007029374086, "loss": 2.2379, "step": 200370 }, { "epoch": 0.47, "grad_norm": 2.515625, "learning_rate": 0.00017379945571061316, "loss": 2.1599, "step": 200375 }, { "epoch": 0.47, "grad_norm": 2.453125, "learning_rate": 0.00017379820845860646, "loss": 2.1057, "step": 200380 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017379696118138894, "loss": 2.0978, "step": 200385 }, { "epoch": 0.47, "grad_norm": 1.8671875, "learning_rate": 0.0001737957138789611, "loss": 1.9783, "step": 200390 }, { "epoch": 0.47, "grad_norm": 2.34375, "learning_rate": 0.00017379446655132327, "loss": 2.0615, "step": 200395 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.0001737932191984759, "loss": 2.0667, "step": 200400 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017379197182041946, "loss": 2.1286, "step": 200405 }, { "epoch": 0.47, "grad_norm": 1.78125, "learning_rate": 0.00017379072441715433, "loss": 2.0965, "step": 200410 }, { "epoch": 0.47, "grad_norm": 1.875, "learning_rate": 0.00017378947698868093, "loss": 2.0425, "step": 200415 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017378822953499973, "loss": 1.9827, "step": 200420 }, { "epoch": 0.47, "grad_norm": 2.5625, "learning_rate": 0.00017378698205611112, "loss": 2.4124, "step": 200425 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017378573455201554, "loss": 2.1293, "step": 200430 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017378448702271343, "loss": 2.0496, "step": 200435 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.0001737832394682052, "loss": 2.148, "step": 200440 }, { "epoch": 0.47, "grad_norm": 1.859375, "learning_rate": 0.00017378199188849126, "loss": 2.3326, "step": 200445 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017378074428357206, "loss": 2.0978, "step": 200450 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.000173779496653448, "loss": 2.0338, "step": 200455 }, { "epoch": 0.47, "grad_norm": 2.59375, "learning_rate": 0.00017377824899811954, "loss": 2.1876, "step": 200460 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.0001737770013175871, "loss": 2.1182, "step": 200465 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017377575361185113, "loss": 2.2961, "step": 200470 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.00017377450588091197, "loss": 2.0735, "step": 200475 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017377325812477015, "loss": 2.0664, "step": 200480 }, { "epoch": 0.47, "grad_norm": 1.8984375, "learning_rate": 0.00017377201034342597, "loss": 2.0542, "step": 200485 }, { "epoch": 0.47, "grad_norm": 1.984375, "learning_rate": 0.00017377076253688003, "loss": 2.1703, "step": 200490 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.0001737695147051326, "loss": 2.0585, "step": 200495 }, { "epoch": 0.47, "grad_norm": 1.9921875, "learning_rate": 0.00017376826684818418, "loss": 2.2344, "step": 200500 }, { "epoch": 0.47, "grad_norm": 3.328125, "learning_rate": 0.00017376701896603517, "loss": 2.0464, "step": 200505 }, { "epoch": 0.47, "grad_norm": 1.9765625, "learning_rate": 0.00017376577105868602, "loss": 2.2318, "step": 200510 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017376452312613713, "loss": 2.079, "step": 200515 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017376327516838895, "loss": 2.1018, "step": 200520 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017376202718544193, "loss": 2.0668, "step": 200525 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017376077917729643, "loss": 2.0198, "step": 200530 }, { "epoch": 0.47, "grad_norm": 2.5625, "learning_rate": 0.0001737595311439529, "loss": 2.0421, "step": 200535 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.0001737582830854118, "loss": 2.2837, "step": 200540 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.0001737570350016735, "loss": 1.9824, "step": 200545 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017375578689273852, "loss": 2.1325, "step": 200550 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017375453875860718, "loss": 2.0421, "step": 200555 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017375329059927995, "loss": 2.0102, "step": 200560 }, { "epoch": 0.47, "grad_norm": 3.171875, "learning_rate": 0.00017375204241475728, "loss": 2.0641, "step": 200565 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017375079420503957, "loss": 2.194, "step": 200570 }, { "epoch": 0.47, "grad_norm": 1.9609375, "learning_rate": 0.00017374954597012724, "loss": 2.1262, "step": 200575 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017374829771002074, "loss": 2.074, "step": 200580 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017374704942472048, "loss": 2.2336, "step": 200585 }, { "epoch": 0.47, "grad_norm": 1.734375, "learning_rate": 0.00017374580111422687, "loss": 1.9263, "step": 200590 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017374455277854037, "loss": 2.0718, "step": 200595 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017374330441766143, "loss": 2.3529, "step": 200600 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.0001737420560315904, "loss": 1.8663, "step": 200605 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.00017374080762032774, "loss": 2.1385, "step": 200610 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.0001737395591838739, "loss": 2.0768, "step": 200615 }, { "epoch": 0.47, "grad_norm": 2.328125, "learning_rate": 0.00017373831072222927, "loss": 2.2607, "step": 200620 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017373706223539434, "loss": 2.0803, "step": 200625 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017373581372336948, "loss": 2.136, "step": 200630 }, { "epoch": 0.47, "grad_norm": 1.8203125, "learning_rate": 0.0001737345651861551, "loss": 2.1002, "step": 200635 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017373331662375168, "loss": 2.0808, "step": 200640 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.0001737320680361596, "loss": 2.2287, "step": 200645 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.0001737308194233793, "loss": 2.3068, "step": 200650 }, { "epoch": 0.47, "grad_norm": 2.34375, "learning_rate": 0.00017372957078541122, "loss": 2.1436, "step": 200655 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017372832212225582, "loss": 1.9232, "step": 200660 }, { "epoch": 0.47, "grad_norm": 2.671875, "learning_rate": 0.00017372707343391345, "loss": 2.2177, "step": 200665 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.0001737258247203846, "loss": 2.2778, "step": 200670 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017372457598166966, "loss": 2.0487, "step": 200675 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.00017372332721776905, "loss": 2.1523, "step": 200680 }, { "epoch": 0.47, "grad_norm": 1.8359375, "learning_rate": 0.00017372207842868324, "loss": 1.9753, "step": 200685 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017372082961441263, "loss": 2.0727, "step": 200690 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017371958077495763, "loss": 2.1563, "step": 200695 }, { "epoch": 0.47, "grad_norm": 2.578125, "learning_rate": 0.0001737183319103187, "loss": 2.0481, "step": 200700 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017371708302049626, "loss": 2.1552, "step": 200705 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017371583410549072, "loss": 2.2748, "step": 200710 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017371458516530247, "loss": 2.1007, "step": 200715 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017371333619993205, "loss": 2.1494, "step": 200720 }, { "epoch": 0.47, "grad_norm": 1.9609375, "learning_rate": 0.00017371208720937977, "loss": 2.0975, "step": 200725 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.0001737108381936461, "loss": 2.134, "step": 200730 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.0001737095891527315, "loss": 2.0233, "step": 200735 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017370834008663635, "loss": 2.2228, "step": 200740 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017370709099536112, "loss": 2.2741, "step": 200745 }, { "epoch": 0.47, "grad_norm": 1.9765625, "learning_rate": 0.0001737058418789062, "loss": 2.0366, "step": 200750 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.000173704592737272, "loss": 2.0007, "step": 200755 }, { "epoch": 0.47, "grad_norm": 1.828125, "learning_rate": 0.00017370334357045902, "loss": 2.0221, "step": 200760 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.0001737020943784676, "loss": 2.0275, "step": 200765 }, { "epoch": 0.47, "grad_norm": 1.796875, "learning_rate": 0.0001737008451612982, "loss": 2.1868, "step": 200770 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017369959591895132, "loss": 2.1902, "step": 200775 }, { "epoch": 0.47, "grad_norm": 1.9921875, "learning_rate": 0.00017369834665142725, "loss": 2.1654, "step": 200780 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017369709735872652, "loss": 2.2645, "step": 200785 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017369584804084954, "loss": 2.1712, "step": 200790 }, { "epoch": 0.47, "grad_norm": 1.953125, "learning_rate": 0.0001736945986977967, "loss": 2.2334, "step": 200795 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017369334932956844, "loss": 2.2671, "step": 200800 }, { "epoch": 0.47, "grad_norm": 1.7734375, "learning_rate": 0.00017369209993616522, "loss": 2.2732, "step": 200805 }, { "epoch": 0.47, "grad_norm": 2.34375, "learning_rate": 0.00017369085051758743, "loss": 2.017, "step": 200810 }, { "epoch": 0.47, "grad_norm": 1.8515625, "learning_rate": 0.0001736896010738355, "loss": 2.114, "step": 200815 }, { "epoch": 0.47, "grad_norm": 2.53125, "learning_rate": 0.0001736883516049099, "loss": 2.1001, "step": 200820 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.000173687102110811, "loss": 2.0656, "step": 200825 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017368585259153925, "loss": 2.1768, "step": 200830 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017368460304709505, "loss": 2.0096, "step": 200835 }, { "epoch": 0.47, "grad_norm": 3.640625, "learning_rate": 0.0001736833534774789, "loss": 2.143, "step": 200840 }, { "epoch": 0.47, "grad_norm": 1.953125, "learning_rate": 0.00017368210388269114, "loss": 2.0591, "step": 200845 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017368085426273225, "loss": 2.0758, "step": 200850 }, { "epoch": 0.47, "grad_norm": 1.7890625, "learning_rate": 0.00017367960461760264, "loss": 2.208, "step": 200855 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017367835494730275, "loss": 2.0733, "step": 200860 }, { "epoch": 0.47, "grad_norm": 2.453125, "learning_rate": 0.000173677105251833, "loss": 2.0795, "step": 200865 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.0001736758555311938, "loss": 2.2002, "step": 200870 }, { "epoch": 0.47, "grad_norm": 1.9296875, "learning_rate": 0.0001736746057853856, "loss": 2.0205, "step": 200875 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017367335601440885, "loss": 2.1255, "step": 200880 }, { "epoch": 0.47, "grad_norm": 2.59375, "learning_rate": 0.0001736721062182639, "loss": 2.0552, "step": 200885 }, { "epoch": 0.47, "grad_norm": 1.7890625, "learning_rate": 0.00017367085639695125, "loss": 2.1049, "step": 200890 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017366960655047127, "loss": 2.2848, "step": 200895 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017366835667882447, "loss": 1.9718, "step": 200900 }, { "epoch": 0.47, "grad_norm": 2.4375, "learning_rate": 0.00017366710678201117, "loss": 2.0477, "step": 200905 }, { "epoch": 0.47, "grad_norm": 1.84375, "learning_rate": 0.00017366585686003186, "loss": 2.1008, "step": 200910 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017366460691288697, "loss": 2.19, "step": 200915 }, { "epoch": 0.47, "grad_norm": 2.484375, "learning_rate": 0.0001736633569405769, "loss": 2.1596, "step": 200920 }, { "epoch": 0.47, "grad_norm": 2.328125, "learning_rate": 0.00017366210694310212, "loss": 2.1985, "step": 200925 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017366085692046302, "loss": 1.9911, "step": 200930 }, { "epoch": 0.47, "grad_norm": 1.8671875, "learning_rate": 0.00017365960687266, "loss": 2.0504, "step": 200935 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017365835679969356, "loss": 2.0761, "step": 200940 }, { "epoch": 0.47, "grad_norm": 2.421875, "learning_rate": 0.00017365710670156408, "loss": 2.2703, "step": 200945 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017365585657827198, "loss": 2.1987, "step": 200950 }, { "epoch": 0.47, "grad_norm": 2.734375, "learning_rate": 0.0001736546064298177, "loss": 2.0733, "step": 200955 }, { "epoch": 0.47, "grad_norm": 2.421875, "learning_rate": 0.00017365335625620172, "loss": 2.0361, "step": 200960 }, { "epoch": 0.47, "grad_norm": 1.65625, "learning_rate": 0.00017365210605742437, "loss": 2.083, "step": 200965 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017365085583348616, "loss": 2.2105, "step": 200970 }, { "epoch": 0.47, "grad_norm": 2.421875, "learning_rate": 0.00017364960558438746, "loss": 2.1211, "step": 200975 }, { "epoch": 0.47, "grad_norm": 1.7109375, "learning_rate": 0.00017364835531012872, "loss": 2.0069, "step": 200980 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017364710501071039, "loss": 1.9342, "step": 200985 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017364585468613282, "loss": 2.0741, "step": 200990 }, { "epoch": 0.47, "grad_norm": 1.9296875, "learning_rate": 0.00017364460433639652, "loss": 2.0856, "step": 200995 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.0001736433539615019, "loss": 2.1301, "step": 201000 }, { "epoch": 0.47, "grad_norm": 2.671875, "learning_rate": 0.00017364210356144938, "loss": 2.0555, "step": 201005 }, { "epoch": 0.47, "grad_norm": 1.90625, "learning_rate": 0.00017364085313623936, "loss": 2.0767, "step": 201010 }, { "epoch": 0.47, "grad_norm": 2.5625, "learning_rate": 0.0001736396026858723, "loss": 2.0145, "step": 201015 }, { "epoch": 0.47, "grad_norm": 1.8203125, "learning_rate": 0.00017363835221034858, "loss": 2.2327, "step": 201020 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.0001736371017096687, "loss": 1.8781, "step": 201025 }, { "epoch": 0.47, "grad_norm": 1.9375, "learning_rate": 0.00017363585118383305, "loss": 2.156, "step": 201030 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.00017363460063284206, "loss": 2.1445, "step": 201035 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017363335005669614, "loss": 2.1898, "step": 201040 }, { "epoch": 0.47, "grad_norm": 1.9765625, "learning_rate": 0.00017363209945539575, "loss": 1.9937, "step": 201045 }, { "epoch": 0.47, "grad_norm": 1.8046875, "learning_rate": 0.00017363084882894127, "loss": 2.1446, "step": 201050 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.00017362959817733317, "loss": 2.1294, "step": 201055 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017362834750057188, "loss": 1.997, "step": 201060 }, { "epoch": 0.47, "grad_norm": 1.859375, "learning_rate": 0.00017362709679865778, "loss": 2.0708, "step": 201065 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017362584607159137, "loss": 2.1593, "step": 201070 }, { "epoch": 0.47, "grad_norm": 2.4375, "learning_rate": 0.000173624595319373, "loss": 1.9063, "step": 201075 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017362334454200316, "loss": 2.0083, "step": 201080 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.00017362209373948223, "loss": 2.0699, "step": 201085 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.0001736208429118107, "loss": 2.0784, "step": 201090 }, { "epoch": 0.47, "grad_norm": 1.984375, "learning_rate": 0.0001736195920589889, "loss": 2.2086, "step": 201095 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.0001736183411810173, "loss": 2.0268, "step": 201100 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017361709027789642, "loss": 2.0681, "step": 201105 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017361583934962654, "loss": 1.9453, "step": 201110 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.0001736145883962082, "loss": 2.2387, "step": 201115 }, { "epoch": 0.47, "grad_norm": 1.890625, "learning_rate": 0.0001736133374176417, "loss": 2.1306, "step": 201120 }, { "epoch": 0.47, "grad_norm": 1.953125, "learning_rate": 0.00017361208641392763, "loss": 2.0799, "step": 201125 }, { "epoch": 0.47, "grad_norm": 2.734375, "learning_rate": 0.00017361083538506634, "loss": 2.0367, "step": 201130 }, { "epoch": 0.47, "grad_norm": 1.796875, "learning_rate": 0.0001736095843310582, "loss": 2.1766, "step": 201135 }, { "epoch": 0.47, "grad_norm": 1.890625, "learning_rate": 0.0001736083332519037, "loss": 2.1041, "step": 201140 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017360708214760327, "loss": 2.1, "step": 201145 }, { "epoch": 0.47, "grad_norm": 2.328125, "learning_rate": 0.00017360583101815735, "loss": 2.0349, "step": 201150 }, { "epoch": 0.47, "grad_norm": 1.765625, "learning_rate": 0.00017360457986356632, "loss": 2.0704, "step": 201155 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017360332868383065, "loss": 2.2607, "step": 201160 }, { "epoch": 0.47, "grad_norm": 1.796875, "learning_rate": 0.00017360207747895071, "loss": 2.1779, "step": 201165 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.000173600826248927, "loss": 2.1143, "step": 201170 }, { "epoch": 0.47, "grad_norm": 1.9765625, "learning_rate": 0.00017359957499375988, "loss": 2.0949, "step": 201175 }, { "epoch": 0.47, "grad_norm": 1.8125, "learning_rate": 0.00017359832371344983, "loss": 2.0249, "step": 201180 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017359707240799724, "loss": 2.2562, "step": 201185 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.0001735958210774026, "loss": 2.0994, "step": 201190 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017359456972166625, "loss": 2.1918, "step": 201195 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.0001735933183407887, "loss": 2.0579, "step": 201200 }, { "epoch": 0.47, "grad_norm": 3.46875, "learning_rate": 0.0001735920669347703, "loss": 2.1447, "step": 201205 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017359081550361155, "loss": 2.0846, "step": 201210 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017358956404731282, "loss": 2.2016, "step": 201215 }, { "epoch": 0.47, "grad_norm": 2.6875, "learning_rate": 0.00017358831256587457, "loss": 2.1748, "step": 201220 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017358706105929716, "loss": 2.316, "step": 201225 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017358580952758117, "loss": 2.2602, "step": 201230 }, { "epoch": 0.47, "grad_norm": 2.390625, "learning_rate": 0.00017358455797072687, "loss": 1.8594, "step": 201235 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017358330638873477, "loss": 2.0699, "step": 201240 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017358205478160528, "loss": 1.9761, "step": 201245 }, { "epoch": 0.47, "grad_norm": 1.8203125, "learning_rate": 0.0001735808031493388, "loss": 1.9321, "step": 201250 }, { "epoch": 0.47, "grad_norm": 2.375, "learning_rate": 0.00017357955149193584, "loss": 2.1632, "step": 201255 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.0001735782998093967, "loss": 2.1212, "step": 201260 }, { "epoch": 0.47, "grad_norm": 1.796875, "learning_rate": 0.00017357704810172194, "loss": 2.1338, "step": 201265 }, { "epoch": 0.47, "grad_norm": 1.7265625, "learning_rate": 0.00017357579636891187, "loss": 2.0474, "step": 201270 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017357454461096702, "loss": 2.1894, "step": 201275 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017357329282788775, "loss": 2.0329, "step": 201280 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.0001735720410196745, "loss": 2.1145, "step": 201285 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017357078918632773, "loss": 2.2115, "step": 201290 }, { "epoch": 0.47, "grad_norm": 2.765625, "learning_rate": 0.00017356953732784787, "loss": 2.192, "step": 201295 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017356828544423528, "loss": 2.3076, "step": 201300 }, { "epoch": 0.47, "grad_norm": 1.78125, "learning_rate": 0.00017356703353549042, "loss": 2.1856, "step": 201305 }, { "epoch": 0.47, "grad_norm": 1.8984375, "learning_rate": 0.00017356578160161374, "loss": 2.0976, "step": 201310 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.00017356452964260565, "loss": 1.9556, "step": 201315 }, { "epoch": 0.47, "grad_norm": 1.9453125, "learning_rate": 0.00017356327765846662, "loss": 2.1621, "step": 201320 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.000173562025649197, "loss": 2.1707, "step": 201325 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.0001735607736147973, "loss": 2.1185, "step": 201330 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017355952155526786, "loss": 2.216, "step": 201335 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017355826947060914, "loss": 2.1046, "step": 201340 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017355701736082162, "loss": 2.1663, "step": 201345 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017355576522590566, "loss": 2.1046, "step": 201350 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017355451306586174, "loss": 2.2172, "step": 201355 }, { "epoch": 0.47, "grad_norm": 2.5, "learning_rate": 0.00017355326088069027, "loss": 1.9549, "step": 201360 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.00017355200867039166, "loss": 2.0234, "step": 201365 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.00017355075643496633, "loss": 2.0675, "step": 201370 }, { "epoch": 0.47, "grad_norm": 2.5625, "learning_rate": 0.00017354950417441476, "loss": 2.2389, "step": 201375 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017354825188873734, "loss": 1.8852, "step": 201380 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.0001735469995779345, "loss": 2.2052, "step": 201385 }, { "epoch": 0.47, "grad_norm": 1.8515625, "learning_rate": 0.00017354574724200665, "loss": 1.9854, "step": 201390 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017354449488095428, "loss": 2.1013, "step": 201395 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017354324249477775, "loss": 2.1951, "step": 201400 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.0001735419900834775, "loss": 2.2155, "step": 201405 }, { "epoch": 0.47, "grad_norm": 2.953125, "learning_rate": 0.000173540737647054, "loss": 2.3209, "step": 201410 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017353948518550764, "loss": 2.004, "step": 201415 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017353823269883887, "loss": 2.1602, "step": 201420 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017353698018704808, "loss": 2.0433, "step": 201425 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017353572765013575, "loss": 2.1303, "step": 201430 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017353447508810228, "loss": 2.1601, "step": 201435 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017353322250094808, "loss": 2.1565, "step": 201440 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017353196988867362, "loss": 2.122, "step": 201445 }, { "epoch": 0.47, "grad_norm": 1.5, "learning_rate": 0.00017353071725127928, "loss": 2.1027, "step": 201450 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.00017352946458876553, "loss": 2.0396, "step": 201455 }, { "epoch": 0.47, "grad_norm": 2.234375, "learning_rate": 0.00017352821190113279, "loss": 2.062, "step": 201460 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017352695918838146, "loss": 1.9982, "step": 201465 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017352570645051197, "loss": 2.0467, "step": 201470 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017352445368752482, "loss": 2.1191, "step": 201475 }, { "epoch": 0.47, "grad_norm": 1.875, "learning_rate": 0.00017352320089942036, "loss": 2.0004, "step": 201480 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.000173521948086199, "loss": 2.3344, "step": 201485 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017352069524786124, "loss": 2.1381, "step": 201490 }, { "epoch": 0.47, "grad_norm": 1.6015625, "learning_rate": 0.0001735194423844075, "loss": 2.0706, "step": 201495 }, { "epoch": 0.47, "grad_norm": 2.515625, "learning_rate": 0.00017351818949583816, "loss": 2.009, "step": 201500 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017351693658215368, "loss": 2.1474, "step": 201505 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.0001735156836433545, "loss": 2.1283, "step": 201510 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.000173514430679441, "loss": 2.0359, "step": 201515 }, { "epoch": 0.47, "grad_norm": 2.109375, "learning_rate": 0.00017351317769041363, "loss": 2.1032, "step": 201520 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017351192467627286, "loss": 1.9426, "step": 201525 }, { "epoch": 0.47, "grad_norm": 1.9296875, "learning_rate": 0.00017351067163701904, "loss": 2.0424, "step": 201530 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.0001735094185726527, "loss": 2.0539, "step": 201535 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017350816548317418, "loss": 2.1651, "step": 201540 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017350691236858392, "loss": 1.9644, "step": 201545 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017350565922888238, "loss": 2.144, "step": 201550 }, { "epoch": 0.47, "grad_norm": 1.8671875, "learning_rate": 0.00017350440606406998, "loss": 2.1238, "step": 201555 }, { "epoch": 0.47, "grad_norm": 1.875, "learning_rate": 0.00017350315287414713, "loss": 2.065, "step": 201560 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017350189965911427, "loss": 1.9594, "step": 201565 }, { "epoch": 0.47, "grad_norm": 1.703125, "learning_rate": 0.00017350064641897183, "loss": 2.2884, "step": 201570 }, { "epoch": 0.47, "grad_norm": 1.8828125, "learning_rate": 0.00017349939315372025, "loss": 2.1111, "step": 201575 }, { "epoch": 0.47, "grad_norm": 1.921875, "learning_rate": 0.00017349813986335995, "loss": 1.8688, "step": 201580 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017349688654789129, "loss": 2.1724, "step": 201585 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017349563320731483, "loss": 1.9951, "step": 201590 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017349437984163092, "loss": 2.2699, "step": 201595 }, { "epoch": 0.47, "grad_norm": 2.328125, "learning_rate": 0.00017349312645083997, "loss": 2.0876, "step": 201600 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.00017349187303494244, "loss": 2.1314, "step": 201605 }, { "epoch": 0.47, "grad_norm": 2.078125, "learning_rate": 0.00017349061959393876, "loss": 2.1292, "step": 201610 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017348936612782937, "loss": 2.0507, "step": 201615 }, { "epoch": 0.47, "grad_norm": 2.0, "learning_rate": 0.00017348811263661465, "loss": 2.1494, "step": 201620 }, { "epoch": 0.47, "grad_norm": 2.484375, "learning_rate": 0.00017348685912029507, "loss": 1.9963, "step": 201625 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017348560557887104, "loss": 2.0392, "step": 201630 }, { "epoch": 0.47, "grad_norm": 2.203125, "learning_rate": 0.00017348435201234298, "loss": 1.9981, "step": 201635 }, { "epoch": 0.47, "grad_norm": 1.84375, "learning_rate": 0.00017348309842071138, "loss": 1.9972, "step": 201640 }, { "epoch": 0.47, "grad_norm": 2.1875, "learning_rate": 0.0001734818448039766, "loss": 2.1892, "step": 201645 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017348059116213907, "loss": 2.0399, "step": 201650 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017347933749519925, "loss": 2.0957, "step": 201655 }, { "epoch": 0.47, "grad_norm": 2.28125, "learning_rate": 0.00017347808380315755, "loss": 2.1613, "step": 201660 }, { "epoch": 0.47, "grad_norm": 2.25, "learning_rate": 0.00017347683008601443, "loss": 2.245, "step": 201665 }, { "epoch": 0.47, "grad_norm": 1.921875, "learning_rate": 0.00017347557634377026, "loss": 2.0467, "step": 201670 }, { "epoch": 0.47, "grad_norm": 1.9609375, "learning_rate": 0.00017347432257642552, "loss": 2.1404, "step": 201675 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.0001734730687839806, "loss": 1.9368, "step": 201680 }, { "epoch": 0.47, "grad_norm": 2.359375, "learning_rate": 0.00017347181496643596, "loss": 2.1873, "step": 201685 }, { "epoch": 0.47, "grad_norm": 1.921875, "learning_rate": 0.00017347056112379202, "loss": 2.2754, "step": 201690 }, { "epoch": 0.47, "grad_norm": 1.9765625, "learning_rate": 0.0001734693072560492, "loss": 2.0043, "step": 201695 }, { "epoch": 0.47, "grad_norm": 2.296875, "learning_rate": 0.00017346805336320792, "loss": 2.1445, "step": 201700 }, { "epoch": 0.47, "grad_norm": 1.90625, "learning_rate": 0.00017346679944526863, "loss": 2.2101, "step": 201705 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017346554550223177, "loss": 1.915, "step": 201710 }, { "epoch": 0.47, "grad_norm": 2.375, "learning_rate": 0.00017346429153409774, "loss": 2.2538, "step": 201715 }, { "epoch": 0.47, "grad_norm": 2.125, "learning_rate": 0.00017346303754086693, "loss": 2.0295, "step": 201720 }, { "epoch": 0.47, "grad_norm": 2.46875, "learning_rate": 0.00017346178352253984, "loss": 2.0496, "step": 201725 }, { "epoch": 0.47, "grad_norm": 2.0625, "learning_rate": 0.00017346052947911692, "loss": 2.0762, "step": 201730 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.0001734592754105985, "loss": 2.0224, "step": 201735 }, { "epoch": 0.47, "grad_norm": 2.046875, "learning_rate": 0.00017345802131698505, "loss": 2.0385, "step": 201740 }, { "epoch": 0.47, "grad_norm": 1.890625, "learning_rate": 0.00017345676719827702, "loss": 2.1058, "step": 201745 }, { "epoch": 0.47, "grad_norm": 1.84375, "learning_rate": 0.00017345551305447484, "loss": 1.9807, "step": 201750 }, { "epoch": 0.47, "grad_norm": 2.265625, "learning_rate": 0.00017345425888557893, "loss": 2.2424, "step": 201755 }, { "epoch": 0.47, "grad_norm": 2.015625, "learning_rate": 0.00017345300469158968, "loss": 2.155, "step": 201760 }, { "epoch": 0.47, "grad_norm": 2.34375, "learning_rate": 0.0001734517504725076, "loss": 2.1811, "step": 201765 }, { "epoch": 0.47, "grad_norm": 2.15625, "learning_rate": 0.00017345049622833302, "loss": 2.1695, "step": 201770 }, { "epoch": 0.47, "grad_norm": 2.03125, "learning_rate": 0.00017344924195906644, "loss": 2.0845, "step": 201775 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.0001734479876647083, "loss": 2.235, "step": 201780 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017344673334525895, "loss": 2.0796, "step": 201785 }, { "epoch": 0.47, "grad_norm": 2.140625, "learning_rate": 0.00017344547900071885, "loss": 2.2078, "step": 201790 }, { "epoch": 0.47, "grad_norm": 1.9765625, "learning_rate": 0.0001734442246310885, "loss": 1.8961, "step": 201795 }, { "epoch": 0.47, "grad_norm": 2.484375, "learning_rate": 0.00017344297023636824, "loss": 2.092, "step": 201800 }, { "epoch": 0.47, "grad_norm": 2.09375, "learning_rate": 0.00017344171581655852, "loss": 2.0156, "step": 201805 }, { "epoch": 0.47, "grad_norm": 2.34375, "learning_rate": 0.00017344046137165978, "loss": 2.2574, "step": 201810 }, { "epoch": 0.47, "grad_norm": 2.3125, "learning_rate": 0.00017343920690167245, "loss": 2.1043, "step": 201815 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017343795240659697, "loss": 1.9948, "step": 201820 }, { "epoch": 0.47, "grad_norm": 2.765625, "learning_rate": 0.00017343669788643374, "loss": 2.0064, "step": 201825 }, { "epoch": 0.47, "grad_norm": 2.171875, "learning_rate": 0.0001734354433411832, "loss": 1.9013, "step": 201830 }, { "epoch": 0.47, "grad_norm": 1.96875, "learning_rate": 0.0001734341887708458, "loss": 1.9566, "step": 201835 }, { "epoch": 0.47, "grad_norm": 2.21875, "learning_rate": 0.00017343293417542194, "loss": 2.0332, "step": 201840 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017343167955491203, "loss": 1.8223, "step": 201845 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017343042490931655, "loss": 2.0463, "step": 201850 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.00017342917023863591, "loss": 2.0467, "step": 201855 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017342791554287055, "loss": 2.2212, "step": 201860 }, { "epoch": 0.48, "grad_norm": 1.78125, "learning_rate": 0.00017342666082202085, "loss": 2.091, "step": 201865 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.00017342540607608728, "loss": 2.094, "step": 201870 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017342415130507026, "loss": 2.089, "step": 201875 }, { "epoch": 0.48, "grad_norm": 2.59375, "learning_rate": 0.0001734228965089702, "loss": 1.9462, "step": 201880 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.0001734216416877876, "loss": 2.0058, "step": 201885 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017342038684152277, "loss": 1.9499, "step": 201890 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017341913197017626, "loss": 2.0382, "step": 201895 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.0001734178770737484, "loss": 1.9806, "step": 201900 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.0001734166221522397, "loss": 2.0837, "step": 201905 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017341536720565052, "loss": 2.029, "step": 201910 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017341411223398135, "loss": 2.1528, "step": 201915 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017341285723723255, "loss": 2.1283, "step": 201920 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.0001734116022154046, "loss": 1.9889, "step": 201925 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.00017341034716849792, "loss": 2.2037, "step": 201930 }, { "epoch": 0.48, "grad_norm": 2.390625, "learning_rate": 0.00017340909209651292, "loss": 1.997, "step": 201935 }, { "epoch": 0.48, "grad_norm": 1.9296875, "learning_rate": 0.00017340783699945007, "loss": 2.0627, "step": 201940 }, { "epoch": 0.48, "grad_norm": 2.390625, "learning_rate": 0.00017340658187730976, "loss": 2.1101, "step": 201945 }, { "epoch": 0.48, "grad_norm": 1.9453125, "learning_rate": 0.00017340532673009246, "loss": 2.0239, "step": 201950 }, { "epoch": 0.48, "grad_norm": 2.578125, "learning_rate": 0.00017340407155779853, "loss": 2.0426, "step": 201955 }, { "epoch": 0.48, "grad_norm": 1.96875, "learning_rate": 0.00017340281636042844, "loss": 2.1593, "step": 201960 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017340156113798263, "loss": 2.1131, "step": 201965 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.0001734003058904615, "loss": 2.1357, "step": 201970 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.0001733990506178655, "loss": 2.0941, "step": 201975 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017339779532019506, "loss": 2.1919, "step": 201980 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.0001733965399974506, "loss": 1.9314, "step": 201985 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017339528464963255, "loss": 2.1627, "step": 201990 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.0001733940292767413, "loss": 2.1567, "step": 201995 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017339277387877738, "loss": 2.094, "step": 202000 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017339151845574113, "loss": 2.012, "step": 202005 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017339026300763303, "loss": 2.1185, "step": 202010 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017338900753445345, "loss": 2.1987, "step": 202015 }, { "epoch": 0.48, "grad_norm": 2.453125, "learning_rate": 0.00017338775203620287, "loss": 2.0723, "step": 202020 }, { "epoch": 0.48, "grad_norm": 2.0, "learning_rate": 0.0001733864965128817, "loss": 1.9399, "step": 202025 }, { "epoch": 0.48, "grad_norm": 2.578125, "learning_rate": 0.00017338524096449035, "loss": 2.0813, "step": 202030 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017338398539102934, "loss": 2.0156, "step": 202035 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017338272979249898, "loss": 1.9959, "step": 202040 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017338147416889974, "loss": 2.0844, "step": 202045 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017338021852023207, "loss": 2.0952, "step": 202050 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.0001733789628464964, "loss": 2.2206, "step": 202055 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017337770714769313, "loss": 2.2702, "step": 202060 }, { "epoch": 0.48, "grad_norm": 1.9765625, "learning_rate": 0.0001733764514238227, "loss": 2.0475, "step": 202065 }, { "epoch": 0.48, "grad_norm": 1.8125, "learning_rate": 0.00017337519567488556, "loss": 1.9393, "step": 202070 }, { "epoch": 0.48, "grad_norm": 2.421875, "learning_rate": 0.0001733739399008821, "loss": 2.2427, "step": 202075 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.0001733726841018128, "loss": 1.9701, "step": 202080 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017337142827767802, "loss": 2.0256, "step": 202085 }, { "epoch": 0.48, "grad_norm": 1.9453125, "learning_rate": 0.0001733701724284783, "loss": 2.0635, "step": 202090 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017336891655421393, "loss": 1.9659, "step": 202095 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017336766065488543, "loss": 2.1044, "step": 202100 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017336640473049323, "loss": 2.062, "step": 202105 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017336514878103768, "loss": 2.0916, "step": 202110 }, { "epoch": 0.48, "grad_norm": 1.796875, "learning_rate": 0.0001733638928065193, "loss": 2.0735, "step": 202115 }, { "epoch": 0.48, "grad_norm": 1.7734375, "learning_rate": 0.00017336263680693846, "loss": 2.2512, "step": 202120 }, { "epoch": 0.48, "grad_norm": 2.484375, "learning_rate": 0.00017336138078229566, "loss": 2.2064, "step": 202125 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017336012473259123, "loss": 2.1336, "step": 202130 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017335886865782569, "loss": 2.0956, "step": 202135 }, { "epoch": 0.48, "grad_norm": 2.484375, "learning_rate": 0.00017335761255799943, "loss": 2.2299, "step": 202140 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017335635643311284, "loss": 2.1755, "step": 202145 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.0001733551002831664, "loss": 2.1077, "step": 202150 }, { "epoch": 0.48, "grad_norm": 2.71875, "learning_rate": 0.00017335384410816057, "loss": 2.3501, "step": 202155 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017335258790809573, "loss": 2.0433, "step": 202160 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.0001733513316829723, "loss": 2.0896, "step": 202165 }, { "epoch": 0.48, "grad_norm": 1.90625, "learning_rate": 0.0001733500754327907, "loss": 2.2157, "step": 202170 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.0001733488191575514, "loss": 2.0676, "step": 202175 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017334756285725483, "loss": 2.2012, "step": 202180 }, { "epoch": 0.48, "grad_norm": 2.4375, "learning_rate": 0.00017334630653190137, "loss": 2.2007, "step": 202185 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.0001733450501814915, "loss": 2.0634, "step": 202190 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017334379380602562, "loss": 2.1761, "step": 202195 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017334253740550418, "loss": 2.0793, "step": 202200 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017334128097992762, "loss": 2.1606, "step": 202205 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.0001733400245292963, "loss": 2.1906, "step": 202210 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.00017333876805361074, "loss": 2.1854, "step": 202215 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.0001733375115528713, "loss": 2.3255, "step": 202220 }, { "epoch": 0.48, "grad_norm": 2.84375, "learning_rate": 0.00017333625502707846, "loss": 2.1354, "step": 202225 }, { "epoch": 0.48, "grad_norm": 2.828125, "learning_rate": 0.0001733349984762326, "loss": 2.0841, "step": 202230 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.0001733337419003342, "loss": 2.0979, "step": 202235 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017333248529938363, "loss": 2.1691, "step": 202240 }, { "epoch": 0.48, "grad_norm": 1.984375, "learning_rate": 0.00017333122867338138, "loss": 2.158, "step": 202245 }, { "epoch": 0.48, "grad_norm": 1.890625, "learning_rate": 0.00017332997202232787, "loss": 2.2281, "step": 202250 }, { "epoch": 0.48, "grad_norm": 1.8828125, "learning_rate": 0.0001733287153462235, "loss": 2.2334, "step": 202255 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017332745864506868, "loss": 1.9912, "step": 202260 }, { "epoch": 0.48, "grad_norm": 2.578125, "learning_rate": 0.00017332620191886389, "loss": 2.0979, "step": 202265 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.00017332494516760953, "loss": 2.0248, "step": 202270 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017332368839130603, "loss": 2.2106, "step": 202275 }, { "epoch": 0.48, "grad_norm": 1.890625, "learning_rate": 0.00017332243158995386, "loss": 2.0566, "step": 202280 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017332117476355342, "loss": 2.1977, "step": 202285 }, { "epoch": 0.48, "grad_norm": 1.765625, "learning_rate": 0.00017331991791210512, "loss": 2.1743, "step": 202290 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.0001733186610356094, "loss": 2.2679, "step": 202295 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.0001733174041340667, "loss": 2.1562, "step": 202300 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017331614720747746, "loss": 2.0151, "step": 202305 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017331489025584207, "loss": 2.2154, "step": 202310 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.000173313633279161, "loss": 2.0899, "step": 202315 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017331237627743465, "loss": 2.1168, "step": 202320 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.00017331111925066348, "loss": 2.0737, "step": 202325 }, { "epoch": 0.48, "grad_norm": 1.828125, "learning_rate": 0.0001733098621988479, "loss": 1.9338, "step": 202330 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017330860512198833, "loss": 2.115, "step": 202335 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.0001733073480200852, "loss": 2.3612, "step": 202340 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.000173306090893139, "loss": 2.1129, "step": 202345 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017330483374115006, "loss": 2.2802, "step": 202350 }, { "epoch": 0.48, "grad_norm": 1.9921875, "learning_rate": 0.00017330357656411888, "loss": 1.9807, "step": 202355 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017330231936204586, "loss": 2.007, "step": 202360 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.00017330106213493145, "loss": 2.08, "step": 202365 }, { "epoch": 0.48, "grad_norm": 1.9375, "learning_rate": 0.00017329980488277604, "loss": 2.0177, "step": 202370 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017329854760558014, "loss": 2.1856, "step": 202375 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017329729030334407, "loss": 2.0223, "step": 202380 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017329603297606836, "loss": 2.0868, "step": 202385 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017329477562375335, "loss": 2.2026, "step": 202390 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017329351824639955, "loss": 2.067, "step": 202395 }, { "epoch": 0.48, "grad_norm": 1.828125, "learning_rate": 0.00017329226084400736, "loss": 2.0165, "step": 202400 }, { "epoch": 0.48, "grad_norm": 1.8359375, "learning_rate": 0.0001732910034165772, "loss": 2.1908, "step": 202405 }, { "epoch": 0.48, "grad_norm": 1.875, "learning_rate": 0.0001732897459641095, "loss": 2.0304, "step": 202410 }, { "epoch": 0.48, "grad_norm": 1.9296875, "learning_rate": 0.00017328848848660467, "loss": 2.2355, "step": 202415 }, { "epoch": 0.48, "grad_norm": 4.1875, "learning_rate": 0.0001732872309840632, "loss": 2.0166, "step": 202420 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017328597345648544, "loss": 1.9729, "step": 202425 }, { "epoch": 0.48, "grad_norm": 1.9609375, "learning_rate": 0.00017328471590387189, "loss": 2.0272, "step": 202430 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017328345832622296, "loss": 2.1553, "step": 202435 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.00017328220072353908, "loss": 2.1072, "step": 202440 }, { "epoch": 0.48, "grad_norm": 2.59375, "learning_rate": 0.00017328094309582062, "loss": 2.0325, "step": 202445 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.0001732796854430681, "loss": 2.2684, "step": 202450 }, { "epoch": 0.48, "grad_norm": 1.90625, "learning_rate": 0.00017327842776528193, "loss": 2.2229, "step": 202455 }, { "epoch": 0.48, "grad_norm": 2.421875, "learning_rate": 0.00017327717006246246, "loss": 2.2196, "step": 202460 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017327591233461023, "loss": 2.2481, "step": 202465 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.0001732746545817256, "loss": 2.228, "step": 202470 }, { "epoch": 0.48, "grad_norm": 2.453125, "learning_rate": 0.00017327339680380904, "loss": 2.0917, "step": 202475 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.00017327213900086093, "loss": 2.188, "step": 202480 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017327088117288173, "loss": 2.0392, "step": 202485 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017326962331987188, "loss": 2.2233, "step": 202490 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.0001732683654418318, "loss": 2.1354, "step": 202495 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.00017326710753876192, "loss": 2.0482, "step": 202500 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017326584961066264, "loss": 2.2995, "step": 202505 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017326459165753445, "loss": 2.1459, "step": 202510 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017326333367937772, "loss": 2.1924, "step": 202515 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017326207567619291, "loss": 2.206, "step": 202520 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017326081764798047, "loss": 2.104, "step": 202525 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.0001732595595947408, "loss": 2.1652, "step": 202530 }, { "epoch": 0.48, "grad_norm": 2.53125, "learning_rate": 0.00017325830151647433, "loss": 2.2101, "step": 202535 }, { "epoch": 0.48, "grad_norm": 1.8671875, "learning_rate": 0.0001732570434131815, "loss": 2.1301, "step": 202540 }, { "epoch": 0.48, "grad_norm": 1.875, "learning_rate": 0.00017325578528486272, "loss": 2.0624, "step": 202545 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017325452713151844, "loss": 2.0836, "step": 202550 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017325326895314912, "loss": 2.1504, "step": 202555 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.0001732520107497551, "loss": 2.138, "step": 202560 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.0001732507525213369, "loss": 2.2218, "step": 202565 }, { "epoch": 0.48, "grad_norm": 2.53125, "learning_rate": 0.0001732494942678949, "loss": 2.3381, "step": 202570 }, { "epoch": 0.48, "grad_norm": 2.40625, "learning_rate": 0.00017324823598942955, "loss": 2.0953, "step": 202575 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017324697768594127, "loss": 2.2196, "step": 202580 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.0001732457193574305, "loss": 2.2448, "step": 202585 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017324446100389765, "loss": 1.9143, "step": 202590 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.00017324320262534316, "loss": 2.1622, "step": 202595 }, { "epoch": 0.48, "grad_norm": 2.609375, "learning_rate": 0.00017324194422176748, "loss": 2.1108, "step": 202600 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017324068579317102, "loss": 2.0894, "step": 202605 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017323942733955423, "loss": 1.9295, "step": 202610 }, { "epoch": 0.48, "grad_norm": 1.8515625, "learning_rate": 0.0001732381688609175, "loss": 2.0556, "step": 202615 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017323691035726131, "loss": 2.1641, "step": 202620 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017323565182858602, "loss": 2.2501, "step": 202625 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.00017323439327489215, "loss": 2.0821, "step": 202630 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017323313469618006, "loss": 1.9989, "step": 202635 }, { "epoch": 0.48, "grad_norm": 1.7578125, "learning_rate": 0.0001732318760924502, "loss": 2.0618, "step": 202640 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.00017323061746370302, "loss": 2.1571, "step": 202645 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.0001732293588099389, "loss": 2.1238, "step": 202650 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017322810013115834, "loss": 2.2036, "step": 202655 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.0001732268414273617, "loss": 2.1055, "step": 202660 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017322558269854948, "loss": 2.1923, "step": 202665 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017322432394472202, "loss": 2.2095, "step": 202670 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017322306516587985, "loss": 1.9983, "step": 202675 }, { "epoch": 0.48, "grad_norm": 1.7890625, "learning_rate": 0.0001732218063620233, "loss": 2.1547, "step": 202680 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017322054753315293, "loss": 2.2562, "step": 202685 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.00017321928867926903, "loss": 2.3841, "step": 202690 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017321802980037215, "loss": 2.1636, "step": 202695 }, { "epoch": 0.48, "grad_norm": 2.90625, "learning_rate": 0.0001732167708964626, "loss": 2.0877, "step": 202700 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.0001732155119675409, "loss": 2.2218, "step": 202705 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017321425301360747, "loss": 1.9341, "step": 202710 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017321299403466272, "loss": 2.0954, "step": 202715 }, { "epoch": 0.48, "grad_norm": 1.9921875, "learning_rate": 0.00017321173503070705, "loss": 2.1154, "step": 202720 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017321047600174096, "loss": 2.0899, "step": 202725 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.0001732092169477648, "loss": 2.0134, "step": 202730 }, { "epoch": 0.48, "grad_norm": 2.6875, "learning_rate": 0.0001732079578687791, "loss": 2.2034, "step": 202735 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.0001732066987647842, "loss": 2.221, "step": 202740 }, { "epoch": 0.48, "grad_norm": 1.9609375, "learning_rate": 0.00017320543963578059, "loss": 1.9535, "step": 202745 }, { "epoch": 0.48, "grad_norm": 1.96875, "learning_rate": 0.00017320418048176865, "loss": 2.0718, "step": 202750 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017320292130274884, "loss": 2.0951, "step": 202755 }, { "epoch": 0.48, "grad_norm": 2.390625, "learning_rate": 0.00017320166209872158, "loss": 2.2431, "step": 202760 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.0001732004028696873, "loss": 2.0113, "step": 202765 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017319914361564647, "loss": 2.3276, "step": 202770 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017319788433659943, "loss": 2.1094, "step": 202775 }, { "epoch": 0.48, "grad_norm": 2.390625, "learning_rate": 0.0001731966250325467, "loss": 2.0118, "step": 202780 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.0001731953657034887, "loss": 2.1711, "step": 202785 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.0001731941063494258, "loss": 2.1114, "step": 202790 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017319284697035845, "loss": 2.0896, "step": 202795 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.0001731915875662871, "loss": 2.2768, "step": 202800 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.0001731903281372122, "loss": 1.9934, "step": 202805 }, { "epoch": 0.48, "grad_norm": 1.890625, "learning_rate": 0.00017318906868313416, "loss": 2.0953, "step": 202810 }, { "epoch": 0.48, "grad_norm": 1.859375, "learning_rate": 0.0001731878092040534, "loss": 2.0982, "step": 202815 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.00017318654969997036, "loss": 1.9853, "step": 202820 }, { "epoch": 0.48, "grad_norm": 3.265625, "learning_rate": 0.00017318529017088544, "loss": 2.32, "step": 202825 }, { "epoch": 0.48, "grad_norm": 2.6875, "learning_rate": 0.00017318403061679914, "loss": 2.1933, "step": 202830 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.0001731827710377118, "loss": 2.2123, "step": 202835 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017318151143362395, "loss": 2.0708, "step": 202840 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.00017318025180453593, "loss": 2.12, "step": 202845 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017317899215044824, "loss": 1.929, "step": 202850 }, { "epoch": 0.48, "grad_norm": 2.484375, "learning_rate": 0.00017317773247136126, "loss": 2.1099, "step": 202855 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.00017317647276727542, "loss": 2.1694, "step": 202860 }, { "epoch": 0.48, "grad_norm": 1.890625, "learning_rate": 0.00017317521303819122, "loss": 2.1852, "step": 202865 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.000173173953284109, "loss": 1.9793, "step": 202870 }, { "epoch": 0.48, "grad_norm": 2.421875, "learning_rate": 0.00017317269350502926, "loss": 2.3138, "step": 202875 }, { "epoch": 0.48, "grad_norm": 1.9453125, "learning_rate": 0.00017317143370095236, "loss": 2.0878, "step": 202880 }, { "epoch": 0.48, "grad_norm": 1.9296875, "learning_rate": 0.0001731701738718788, "loss": 1.9593, "step": 202885 }, { "epoch": 0.48, "grad_norm": 2.484375, "learning_rate": 0.00017316891401780894, "loss": 2.2377, "step": 202890 }, { "epoch": 0.48, "grad_norm": 2.53125, "learning_rate": 0.00017316765413874328, "loss": 2.1335, "step": 202895 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017316639423468227, "loss": 2.2382, "step": 202900 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.00017316513430562624, "loss": 2.2017, "step": 202905 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017316387435157567, "loss": 2.1454, "step": 202910 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017316261437253102, "loss": 2.0031, "step": 202915 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017316135436849268, "loss": 2.1618, "step": 202920 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.0001731600943394611, "loss": 2.1431, "step": 202925 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.0001731588342854367, "loss": 2.1026, "step": 202930 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017315757420641994, "loss": 2.1344, "step": 202935 }, { "epoch": 0.48, "grad_norm": 2.59375, "learning_rate": 0.0001731563141024112, "loss": 2.0268, "step": 202940 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017315505397341095, "loss": 2.0272, "step": 202945 }, { "epoch": 0.48, "grad_norm": 1.859375, "learning_rate": 0.00017315379381941957, "loss": 2.0374, "step": 202950 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017315253364043754, "loss": 1.9754, "step": 202955 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.0001731512734364653, "loss": 1.9966, "step": 202960 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.00017315001320750327, "loss": 2.1163, "step": 202965 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.00017314875295355183, "loss": 2.0487, "step": 202970 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.0001731474926746115, "loss": 2.1677, "step": 202975 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.0001731462323706826, "loss": 2.0743, "step": 202980 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017314497204176565, "loss": 2.2776, "step": 202985 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017314371168786105, "loss": 2.0655, "step": 202990 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017314245130896922, "loss": 2.06, "step": 202995 }, { "epoch": 0.48, "grad_norm": 1.96875, "learning_rate": 0.00017314119090509062, "loss": 2.2071, "step": 203000 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017313993047622566, "loss": 2.0732, "step": 203005 }, { "epoch": 0.48, "grad_norm": 1.8828125, "learning_rate": 0.00017313867002237475, "loss": 2.1838, "step": 203010 }, { "epoch": 0.48, "grad_norm": 1.9921875, "learning_rate": 0.0001731374095435384, "loss": 2.1547, "step": 203015 }, { "epoch": 0.48, "grad_norm": 2.0, "learning_rate": 0.00017313614903971693, "loss": 2.1074, "step": 203020 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017313488851091084, "loss": 2.084, "step": 203025 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017313362795712056, "loss": 2.0619, "step": 203030 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.0001731323673783465, "loss": 2.0606, "step": 203035 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017313110677458907, "loss": 2.1444, "step": 203040 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017312984614584878, "loss": 2.1137, "step": 203045 }, { "epoch": 0.48, "grad_norm": 1.8828125, "learning_rate": 0.00017312858549212596, "loss": 2.1258, "step": 203050 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.0001731273248134211, "loss": 1.976, "step": 203055 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.00017312606410973462, "loss": 2.0171, "step": 203060 }, { "epoch": 0.48, "grad_norm": 2.390625, "learning_rate": 0.00017312480338106697, "loss": 1.7282, "step": 203065 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017312354262741855, "loss": 2.1639, "step": 203070 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.00017312228184878978, "loss": 1.9634, "step": 203075 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017312102104518115, "loss": 2.1413, "step": 203080 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017311976021659305, "loss": 2.167, "step": 203085 }, { "epoch": 0.48, "grad_norm": 1.8125, "learning_rate": 0.00017311849936302588, "loss": 2.1215, "step": 203090 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.00017311723848448013, "loss": 2.0612, "step": 203095 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017311597758095618, "loss": 2.1624, "step": 203100 }, { "epoch": 0.48, "grad_norm": 2.515625, "learning_rate": 0.0001731147166524545, "loss": 2.1867, "step": 203105 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017311345569897554, "loss": 1.9853, "step": 203110 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017311219472051966, "loss": 2.1346, "step": 203115 }, { "epoch": 0.48, "grad_norm": 2.0, "learning_rate": 0.00017311093371708732, "loss": 2.1506, "step": 203120 }, { "epoch": 0.48, "grad_norm": 1.8828125, "learning_rate": 0.000173109672688679, "loss": 2.2004, "step": 203125 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.00017310841163529505, "loss": 2.1851, "step": 203130 }, { "epoch": 0.48, "grad_norm": 2.53125, "learning_rate": 0.00017310715055693596, "loss": 2.0109, "step": 203135 }, { "epoch": 0.48, "grad_norm": 1.6953125, "learning_rate": 0.00017310588945360214, "loss": 2.0664, "step": 203140 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017310462832529404, "loss": 2.2938, "step": 203145 }, { "epoch": 0.48, "grad_norm": 1.984375, "learning_rate": 0.00017310336717201203, "loss": 2.1033, "step": 203150 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017310210599375662, "loss": 2.1785, "step": 203155 }, { "epoch": 0.48, "grad_norm": 1.96875, "learning_rate": 0.0001731008447905282, "loss": 2.1244, "step": 203160 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017309958356232717, "loss": 2.2415, "step": 203165 }, { "epoch": 0.48, "grad_norm": 1.703125, "learning_rate": 0.00017309832230915403, "loss": 1.9909, "step": 203170 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017309706103100917, "loss": 2.2833, "step": 203175 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017309579972789302, "loss": 2.0874, "step": 203180 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017309453839980604, "loss": 2.1516, "step": 203185 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017309327704674864, "loss": 2.0829, "step": 203190 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017309201566872125, "loss": 2.0902, "step": 203195 }, { "epoch": 0.48, "grad_norm": 1.8828125, "learning_rate": 0.00017309075426572427, "loss": 2.2928, "step": 203200 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.0001730894928377582, "loss": 2.1385, "step": 203205 }, { "epoch": 0.48, "grad_norm": 2.546875, "learning_rate": 0.0001730882313848234, "loss": 2.0941, "step": 203210 }, { "epoch": 0.48, "grad_norm": 1.9765625, "learning_rate": 0.00017308696990692038, "loss": 2.0558, "step": 203215 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.0001730857084040495, "loss": 2.2179, "step": 203220 }, { "epoch": 0.48, "grad_norm": 2.5, "learning_rate": 0.00017308444687621122, "loss": 2.3335, "step": 203225 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017308318532340594, "loss": 2.1195, "step": 203230 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017308192374563414, "loss": 2.1859, "step": 203235 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017308066214289625, "loss": 1.8839, "step": 203240 }, { "epoch": 0.48, "grad_norm": 1.9453125, "learning_rate": 0.00017307940051519265, "loss": 2.1014, "step": 203245 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017307813886252381, "loss": 2.2089, "step": 203250 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017307687718489016, "loss": 2.1256, "step": 203255 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017307561548229213, "loss": 2.0712, "step": 203260 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.0001730743537547301, "loss": 1.9473, "step": 203265 }, { "epoch": 0.48, "grad_norm": 1.9765625, "learning_rate": 0.00017307309200220462, "loss": 2.2155, "step": 203270 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.00017307183022471597, "loss": 1.9974, "step": 203275 }, { "epoch": 0.48, "grad_norm": 1.9765625, "learning_rate": 0.0001730705684222647, "loss": 2.0876, "step": 203280 }, { "epoch": 0.48, "grad_norm": 2.40625, "learning_rate": 0.00017306930659485118, "loss": 2.0274, "step": 203285 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017306804474247585, "loss": 1.9623, "step": 203290 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.0001730667828651392, "loss": 1.9824, "step": 203295 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.00017306552096284158, "loss": 2.0574, "step": 203300 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.00017306425903558346, "loss": 2.1399, "step": 203305 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017306299708336526, "loss": 2.1309, "step": 203310 }, { "epoch": 0.48, "grad_norm": 1.7890625, "learning_rate": 0.00017306173510618742, "loss": 2.0324, "step": 203315 }, { "epoch": 0.48, "grad_norm": 1.734375, "learning_rate": 0.00017306047310405035, "loss": 2.1683, "step": 203320 }, { "epoch": 0.48, "grad_norm": 1.9609375, "learning_rate": 0.00017305921107695454, "loss": 2.089, "step": 203325 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017305794902490034, "loss": 2.0061, "step": 203330 }, { "epoch": 0.48, "grad_norm": 1.7109375, "learning_rate": 0.0001730566869478882, "loss": 1.9474, "step": 203335 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017305542484591862, "loss": 2.201, "step": 203340 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017305416271899194, "loss": 2.0063, "step": 203345 }, { "epoch": 0.48, "grad_norm": 2.484375, "learning_rate": 0.00017305290056710866, "loss": 2.1014, "step": 203350 }, { "epoch": 0.48, "grad_norm": 2.421875, "learning_rate": 0.00017305163839026916, "loss": 2.0316, "step": 203355 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.00017305037618847396, "loss": 2.319, "step": 203360 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017304911396172338, "loss": 2.2423, "step": 203365 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017304785171001787, "loss": 2.0805, "step": 203370 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.0001730465894333579, "loss": 1.9689, "step": 203375 }, { "epoch": 0.48, "grad_norm": 2.421875, "learning_rate": 0.0001730453271317439, "loss": 2.1714, "step": 203380 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017304406480517633, "loss": 1.8951, "step": 203385 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017304280245365554, "loss": 2.0396, "step": 203390 }, { "epoch": 0.48, "grad_norm": 2.484375, "learning_rate": 0.000173041540077182, "loss": 2.104, "step": 203395 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017304027767575618, "loss": 2.0105, "step": 203400 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.00017303901524937847, "loss": 2.1178, "step": 203405 }, { "epoch": 0.48, "grad_norm": 2.484375, "learning_rate": 0.00017303775279804928, "loss": 2.0553, "step": 203410 }, { "epoch": 0.48, "grad_norm": 2.40625, "learning_rate": 0.00017303649032176908, "loss": 2.121, "step": 203415 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.0001730352278205383, "loss": 1.9183, "step": 203420 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017303396529435734, "loss": 2.0865, "step": 203425 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017303270274322667, "loss": 2.1343, "step": 203430 }, { "epoch": 0.48, "grad_norm": 1.9921875, "learning_rate": 0.0001730314401671467, "loss": 1.9854, "step": 203435 }, { "epoch": 0.48, "grad_norm": 1.875, "learning_rate": 0.00017303017756611788, "loss": 1.952, "step": 203440 }, { "epoch": 0.48, "grad_norm": 1.984375, "learning_rate": 0.00017302891494014062, "loss": 2.0014, "step": 203445 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017302765228921537, "loss": 2.0719, "step": 203450 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.00017302638961334252, "loss": 2.0515, "step": 203455 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017302512691252253, "loss": 2.2897, "step": 203460 }, { "epoch": 0.48, "grad_norm": 1.71875, "learning_rate": 0.00017302386418675586, "loss": 2.1317, "step": 203465 }, { "epoch": 0.48, "grad_norm": 1.6484375, "learning_rate": 0.0001730226014360429, "loss": 2.2331, "step": 203470 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017302133866038411, "loss": 2.0857, "step": 203475 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017302007585977988, "loss": 2.0473, "step": 203480 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017301881303423067, "loss": 1.967, "step": 203485 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.00017301755018373692, "loss": 2.0639, "step": 203490 }, { "epoch": 0.48, "grad_norm": 1.984375, "learning_rate": 0.00017301628730829904, "loss": 1.9977, "step": 203495 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017301502440791747, "loss": 1.9869, "step": 203500 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017301376148259269, "loss": 2.0865, "step": 203505 }, { "epoch": 0.48, "grad_norm": 1.765625, "learning_rate": 0.00017301249853232502, "loss": 2.0986, "step": 203510 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.000173011235557115, "loss": 2.1209, "step": 203515 }, { "epoch": 0.48, "grad_norm": 2.546875, "learning_rate": 0.00017300997255696298, "loss": 2.1414, "step": 203520 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017300870953186948, "loss": 2.1062, "step": 203525 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.00017300744648183485, "loss": 1.9391, "step": 203530 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.00017300618340685954, "loss": 2.1807, "step": 203535 }, { "epoch": 0.48, "grad_norm": 1.96875, "learning_rate": 0.00017300492030694403, "loss": 2.1681, "step": 203540 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.0001730036571820887, "loss": 1.9741, "step": 203545 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017300239403229397, "loss": 2.1522, "step": 203550 }, { "epoch": 0.48, "grad_norm": 1.6171875, "learning_rate": 0.00017300113085756032, "loss": 2.0989, "step": 203555 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017299986765788815, "loss": 2.1441, "step": 203560 }, { "epoch": 0.48, "grad_norm": 2.65625, "learning_rate": 0.00017299860443327792, "loss": 1.9579, "step": 203565 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017299734118373004, "loss": 2.1213, "step": 203570 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017299607790924491, "loss": 2.1353, "step": 203575 }, { "epoch": 0.48, "grad_norm": 2.515625, "learning_rate": 0.00017299481460982303, "loss": 2.089, "step": 203580 }, { "epoch": 0.48, "grad_norm": 1.890625, "learning_rate": 0.0001729935512854648, "loss": 2.0377, "step": 203585 }, { "epoch": 0.48, "grad_norm": 1.8828125, "learning_rate": 0.00017299228793617063, "loss": 2.1215, "step": 203590 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.000172991024561941, "loss": 2.1905, "step": 203595 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.0001729897611627763, "loss": 2.1194, "step": 203600 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017298849773867697, "loss": 2.2285, "step": 203605 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.00017298723428964342, "loss": 2.1094, "step": 203610 }, { "epoch": 0.48, "grad_norm": 1.78125, "learning_rate": 0.00017298597081567612, "loss": 2.1267, "step": 203615 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.00017298470731677549, "loss": 2.2229, "step": 203620 }, { "epoch": 0.48, "grad_norm": 1.9921875, "learning_rate": 0.000172983443792942, "loss": 2.1491, "step": 203625 }, { "epoch": 0.48, "grad_norm": 2.390625, "learning_rate": 0.000172982180244176, "loss": 2.0858, "step": 203630 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017298091667047798, "loss": 2.0886, "step": 203635 }, { "epoch": 0.48, "grad_norm": 1.765625, "learning_rate": 0.00017297965307184833, "loss": 2.0403, "step": 203640 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.00017297838944828754, "loss": 2.0118, "step": 203645 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.000172977125799796, "loss": 2.0845, "step": 203650 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.00017297586212637413, "loss": 2.1172, "step": 203655 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.0001729745984280224, "loss": 2.2012, "step": 203660 }, { "epoch": 0.48, "grad_norm": 2.4375, "learning_rate": 0.00017297333470474123, "loss": 2.1325, "step": 203665 }, { "epoch": 0.48, "grad_norm": 1.78125, "learning_rate": 0.00017297207095653104, "loss": 1.9188, "step": 203670 }, { "epoch": 0.48, "grad_norm": 2.515625, "learning_rate": 0.00017297080718339225, "loss": 2.315, "step": 203675 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017296954338532535, "loss": 2.1746, "step": 203680 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.00017296827956233068, "loss": 1.952, "step": 203685 }, { "epoch": 0.48, "grad_norm": 1.8125, "learning_rate": 0.00017296701571440875, "loss": 1.9527, "step": 203690 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017296575184155995, "loss": 2.0243, "step": 203695 }, { "epoch": 0.48, "grad_norm": 2.578125, "learning_rate": 0.00017296448794378475, "loss": 2.1928, "step": 203700 }, { "epoch": 0.48, "grad_norm": 2.515625, "learning_rate": 0.00017296322402108355, "loss": 2.1236, "step": 203705 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017296196007345676, "loss": 2.2602, "step": 203710 }, { "epoch": 0.48, "grad_norm": 1.96875, "learning_rate": 0.00017296069610090486, "loss": 2.0902, "step": 203715 }, { "epoch": 0.48, "grad_norm": 1.8359375, "learning_rate": 0.0001729594321034283, "loss": 2.1546, "step": 203720 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017295816808102742, "loss": 2.1567, "step": 203725 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017295690403370273, "loss": 2.2747, "step": 203730 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017295563996145462, "loss": 2.1314, "step": 203735 }, { "epoch": 0.48, "grad_norm": 1.921875, "learning_rate": 0.00017295437586428356, "loss": 2.0681, "step": 203740 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.00017295311174218996, "loss": 1.9596, "step": 203745 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.00017295184759517427, "loss": 2.1094, "step": 203750 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017295058342323686, "loss": 1.8647, "step": 203755 }, { "epoch": 0.48, "grad_norm": 2.53125, "learning_rate": 0.00017294931922637826, "loss": 2.1049, "step": 203760 }, { "epoch": 0.48, "grad_norm": 2.515625, "learning_rate": 0.0001729480550045988, "loss": 2.0616, "step": 203765 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.000172946790757899, "loss": 1.9023, "step": 203770 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017294552648627925, "loss": 1.9622, "step": 203775 }, { "epoch": 0.48, "grad_norm": 2.421875, "learning_rate": 0.00017294426218973996, "loss": 2.226, "step": 203780 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.00017294299786828162, "loss": 2.072, "step": 203785 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017294173352190462, "loss": 1.9598, "step": 203790 }, { "epoch": 0.48, "grad_norm": 1.90625, "learning_rate": 0.00017294046915060937, "loss": 2.0577, "step": 203795 }, { "epoch": 0.48, "grad_norm": 2.453125, "learning_rate": 0.00017293920475439635, "loss": 2.154, "step": 203800 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.000172937940333266, "loss": 2.0795, "step": 203805 }, { "epoch": 0.48, "grad_norm": 2.40625, "learning_rate": 0.0001729366758872187, "loss": 2.0847, "step": 203810 }, { "epoch": 0.48, "grad_norm": 2.703125, "learning_rate": 0.00017293541141625492, "loss": 1.9711, "step": 203815 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017293414692037508, "loss": 1.9879, "step": 203820 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017293288239957958, "loss": 2.0938, "step": 203825 }, { "epoch": 0.48, "grad_norm": 1.921875, "learning_rate": 0.00017293161785386893, "loss": 2.1294, "step": 203830 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.0001729303532832435, "loss": 2.2277, "step": 203835 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017292908868770374, "loss": 2.1631, "step": 203840 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017292782406725009, "loss": 2.052, "step": 203845 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017292655942188295, "loss": 2.0366, "step": 203850 }, { "epoch": 0.48, "grad_norm": 1.8515625, "learning_rate": 0.0001729252947516028, "loss": 1.861, "step": 203855 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017292403005641002, "loss": 2.3006, "step": 203860 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.0001729227653363051, "loss": 2.1231, "step": 203865 }, { "epoch": 0.48, "grad_norm": 1.9375, "learning_rate": 0.0001729215005912884, "loss": 1.8761, "step": 203870 }, { "epoch": 0.48, "grad_norm": 2.9375, "learning_rate": 0.00017292023582136042, "loss": 2.0918, "step": 203875 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017291897102652157, "loss": 2.2162, "step": 203880 }, { "epoch": 0.48, "grad_norm": 1.796875, "learning_rate": 0.00017291770620677229, "loss": 2.2294, "step": 203885 }, { "epoch": 0.48, "grad_norm": 1.9609375, "learning_rate": 0.00017291644136211296, "loss": 2.1398, "step": 203890 }, { "epoch": 0.48, "grad_norm": 1.90625, "learning_rate": 0.00017291517649254407, "loss": 2.0411, "step": 203895 }, { "epoch": 0.48, "grad_norm": 1.6484375, "learning_rate": 0.00017291391159806604, "loss": 2.0621, "step": 203900 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017291264667867928, "loss": 2.1353, "step": 203905 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017291138173438424, "loss": 1.8993, "step": 203910 }, { "epoch": 0.48, "grad_norm": 2.625, "learning_rate": 0.00017291011676518137, "loss": 2.0646, "step": 203915 }, { "epoch": 0.48, "grad_norm": 2.0, "learning_rate": 0.00017290885177107107, "loss": 2.0849, "step": 203920 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017290758675205377, "loss": 2.2585, "step": 203925 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017290632170812993, "loss": 1.9567, "step": 203930 }, { "epoch": 0.48, "grad_norm": 2.515625, "learning_rate": 0.00017290505663929997, "loss": 2.1766, "step": 203935 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.0001729037915455643, "loss": 2.1559, "step": 203940 }, { "epoch": 0.48, "grad_norm": 2.546875, "learning_rate": 0.00017290252642692338, "loss": 2.0757, "step": 203945 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017290126128337768, "loss": 2.256, "step": 203950 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.00017289999611492753, "loss": 2.1506, "step": 203955 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.00017289873092157343, "loss": 2.1647, "step": 203960 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017289746570331582, "loss": 1.8828, "step": 203965 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.0001728962004601551, "loss": 2.1652, "step": 203970 }, { "epoch": 0.48, "grad_norm": 2.546875, "learning_rate": 0.00017289493519209172, "loss": 2.0341, "step": 203975 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.0001728936698991261, "loss": 2.2177, "step": 203980 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017289240458125867, "loss": 2.0415, "step": 203985 }, { "epoch": 0.48, "grad_norm": 2.625, "learning_rate": 0.00017289113923848993, "loss": 2.0359, "step": 203990 }, { "epoch": 0.48, "grad_norm": 2.546875, "learning_rate": 0.00017288987387082018, "loss": 2.0353, "step": 203995 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017288860847825, "loss": 2.2715, "step": 204000 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.0001728873430607797, "loss": 2.0928, "step": 204005 }, { "epoch": 0.48, "grad_norm": 2.4375, "learning_rate": 0.00017288607761840977, "loss": 2.142, "step": 204010 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.00017288481215114066, "loss": 2.0255, "step": 204015 }, { "epoch": 0.48, "grad_norm": 2.40625, "learning_rate": 0.00017288354665897276, "loss": 2.1729, "step": 204020 }, { "epoch": 0.48, "grad_norm": 2.828125, "learning_rate": 0.0001728822811419065, "loss": 1.9494, "step": 204025 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017288101559994235, "loss": 2.2591, "step": 204030 }, { "epoch": 0.48, "grad_norm": 2.75, "learning_rate": 0.0001728797500330807, "loss": 2.0009, "step": 204035 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.00017287848444132205, "loss": 2.1271, "step": 204040 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017287721882466674, "loss": 2.2184, "step": 204045 }, { "epoch": 0.48, "grad_norm": 2.0, "learning_rate": 0.00017287595318311527, "loss": 2.1283, "step": 204050 }, { "epoch": 0.48, "grad_norm": 1.6875, "learning_rate": 0.00017287468751666805, "loss": 1.9951, "step": 204055 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017287342182532552, "loss": 1.9973, "step": 204060 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.0001728721561090881, "loss": 2.064, "step": 204065 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.00017287089036795625, "loss": 2.0521, "step": 204070 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017286962460193035, "loss": 2.019, "step": 204075 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.0001728683588110109, "loss": 2.1113, "step": 204080 }, { "epoch": 0.48, "grad_norm": 1.9296875, "learning_rate": 0.00017286709299519826, "loss": 2.1475, "step": 204085 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.00017286582715449292, "loss": 1.943, "step": 204090 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017286456128889533, "loss": 2.1744, "step": 204095 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.0001728632953984058, "loss": 2.1516, "step": 204100 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.00017286202948302493, "loss": 1.9918, "step": 204105 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017286076354275303, "loss": 2.2509, "step": 204110 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017285949757759057, "loss": 2.1101, "step": 204115 }, { "epoch": 0.48, "grad_norm": 2.421875, "learning_rate": 0.000172858231587538, "loss": 2.1438, "step": 204120 }, { "epoch": 0.48, "grad_norm": 2.0, "learning_rate": 0.00017285696557259572, "loss": 2.0563, "step": 204125 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.0001728556995327642, "loss": 1.9261, "step": 204130 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017285443346804383, "loss": 1.9831, "step": 204135 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.00017285316737843505, "loss": 2.1978, "step": 204140 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017285190126393833, "loss": 2.141, "step": 204145 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.00017285063512455408, "loss": 2.1411, "step": 204150 }, { "epoch": 0.48, "grad_norm": 2.40625, "learning_rate": 0.00017284936896028272, "loss": 2.1761, "step": 204155 }, { "epoch": 0.48, "grad_norm": 2.546875, "learning_rate": 0.00017284810277112472, "loss": 2.2026, "step": 204160 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017284683655708046, "loss": 1.9817, "step": 204165 }, { "epoch": 0.48, "grad_norm": 2.0, "learning_rate": 0.0001728455703181504, "loss": 1.9159, "step": 204170 }, { "epoch": 0.48, "grad_norm": 1.828125, "learning_rate": 0.000172844304054335, "loss": 2.0477, "step": 204175 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017284303776563464, "loss": 1.9591, "step": 204180 }, { "epoch": 0.48, "grad_norm": 2.390625, "learning_rate": 0.00017284177145204978, "loss": 2.1224, "step": 204185 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017284050511358085, "loss": 2.1873, "step": 204190 }, { "epoch": 0.48, "grad_norm": 2.65625, "learning_rate": 0.0001728392387502283, "loss": 2.09, "step": 204195 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017283797236199253, "loss": 2.2678, "step": 204200 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017283670594887398, "loss": 2.2382, "step": 204205 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.0001728354395108731, "loss": 2.2048, "step": 204210 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017283417304799032, "loss": 1.9411, "step": 204215 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017283290656022607, "loss": 2.1928, "step": 204220 }, { "epoch": 0.48, "grad_norm": 3.640625, "learning_rate": 0.00017283164004758074, "loss": 2.2152, "step": 204225 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017283037351005485, "loss": 2.2047, "step": 204230 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017282910694764873, "loss": 2.18, "step": 204235 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.0001728278403603629, "loss": 2.1047, "step": 204240 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017282657374819777, "loss": 1.8451, "step": 204245 }, { "epoch": 0.48, "grad_norm": 1.9765625, "learning_rate": 0.00017282530711115371, "loss": 2.2473, "step": 204250 }, { "epoch": 0.48, "grad_norm": 2.671875, "learning_rate": 0.00017282404044923126, "loss": 2.3733, "step": 204255 }, { "epoch": 0.48, "grad_norm": 2.59375, "learning_rate": 0.0001728227737624308, "loss": 2.0703, "step": 204260 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.0001728215070507527, "loss": 2.1386, "step": 204265 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.0001728202403141975, "loss": 2.1438, "step": 204270 }, { "epoch": 0.48, "grad_norm": 2.765625, "learning_rate": 0.0001728189735527656, "loss": 2.1569, "step": 204275 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.0001728177067664574, "loss": 2.0863, "step": 204280 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017281643995527334, "loss": 2.1107, "step": 204285 }, { "epoch": 0.48, "grad_norm": 1.8515625, "learning_rate": 0.00017281517311921383, "loss": 1.9995, "step": 204290 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.0001728139062582794, "loss": 2.1329, "step": 204295 }, { "epoch": 0.48, "grad_norm": 2.53125, "learning_rate": 0.00017281263937247038, "loss": 2.2496, "step": 204300 }, { "epoch": 0.48, "grad_norm": 2.65625, "learning_rate": 0.00017281137246178728, "loss": 1.9833, "step": 204305 }, { "epoch": 0.48, "grad_norm": 2.390625, "learning_rate": 0.00017281010552623046, "loss": 2.1852, "step": 204310 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.0001728088385658004, "loss": 1.9483, "step": 204315 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.0001728075715804975, "loss": 2.1619, "step": 204320 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017280630457032224, "loss": 2.1535, "step": 204325 }, { "epoch": 0.48, "grad_norm": 1.9453125, "learning_rate": 0.00017280503753527501, "loss": 2.0629, "step": 204330 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.00017280377047535628, "loss": 2.2667, "step": 204335 }, { "epoch": 0.48, "grad_norm": 2.5, "learning_rate": 0.00017280250339056643, "loss": 2.0974, "step": 204340 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017280123628090593, "loss": 2.2538, "step": 204345 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017279996914637525, "loss": 2.1324, "step": 204350 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017279870198697476, "loss": 1.8737, "step": 204355 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017279743480270486, "loss": 2.1807, "step": 204360 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.0001727961675935661, "loss": 2.0422, "step": 204365 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017279490035955884, "loss": 2.0695, "step": 204370 }, { "epoch": 0.48, "grad_norm": 3.25, "learning_rate": 0.0001727936331006835, "loss": 2.0375, "step": 204375 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.00017279236581694055, "loss": 2.0884, "step": 204380 }, { "epoch": 0.48, "grad_norm": 2.625, "learning_rate": 0.0001727910985083304, "loss": 2.082, "step": 204385 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.0001727898311748535, "loss": 1.9891, "step": 204390 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017278856381651024, "loss": 2.0813, "step": 204395 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017278729643330112, "loss": 2.1679, "step": 204400 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017278602902522653, "loss": 2.1321, "step": 204405 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.00017278476159228692, "loss": 2.0969, "step": 204410 }, { "epoch": 0.48, "grad_norm": 2.546875, "learning_rate": 0.0001727834941344827, "loss": 2.1639, "step": 204415 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.00017278222665181433, "loss": 2.1306, "step": 204420 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017278095914428224, "loss": 2.1409, "step": 204425 }, { "epoch": 0.48, "grad_norm": 2.40625, "learning_rate": 0.00017277969161188684, "loss": 2.151, "step": 204430 }, { "epoch": 0.48, "grad_norm": 1.9609375, "learning_rate": 0.00017277842405462857, "loss": 2.0625, "step": 204435 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.0001727771564725079, "loss": 2.2192, "step": 204440 }, { "epoch": 0.48, "grad_norm": 2.40625, "learning_rate": 0.0001727758888655252, "loss": 2.0449, "step": 204445 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017277462123368095, "loss": 2.0969, "step": 204450 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.00017277335357697557, "loss": 1.827, "step": 204455 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017277208589540948, "loss": 2.1063, "step": 204460 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017277081818898316, "loss": 2.1946, "step": 204465 }, { "epoch": 0.48, "grad_norm": 2.640625, "learning_rate": 0.00017276955045769697, "loss": 2.0319, "step": 204470 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017276828270155141, "loss": 2.2109, "step": 204475 }, { "epoch": 0.48, "grad_norm": 1.9375, "learning_rate": 0.0001727670149205469, "loss": 2.1458, "step": 204480 }, { "epoch": 0.48, "grad_norm": 2.53125, "learning_rate": 0.00017276574711468383, "loss": 2.1149, "step": 204485 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017276447928396264, "loss": 2.215, "step": 204490 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017276321142838382, "loss": 2.1122, "step": 204495 }, { "epoch": 0.48, "grad_norm": 1.84375, "learning_rate": 0.00017276194354794774, "loss": 2.1811, "step": 204500 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017276067564265488, "loss": 2.0982, "step": 204505 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017275940771250566, "loss": 2.0083, "step": 204510 }, { "epoch": 0.48, "grad_norm": 1.734375, "learning_rate": 0.00017275813975750048, "loss": 2.085, "step": 204515 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.0001727568717776398, "loss": 1.8672, "step": 204520 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.0001727556037729241, "loss": 2.2374, "step": 204525 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.00017275433574335373, "loss": 1.9701, "step": 204530 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017275306768892916, "loss": 2.2036, "step": 204535 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017275179960965083, "loss": 1.9583, "step": 204540 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017275053150551917, "loss": 2.0815, "step": 204545 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.0001727492633765346, "loss": 1.9149, "step": 204550 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.00017274799522269758, "loss": 2.0621, "step": 204555 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017274672704400848, "loss": 2.0345, "step": 204560 }, { "epoch": 0.48, "grad_norm": 1.984375, "learning_rate": 0.00017274545884046782, "loss": 2.0621, "step": 204565 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.000172744190612076, "loss": 2.0742, "step": 204570 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017274292235883342, "loss": 2.2272, "step": 204575 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017274165408074054, "loss": 2.1576, "step": 204580 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.00017274038577779778, "loss": 2.1355, "step": 204585 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.00017273911745000563, "loss": 2.172, "step": 204590 }, { "epoch": 0.48, "grad_norm": 1.9375, "learning_rate": 0.00017273784909736442, "loss": 2.2208, "step": 204595 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.0001727365807198747, "loss": 2.0179, "step": 204600 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.0001727353123175368, "loss": 2.0984, "step": 204605 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.00017273404389035122, "loss": 2.0929, "step": 204610 }, { "epoch": 0.48, "grad_norm": 1.796875, "learning_rate": 0.00017273277543831833, "loss": 2.0685, "step": 204615 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017273150696143867, "loss": 2.0792, "step": 204620 }, { "epoch": 0.48, "grad_norm": 2.484375, "learning_rate": 0.00017273023845971257, "loss": 2.1466, "step": 204625 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.0001727289699331405, "loss": 2.143, "step": 204630 }, { "epoch": 0.48, "grad_norm": 1.828125, "learning_rate": 0.00017272770138172293, "loss": 2.1363, "step": 204635 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017272643280546023, "loss": 2.1955, "step": 204640 }, { "epoch": 0.48, "grad_norm": 2.515625, "learning_rate": 0.00017272516420435284, "loss": 2.0772, "step": 204645 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.00017272389557840127, "loss": 2.1911, "step": 204650 }, { "epoch": 0.48, "grad_norm": 2.515625, "learning_rate": 0.00017272262692760585, "loss": 1.9445, "step": 204655 }, { "epoch": 0.48, "grad_norm": 1.9375, "learning_rate": 0.0001727213582519671, "loss": 2.0483, "step": 204660 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017272008955148538, "loss": 2.3435, "step": 204665 }, { "epoch": 0.48, "grad_norm": 1.890625, "learning_rate": 0.00017271882082616117, "loss": 2.0441, "step": 204670 }, { "epoch": 0.48, "grad_norm": 2.390625, "learning_rate": 0.00017271755207599488, "loss": 2.3363, "step": 204675 }, { "epoch": 0.48, "grad_norm": 2.8125, "learning_rate": 0.000172716283300987, "loss": 2.2185, "step": 204680 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017271501450113788, "loss": 1.9994, "step": 204685 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.000172713745676448, "loss": 2.1084, "step": 204690 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.0001727124768269178, "loss": 2.2484, "step": 204695 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017271120795254769, "loss": 2.3412, "step": 204700 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.0001727099390533381, "loss": 2.2077, "step": 204705 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017270867012928948, "loss": 1.9814, "step": 204710 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.00017270740118040226, "loss": 2.0799, "step": 204715 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017270613220667688, "loss": 1.8952, "step": 204720 }, { "epoch": 0.48, "grad_norm": 1.9609375, "learning_rate": 0.00017270486320811377, "loss": 1.9614, "step": 204725 }, { "epoch": 0.48, "grad_norm": 1.8984375, "learning_rate": 0.00017270359418471336, "loss": 1.9151, "step": 204730 }, { "epoch": 0.48, "grad_norm": 1.78125, "learning_rate": 0.00017270232513647607, "loss": 1.8782, "step": 204735 }, { "epoch": 0.48, "grad_norm": 2.734375, "learning_rate": 0.00017270105606340236, "loss": 1.9684, "step": 204740 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017269978696549264, "loss": 2.2159, "step": 204745 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017269851784274738, "loss": 2.02, "step": 204750 }, { "epoch": 0.48, "grad_norm": 1.8671875, "learning_rate": 0.00017269724869516698, "loss": 2.1572, "step": 204755 }, { "epoch": 0.48, "grad_norm": 2.5625, "learning_rate": 0.00017269597952275186, "loss": 2.1981, "step": 204760 }, { "epoch": 0.48, "grad_norm": 2.515625, "learning_rate": 0.00017269471032550248, "loss": 1.9542, "step": 204765 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017269344110341927, "loss": 2.2772, "step": 204770 }, { "epoch": 0.48, "grad_norm": 2.71875, "learning_rate": 0.00017269217185650267, "loss": 2.2286, "step": 204775 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.0001726909025847531, "loss": 2.0139, "step": 204780 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.000172689633288171, "loss": 2.048, "step": 204785 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.0001726883639667568, "loss": 2.1666, "step": 204790 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017268709462051093, "loss": 2.1722, "step": 204795 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017268582524943387, "loss": 1.9905, "step": 204800 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.00017268455585352598, "loss": 2.1109, "step": 204805 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.00017268328643278773, "loss": 2.0111, "step": 204810 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017268201698721956, "loss": 1.9096, "step": 204815 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017268074751682186, "loss": 2.1206, "step": 204820 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017267947802159514, "loss": 1.9832, "step": 204825 }, { "epoch": 0.48, "grad_norm": 1.9609375, "learning_rate": 0.0001726782085015398, "loss": 2.1171, "step": 204830 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.0001726769389566562, "loss": 2.2645, "step": 204835 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017267566938694492, "loss": 2.1039, "step": 204840 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.00017267439979240626, "loss": 2.0076, "step": 204845 }, { "epoch": 0.48, "grad_norm": 2.0, "learning_rate": 0.00017267313017304075, "loss": 2.0306, "step": 204850 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017267186052884873, "loss": 2.0675, "step": 204855 }, { "epoch": 0.48, "grad_norm": 1.9453125, "learning_rate": 0.00017267059085983072, "loss": 2.3327, "step": 204860 }, { "epoch": 0.48, "grad_norm": 1.71875, "learning_rate": 0.0001726693211659871, "loss": 1.9854, "step": 204865 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017266805144731834, "loss": 2.1787, "step": 204870 }, { "epoch": 0.48, "grad_norm": 1.9765625, "learning_rate": 0.00017266678170382486, "loss": 2.059, "step": 204875 }, { "epoch": 0.48, "grad_norm": 1.9296875, "learning_rate": 0.00017266551193550708, "loss": 2.1154, "step": 204880 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017266424214236543, "loss": 2.2134, "step": 204885 }, { "epoch": 0.48, "grad_norm": 2.484375, "learning_rate": 0.00017266297232440035, "loss": 2.013, "step": 204890 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.0001726617024816123, "loss": 2.0977, "step": 204895 }, { "epoch": 0.48, "grad_norm": 1.9921875, "learning_rate": 0.0001726604326140017, "loss": 2.0962, "step": 204900 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017265916272156897, "loss": 1.9897, "step": 204905 }, { "epoch": 0.48, "grad_norm": 1.9296875, "learning_rate": 0.00017265789280431455, "loss": 2.1115, "step": 204910 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017265662286223887, "loss": 2.1892, "step": 204915 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.0001726553528953424, "loss": 2.2329, "step": 204920 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.0001726540829036255, "loss": 1.978, "step": 204925 }, { "epoch": 0.48, "grad_norm": 1.734375, "learning_rate": 0.00017265281288708867, "loss": 1.9647, "step": 204930 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017265154284573232, "loss": 2.1537, "step": 204935 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017265027277955689, "loss": 1.9648, "step": 204940 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.0001726490026885628, "loss": 2.1371, "step": 204945 }, { "epoch": 0.48, "grad_norm": 1.8359375, "learning_rate": 0.0001726477325727505, "loss": 2.2011, "step": 204950 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017264646243212041, "loss": 2.1126, "step": 204955 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.00017264519226667299, "loss": 1.8787, "step": 204960 }, { "epoch": 0.48, "grad_norm": 1.9296875, "learning_rate": 0.00017264392207640866, "loss": 2.1265, "step": 204965 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017264265186132783, "loss": 1.982, "step": 204970 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017264138162143092, "loss": 2.0896, "step": 204975 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.00017264011135671844, "loss": 2.1373, "step": 204980 }, { "epoch": 0.48, "grad_norm": 2.0, "learning_rate": 0.00017263884106719077, "loss": 2.1667, "step": 204985 }, { "epoch": 0.48, "grad_norm": 1.9921875, "learning_rate": 0.00017263757075284834, "loss": 2.288, "step": 204990 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017263630041369164, "loss": 2.0853, "step": 204995 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017263503004972104, "loss": 2.1659, "step": 205000 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.000172633759660937, "loss": 2.2138, "step": 205005 }, { "epoch": 0.48, "grad_norm": 2.515625, "learning_rate": 0.00017263248924733992, "loss": 1.9824, "step": 205010 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.0001726312188089303, "loss": 1.8335, "step": 205015 }, { "epoch": 0.48, "grad_norm": 1.84375, "learning_rate": 0.0001726299483457085, "loss": 2.1246, "step": 205020 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017262867785767502, "loss": 2.284, "step": 205025 }, { "epoch": 0.48, "grad_norm": 1.90625, "learning_rate": 0.00017262740734483027, "loss": 2.2254, "step": 205030 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.0001726261368071747, "loss": 2.0882, "step": 205035 }, { "epoch": 0.48, "grad_norm": 1.9921875, "learning_rate": 0.0001726248662447087, "loss": 1.9526, "step": 205040 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017262359565743272, "loss": 2.0296, "step": 205045 }, { "epoch": 0.48, "grad_norm": 1.8671875, "learning_rate": 0.00017262232504534718, "loss": 2.1579, "step": 205050 }, { "epoch": 0.48, "grad_norm": 2.59375, "learning_rate": 0.0001726210544084526, "loss": 2.0633, "step": 205055 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017261978374674933, "loss": 2.1129, "step": 205060 }, { "epoch": 0.48, "grad_norm": 2.484375, "learning_rate": 0.0001726185130602378, "loss": 2.0438, "step": 205065 }, { "epoch": 0.48, "grad_norm": 1.9375, "learning_rate": 0.00017261724234891848, "loss": 2.1654, "step": 205070 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.00017261597161279178, "loss": 2.2632, "step": 205075 }, { "epoch": 0.48, "grad_norm": 1.796875, "learning_rate": 0.00017261470085185815, "loss": 2.0693, "step": 205080 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.00017261343006611806, "loss": 1.9874, "step": 205085 }, { "epoch": 0.48, "grad_norm": 2.640625, "learning_rate": 0.00017261215925557189, "loss": 2.0451, "step": 205090 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017261088842022006, "loss": 2.1641, "step": 205095 }, { "epoch": 0.48, "grad_norm": 1.9921875, "learning_rate": 0.00017260961756006307, "loss": 2.0869, "step": 205100 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017260834667510126, "loss": 2.2805, "step": 205105 }, { "epoch": 0.48, "grad_norm": 2.671875, "learning_rate": 0.00017260707576533516, "loss": 1.9417, "step": 205110 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.00017260580483076518, "loss": 2.1076, "step": 205115 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017260453387139173, "loss": 2.0239, "step": 205120 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017260326288721527, "loss": 1.9358, "step": 205125 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.00017260199187823618, "loss": 2.1234, "step": 205130 }, { "epoch": 0.48, "grad_norm": 2.4375, "learning_rate": 0.00017260072084445495, "loss": 1.9769, "step": 205135 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.00017259944978587202, "loss": 2.0357, "step": 205140 }, { "epoch": 0.48, "grad_norm": 1.9296875, "learning_rate": 0.00017259817870248777, "loss": 1.9943, "step": 205145 }, { "epoch": 0.48, "grad_norm": 2.4375, "learning_rate": 0.00017259690759430268, "loss": 2.146, "step": 205150 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017259563646131719, "loss": 2.2622, "step": 205155 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017259436530353165, "loss": 2.0601, "step": 205160 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017259309412094662, "loss": 2.1398, "step": 205165 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017259182291356245, "loss": 2.2587, "step": 205170 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.0001725905516813796, "loss": 2.0798, "step": 205175 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.0001725892804243985, "loss": 2.338, "step": 205180 }, { "epoch": 0.48, "grad_norm": 1.890625, "learning_rate": 0.0001725880091426196, "loss": 2.0885, "step": 205185 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017258673783604328, "loss": 2.0892, "step": 205190 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017258546650467005, "loss": 2.0226, "step": 205195 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.0001725841951485003, "loss": 2.117, "step": 205200 }, { "epoch": 0.48, "grad_norm": 2.71875, "learning_rate": 0.00017258292376753445, "loss": 2.2183, "step": 205205 }, { "epoch": 0.48, "grad_norm": 1.84375, "learning_rate": 0.000172581652361773, "loss": 2.1203, "step": 205210 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017258038093121632, "loss": 2.127, "step": 205215 }, { "epoch": 0.48, "grad_norm": 1.8984375, "learning_rate": 0.00017257910947586486, "loss": 2.1264, "step": 205220 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017257783799571908, "loss": 2.2921, "step": 205225 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017257656649077933, "loss": 2.0106, "step": 205230 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017257529496104618, "loss": 1.9913, "step": 205235 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017257402340651996, "loss": 2.1221, "step": 205240 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017257275182720115, "loss": 2.0506, "step": 205245 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.0001725714802230902, "loss": 1.9469, "step": 205250 }, { "epoch": 0.48, "grad_norm": 1.9609375, "learning_rate": 0.00017257020859418748, "loss": 2.2671, "step": 205255 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017256893694049343, "loss": 2.2205, "step": 205260 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017256766526200856, "loss": 2.1463, "step": 205265 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017256639355873327, "loss": 2.2551, "step": 205270 }, { "epoch": 0.48, "grad_norm": 1.828125, "learning_rate": 0.00017256512183066796, "loss": 2.0134, "step": 205275 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017256385007781308, "loss": 1.9526, "step": 205280 }, { "epoch": 0.48, "grad_norm": 1.875, "learning_rate": 0.00017256257830016906, "loss": 2.0755, "step": 205285 }, { "epoch": 0.48, "grad_norm": 1.96875, "learning_rate": 0.0001725613064977364, "loss": 1.9603, "step": 205290 }, { "epoch": 0.48, "grad_norm": 2.625, "learning_rate": 0.00017256003467051544, "loss": 2.1665, "step": 205295 }, { "epoch": 0.48, "grad_norm": 1.921875, "learning_rate": 0.00017255876281850667, "loss": 2.1256, "step": 205300 }, { "epoch": 0.48, "grad_norm": 1.8828125, "learning_rate": 0.0001725574909417105, "loss": 2.2406, "step": 205305 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.0001725562190401274, "loss": 2.0619, "step": 205310 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.00017255494711375776, "loss": 2.2435, "step": 205315 }, { "epoch": 0.48, "grad_norm": 2.390625, "learning_rate": 0.00017255367516260205, "loss": 1.9388, "step": 205320 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.00017255240318666068, "loss": 2.2143, "step": 205325 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017255113118593409, "loss": 2.1712, "step": 205330 }, { "epoch": 0.48, "grad_norm": 1.8203125, "learning_rate": 0.00017254985916042271, "loss": 2.1044, "step": 205335 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.000172548587110127, "loss": 2.0892, "step": 205340 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017254731503504737, "loss": 2.1453, "step": 205345 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017254604293518425, "loss": 2.1905, "step": 205350 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.0001725447708105381, "loss": 2.3026, "step": 205355 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017254349866110936, "loss": 2.0356, "step": 205360 }, { "epoch": 0.48, "grad_norm": 2.4375, "learning_rate": 0.0001725422264868984, "loss": 2.2147, "step": 205365 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017254095428790572, "loss": 2.2753, "step": 205370 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017253968206413175, "loss": 2.2677, "step": 205375 }, { "epoch": 0.48, "grad_norm": 2.453125, "learning_rate": 0.0001725384098155769, "loss": 2.3028, "step": 205380 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.0001725371375422416, "loss": 2.1499, "step": 205385 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.0001725358652441263, "loss": 2.0574, "step": 205390 }, { "epoch": 0.48, "grad_norm": 2.34375, "learning_rate": 0.00017253459292123144, "loss": 2.1825, "step": 205395 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017253332057355744, "loss": 2.0794, "step": 205400 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.00017253204820110476, "loss": 1.8327, "step": 205405 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.0001725307758038738, "loss": 2.0918, "step": 205410 }, { "epoch": 0.48, "grad_norm": 1.9375, "learning_rate": 0.00017252950338186501, "loss": 2.1093, "step": 205415 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.00017252823093507887, "loss": 2.1482, "step": 205420 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017252695846351572, "loss": 2.2225, "step": 205425 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.00017252568596717605, "loss": 1.9962, "step": 205430 }, { "epoch": 0.48, "grad_norm": 2.328125, "learning_rate": 0.0001725244134460603, "loss": 2.0935, "step": 205435 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.0001725231409001689, "loss": 2.0039, "step": 205440 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.00017252186832950227, "loss": 2.0187, "step": 205445 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017252059573406089, "loss": 2.1631, "step": 205450 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.0001725193231138451, "loss": 2.1145, "step": 205455 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017251805046885544, "loss": 1.9329, "step": 205460 }, { "epoch": 0.48, "grad_norm": 2.546875, "learning_rate": 0.00017251677779909228, "loss": 2.1891, "step": 205465 }, { "epoch": 0.48, "grad_norm": 1.9921875, "learning_rate": 0.00017251550510455607, "loss": 2.0088, "step": 205470 }, { "epoch": 0.48, "grad_norm": 1.875, "learning_rate": 0.00017251423238524727, "loss": 1.9545, "step": 205475 }, { "epoch": 0.48, "grad_norm": 2.5, "learning_rate": 0.00017251295964116627, "loss": 2.1444, "step": 205480 }, { "epoch": 0.48, "grad_norm": 2.53125, "learning_rate": 0.00017251168687231353, "loss": 2.2181, "step": 205485 }, { "epoch": 0.48, "grad_norm": 2.453125, "learning_rate": 0.0001725104140786895, "loss": 2.275, "step": 205490 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017250914126029458, "loss": 2.0843, "step": 205495 }, { "epoch": 0.48, "grad_norm": 2.0, "learning_rate": 0.00017250786841712922, "loss": 2.0745, "step": 205500 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.0001725065955491939, "loss": 2.2795, "step": 205505 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017250532265648895, "loss": 1.9179, "step": 205510 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.0001725040497390149, "loss": 2.2121, "step": 205515 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017250277679677214, "loss": 2.1126, "step": 205520 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.00017250150382976112, "loss": 2.0498, "step": 205525 }, { "epoch": 0.48, "grad_norm": 1.7734375, "learning_rate": 0.0001725002308379823, "loss": 1.8997, "step": 205530 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017249895782143603, "loss": 2.1481, "step": 205535 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017249768478012285, "loss": 2.066, "step": 205540 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.0001724964117140431, "loss": 2.0733, "step": 205545 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.0001724951386231973, "loss": 2.1167, "step": 205550 }, { "epoch": 0.48, "grad_norm": 1.6796875, "learning_rate": 0.00017249386550758584, "loss": 1.9696, "step": 205555 }, { "epoch": 0.48, "grad_norm": 2.484375, "learning_rate": 0.00017249259236720916, "loss": 2.0723, "step": 205560 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.0001724913192020677, "loss": 1.9875, "step": 205565 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017249004601216185, "loss": 2.0735, "step": 205570 }, { "epoch": 0.48, "grad_norm": 2.578125, "learning_rate": 0.00017248877279749214, "loss": 2.2033, "step": 205575 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.00017248749955805893, "loss": 2.1489, "step": 205580 }, { "epoch": 0.48, "grad_norm": 2.65625, "learning_rate": 0.00017248622629386268, "loss": 2.1052, "step": 205585 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.0001724849530049038, "loss": 2.1583, "step": 205590 }, { "epoch": 0.48, "grad_norm": 2.171875, "learning_rate": 0.00017248367969118275, "loss": 2.0932, "step": 205595 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017248240635269998, "loss": 2.092, "step": 205600 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.0001724811329894559, "loss": 2.2489, "step": 205605 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017247985960145093, "loss": 2.011, "step": 205610 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017247858618868556, "loss": 2.2465, "step": 205615 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017247731275116016, "loss": 2.2202, "step": 205620 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.0001724760392888752, "loss": 2.0287, "step": 205625 }, { "epoch": 0.48, "grad_norm": 1.6484375, "learning_rate": 0.00017247476580183112, "loss": 2.0439, "step": 205630 }, { "epoch": 0.48, "grad_norm": 2.09375, "learning_rate": 0.00017247349229002835, "loss": 2.2171, "step": 205635 }, { "epoch": 0.48, "grad_norm": 2.03125, "learning_rate": 0.0001724722187534673, "loss": 1.9935, "step": 205640 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017247094519214846, "loss": 2.2917, "step": 205645 }, { "epoch": 0.48, "grad_norm": 2.015625, "learning_rate": 0.0001724696716060722, "loss": 2.0433, "step": 205650 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.000172468397995239, "loss": 1.9581, "step": 205655 }, { "epoch": 0.48, "grad_norm": 2.359375, "learning_rate": 0.00017246712435964927, "loss": 2.0106, "step": 205660 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017246585069930346, "loss": 2.1274, "step": 205665 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.000172464577014202, "loss": 2.2625, "step": 205670 }, { "epoch": 0.48, "grad_norm": 1.9453125, "learning_rate": 0.00017246330330434535, "loss": 1.9594, "step": 205675 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017246202956973389, "loss": 2.0179, "step": 205680 }, { "epoch": 0.48, "grad_norm": 1.796875, "learning_rate": 0.0001724607558103681, "loss": 2.1708, "step": 205685 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.0001724594820262484, "loss": 2.0451, "step": 205690 }, { "epoch": 0.48, "grad_norm": 2.390625, "learning_rate": 0.00017245820821737523, "loss": 2.2928, "step": 205695 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017245693438374903, "loss": 2.1742, "step": 205700 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.0001724556605253702, "loss": 2.2195, "step": 205705 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.00017245438664223922, "loss": 1.9804, "step": 205710 }, { "epoch": 0.48, "grad_norm": 2.671875, "learning_rate": 0.0001724531127343565, "loss": 2.181, "step": 205715 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017245183880172248, "loss": 2.0435, "step": 205720 }, { "epoch": 0.48, "grad_norm": 2.40625, "learning_rate": 0.0001724505648443376, "loss": 2.0381, "step": 205725 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017244929086220233, "loss": 2.0535, "step": 205730 }, { "epoch": 0.48, "grad_norm": 1.9609375, "learning_rate": 0.00017244801685531704, "loss": 1.9923, "step": 205735 }, { "epoch": 0.48, "grad_norm": 1.7890625, "learning_rate": 0.00017244674282368219, "loss": 2.1102, "step": 205740 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017244546876729825, "loss": 2.0846, "step": 205745 }, { "epoch": 0.48, "grad_norm": 2.203125, "learning_rate": 0.00017244419468616555, "loss": 2.0017, "step": 205750 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017244292058028466, "loss": 1.9182, "step": 205755 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017244164644965592, "loss": 1.9858, "step": 205760 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017244037229427982, "loss": 2.0752, "step": 205765 }, { "epoch": 0.48, "grad_norm": 1.859375, "learning_rate": 0.00017243909811415678, "loss": 2.1267, "step": 205770 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017243782390928723, "loss": 2.2801, "step": 205775 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.0001724365496796716, "loss": 2.0628, "step": 205780 }, { "epoch": 0.48, "grad_norm": 2.9375, "learning_rate": 0.00017243527542531032, "loss": 2.2837, "step": 205785 }, { "epoch": 0.48, "grad_norm": 1.8984375, "learning_rate": 0.00017243400114620386, "loss": 1.9445, "step": 205790 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017243272684235261, "loss": 2.3495, "step": 205795 }, { "epoch": 0.48, "grad_norm": 2.4375, "learning_rate": 0.00017243145251375706, "loss": 2.136, "step": 205800 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017243017816041754, "loss": 2.0735, "step": 205805 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.00017242890378233464, "loss": 2.166, "step": 205810 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017242762937950867, "loss": 2.0752, "step": 205815 }, { "epoch": 0.48, "grad_norm": 2.4375, "learning_rate": 0.0001724263549519401, "loss": 2.3596, "step": 205820 }, { "epoch": 0.48, "grad_norm": 1.8984375, "learning_rate": 0.0001724250804996294, "loss": 2.0295, "step": 205825 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017242380602257694, "loss": 2.1611, "step": 205830 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.00017242253152078323, "loss": 2.0695, "step": 205835 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017242125699424866, "loss": 2.1387, "step": 205840 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.0001724199824429737, "loss": 1.9939, "step": 205845 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.00017241870786695876, "loss": 2.0789, "step": 205850 }, { "epoch": 0.48, "grad_norm": 1.96875, "learning_rate": 0.00017241743326620423, "loss": 2.1307, "step": 205855 }, { "epoch": 0.48, "grad_norm": 1.84375, "learning_rate": 0.00017241615864071062, "loss": 2.061, "step": 205860 }, { "epoch": 0.48, "grad_norm": 1.640625, "learning_rate": 0.0001724148839904783, "loss": 1.9957, "step": 205865 }, { "epoch": 0.48, "grad_norm": 1.953125, "learning_rate": 0.0001724136093155078, "loss": 2.0202, "step": 205870 }, { "epoch": 0.48, "grad_norm": 2.375, "learning_rate": 0.0001724123346157995, "loss": 2.1876, "step": 205875 }, { "epoch": 0.48, "grad_norm": 1.671875, "learning_rate": 0.0001724110598913538, "loss": 2.2096, "step": 205880 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017240978514217114, "loss": 2.1337, "step": 205885 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017240851036825206, "loss": 2.2176, "step": 205890 }, { "epoch": 0.48, "grad_norm": 1.984375, "learning_rate": 0.00017240723556959683, "loss": 1.8904, "step": 205895 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017240596074620602, "loss": 2.1271, "step": 205900 }, { "epoch": 0.48, "grad_norm": 2.265625, "learning_rate": 0.00017240468589808006, "loss": 2.3102, "step": 205905 }, { "epoch": 0.48, "grad_norm": 1.984375, "learning_rate": 0.0001724034110252193, "loss": 2.0688, "step": 205910 }, { "epoch": 0.48, "grad_norm": 2.078125, "learning_rate": 0.00017240213612762425, "loss": 2.1679, "step": 205915 }, { "epoch": 0.48, "grad_norm": 1.9140625, "learning_rate": 0.00017240086120529527, "loss": 2.239, "step": 205920 }, { "epoch": 0.48, "grad_norm": 2.484375, "learning_rate": 0.00017239958625823288, "loss": 2.1835, "step": 205925 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017239831128643748, "loss": 2.11, "step": 205930 }, { "epoch": 0.48, "grad_norm": 2.125, "learning_rate": 0.0001723970362899095, "loss": 2.1831, "step": 205935 }, { "epoch": 0.48, "grad_norm": 1.890625, "learning_rate": 0.00017239576126864935, "loss": 2.054, "step": 205940 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017239448622265752, "loss": 2.2357, "step": 205945 }, { "epoch": 0.48, "grad_norm": 1.75, "learning_rate": 0.0001723932111519344, "loss": 2.306, "step": 205950 }, { "epoch": 0.48, "grad_norm": 1.9375, "learning_rate": 0.00017239193605648047, "loss": 1.964, "step": 205955 }, { "epoch": 0.48, "grad_norm": 2.4375, "learning_rate": 0.00017239066093629614, "loss": 2.1106, "step": 205960 }, { "epoch": 0.48, "grad_norm": 1.9453125, "learning_rate": 0.00017238938579138187, "loss": 1.9039, "step": 205965 }, { "epoch": 0.48, "grad_norm": 2.21875, "learning_rate": 0.00017238811062173802, "loss": 2.1326, "step": 205970 }, { "epoch": 0.48, "grad_norm": 2.109375, "learning_rate": 0.00017238683542736513, "loss": 2.0379, "step": 205975 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017238556020826356, "loss": 2.1836, "step": 205980 }, { "epoch": 0.48, "grad_norm": 2.28125, "learning_rate": 0.00017238428496443376, "loss": 2.1762, "step": 205985 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017238300969587617, "loss": 2.1984, "step": 205990 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017238173440259127, "loss": 2.0399, "step": 205995 }, { "epoch": 0.48, "grad_norm": 2.4375, "learning_rate": 0.00017238045908457943, "loss": 2.1001, "step": 206000 }, { "epoch": 0.48, "grad_norm": 2.46875, "learning_rate": 0.0001723791837418411, "loss": 2.1963, "step": 206005 }, { "epoch": 0.48, "grad_norm": 1.890625, "learning_rate": 0.00017237790837437676, "loss": 2.1329, "step": 206010 }, { "epoch": 0.48, "grad_norm": 2.296875, "learning_rate": 0.0001723766329821868, "loss": 2.0335, "step": 206015 }, { "epoch": 0.48, "grad_norm": 1.9453125, "learning_rate": 0.00017237535756527166, "loss": 2.2586, "step": 206020 }, { "epoch": 0.48, "grad_norm": 2.25, "learning_rate": 0.0001723740821236318, "loss": 2.1385, "step": 206025 }, { "epoch": 0.48, "grad_norm": 2.0625, "learning_rate": 0.00017237280665726763, "loss": 2.0999, "step": 206030 }, { "epoch": 0.48, "grad_norm": 2.046875, "learning_rate": 0.00017237153116617958, "loss": 2.249, "step": 206035 }, { "epoch": 0.48, "grad_norm": 1.9375, "learning_rate": 0.00017237025565036812, "loss": 2.0058, "step": 206040 }, { "epoch": 0.48, "grad_norm": 1.75, "learning_rate": 0.00017236898010983368, "loss": 1.9724, "step": 206045 }, { "epoch": 0.48, "grad_norm": 2.234375, "learning_rate": 0.00017236770454457666, "loss": 2.1562, "step": 206050 }, { "epoch": 0.48, "grad_norm": 2.1875, "learning_rate": 0.00017236642895459756, "loss": 2.0034, "step": 206055 }, { "epoch": 0.48, "grad_norm": 2.625, "learning_rate": 0.00017236515333989675, "loss": 2.257, "step": 206060 }, { "epoch": 0.48, "grad_norm": 1.96875, "learning_rate": 0.0001723638777004747, "loss": 2.1204, "step": 206065 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.0001723626020363318, "loss": 2.1524, "step": 206070 }, { "epoch": 0.48, "grad_norm": 2.140625, "learning_rate": 0.00017236132634746855, "loss": 1.9843, "step": 206075 }, { "epoch": 0.48, "grad_norm": 1.7265625, "learning_rate": 0.0001723600506338854, "loss": 2.1805, "step": 206080 }, { "epoch": 0.48, "grad_norm": 2.3125, "learning_rate": 0.00017235877489558268, "loss": 2.2032, "step": 206085 }, { "epoch": 0.48, "grad_norm": 2.15625, "learning_rate": 0.0001723574991325609, "loss": 2.2481, "step": 206090 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.0001723562233448205, "loss": 2.2369, "step": 206095 }, { "epoch": 0.49, "grad_norm": 2.59375, "learning_rate": 0.0001723549475323619, "loss": 2.0004, "step": 206100 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017235367169518554, "loss": 1.9983, "step": 206105 }, { "epoch": 0.49, "grad_norm": 1.9140625, "learning_rate": 0.00017235239583329186, "loss": 2.088, "step": 206110 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017235111994668128, "loss": 2.2942, "step": 206115 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017234984403535425, "loss": 2.1922, "step": 206120 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.0001723485680993112, "loss": 2.1253, "step": 206125 }, { "epoch": 0.49, "grad_norm": 2.359375, "learning_rate": 0.00017234729213855257, "loss": 2.2986, "step": 206130 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.0001723460161530788, "loss": 1.9123, "step": 206135 }, { "epoch": 0.49, "grad_norm": 2.0, "learning_rate": 0.00017234474014289028, "loss": 2.1875, "step": 206140 }, { "epoch": 0.49, "grad_norm": 1.984375, "learning_rate": 0.00017234346410798751, "loss": 2.1713, "step": 206145 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.00017234218804837093, "loss": 2.1385, "step": 206150 }, { "epoch": 0.49, "grad_norm": 2.546875, "learning_rate": 0.0001723409119640409, "loss": 2.1047, "step": 206155 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017233963585499792, "loss": 2.1355, "step": 206160 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017233835972124242, "loss": 2.0912, "step": 206165 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.0001723370835627748, "loss": 2.1247, "step": 206170 }, { "epoch": 0.49, "grad_norm": 1.9140625, "learning_rate": 0.00017233580737959555, "loss": 2.2565, "step": 206175 }, { "epoch": 0.49, "grad_norm": 2.46875, "learning_rate": 0.00017233453117170507, "loss": 2.0446, "step": 206180 }, { "epoch": 0.49, "grad_norm": 2.359375, "learning_rate": 0.00017233325493910378, "loss": 1.8016, "step": 206185 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017233197868179217, "loss": 2.2059, "step": 206190 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017233070239977062, "loss": 2.1592, "step": 206195 }, { "epoch": 0.49, "grad_norm": 1.84375, "learning_rate": 0.0001723294260930396, "loss": 2.0934, "step": 206200 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.00017232814976159952, "loss": 1.9973, "step": 206205 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017232687340545084, "loss": 2.1626, "step": 206210 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017232559702459402, "loss": 1.9949, "step": 206215 }, { "epoch": 0.49, "grad_norm": 1.96875, "learning_rate": 0.00017232432061902944, "loss": 2.1469, "step": 206220 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017232304418875755, "loss": 1.9983, "step": 206225 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.0001723217677337788, "loss": 2.078, "step": 206230 }, { "epoch": 0.49, "grad_norm": 1.8515625, "learning_rate": 0.00017232049125409361, "loss": 2.133, "step": 206235 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.00017231921474970244, "loss": 2.0846, "step": 206240 }, { "epoch": 0.49, "grad_norm": 2.453125, "learning_rate": 0.00017231793822060574, "loss": 2.1602, "step": 206245 }, { "epoch": 0.49, "grad_norm": 2.546875, "learning_rate": 0.0001723166616668039, "loss": 2.1006, "step": 206250 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017231538508829738, "loss": 2.0478, "step": 206255 }, { "epoch": 0.49, "grad_norm": 1.8984375, "learning_rate": 0.0001723141084850866, "loss": 2.0678, "step": 206260 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.00017231283185717203, "loss": 1.9696, "step": 206265 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.0001723115552045541, "loss": 2.0492, "step": 206270 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.0001723102785272332, "loss": 1.9642, "step": 206275 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.0001723090018252098, "loss": 2.0368, "step": 206280 }, { "epoch": 0.49, "grad_norm": 2.875, "learning_rate": 0.0001723077250984843, "loss": 2.2016, "step": 206285 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017230644834705723, "loss": 2.2091, "step": 206290 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017230517157092897, "loss": 2.092, "step": 206295 }, { "epoch": 0.49, "grad_norm": 2.5, "learning_rate": 0.0001723038947700999, "loss": 2.0, "step": 206300 }, { "epoch": 0.49, "grad_norm": 2.59375, "learning_rate": 0.00017230261794457053, "loss": 1.9185, "step": 206305 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017230134109434128, "loss": 2.2148, "step": 206310 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017230006421941257, "loss": 2.0149, "step": 206315 }, { "epoch": 0.49, "grad_norm": 1.9609375, "learning_rate": 0.00017229878731978487, "loss": 2.1025, "step": 206320 }, { "epoch": 0.49, "grad_norm": 1.796875, "learning_rate": 0.00017229751039545858, "loss": 2.17, "step": 206325 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017229623344643415, "loss": 2.2666, "step": 206330 }, { "epoch": 0.49, "grad_norm": 2.4375, "learning_rate": 0.00017229495647271201, "loss": 2.1006, "step": 206335 }, { "epoch": 0.49, "grad_norm": 1.8203125, "learning_rate": 0.0001722936794742926, "loss": 2.0972, "step": 206340 }, { "epoch": 0.49, "grad_norm": 2.453125, "learning_rate": 0.00017229240245117638, "loss": 2.2063, "step": 206345 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017229112540336373, "loss": 2.2127, "step": 206350 }, { "epoch": 0.49, "grad_norm": 1.9609375, "learning_rate": 0.00017228984833085514, "loss": 2.0347, "step": 206355 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.000172288571233651, "loss": 2.1445, "step": 206360 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017228729411175182, "loss": 2.2796, "step": 206365 }, { "epoch": 0.49, "grad_norm": 1.875, "learning_rate": 0.000172286016965158, "loss": 2.2933, "step": 206370 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.0001722847397938699, "loss": 2.0823, "step": 206375 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017228346259788805, "loss": 2.06, "step": 206380 }, { "epoch": 0.49, "grad_norm": 1.9140625, "learning_rate": 0.00017228218537721288, "loss": 2.1645, "step": 206385 }, { "epoch": 0.49, "grad_norm": 5.3125, "learning_rate": 0.00017228090813184479, "loss": 2.0799, "step": 206390 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.0001722796308617842, "loss": 2.1064, "step": 206395 }, { "epoch": 0.49, "grad_norm": 2.78125, "learning_rate": 0.0001722783535670316, "loss": 2.0828, "step": 206400 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017227707624758743, "loss": 2.1451, "step": 206405 }, { "epoch": 0.49, "grad_norm": 2.578125, "learning_rate": 0.00017227579890345207, "loss": 2.0046, "step": 206410 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.000172274521534626, "loss": 2.1974, "step": 206415 }, { "epoch": 0.49, "grad_norm": 2.65625, "learning_rate": 0.00017227324414110964, "loss": 2.319, "step": 206420 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017227196672290343, "loss": 2.1185, "step": 206425 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.0001722706892800078, "loss": 1.9489, "step": 206430 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.0001722694118124232, "loss": 2.0621, "step": 206435 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017226813432015003, "loss": 2.2326, "step": 206440 }, { "epoch": 0.49, "grad_norm": 2.484375, "learning_rate": 0.0001722668568031888, "loss": 1.9203, "step": 206445 }, { "epoch": 0.49, "grad_norm": 1.8984375, "learning_rate": 0.0001722655792615399, "loss": 2.1443, "step": 206450 }, { "epoch": 0.49, "grad_norm": 1.953125, "learning_rate": 0.00017226430169520372, "loss": 2.1426, "step": 206455 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017226302410418076, "loss": 2.0987, "step": 206460 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017226174648847143, "loss": 2.0394, "step": 206465 }, { "epoch": 0.49, "grad_norm": 1.9140625, "learning_rate": 0.00017226046884807623, "loss": 1.9991, "step": 206470 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.0001722591911829955, "loss": 2.166, "step": 206475 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017225791349322972, "loss": 2.2072, "step": 206480 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017225663577877934, "loss": 2.2173, "step": 206485 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.0001722553580396448, "loss": 2.2424, "step": 206490 }, { "epoch": 0.49, "grad_norm": 2.578125, "learning_rate": 0.00017225408027582648, "loss": 2.1798, "step": 206495 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017225280248732487, "loss": 1.9895, "step": 206500 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.0001722515246741404, "loss": 2.1315, "step": 206505 }, { "epoch": 0.49, "grad_norm": 2.4375, "learning_rate": 0.0001722502468362735, "loss": 2.0309, "step": 206510 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.0001722489689737246, "loss": 2.094, "step": 206515 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.00017224769108649415, "loss": 2.2273, "step": 206520 }, { "epoch": 0.49, "grad_norm": 2.359375, "learning_rate": 0.00017224641317458258, "loss": 1.9716, "step": 206525 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.0001722451352379903, "loss": 1.9872, "step": 206530 }, { "epoch": 0.49, "grad_norm": 1.8203125, "learning_rate": 0.00017224385727671778, "loss": 1.9756, "step": 206535 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017224257929076547, "loss": 1.8909, "step": 206540 }, { "epoch": 0.49, "grad_norm": 2.484375, "learning_rate": 0.00017224130128013377, "loss": 2.1365, "step": 206545 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017224002324482313, "loss": 2.1266, "step": 206550 }, { "epoch": 0.49, "grad_norm": 2.53125, "learning_rate": 0.000172238745184834, "loss": 2.0845, "step": 206555 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017223746710016678, "loss": 2.1224, "step": 206560 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017223618899082194, "loss": 2.1748, "step": 206565 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.0001722349108567999, "loss": 1.8675, "step": 206570 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017223363269810112, "loss": 1.9962, "step": 206575 }, { "epoch": 0.49, "grad_norm": 1.9140625, "learning_rate": 0.000172232354514726, "loss": 1.8993, "step": 206580 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017223107630667505, "loss": 2.2355, "step": 206585 }, { "epoch": 0.49, "grad_norm": 1.8984375, "learning_rate": 0.00017222979807394862, "loss": 2.0764, "step": 206590 }, { "epoch": 0.49, "grad_norm": 1.9296875, "learning_rate": 0.0001722285198165472, "loss": 2.1691, "step": 206595 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.0001722272415344712, "loss": 2.0831, "step": 206600 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017222596322772103, "loss": 1.9399, "step": 206605 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.00017222468489629718, "loss": 2.1168, "step": 206610 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017222340654020007, "loss": 2.2135, "step": 206615 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017222212815943012, "loss": 2.0351, "step": 206620 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.0001722208497539878, "loss": 2.2393, "step": 206625 }, { "epoch": 0.49, "grad_norm": 2.46875, "learning_rate": 0.00017221957132387353, "loss": 2.0864, "step": 206630 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017221829286908776, "loss": 2.1936, "step": 206635 }, { "epoch": 0.49, "grad_norm": 2.375, "learning_rate": 0.00017221701438963088, "loss": 2.0218, "step": 206640 }, { "epoch": 0.49, "grad_norm": 2.546875, "learning_rate": 0.0001722157358855034, "loss": 2.0356, "step": 206645 }, { "epoch": 0.49, "grad_norm": 2.453125, "learning_rate": 0.00017221445735670566, "loss": 2.1341, "step": 206650 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017221317880323817, "loss": 2.2452, "step": 206655 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017221190022510135, "loss": 1.9765, "step": 206660 }, { "epoch": 0.49, "grad_norm": 1.90625, "learning_rate": 0.00017221062162229566, "loss": 2.0676, "step": 206665 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.0001722093429948215, "loss": 2.1954, "step": 206670 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.0001722080643426793, "loss": 2.1162, "step": 206675 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017220678566586952, "loss": 2.1257, "step": 206680 }, { "epoch": 0.49, "grad_norm": 1.7109375, "learning_rate": 0.0001722055069643926, "loss": 1.9626, "step": 206685 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017220422823824896, "loss": 2.1579, "step": 206690 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017220294948743907, "loss": 1.9939, "step": 206695 }, { "epoch": 0.49, "grad_norm": 1.9296875, "learning_rate": 0.00017220167071196332, "loss": 2.1699, "step": 206700 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017220039191182218, "loss": 2.1243, "step": 206705 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.0001721991130870161, "loss": 2.1128, "step": 206710 }, { "epoch": 0.49, "grad_norm": 1.9453125, "learning_rate": 0.00017219783423754544, "loss": 2.0727, "step": 206715 }, { "epoch": 0.49, "grad_norm": 1.8125, "learning_rate": 0.00017219655536341073, "loss": 2.1247, "step": 206720 }, { "epoch": 0.49, "grad_norm": 1.9609375, "learning_rate": 0.00017219527646461237, "loss": 2.2557, "step": 206725 }, { "epoch": 0.49, "grad_norm": 1.9921875, "learning_rate": 0.00017219399754115077, "loss": 1.8142, "step": 206730 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.00017219271859302639, "loss": 2.1153, "step": 206735 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017219143962023966, "loss": 2.1278, "step": 206740 }, { "epoch": 0.49, "grad_norm": 1.9375, "learning_rate": 0.00017219016062279104, "loss": 2.0239, "step": 206745 }, { "epoch": 0.49, "grad_norm": 1.90625, "learning_rate": 0.00017218888160068098, "loss": 2.084, "step": 206750 }, { "epoch": 0.49, "grad_norm": 1.859375, "learning_rate": 0.00017218760255390987, "loss": 2.2249, "step": 206755 }, { "epoch": 0.49, "grad_norm": 2.0, "learning_rate": 0.00017218632348247817, "loss": 2.1599, "step": 206760 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001721850443863863, "loss": 2.2499, "step": 206765 }, { "epoch": 0.49, "grad_norm": 2.84375, "learning_rate": 0.0001721837652656347, "loss": 2.2368, "step": 206770 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017218248612022383, "loss": 2.0321, "step": 206775 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017218120695015413, "loss": 2.0923, "step": 206780 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.00017217992775542598, "loss": 1.9082, "step": 206785 }, { "epoch": 0.49, "grad_norm": 2.515625, "learning_rate": 0.00017217864853603988, "loss": 1.9024, "step": 206790 }, { "epoch": 0.49, "grad_norm": 2.609375, "learning_rate": 0.00017217736929199626, "loss": 2.2442, "step": 206795 }, { "epoch": 0.49, "grad_norm": 1.9609375, "learning_rate": 0.00017217609002329553, "loss": 2.0242, "step": 206800 }, { "epoch": 0.49, "grad_norm": 1.9609375, "learning_rate": 0.00017217481072993813, "loss": 2.1217, "step": 206805 }, { "epoch": 0.49, "grad_norm": 2.828125, "learning_rate": 0.00017217353141192452, "loss": 2.3585, "step": 206810 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017217225206925512, "loss": 1.9978, "step": 206815 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.00017217097270193037, "loss": 2.0464, "step": 206820 }, { "epoch": 0.49, "grad_norm": 2.34375, "learning_rate": 0.00017216969330995067, "loss": 2.1688, "step": 206825 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.00017216841389331656, "loss": 2.038, "step": 206830 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.00017216713445202838, "loss": 2.1841, "step": 206835 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.00017216585498608657, "loss": 2.162, "step": 206840 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017216457549549163, "loss": 2.276, "step": 206845 }, { "epoch": 0.49, "grad_norm": 2.0, "learning_rate": 0.00017216329598024395, "loss": 2.1886, "step": 206850 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.000172162016440344, "loss": 2.1159, "step": 206855 }, { "epoch": 0.49, "grad_norm": 2.46875, "learning_rate": 0.00017216073687579216, "loss": 1.9573, "step": 206860 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.0001721594572865889, "loss": 2.0946, "step": 206865 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.0001721581776727347, "loss": 1.9873, "step": 206870 }, { "epoch": 0.49, "grad_norm": 1.9375, "learning_rate": 0.00017215689803422993, "loss": 2.1127, "step": 206875 }, { "epoch": 0.49, "grad_norm": 1.890625, "learning_rate": 0.00017215561837107508, "loss": 2.2103, "step": 206880 }, { "epoch": 0.49, "grad_norm": 1.6640625, "learning_rate": 0.00017215433868327055, "loss": 2.193, "step": 206885 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017215305897081676, "loss": 2.2039, "step": 206890 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017215177923371423, "loss": 2.18, "step": 206895 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.0001721504994719633, "loss": 2.0083, "step": 206900 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017214921968556447, "loss": 2.1641, "step": 206905 }, { "epoch": 0.49, "grad_norm": 2.46875, "learning_rate": 0.00017214793987451816, "loss": 2.1138, "step": 206910 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.0001721466600388248, "loss": 2.1484, "step": 206915 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017214538017848482, "loss": 2.0974, "step": 206920 }, { "epoch": 0.49, "grad_norm": 1.7734375, "learning_rate": 0.0001721441002934987, "loss": 2.0238, "step": 206925 }, { "epoch": 0.49, "grad_norm": 1.84375, "learning_rate": 0.00017214282038386682, "loss": 2.0949, "step": 206930 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017214154044958968, "loss": 2.1326, "step": 206935 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017214026049066765, "loss": 2.2053, "step": 206940 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.0001721389805071012, "loss": 2.1721, "step": 206945 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017213770049889074, "loss": 1.998, "step": 206950 }, { "epoch": 0.49, "grad_norm": 2.8125, "learning_rate": 0.0001721364204660368, "loss": 2.0658, "step": 206955 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.0001721351404085397, "loss": 2.3291, "step": 206960 }, { "epoch": 0.49, "grad_norm": 1.921875, "learning_rate": 0.00017213386032639995, "loss": 2.1836, "step": 206965 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017213258021961795, "loss": 2.1583, "step": 206970 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017213130008819416, "loss": 2.044, "step": 206975 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.000172130019932129, "loss": 2.2286, "step": 206980 }, { "epoch": 0.49, "grad_norm": 1.953125, "learning_rate": 0.00017212873975142295, "loss": 2.1206, "step": 206985 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017212745954607638, "loss": 2.1819, "step": 206990 }, { "epoch": 0.49, "grad_norm": 2.78125, "learning_rate": 0.00017212617931608977, "loss": 1.9608, "step": 206995 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017212489906146355, "loss": 1.9784, "step": 207000 }, { "epoch": 0.49, "grad_norm": 1.8828125, "learning_rate": 0.00017212361878219816, "loss": 2.0823, "step": 207005 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.000172122338478294, "loss": 1.9951, "step": 207010 }, { "epoch": 0.49, "grad_norm": 1.9375, "learning_rate": 0.0001721210581497516, "loss": 2.139, "step": 207015 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.0001721197777965713, "loss": 2.1578, "step": 207020 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017211849741875361, "loss": 2.1376, "step": 207025 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.0001721172170162989, "loss": 2.1086, "step": 207030 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017211593658920764, "loss": 1.9972, "step": 207035 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.00017211465613748027, "loss": 2.1404, "step": 207040 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017211337566111725, "loss": 2.3137, "step": 207045 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.000172112095160119, "loss": 2.0532, "step": 207050 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.0001721108146344859, "loss": 1.9316, "step": 207055 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.0001721095340842185, "loss": 2.1192, "step": 207060 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.0001721082535093171, "loss": 2.3636, "step": 207065 }, { "epoch": 0.49, "grad_norm": 2.765625, "learning_rate": 0.00017210697290978227, "loss": 1.924, "step": 207070 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017210569228561437, "loss": 2.1226, "step": 207075 }, { "epoch": 0.49, "grad_norm": 1.890625, "learning_rate": 0.00017210441163681387, "loss": 1.9745, "step": 207080 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.0001721031309633812, "loss": 2.0953, "step": 207085 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017210185026531676, "loss": 2.1207, "step": 207090 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017210056954262103, "loss": 1.9854, "step": 207095 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017209928879529448, "loss": 2.2513, "step": 207100 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017209800802333745, "loss": 2.0725, "step": 207105 }, { "epoch": 0.49, "grad_norm": 1.984375, "learning_rate": 0.00017209672722675047, "loss": 2.0053, "step": 207110 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017209544640553394, "loss": 1.7913, "step": 207115 }, { "epoch": 0.49, "grad_norm": 2.4375, "learning_rate": 0.00017209416555968827, "loss": 2.1977, "step": 207120 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017209288468921394, "loss": 2.0877, "step": 207125 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017209160379411136, "loss": 2.0432, "step": 207130 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.000172090322874381, "loss": 1.9506, "step": 207135 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.0001720890419300233, "loss": 2.0543, "step": 207140 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017208776096103864, "loss": 2.0915, "step": 207145 }, { "epoch": 0.49, "grad_norm": 1.8359375, "learning_rate": 0.00017208647996742752, "loss": 2.0024, "step": 207150 }, { "epoch": 0.49, "grad_norm": 1.890625, "learning_rate": 0.00017208519894919034, "loss": 2.1896, "step": 207155 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.0001720839179063275, "loss": 2.0723, "step": 207160 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017208263683883955, "loss": 2.2688, "step": 207165 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017208135574672684, "loss": 2.0897, "step": 207170 }, { "epoch": 0.49, "grad_norm": 1.8828125, "learning_rate": 0.00017208007462998982, "loss": 2.2839, "step": 207175 }, { "epoch": 0.49, "grad_norm": 2.0, "learning_rate": 0.00017207879348862896, "loss": 2.2013, "step": 207180 }, { "epoch": 0.49, "grad_norm": 1.921875, "learning_rate": 0.00017207751232264468, "loss": 2.1771, "step": 207185 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.0001720762311320374, "loss": 2.1251, "step": 207190 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.0001720749499168076, "loss": 2.0513, "step": 207195 }, { "epoch": 0.49, "grad_norm": 1.7890625, "learning_rate": 0.00017207366867695565, "loss": 2.3605, "step": 207200 }, { "epoch": 0.49, "grad_norm": 1.8671875, "learning_rate": 0.00017207238741248203, "loss": 2.0992, "step": 207205 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.0001720711061233872, "loss": 2.0619, "step": 207210 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017206982480967158, "loss": 2.1609, "step": 207215 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.00017206854347133555, "loss": 2.1107, "step": 207220 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017206726210837966, "loss": 2.1999, "step": 207225 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017206598072080426, "loss": 2.228, "step": 207230 }, { "epoch": 0.49, "grad_norm": 1.828125, "learning_rate": 0.0001720646993086098, "loss": 2.0673, "step": 207235 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.00017206341787179676, "loss": 2.0015, "step": 207240 }, { "epoch": 0.49, "grad_norm": 1.859375, "learning_rate": 0.00017206213641036551, "loss": 1.9904, "step": 207245 }, { "epoch": 0.49, "grad_norm": 1.9140625, "learning_rate": 0.00017206085492431656, "loss": 2.0898, "step": 207250 }, { "epoch": 0.49, "grad_norm": 1.8359375, "learning_rate": 0.00017205957341365032, "loss": 2.0796, "step": 207255 }, { "epoch": 0.49, "grad_norm": 1.8046875, "learning_rate": 0.0001720582918783672, "loss": 2.2122, "step": 207260 }, { "epoch": 0.49, "grad_norm": 1.9921875, "learning_rate": 0.0001720570103184677, "loss": 2.1037, "step": 207265 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017205572873395215, "loss": 2.0867, "step": 207270 }, { "epoch": 0.49, "grad_norm": 2.359375, "learning_rate": 0.0001720544471248211, "loss": 2.1744, "step": 207275 }, { "epoch": 0.49, "grad_norm": 1.9375, "learning_rate": 0.00017205316549107495, "loss": 2.0535, "step": 207280 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001720518838327141, "loss": 1.9621, "step": 207285 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017205060214973904, "loss": 2.1307, "step": 207290 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017204932044215018, "loss": 2.1146, "step": 207295 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017204803870994798, "loss": 2.0685, "step": 207300 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017204675695313285, "loss": 2.2465, "step": 207305 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017204547517170527, "loss": 2.0122, "step": 207310 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017204419336566562, "loss": 2.0424, "step": 207315 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.0001720429115350144, "loss": 2.1022, "step": 207320 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017204162967975197, "loss": 2.1598, "step": 207325 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017204034779987884, "loss": 2.0987, "step": 207330 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001720390658953954, "loss": 2.1366, "step": 207335 }, { "epoch": 0.49, "grad_norm": 1.9140625, "learning_rate": 0.00017203778396630213, "loss": 2.1411, "step": 207340 }, { "epoch": 0.49, "grad_norm": 2.34375, "learning_rate": 0.00017203650201259944, "loss": 2.2699, "step": 207345 }, { "epoch": 0.49, "grad_norm": 2.53125, "learning_rate": 0.00017203522003428776, "loss": 1.9189, "step": 207350 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.00017203393803136757, "loss": 2.134, "step": 207355 }, { "epoch": 0.49, "grad_norm": 1.9921875, "learning_rate": 0.00017203265600383927, "loss": 2.0096, "step": 207360 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001720313739517033, "loss": 1.9082, "step": 207365 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.00017203009187496008, "loss": 2.3067, "step": 207370 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.0001720288097736101, "loss": 2.2679, "step": 207375 }, { "epoch": 0.49, "grad_norm": 1.828125, "learning_rate": 0.00017202752764765378, "loss": 2.0267, "step": 207380 }, { "epoch": 0.49, "grad_norm": 2.0, "learning_rate": 0.00017202624549709158, "loss": 2.1877, "step": 207385 }, { "epoch": 0.49, "grad_norm": 3.625, "learning_rate": 0.00017202496332192386, "loss": 2.0142, "step": 207390 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017202368112215113, "loss": 2.0643, "step": 207395 }, { "epoch": 0.49, "grad_norm": 2.4375, "learning_rate": 0.00017202239889777378, "loss": 1.9914, "step": 207400 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.0001720211166487923, "loss": 2.1572, "step": 207405 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.0001720198343752071, "loss": 2.1148, "step": 207410 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.0001720185520770186, "loss": 2.0253, "step": 207415 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017201726975422726, "loss": 2.1469, "step": 207420 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001720159874068335, "loss": 2.1918, "step": 207425 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017201470503483779, "loss": 2.1191, "step": 207430 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017201342263824056, "loss": 2.0703, "step": 207435 }, { "epoch": 0.49, "grad_norm": 2.46875, "learning_rate": 0.00017201214021704222, "loss": 2.0747, "step": 207440 }, { "epoch": 0.49, "grad_norm": 1.9765625, "learning_rate": 0.00017201085777124325, "loss": 2.1882, "step": 207445 }, { "epoch": 0.49, "grad_norm": 1.9765625, "learning_rate": 0.00017200957530084403, "loss": 2.0901, "step": 207450 }, { "epoch": 0.49, "grad_norm": 2.4375, "learning_rate": 0.00017200829280584508, "loss": 2.1894, "step": 207455 }, { "epoch": 0.49, "grad_norm": 1.78125, "learning_rate": 0.00017200701028624674, "loss": 1.8713, "step": 207460 }, { "epoch": 0.49, "grad_norm": 2.59375, "learning_rate": 0.00017200572774204951, "loss": 2.1195, "step": 207465 }, { "epoch": 0.49, "grad_norm": 2.453125, "learning_rate": 0.00017200444517325388, "loss": 2.1315, "step": 207470 }, { "epoch": 0.49, "grad_norm": 1.9296875, "learning_rate": 0.00017200316257986016, "loss": 2.1463, "step": 207475 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.00017200187996186888, "loss": 2.1166, "step": 207480 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017200059731928042, "loss": 2.1976, "step": 207485 }, { "epoch": 0.49, "grad_norm": 2.65625, "learning_rate": 0.00017199931465209528, "loss": 2.315, "step": 207490 }, { "epoch": 0.49, "grad_norm": 1.9296875, "learning_rate": 0.00017199803196031387, "loss": 2.1021, "step": 207495 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.00017199674924393664, "loss": 2.0505, "step": 207500 }, { "epoch": 0.49, "grad_norm": 2.640625, "learning_rate": 0.00017199546650296398, "loss": 2.0647, "step": 207505 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.00017199418373739636, "loss": 2.1856, "step": 207510 }, { "epoch": 0.49, "grad_norm": 1.875, "learning_rate": 0.00017199290094723426, "loss": 2.155, "step": 207515 }, { "epoch": 0.49, "grad_norm": 1.9921875, "learning_rate": 0.00017199161813247804, "loss": 2.1528, "step": 207520 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.0001719903352931282, "loss": 2.2921, "step": 207525 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017198905242918515, "loss": 2.1466, "step": 207530 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017198776954064932, "loss": 2.1403, "step": 207535 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.0001719864866275212, "loss": 2.1189, "step": 207540 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017198520368980115, "loss": 2.171, "step": 207545 }, { "epoch": 0.49, "grad_norm": 1.984375, "learning_rate": 0.00017198392072748968, "loss": 2.1364, "step": 207550 }, { "epoch": 0.49, "grad_norm": 1.78125, "learning_rate": 0.0001719826377405872, "loss": 2.1404, "step": 207555 }, { "epoch": 0.49, "grad_norm": 2.359375, "learning_rate": 0.0001719813547290941, "loss": 2.0686, "step": 207560 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.0001719800716930109, "loss": 2.1404, "step": 207565 }, { "epoch": 0.49, "grad_norm": 1.7578125, "learning_rate": 0.000171978788632338, "loss": 2.1764, "step": 207570 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017197750554707582, "loss": 2.1177, "step": 207575 }, { "epoch": 0.49, "grad_norm": 2.59375, "learning_rate": 0.00017197622243722484, "loss": 2.1621, "step": 207580 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017197493930278549, "loss": 2.0139, "step": 207585 }, { "epoch": 0.49, "grad_norm": 2.5625, "learning_rate": 0.00017197365614375816, "loss": 2.0848, "step": 207590 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017197237296014335, "loss": 2.0268, "step": 207595 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017197108975194146, "loss": 2.087, "step": 207600 }, { "epoch": 0.49, "grad_norm": 3.296875, "learning_rate": 0.00017196980651915293, "loss": 2.1362, "step": 207605 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017196852326177824, "loss": 2.1489, "step": 207610 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017196723997981779, "loss": 2.2069, "step": 207615 }, { "epoch": 0.49, "grad_norm": 2.5625, "learning_rate": 0.000171965956673272, "loss": 2.3116, "step": 207620 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.00017196467334214136, "loss": 2.0808, "step": 207625 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017196338998642628, "loss": 2.154, "step": 207630 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017196210660612717, "loss": 1.9696, "step": 207635 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.0001719608232012445, "loss": 2.069, "step": 207640 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017195953977177874, "loss": 2.1789, "step": 207645 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.0001719582563177303, "loss": 2.0881, "step": 207650 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.0001719569728390996, "loss": 2.0742, "step": 207655 }, { "epoch": 0.49, "grad_norm": 1.9296875, "learning_rate": 0.0001719556893358871, "loss": 2.1547, "step": 207660 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.0001719544058080932, "loss": 2.1308, "step": 207665 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001719531222557184, "loss": 2.2477, "step": 207670 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.00017195183867876312, "loss": 2.1379, "step": 207675 }, { "epoch": 0.49, "grad_norm": 2.875, "learning_rate": 0.00017195055507722776, "loss": 2.0811, "step": 207680 }, { "epoch": 0.49, "grad_norm": 1.953125, "learning_rate": 0.0001719492714511128, "loss": 2.0989, "step": 207685 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017194798780041865, "loss": 2.1741, "step": 207690 }, { "epoch": 0.49, "grad_norm": 1.9765625, "learning_rate": 0.00017194670412514577, "loss": 2.1395, "step": 207695 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.0001719454204252946, "loss": 1.9183, "step": 207700 }, { "epoch": 0.49, "grad_norm": 1.9296875, "learning_rate": 0.00017194413670086557, "loss": 1.9464, "step": 207705 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017194285295185911, "loss": 2.0947, "step": 207710 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017194156917827566, "loss": 2.0387, "step": 207715 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017194028538011565, "loss": 2.0812, "step": 207720 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.0001719390015573796, "loss": 2.1961, "step": 207725 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.00017193771771006783, "loss": 2.2112, "step": 207730 }, { "epoch": 0.49, "grad_norm": 2.53125, "learning_rate": 0.00017193643383818082, "loss": 2.115, "step": 207735 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.00017193514994171905, "loss": 1.9354, "step": 207740 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.0001719338660206829, "loss": 1.9711, "step": 207745 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017193258207507287, "loss": 2.0867, "step": 207750 }, { "epoch": 0.49, "grad_norm": 2.484375, "learning_rate": 0.00017193129810488936, "loss": 2.0511, "step": 207755 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017193001411013278, "loss": 2.0307, "step": 207760 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017192873009080363, "loss": 2.0431, "step": 207765 }, { "epoch": 0.49, "grad_norm": 1.9609375, "learning_rate": 0.00017192744604690234, "loss": 1.9821, "step": 207770 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017192616197842927, "loss": 2.0166, "step": 207775 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.000171924877885385, "loss": 2.16, "step": 207780 }, { "epoch": 0.49, "grad_norm": 2.359375, "learning_rate": 0.0001719235937677698, "loss": 1.9624, "step": 207785 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017192230962558426, "loss": 2.1417, "step": 207790 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017192102545882874, "loss": 2.1477, "step": 207795 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017191974126750368, "loss": 1.8251, "step": 207800 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017191845705160955, "loss": 2.0566, "step": 207805 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.00017191717281114674, "loss": 2.1491, "step": 207810 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017191588854611575, "loss": 2.0076, "step": 207815 }, { "epoch": 0.49, "grad_norm": 2.609375, "learning_rate": 0.000171914604256517, "loss": 2.0686, "step": 207820 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017191331994235087, "loss": 2.2709, "step": 207825 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017191203560361788, "loss": 2.172, "step": 207830 }, { "epoch": 0.49, "grad_norm": 1.7109375, "learning_rate": 0.0001719107512403184, "loss": 2.0524, "step": 207835 }, { "epoch": 0.49, "grad_norm": 2.46875, "learning_rate": 0.00017190946685245295, "loss": 2.2598, "step": 207840 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017190818244002188, "loss": 2.1417, "step": 207845 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017190689800302572, "loss": 2.1142, "step": 207850 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017190561354146481, "loss": 2.1062, "step": 207855 }, { "epoch": 0.49, "grad_norm": 1.9453125, "learning_rate": 0.00017190432905533966, "loss": 2.0986, "step": 207860 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017190304454465068, "loss": 2.1384, "step": 207865 }, { "epoch": 0.49, "grad_norm": 2.359375, "learning_rate": 0.00017190176000939834, "loss": 2.1231, "step": 207870 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017190047544958303, "loss": 2.2747, "step": 207875 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017189919086520522, "loss": 2.2673, "step": 207880 }, { "epoch": 0.49, "grad_norm": 1.9921875, "learning_rate": 0.00017189790625626533, "loss": 2.1735, "step": 207885 }, { "epoch": 0.49, "grad_norm": 2.453125, "learning_rate": 0.00017189662162276387, "loss": 2.3304, "step": 207890 }, { "epoch": 0.49, "grad_norm": 2.875, "learning_rate": 0.00017189533696470116, "loss": 2.0274, "step": 207895 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.0001718940522820777, "loss": 2.198, "step": 207900 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.00017189276757489393, "loss": 2.1367, "step": 207905 }, { "epoch": 0.49, "grad_norm": 1.90625, "learning_rate": 0.0001718914828431503, "loss": 2.0946, "step": 207910 }, { "epoch": 0.49, "grad_norm": 1.84375, "learning_rate": 0.00017189019808684725, "loss": 1.9679, "step": 207915 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.00017188891330598518, "loss": 2.0437, "step": 207920 }, { "epoch": 0.49, "grad_norm": 1.84375, "learning_rate": 0.00017188762850056457, "loss": 2.1028, "step": 207925 }, { "epoch": 0.49, "grad_norm": 2.46875, "learning_rate": 0.00017188634367058583, "loss": 2.1399, "step": 207930 }, { "epoch": 0.49, "grad_norm": 1.9765625, "learning_rate": 0.00017188505881604943, "loss": 2.1952, "step": 207935 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.00017188377393695576, "loss": 2.1895, "step": 207940 }, { "epoch": 0.49, "grad_norm": 1.9765625, "learning_rate": 0.0001718824890333053, "loss": 1.9939, "step": 207945 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017188120410509847, "loss": 1.9355, "step": 207950 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017187991915233576, "loss": 2.1116, "step": 207955 }, { "epoch": 0.49, "grad_norm": 2.765625, "learning_rate": 0.0001718786341750175, "loss": 2.0337, "step": 207960 }, { "epoch": 0.49, "grad_norm": 2.75, "learning_rate": 0.00017187734917314425, "loss": 2.1648, "step": 207965 }, { "epoch": 0.49, "grad_norm": 2.375, "learning_rate": 0.00017187606414671637, "loss": 2.0362, "step": 207970 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.00017187477909573433, "loss": 2.1783, "step": 207975 }, { "epoch": 0.49, "grad_norm": 1.9296875, "learning_rate": 0.00017187349402019857, "loss": 2.0713, "step": 207980 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.0001718722089201095, "loss": 2.1184, "step": 207985 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001718709237954676, "loss": 2.2111, "step": 207990 }, { "epoch": 0.49, "grad_norm": 2.78125, "learning_rate": 0.00017186963864627327, "loss": 2.0436, "step": 207995 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017186835347252698, "loss": 2.0886, "step": 208000 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017186706827422915, "loss": 2.0872, "step": 208005 }, { "epoch": 0.49, "grad_norm": 2.703125, "learning_rate": 0.00017186578305138024, "loss": 2.1251, "step": 208010 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017186449780398066, "loss": 2.1374, "step": 208015 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.0001718632125320309, "loss": 2.0469, "step": 208020 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001718619272355313, "loss": 1.918, "step": 208025 }, { "epoch": 0.49, "grad_norm": 1.921875, "learning_rate": 0.0001718606419144824, "loss": 2.246, "step": 208030 }, { "epoch": 0.49, "grad_norm": 1.9140625, "learning_rate": 0.0001718593565688846, "loss": 2.2169, "step": 208035 }, { "epoch": 0.49, "grad_norm": 1.7890625, "learning_rate": 0.0001718580711987383, "loss": 2.0418, "step": 208040 }, { "epoch": 0.49, "grad_norm": 3.65625, "learning_rate": 0.00017185678580404403, "loss": 1.9619, "step": 208045 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017185550038480217, "loss": 1.9423, "step": 208050 }, { "epoch": 0.49, "grad_norm": 1.84375, "learning_rate": 0.00017185421494101316, "loss": 2.1157, "step": 208055 }, { "epoch": 0.49, "grad_norm": 1.9453125, "learning_rate": 0.0001718529294726775, "loss": 2.1575, "step": 208060 }, { "epoch": 0.49, "grad_norm": 2.0, "learning_rate": 0.0001718516439797955, "loss": 2.0642, "step": 208065 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.0001718503584623677, "loss": 2.0428, "step": 208070 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.0001718490729203945, "loss": 2.0224, "step": 208075 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.0001718477873538764, "loss": 2.1143, "step": 208080 }, { "epoch": 0.49, "grad_norm": 1.8515625, "learning_rate": 0.00017184650176281375, "loss": 2.12, "step": 208085 }, { "epoch": 0.49, "grad_norm": 2.46875, "learning_rate": 0.00017184521614720704, "loss": 2.1041, "step": 208090 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017184393050705672, "loss": 1.8767, "step": 208095 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.0001718426448423632, "loss": 1.7454, "step": 208100 }, { "epoch": 0.49, "grad_norm": 2.71875, "learning_rate": 0.00017184135915312694, "loss": 1.9358, "step": 208105 }, { "epoch": 0.49, "grad_norm": 1.9609375, "learning_rate": 0.00017184007343934833, "loss": 2.3416, "step": 208110 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.0001718387877010279, "loss": 2.1993, "step": 208115 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017183750193816599, "loss": 2.1124, "step": 208120 }, { "epoch": 0.49, "grad_norm": 1.90625, "learning_rate": 0.0001718362161507631, "loss": 2.2343, "step": 208125 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017183493033881968, "loss": 1.963, "step": 208130 }, { "epoch": 0.49, "grad_norm": 2.375, "learning_rate": 0.0001718336445023361, "loss": 2.3325, "step": 208135 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017183235864131287, "loss": 2.1033, "step": 208140 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017183107275575043, "loss": 2.1178, "step": 208145 }, { "epoch": 0.49, "grad_norm": 1.9375, "learning_rate": 0.00017182978684564915, "loss": 2.0811, "step": 208150 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017182850091100954, "loss": 1.9803, "step": 208155 }, { "epoch": 0.49, "grad_norm": 2.453125, "learning_rate": 0.000171827214951832, "loss": 2.188, "step": 208160 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017182592896811697, "loss": 2.1317, "step": 208165 }, { "epoch": 0.49, "grad_norm": 2.5, "learning_rate": 0.00017182464295986493, "loss": 2.0462, "step": 208170 }, { "epoch": 0.49, "grad_norm": 2.359375, "learning_rate": 0.00017182335692707629, "loss": 2.0267, "step": 208175 }, { "epoch": 0.49, "grad_norm": 2.34375, "learning_rate": 0.00017182207086975144, "loss": 2.0335, "step": 208180 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.0001718207847878909, "loss": 2.0671, "step": 208185 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017181949868149508, "loss": 1.9807, "step": 208190 }, { "epoch": 0.49, "grad_norm": 1.765625, "learning_rate": 0.00017181821255056443, "loss": 2.0648, "step": 208195 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.00017181692639509934, "loss": 2.1626, "step": 208200 }, { "epoch": 0.49, "grad_norm": 2.34375, "learning_rate": 0.0001718156402151003, "loss": 2.1548, "step": 208205 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017181435401056775, "loss": 2.0205, "step": 208210 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.0001718130677815021, "loss": 2.171, "step": 208215 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.0001718117815279038, "loss": 1.9575, "step": 208220 }, { "epoch": 0.49, "grad_norm": 1.59375, "learning_rate": 0.00017181049524977332, "loss": 1.9106, "step": 208225 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017180920894711106, "loss": 2.2364, "step": 208230 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017180792261991745, "loss": 1.9725, "step": 208235 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.000171806636268193, "loss": 2.0933, "step": 208240 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017180534989193804, "loss": 1.8996, "step": 208245 }, { "epoch": 0.49, "grad_norm": 2.5, "learning_rate": 0.00017180406349115307, "loss": 2.0361, "step": 208250 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017180277706583857, "loss": 2.1535, "step": 208255 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.0001718014906159949, "loss": 2.2151, "step": 208260 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017180020414162258, "loss": 2.0391, "step": 208265 }, { "epoch": 0.49, "grad_norm": 2.53125, "learning_rate": 0.000171798917642722, "loss": 1.9976, "step": 208270 }, { "epoch": 0.49, "grad_norm": 1.921875, "learning_rate": 0.0001717976311192936, "loss": 2.3078, "step": 208275 }, { "epoch": 0.49, "grad_norm": 2.375, "learning_rate": 0.00017179634457133785, "loss": 2.1375, "step": 208280 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017179505799885513, "loss": 2.035, "step": 208285 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.0001717937714018459, "loss": 2.1223, "step": 208290 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.00017179248478031066, "loss": 2.142, "step": 208295 }, { "epoch": 0.49, "grad_norm": 1.984375, "learning_rate": 0.0001717911981342498, "loss": 1.9296, "step": 208300 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017178991146366376, "loss": 1.9734, "step": 208305 }, { "epoch": 0.49, "grad_norm": 2.625, "learning_rate": 0.00017178862476855298, "loss": 2.2231, "step": 208310 }, { "epoch": 0.49, "grad_norm": 1.921875, "learning_rate": 0.0001717873380489179, "loss": 2.0809, "step": 208315 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.00017178605130475897, "loss": 2.0446, "step": 208320 }, { "epoch": 0.49, "grad_norm": 2.578125, "learning_rate": 0.0001717847645360766, "loss": 2.1342, "step": 208325 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017178347774287128, "loss": 2.1326, "step": 208330 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017178219092514341, "loss": 2.1221, "step": 208335 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.00017178090408289344, "loss": 2.0446, "step": 208340 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017177961721612182, "loss": 2.0298, "step": 208345 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.000171778330324829, "loss": 2.2759, "step": 208350 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017177704340901536, "loss": 1.9283, "step": 208355 }, { "epoch": 0.49, "grad_norm": 2.765625, "learning_rate": 0.0001717757564686814, "loss": 2.3092, "step": 208360 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017177446950382755, "loss": 2.0879, "step": 208365 }, { "epoch": 0.49, "grad_norm": 3.0625, "learning_rate": 0.00017177318251445422, "loss": 1.9479, "step": 208370 }, { "epoch": 0.49, "grad_norm": 1.9453125, "learning_rate": 0.00017177189550056187, "loss": 2.2245, "step": 208375 }, { "epoch": 0.49, "grad_norm": 2.4375, "learning_rate": 0.00017177060846215096, "loss": 2.2256, "step": 208380 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017176932139922188, "loss": 2.0636, "step": 208385 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017176803431177514, "loss": 2.2212, "step": 208390 }, { "epoch": 0.49, "grad_norm": 1.96875, "learning_rate": 0.0001717667471998111, "loss": 2.1002, "step": 208395 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017176546006333028, "loss": 2.1799, "step": 208400 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017176417290233303, "loss": 2.033, "step": 208405 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017176288571681985, "loss": 2.0409, "step": 208410 }, { "epoch": 0.49, "grad_norm": 1.953125, "learning_rate": 0.00017176159850679118, "loss": 2.2355, "step": 208415 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017176031127224742, "loss": 2.0678, "step": 208420 }, { "epoch": 0.49, "grad_norm": 1.90625, "learning_rate": 0.00017175902401318904, "loss": 1.9634, "step": 208425 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.0001717577367296165, "loss": 2.1271, "step": 208430 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.0001717564494215302, "loss": 1.9977, "step": 208435 }, { "epoch": 0.49, "grad_norm": 1.859375, "learning_rate": 0.0001717551620889306, "loss": 2.0847, "step": 208440 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.00017175387473181814, "loss": 2.0946, "step": 208445 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017175258735019323, "loss": 2.0885, "step": 208450 }, { "epoch": 0.49, "grad_norm": 2.5, "learning_rate": 0.0001717512999440564, "loss": 2.07, "step": 208455 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.00017175001251340795, "loss": 1.9807, "step": 208460 }, { "epoch": 0.49, "grad_norm": 2.75, "learning_rate": 0.00017174872505824844, "loss": 2.0218, "step": 208465 }, { "epoch": 0.49, "grad_norm": 1.9765625, "learning_rate": 0.00017174743757857822, "loss": 2.1101, "step": 208470 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.0001717461500743978, "loss": 1.9572, "step": 208475 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017174486254570762, "loss": 2.133, "step": 208480 }, { "epoch": 0.49, "grad_norm": 1.84375, "learning_rate": 0.00017174357499250806, "loss": 2.0478, "step": 208485 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.00017174228741479963, "loss": 2.1126, "step": 208490 }, { "epoch": 0.49, "grad_norm": 2.609375, "learning_rate": 0.0001717409998125827, "loss": 2.1049, "step": 208495 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017173971218585775, "loss": 2.1487, "step": 208500 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.0001717384245346252, "loss": 2.0942, "step": 208505 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017173713685888554, "loss": 2.1609, "step": 208510 }, { "epoch": 0.49, "grad_norm": 1.9375, "learning_rate": 0.00017173584915863914, "loss": 1.9569, "step": 208515 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017173456143388646, "loss": 2.1077, "step": 208520 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.000171733273684628, "loss": 2.1351, "step": 208525 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017173198591086412, "loss": 2.0994, "step": 208530 }, { "epoch": 0.49, "grad_norm": 1.9296875, "learning_rate": 0.0001717306981125953, "loss": 2.1492, "step": 208535 }, { "epoch": 0.49, "grad_norm": 2.546875, "learning_rate": 0.00017172941028982197, "loss": 2.2149, "step": 208540 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017172812244254458, "loss": 2.1842, "step": 208545 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017172683457076357, "loss": 2.1206, "step": 208550 }, { "epoch": 0.49, "grad_norm": 1.8671875, "learning_rate": 0.00017172554667447935, "loss": 2.063, "step": 208555 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001717242587536924, "loss": 2.0631, "step": 208560 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017172297080840312, "loss": 2.0288, "step": 208565 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017172168283861202, "loss": 2.2033, "step": 208570 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017172039484431945, "loss": 2.133, "step": 208575 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.0001717191068255259, "loss": 2.2685, "step": 208580 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017171781878223182, "loss": 2.0515, "step": 208585 }, { "epoch": 0.49, "grad_norm": 2.515625, "learning_rate": 0.0001717165307144376, "loss": 1.9854, "step": 208590 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017171524262214372, "loss": 1.9891, "step": 208595 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017171395450535064, "loss": 1.999, "step": 208600 }, { "epoch": 0.49, "grad_norm": 2.375, "learning_rate": 0.00017171266636405877, "loss": 2.2901, "step": 208605 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017171137819826853, "loss": 2.212, "step": 208610 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.0001717100900079804, "loss": 2.034, "step": 208615 }, { "epoch": 0.49, "grad_norm": 2.578125, "learning_rate": 0.00017170880179319476, "loss": 2.0741, "step": 208620 }, { "epoch": 0.49, "grad_norm": 2.46875, "learning_rate": 0.00017170751355391218, "loss": 1.7665, "step": 208625 }, { "epoch": 0.49, "grad_norm": 2.546875, "learning_rate": 0.00017170622529013292, "loss": 2.0698, "step": 208630 }, { "epoch": 0.49, "grad_norm": 1.875, "learning_rate": 0.00017170493700185757, "loss": 2.176, "step": 208635 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.0001717036486890865, "loss": 2.1363, "step": 208640 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017170236035182014, "loss": 2.079, "step": 208645 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017170107199005898, "loss": 2.122, "step": 208650 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.0001716997836038034, "loss": 2.254, "step": 208655 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.00017169849519305393, "loss": 2.1631, "step": 208660 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.0001716972067578109, "loss": 2.0517, "step": 208665 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017169591829807484, "loss": 2.0505, "step": 208670 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017169462981384614, "loss": 2.0521, "step": 208675 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017169334130512525, "loss": 2.0567, "step": 208680 }, { "epoch": 0.49, "grad_norm": 1.796875, "learning_rate": 0.00017169205277191264, "loss": 2.0252, "step": 208685 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017169076421420868, "loss": 2.1828, "step": 208690 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.0001716894756320139, "loss": 2.1271, "step": 208695 }, { "epoch": 0.49, "grad_norm": 2.4375, "learning_rate": 0.00017168818702532868, "loss": 1.9427, "step": 208700 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.00017168689839415347, "loss": 1.991, "step": 208705 }, { "epoch": 0.49, "grad_norm": 2.34375, "learning_rate": 0.00017168560973848868, "loss": 2.0988, "step": 208710 }, { "epoch": 0.49, "grad_norm": 1.96875, "learning_rate": 0.00017168432105833486, "loss": 2.0672, "step": 208715 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.00017168303235369232, "loss": 2.023, "step": 208720 }, { "epoch": 0.49, "grad_norm": 1.84375, "learning_rate": 0.00017168174362456156, "loss": 2.0403, "step": 208725 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017168045487094303, "loss": 2.0547, "step": 208730 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017167916609283715, "loss": 1.9402, "step": 208735 }, { "epoch": 0.49, "grad_norm": 1.8203125, "learning_rate": 0.00017167787729024437, "loss": 2.193, "step": 208740 }, { "epoch": 0.49, "grad_norm": 5.09375, "learning_rate": 0.00017167658846316513, "loss": 2.1343, "step": 208745 }, { "epoch": 0.49, "grad_norm": 2.34375, "learning_rate": 0.00017167529961159987, "loss": 2.0865, "step": 208750 }, { "epoch": 0.49, "grad_norm": 2.5, "learning_rate": 0.00017167401073554903, "loss": 2.0193, "step": 208755 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017167272183501303, "loss": 2.0811, "step": 208760 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017167143290999234, "loss": 2.1497, "step": 208765 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.0001716701439604874, "loss": 2.1772, "step": 208770 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017166885498649866, "loss": 1.9026, "step": 208775 }, { "epoch": 0.49, "grad_norm": 1.9765625, "learning_rate": 0.00017166756598802646, "loss": 2.0755, "step": 208780 }, { "epoch": 0.49, "grad_norm": 2.484375, "learning_rate": 0.00017166627696507138, "loss": 1.9021, "step": 208785 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001716649879176338, "loss": 1.9596, "step": 208790 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.00017166369884571414, "loss": 2.1646, "step": 208795 }, { "epoch": 0.49, "grad_norm": 1.6875, "learning_rate": 0.00017166240974931287, "loss": 1.8809, "step": 208800 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017166112062843043, "loss": 2.0425, "step": 208805 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017165983148306724, "loss": 2.0429, "step": 208810 }, { "epoch": 0.49, "grad_norm": 1.71875, "learning_rate": 0.00017165854231322375, "loss": 2.1103, "step": 208815 }, { "epoch": 0.49, "grad_norm": 1.8828125, "learning_rate": 0.0001716572531189004, "loss": 2.0448, "step": 208820 }, { "epoch": 0.49, "grad_norm": 1.9609375, "learning_rate": 0.00017165596390009765, "loss": 2.3029, "step": 208825 }, { "epoch": 0.49, "grad_norm": 2.453125, "learning_rate": 0.0001716546746568159, "loss": 2.0935, "step": 208830 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.00017165338538905562, "loss": 2.1118, "step": 208835 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017165209609681727, "loss": 2.2002, "step": 208840 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.00017165080678010125, "loss": 2.2509, "step": 208845 }, { "epoch": 0.49, "grad_norm": 1.984375, "learning_rate": 0.00017164951743890797, "loss": 2.0831, "step": 208850 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.00017164822807323797, "loss": 2.2311, "step": 208855 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017164693868309164, "loss": 2.2064, "step": 208860 }, { "epoch": 0.49, "grad_norm": 1.9453125, "learning_rate": 0.0001716456492684694, "loss": 2.2096, "step": 208865 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.0001716443598293717, "loss": 2.177, "step": 208870 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017164307036579896, "loss": 2.1861, "step": 208875 }, { "epoch": 0.49, "grad_norm": 2.359375, "learning_rate": 0.0001716417808777517, "loss": 2.0348, "step": 208880 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017164049136523027, "loss": 2.0557, "step": 208885 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017163920182823516, "loss": 2.2114, "step": 208890 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017163791226676683, "loss": 2.073, "step": 208895 }, { "epoch": 0.49, "grad_norm": 2.625, "learning_rate": 0.00017163662268082565, "loss": 2.1675, "step": 208900 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017163533307041213, "loss": 2.0907, "step": 208905 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.00017163404343552668, "loss": 1.9746, "step": 208910 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017163275377616972, "loss": 2.081, "step": 208915 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017163146409234173, "loss": 1.9231, "step": 208920 }, { "epoch": 0.49, "grad_norm": 2.71875, "learning_rate": 0.0001716301743840431, "loss": 2.0893, "step": 208925 }, { "epoch": 0.49, "grad_norm": 1.9140625, "learning_rate": 0.0001716288846512743, "loss": 1.9874, "step": 208930 }, { "epoch": 0.49, "grad_norm": 2.359375, "learning_rate": 0.00017162759489403585, "loss": 2.0109, "step": 208935 }, { "epoch": 0.49, "grad_norm": 2.5, "learning_rate": 0.00017162630511232803, "loss": 2.2478, "step": 208940 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017162501530615143, "loss": 2.1805, "step": 208945 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.0001716237254755064, "loss": 1.9731, "step": 208950 }, { "epoch": 0.49, "grad_norm": 1.9921875, "learning_rate": 0.0001716224356203934, "loss": 2.0441, "step": 208955 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017162114574081287, "loss": 2.0817, "step": 208960 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017161985583676525, "loss": 1.9846, "step": 208965 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017161856590825101, "loss": 2.0818, "step": 208970 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017161727595527056, "loss": 2.1821, "step": 208975 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017161598597782436, "loss": 2.0835, "step": 208980 }, { "epoch": 0.49, "grad_norm": 1.9765625, "learning_rate": 0.00017161469597591283, "loss": 2.1359, "step": 208985 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017161340594953645, "loss": 2.1545, "step": 208990 }, { "epoch": 0.49, "grad_norm": 2.59375, "learning_rate": 0.00017161211589869558, "loss": 2.2554, "step": 208995 }, { "epoch": 0.49, "grad_norm": 1.8046875, "learning_rate": 0.00017161082582339073, "loss": 2.057, "step": 209000 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017160953572362234, "loss": 2.2197, "step": 209005 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017160824559939086, "loss": 2.1023, "step": 209010 }, { "epoch": 0.49, "grad_norm": 1.890625, "learning_rate": 0.00017160695545069668, "loss": 2.1486, "step": 209015 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017160566527754022, "loss": 2.0796, "step": 209020 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017160437507992202, "loss": 2.222, "step": 209025 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017160308485784245, "loss": 2.0984, "step": 209030 }, { "epoch": 0.49, "grad_norm": 1.8671875, "learning_rate": 0.00017160179461130195, "loss": 2.02, "step": 209035 }, { "epoch": 0.49, "grad_norm": 1.9921875, "learning_rate": 0.00017160050434030102, "loss": 2.0044, "step": 209040 }, { "epoch": 0.49, "grad_norm": 1.84375, "learning_rate": 0.00017159921404484, "loss": 2.1416, "step": 209045 }, { "epoch": 0.49, "grad_norm": 1.9296875, "learning_rate": 0.00017159792372491945, "loss": 2.0276, "step": 209050 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.0001715966333805397, "loss": 2.1391, "step": 209055 }, { "epoch": 0.49, "grad_norm": 1.84375, "learning_rate": 0.00017159534301170132, "loss": 1.9713, "step": 209060 }, { "epoch": 0.49, "grad_norm": 1.9453125, "learning_rate": 0.0001715940526184046, "loss": 2.1542, "step": 209065 }, { "epoch": 0.49, "grad_norm": 2.53125, "learning_rate": 0.00017159276220065007, "loss": 2.2954, "step": 209070 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017159147175843816, "loss": 2.1689, "step": 209075 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001715901812917693, "loss": 2.1627, "step": 209080 }, { "epoch": 0.49, "grad_norm": 1.9921875, "learning_rate": 0.0001715888908006439, "loss": 2.2881, "step": 209085 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001715876002850625, "loss": 2.2313, "step": 209090 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017158630974502547, "loss": 2.1877, "step": 209095 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017158501918053322, "loss": 2.1705, "step": 209100 }, { "epoch": 0.49, "grad_norm": 2.890625, "learning_rate": 0.00017158372859158626, "loss": 2.1572, "step": 209105 }, { "epoch": 0.49, "grad_norm": 1.984375, "learning_rate": 0.00017158243797818498, "loss": 2.0007, "step": 209110 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.00017158114734032985, "loss": 2.0146, "step": 209115 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.0001715798566780213, "loss": 2.0621, "step": 209120 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017157856599125978, "loss": 2.2025, "step": 209125 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.0001715772752800457, "loss": 1.8837, "step": 209130 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017157598454437955, "loss": 2.0738, "step": 209135 }, { "epoch": 0.49, "grad_norm": 2.578125, "learning_rate": 0.00017157469378426175, "loss": 2.1229, "step": 209140 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.0001715734029996927, "loss": 2.1784, "step": 209145 }, { "epoch": 0.49, "grad_norm": 1.8828125, "learning_rate": 0.00017157211219067292, "loss": 2.2089, "step": 209150 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.0001715708213572028, "loss": 1.8843, "step": 209155 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017156953049928276, "loss": 2.2524, "step": 209160 }, { "epoch": 0.49, "grad_norm": 2.0, "learning_rate": 0.00017156823961691331, "loss": 2.0477, "step": 209165 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.0001715669487100948, "loss": 2.1166, "step": 209170 }, { "epoch": 0.49, "grad_norm": 1.9921875, "learning_rate": 0.00017156565777882779, "loss": 1.9825, "step": 209175 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.0001715643668231126, "loss": 2.1077, "step": 209180 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017156307584294974, "loss": 2.2316, "step": 209185 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.00017156178483833964, "loss": 2.0054, "step": 209190 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017156049380928273, "loss": 2.2503, "step": 209195 }, { "epoch": 0.49, "grad_norm": 1.9921875, "learning_rate": 0.00017155920275577948, "loss": 2.0004, "step": 209200 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.00017155791167783029, "loss": 2.1315, "step": 209205 }, { "epoch": 0.49, "grad_norm": 1.8671875, "learning_rate": 0.0001715566205754356, "loss": 2.0746, "step": 209210 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017155532944859592, "loss": 2.1447, "step": 209215 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017155403829731162, "loss": 2.0622, "step": 209220 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017155274712158313, "loss": 2.0976, "step": 209225 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017155145592141096, "loss": 2.2425, "step": 209230 }, { "epoch": 0.49, "grad_norm": 1.921875, "learning_rate": 0.0001715501646967955, "loss": 2.0798, "step": 209235 }, { "epoch": 0.49, "grad_norm": 2.734375, "learning_rate": 0.0001715488734477372, "loss": 2.0942, "step": 209240 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017154758217423653, "loss": 1.9595, "step": 209245 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017154629087629388, "loss": 2.1706, "step": 209250 }, { "epoch": 0.49, "grad_norm": 2.546875, "learning_rate": 0.00017154499955390976, "loss": 2.0072, "step": 209255 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017154370820708455, "loss": 2.0639, "step": 209260 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017154241683581872, "loss": 2.0018, "step": 209265 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017154112544011266, "loss": 2.223, "step": 209270 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.0001715398340199669, "loss": 2.141, "step": 209275 }, { "epoch": 0.49, "grad_norm": 1.984375, "learning_rate": 0.0001715385425753818, "loss": 1.8732, "step": 209280 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.00017153725110635786, "loss": 1.9153, "step": 209285 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.0001715359596128955, "loss": 2.0048, "step": 209290 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017153466809499514, "loss": 2.0016, "step": 209295 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017153337655265727, "loss": 2.0575, "step": 209300 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017153208498588227, "loss": 2.2201, "step": 209305 }, { "epoch": 0.49, "grad_norm": 2.59375, "learning_rate": 0.00017153079339467063, "loss": 2.0181, "step": 209310 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.00017152950177902278, "loss": 2.1177, "step": 209315 }, { "epoch": 0.49, "grad_norm": 2.0, "learning_rate": 0.00017152821013893913, "loss": 2.0981, "step": 209320 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.00017152691847442016, "loss": 2.1861, "step": 209325 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.0001715256267854663, "loss": 2.0551, "step": 209330 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017152433507207801, "loss": 2.2227, "step": 209335 }, { "epoch": 0.49, "grad_norm": 1.8984375, "learning_rate": 0.00017152304333425565, "loss": 2.1534, "step": 209340 }, { "epoch": 0.49, "grad_norm": 1.9140625, "learning_rate": 0.00017152175157199978, "loss": 2.2813, "step": 209345 }, { "epoch": 0.49, "grad_norm": 1.8828125, "learning_rate": 0.00017152045978531075, "loss": 2.1321, "step": 209350 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017151916797418905, "loss": 2.2062, "step": 209355 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.0001715178761386351, "loss": 2.1218, "step": 209360 }, { "epoch": 0.49, "grad_norm": 1.90625, "learning_rate": 0.00017151658427864936, "loss": 1.9444, "step": 209365 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017151529239423224, "loss": 2.1108, "step": 209370 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017151400048538422, "loss": 2.1777, "step": 209375 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017151270855210568, "loss": 2.1934, "step": 209380 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017151141659439714, "loss": 2.128, "step": 209385 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.00017151012461225898, "loss": 2.1015, "step": 209390 }, { "epoch": 0.49, "grad_norm": 2.484375, "learning_rate": 0.00017150883260569168, "loss": 2.0643, "step": 209395 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017150754057469568, "loss": 2.1375, "step": 209400 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017150624851927139, "loss": 2.2049, "step": 209405 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017150495643941923, "loss": 2.2047, "step": 209410 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.0001715036643351397, "loss": 2.0549, "step": 209415 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017150237220643326, "loss": 1.9824, "step": 209420 }, { "epoch": 0.49, "grad_norm": 2.703125, "learning_rate": 0.0001715010800533003, "loss": 2.1884, "step": 209425 }, { "epoch": 0.49, "grad_norm": 2.71875, "learning_rate": 0.00017149978787574127, "loss": 1.9297, "step": 209430 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.0001714984956737566, "loss": 2.2007, "step": 209435 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017149720344734675, "loss": 1.8305, "step": 209440 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017149591119651215, "loss": 2.0681, "step": 209445 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017149461892125326, "loss": 2.1975, "step": 209450 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017149332662157055, "loss": 1.9912, "step": 209455 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017149203429746437, "loss": 2.138, "step": 209460 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017149074194893523, "loss": 2.0324, "step": 209465 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017148944957598358, "loss": 2.2325, "step": 209470 }, { "epoch": 0.49, "grad_norm": 1.9453125, "learning_rate": 0.00017148815717860983, "loss": 1.9976, "step": 209475 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.0001714868647568144, "loss": 2.1604, "step": 209480 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.0001714855723105978, "loss": 2.0948, "step": 209485 }, { "epoch": 0.49, "grad_norm": 2.796875, "learning_rate": 0.00017148427983996038, "loss": 1.8143, "step": 209490 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017148298734490268, "loss": 2.134, "step": 209495 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017148169482542505, "loss": 2.2007, "step": 209500 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017148040228152805, "loss": 1.993, "step": 209505 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017147910971321197, "loss": 2.2248, "step": 209510 }, { "epoch": 0.49, "grad_norm": 2.46875, "learning_rate": 0.00017147781712047736, "loss": 2.1797, "step": 209515 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017147652450332467, "loss": 1.9505, "step": 209520 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017147523186175425, "loss": 2.0892, "step": 209525 }, { "epoch": 0.49, "grad_norm": 1.8359375, "learning_rate": 0.00017147393919576663, "loss": 2.0749, "step": 209530 }, { "epoch": 0.49, "grad_norm": 1.984375, "learning_rate": 0.00017147264650536217, "loss": 2.2158, "step": 209535 }, { "epoch": 0.49, "grad_norm": 1.8828125, "learning_rate": 0.00017147135379054138, "loss": 1.8599, "step": 209540 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.0001714700610513047, "loss": 2.1378, "step": 209545 }, { "epoch": 0.49, "grad_norm": 1.9375, "learning_rate": 0.00017146876828765252, "loss": 2.1717, "step": 209550 }, { "epoch": 0.49, "grad_norm": 1.953125, "learning_rate": 0.00017146747549958536, "loss": 1.9966, "step": 209555 }, { "epoch": 0.49, "grad_norm": 2.515625, "learning_rate": 0.00017146618268710356, "loss": 2.2186, "step": 209560 }, { "epoch": 0.49, "grad_norm": 2.453125, "learning_rate": 0.00017146488985020762, "loss": 2.2415, "step": 209565 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017146359698889802, "loss": 2.3001, "step": 209570 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017146230410317514, "loss": 1.9497, "step": 209575 }, { "epoch": 0.49, "grad_norm": 2.375, "learning_rate": 0.00017146101119303945, "loss": 2.1229, "step": 209580 }, { "epoch": 0.49, "grad_norm": 1.953125, "learning_rate": 0.00017145971825849134, "loss": 2.0178, "step": 209585 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017145842529953136, "loss": 2.1877, "step": 209590 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017145713231615984, "loss": 2.025, "step": 209595 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017145583930837726, "loss": 2.112, "step": 209600 }, { "epoch": 0.49, "grad_norm": 2.34375, "learning_rate": 0.0001714545462761841, "loss": 2.0231, "step": 209605 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017145325321958073, "loss": 2.0395, "step": 209610 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017145196013856767, "loss": 2.1224, "step": 209615 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017145066703314532, "loss": 2.0484, "step": 209620 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.00017144937390331413, "loss": 2.0233, "step": 209625 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017144808074907452, "loss": 2.0584, "step": 209630 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017144678757042696, "loss": 2.0623, "step": 209635 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017144549436737188, "loss": 2.1978, "step": 209640 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017144420113990972, "loss": 2.0615, "step": 209645 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017144290788804093, "loss": 1.9648, "step": 209650 }, { "epoch": 0.49, "grad_norm": 2.96875, "learning_rate": 0.00017144161461176593, "loss": 2.0968, "step": 209655 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.0001714403213110852, "loss": 2.1129, "step": 209660 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.00017143902798599916, "loss": 2.0724, "step": 209665 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017143773463650826, "loss": 2.3878, "step": 209670 }, { "epoch": 0.49, "grad_norm": 1.796875, "learning_rate": 0.00017143644126261292, "loss": 2.2605, "step": 209675 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.0001714351478643136, "loss": 1.9696, "step": 209680 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017143385444161075, "loss": 2.2394, "step": 209685 }, { "epoch": 0.49, "grad_norm": 1.9296875, "learning_rate": 0.00017143256099450476, "loss": 2.0953, "step": 209690 }, { "epoch": 0.49, "grad_norm": 1.984375, "learning_rate": 0.00017143126752299615, "loss": 2.1211, "step": 209695 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017142997402708532, "loss": 2.0373, "step": 209700 }, { "epoch": 0.49, "grad_norm": 1.984375, "learning_rate": 0.0001714286805067727, "loss": 2.0362, "step": 209705 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.00017142738696205876, "loss": 2.1931, "step": 209710 }, { "epoch": 0.49, "grad_norm": 2.34375, "learning_rate": 0.0001714260933929439, "loss": 2.0287, "step": 209715 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017142479979942863, "loss": 2.1601, "step": 209720 }, { "epoch": 0.49, "grad_norm": 1.8203125, "learning_rate": 0.00017142350618151335, "loss": 2.0786, "step": 209725 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017142221253919847, "loss": 2.2444, "step": 209730 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.0001714209188724845, "loss": 2.1293, "step": 209735 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.00017141962518137183, "loss": 2.1932, "step": 209740 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017141833146586092, "loss": 2.0445, "step": 209745 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017141703772595226, "loss": 2.0608, "step": 209750 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017141574396164618, "loss": 2.0811, "step": 209755 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.00017141445017294322, "loss": 2.064, "step": 209760 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017141315635984378, "loss": 1.9536, "step": 209765 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.0001714118625223483, "loss": 2.1326, "step": 209770 }, { "epoch": 0.49, "grad_norm": 2.53125, "learning_rate": 0.00017141056866045725, "loss": 2.1437, "step": 209775 }, { "epoch": 0.49, "grad_norm": 1.9453125, "learning_rate": 0.00017140927477417105, "loss": 2.0523, "step": 209780 }, { "epoch": 0.49, "grad_norm": 1.875, "learning_rate": 0.00017140798086349014, "loss": 2.2355, "step": 209785 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.000171406686928415, "loss": 2.0537, "step": 209790 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.000171405392968946, "loss": 2.2355, "step": 209795 }, { "epoch": 0.49, "grad_norm": 1.9375, "learning_rate": 0.00017140409898508364, "loss": 1.9675, "step": 209800 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017140280497682836, "loss": 2.0398, "step": 209805 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017140151094418055, "loss": 1.9851, "step": 209810 }, { "epoch": 0.49, "grad_norm": 2.0, "learning_rate": 0.0001714002168871407, "loss": 2.0716, "step": 209815 }, { "epoch": 0.49, "grad_norm": 2.0, "learning_rate": 0.00017139892280570925, "loss": 1.9596, "step": 209820 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017139762869988664, "loss": 2.1178, "step": 209825 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017139633456967333, "loss": 2.0587, "step": 209830 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017139504041506967, "loss": 2.1691, "step": 209835 }, { "epoch": 0.49, "grad_norm": 2.0625, "learning_rate": 0.00017139374623607623, "loss": 1.8982, "step": 209840 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.00017139245203269337, "loss": 2.1458, "step": 209845 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017139115780492151, "loss": 2.1325, "step": 209850 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017138986355276118, "loss": 2.1043, "step": 209855 }, { "epoch": 0.49, "grad_norm": 1.9765625, "learning_rate": 0.00017138856927621278, "loss": 2.1012, "step": 209860 }, { "epoch": 0.49, "grad_norm": 2.109375, "learning_rate": 0.00017138727497527676, "loss": 1.9765, "step": 209865 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.00017138598064995352, "loss": 2.0246, "step": 209870 }, { "epoch": 0.49, "grad_norm": 2.015625, "learning_rate": 0.00017138468630024356, "loss": 2.0418, "step": 209875 }, { "epoch": 0.49, "grad_norm": 2.390625, "learning_rate": 0.0001713833919261473, "loss": 1.9843, "step": 209880 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017138209752766518, "loss": 2.1101, "step": 209885 }, { "epoch": 0.49, "grad_norm": 2.0, "learning_rate": 0.0001713808031047976, "loss": 2.0128, "step": 209890 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017137950865754505, "loss": 2.2474, "step": 209895 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.000171378214185908, "loss": 2.1575, "step": 209900 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017137691968988685, "loss": 2.0926, "step": 209905 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017137562516948202, "loss": 2.1238, "step": 209910 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.000171374330624694, "loss": 2.176, "step": 209915 }, { "epoch": 0.49, "grad_norm": 1.8671875, "learning_rate": 0.00017137303605552323, "loss": 2.2579, "step": 209920 }, { "epoch": 0.49, "grad_norm": 2.171875, "learning_rate": 0.00017137174146197013, "loss": 2.3069, "step": 209925 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017137044684403514, "loss": 2.1368, "step": 209930 }, { "epoch": 0.49, "grad_norm": 2.421875, "learning_rate": 0.00017136915220171872, "loss": 2.1232, "step": 209935 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.0001713678575350213, "loss": 2.0872, "step": 209940 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017136656284394333, "loss": 2.3395, "step": 209945 }, { "epoch": 0.49, "grad_norm": 1.9765625, "learning_rate": 0.00017136526812848525, "loss": 1.9041, "step": 209950 }, { "epoch": 0.49, "grad_norm": 2.3125, "learning_rate": 0.00017136397338864747, "loss": 2.1022, "step": 209955 }, { "epoch": 0.49, "grad_norm": 2.484375, "learning_rate": 0.00017136267862443048, "loss": 2.0714, "step": 209960 }, { "epoch": 0.49, "grad_norm": 2.46875, "learning_rate": 0.00017136138383583473, "loss": 2.264, "step": 209965 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.0001713600890228606, "loss": 2.202, "step": 209970 }, { "epoch": 0.49, "grad_norm": 1.90625, "learning_rate": 0.0001713587941855086, "loss": 2.1786, "step": 209975 }, { "epoch": 0.49, "grad_norm": 1.6171875, "learning_rate": 0.00017135749932377912, "loss": 1.9718, "step": 209980 }, { "epoch": 0.49, "grad_norm": 1.6796875, "learning_rate": 0.00017135620443767263, "loss": 2.0525, "step": 209985 }, { "epoch": 0.49, "grad_norm": 2.484375, "learning_rate": 0.00017135490952718957, "loss": 2.0036, "step": 209990 }, { "epoch": 0.49, "grad_norm": 2.0, "learning_rate": 0.00017135361459233038, "loss": 2.0921, "step": 209995 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017135231963309549, "loss": 2.2012, "step": 210000 }, { "epoch": 0.49, "grad_norm": 2.21875, "learning_rate": 0.00017135102464948537, "loss": 1.9677, "step": 210005 }, { "epoch": 0.49, "grad_norm": 1.890625, "learning_rate": 0.00017134972964150043, "loss": 2.0947, "step": 210010 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.00017134843460914114, "loss": 2.2099, "step": 210015 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.0001713471395524079, "loss": 1.9076, "step": 210020 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017134584447130123, "loss": 2.0341, "step": 210025 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.0001713445493658215, "loss": 2.0556, "step": 210030 }, { "epoch": 0.49, "grad_norm": 1.8515625, "learning_rate": 0.00017134325423596918, "loss": 2.216, "step": 210035 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.0001713419590817447, "loss": 1.9423, "step": 210040 }, { "epoch": 0.49, "grad_norm": 1.984375, "learning_rate": 0.00017134066390314854, "loss": 2.0661, "step": 210045 }, { "epoch": 0.49, "grad_norm": 2.34375, "learning_rate": 0.0001713393687001811, "loss": 1.9354, "step": 210050 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017133807347284282, "loss": 2.0658, "step": 210055 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.0001713367782211342, "loss": 1.9802, "step": 210060 }, { "epoch": 0.49, "grad_norm": 2.5, "learning_rate": 0.0001713354829450556, "loss": 2.1553, "step": 210065 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017133418764460754, "loss": 1.8132, "step": 210070 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.0001713328923197904, "loss": 2.1947, "step": 210075 }, { "epoch": 0.49, "grad_norm": 1.9375, "learning_rate": 0.00017133159697060467, "loss": 2.0799, "step": 210080 }, { "epoch": 0.49, "grad_norm": 2.34375, "learning_rate": 0.0001713303015970508, "loss": 2.1298, "step": 210085 }, { "epoch": 0.49, "grad_norm": 1.8203125, "learning_rate": 0.00017132900619912914, "loss": 2.0115, "step": 210090 }, { "epoch": 0.49, "grad_norm": 2.34375, "learning_rate": 0.00017132771077684025, "loss": 2.2395, "step": 210095 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017132641533018452, "loss": 2.0854, "step": 210100 }, { "epoch": 0.49, "grad_norm": 2.484375, "learning_rate": 0.00017132511985916234, "loss": 2.2572, "step": 210105 }, { "epoch": 0.49, "grad_norm": 1.8828125, "learning_rate": 0.00017132382436377426, "loss": 2.3106, "step": 210110 }, { "epoch": 0.49, "grad_norm": 2.25, "learning_rate": 0.00017132252884402064, "loss": 1.9715, "step": 210115 }, { "epoch": 0.49, "grad_norm": 1.890625, "learning_rate": 0.00017132123329990196, "loss": 1.9913, "step": 210120 }, { "epoch": 0.49, "grad_norm": 2.40625, "learning_rate": 0.00017131993773141866, "loss": 2.2977, "step": 210125 }, { "epoch": 0.49, "grad_norm": 1.9609375, "learning_rate": 0.00017131864213857116, "loss": 1.9299, "step": 210130 }, { "epoch": 0.49, "grad_norm": 2.46875, "learning_rate": 0.00017131734652135993, "loss": 2.1267, "step": 210135 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017131605087978538, "loss": 1.9291, "step": 210140 }, { "epoch": 0.49, "grad_norm": 2.234375, "learning_rate": 0.00017131475521384802, "loss": 2.1356, "step": 210145 }, { "epoch": 0.49, "grad_norm": 1.984375, "learning_rate": 0.0001713134595235482, "loss": 2.2495, "step": 210150 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017131216380888645, "loss": 2.0899, "step": 210155 }, { "epoch": 0.49, "grad_norm": 2.5625, "learning_rate": 0.00017131086806986313, "loss": 2.1779, "step": 210160 }, { "epoch": 0.49, "grad_norm": 2.359375, "learning_rate": 0.00017130957230647876, "loss": 1.9749, "step": 210165 }, { "epoch": 0.49, "grad_norm": 2.6875, "learning_rate": 0.0001713082765187337, "loss": 2.1578, "step": 210170 }, { "epoch": 0.49, "grad_norm": 2.375, "learning_rate": 0.00017130698070662848, "loss": 2.134, "step": 210175 }, { "epoch": 0.49, "grad_norm": 1.6796875, "learning_rate": 0.0001713056848701635, "loss": 2.115, "step": 210180 }, { "epoch": 0.49, "grad_norm": 2.5625, "learning_rate": 0.00017130438900933917, "loss": 1.8962, "step": 210185 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017130309312415602, "loss": 2.0056, "step": 210190 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017130179721461442, "loss": 1.9212, "step": 210195 }, { "epoch": 0.49, "grad_norm": 2.15625, "learning_rate": 0.00017130050128071482, "loss": 2.0869, "step": 210200 }, { "epoch": 0.49, "grad_norm": 1.9375, "learning_rate": 0.00017129920532245766, "loss": 2.2415, "step": 210205 }, { "epoch": 0.49, "grad_norm": 2.953125, "learning_rate": 0.0001712979093398434, "loss": 2.2175, "step": 210210 }, { "epoch": 0.49, "grad_norm": 2.09375, "learning_rate": 0.00017129661333287254, "loss": 2.0342, "step": 210215 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.0001712953173015454, "loss": 2.0314, "step": 210220 }, { "epoch": 0.49, "grad_norm": 2.265625, "learning_rate": 0.00017129402124586252, "loss": 1.9714, "step": 210225 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017129272516582433, "loss": 2.369, "step": 210230 }, { "epoch": 0.49, "grad_norm": 2.375, "learning_rate": 0.0001712914290614312, "loss": 2.318, "step": 210235 }, { "epoch": 0.49, "grad_norm": 1.90625, "learning_rate": 0.00017129013293268365, "loss": 1.9444, "step": 210240 }, { "epoch": 0.49, "grad_norm": 1.484375, "learning_rate": 0.00017128883677958208, "loss": 1.7751, "step": 210245 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.000171287540602127, "loss": 1.9798, "step": 210250 }, { "epoch": 0.49, "grad_norm": 1.5859375, "learning_rate": 0.00017128624440031876, "loss": 2.0933, "step": 210255 }, { "epoch": 0.49, "grad_norm": 2.046875, "learning_rate": 0.00017128494817415785, "loss": 2.0883, "step": 210260 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.0001712836519236447, "loss": 2.1727, "step": 210265 }, { "epoch": 0.49, "grad_norm": 1.96875, "learning_rate": 0.0001712823556487798, "loss": 2.0847, "step": 210270 }, { "epoch": 0.49, "grad_norm": 2.203125, "learning_rate": 0.00017128105934956353, "loss": 1.927, "step": 210275 }, { "epoch": 0.49, "grad_norm": 2.078125, "learning_rate": 0.00017127976302599635, "loss": 1.9544, "step": 210280 }, { "epoch": 0.49, "grad_norm": 1.9296875, "learning_rate": 0.00017127846667807872, "loss": 2.2318, "step": 210285 }, { "epoch": 0.49, "grad_norm": 2.03125, "learning_rate": 0.00017127717030581105, "loss": 2.0419, "step": 210290 }, { "epoch": 0.49, "grad_norm": 1.8359375, "learning_rate": 0.00017127587390919385, "loss": 2.0907, "step": 210295 }, { "epoch": 0.49, "grad_norm": 2.125, "learning_rate": 0.00017127457748822747, "loss": 2.216, "step": 210300 }, { "epoch": 0.49, "grad_norm": 2.140625, "learning_rate": 0.00017127328104291246, "loss": 1.9677, "step": 210305 }, { "epoch": 0.49, "grad_norm": 2.734375, "learning_rate": 0.00017127198457324915, "loss": 2.0395, "step": 210310 }, { "epoch": 0.49, "grad_norm": 2.28125, "learning_rate": 0.00017127068807923807, "loss": 2.2351, "step": 210315 }, { "epoch": 0.49, "grad_norm": 2.328125, "learning_rate": 0.00017126939156087963, "loss": 2.1791, "step": 210320 }, { "epoch": 0.49, "grad_norm": 1.8671875, "learning_rate": 0.00017126809501817424, "loss": 2.1385, "step": 210325 }, { "epoch": 0.49, "grad_norm": 2.75, "learning_rate": 0.00017126679845112244, "loss": 1.9515, "step": 210330 }, { "epoch": 0.49, "grad_norm": 2.1875, "learning_rate": 0.00017126550185972455, "loss": 2.0932, "step": 210335 }, { "epoch": 0.49, "grad_norm": 2.296875, "learning_rate": 0.0001712642052439811, "loss": 2.1215, "step": 210340 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017126290860389247, "loss": 2.2454, "step": 210345 }, { "epoch": 0.5, "grad_norm": 1.9453125, "learning_rate": 0.00017126161193945917, "loss": 2.1072, "step": 210350 }, { "epoch": 0.5, "grad_norm": 1.84375, "learning_rate": 0.00017126031525068164, "loss": 2.0071, "step": 210355 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017125901853756026, "loss": 1.9075, "step": 210360 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017125772180009546, "loss": 2.0543, "step": 210365 }, { "epoch": 0.5, "grad_norm": 2.78125, "learning_rate": 0.0001712564250382878, "loss": 2.0837, "step": 210370 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017125512825213763, "loss": 2.2054, "step": 210375 }, { "epoch": 0.5, "grad_norm": 1.8359375, "learning_rate": 0.0001712538314416454, "loss": 2.0112, "step": 210380 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017125253460681158, "loss": 2.1662, "step": 210385 }, { "epoch": 0.5, "grad_norm": 1.6796875, "learning_rate": 0.0001712512377476366, "loss": 1.9937, "step": 210390 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017124994086412093, "loss": 2.1658, "step": 210395 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017124864395626498, "loss": 2.0817, "step": 210400 }, { "epoch": 0.5, "grad_norm": 2.515625, "learning_rate": 0.0001712473470240692, "loss": 2.1041, "step": 210405 }, { "epoch": 0.5, "grad_norm": 1.9453125, "learning_rate": 0.000171246050067534, "loss": 2.0614, "step": 210410 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017124475308665988, "loss": 2.0618, "step": 210415 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017124345608144728, "loss": 2.1452, "step": 210420 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017124215905189657, "loss": 2.0386, "step": 210425 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.0001712408619980083, "loss": 2.054, "step": 210430 }, { "epoch": 0.5, "grad_norm": 2.484375, "learning_rate": 0.00017123956491978283, "loss": 2.1724, "step": 210435 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017123826781722065, "loss": 2.3389, "step": 210440 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.0001712369706903222, "loss": 2.2216, "step": 210445 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.00017123567353908786, "loss": 1.974, "step": 210450 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017123437636351816, "loss": 2.1079, "step": 210455 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.0001712330791636135, "loss": 2.2165, "step": 210460 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017123178193937432, "loss": 2.1882, "step": 210465 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017123048469080108, "loss": 2.0708, "step": 210470 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017122918741789423, "loss": 2.0082, "step": 210475 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017122789012065414, "loss": 2.0522, "step": 210480 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017122659279908135, "loss": 2.1695, "step": 210485 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.0001712252954531763, "loss": 2.162, "step": 210490 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017122399808293932, "loss": 2.0058, "step": 210495 }, { "epoch": 0.5, "grad_norm": 2.546875, "learning_rate": 0.000171222700688371, "loss": 2.1356, "step": 210500 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017122140326947168, "loss": 2.0661, "step": 210505 }, { "epoch": 0.5, "grad_norm": 2.734375, "learning_rate": 0.00017122010582624184, "loss": 2.1196, "step": 210510 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.0001712188083586819, "loss": 2.1648, "step": 210515 }, { "epoch": 0.5, "grad_norm": 2.578125, "learning_rate": 0.00017121751086679235, "loss": 2.1451, "step": 210520 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.0001712162133505736, "loss": 1.9317, "step": 210525 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.0001712149158100261, "loss": 2.1809, "step": 210530 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.0001712136182451503, "loss": 2.0569, "step": 210535 }, { "epoch": 0.5, "grad_norm": 1.9296875, "learning_rate": 0.0001712123206559466, "loss": 2.171, "step": 210540 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017121102304241555, "loss": 2.0957, "step": 210545 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.0001712097254045575, "loss": 2.1816, "step": 210550 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017120842774237288, "loss": 2.0794, "step": 210555 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017120713005586217, "loss": 1.8933, "step": 210560 }, { "epoch": 0.5, "grad_norm": 2.390625, "learning_rate": 0.00017120583234502584, "loss": 1.9069, "step": 210565 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.0001712045346098643, "loss": 2.1892, "step": 210570 }, { "epoch": 0.5, "grad_norm": 2.875, "learning_rate": 0.00017120323685037797, "loss": 2.1029, "step": 210575 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017120193906656738, "loss": 2.0034, "step": 210580 }, { "epoch": 0.5, "grad_norm": 1.875, "learning_rate": 0.00017120064125843287, "loss": 2.1176, "step": 210585 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017119934342597497, "loss": 2.4198, "step": 210590 }, { "epoch": 0.5, "grad_norm": 2.59375, "learning_rate": 0.00017119804556919404, "loss": 2.0303, "step": 210595 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.0001711967476880906, "loss": 2.3521, "step": 210600 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017119544978266507, "loss": 2.1813, "step": 210605 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017119415185291783, "loss": 2.1856, "step": 210610 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017119285389884944, "loss": 2.1668, "step": 210615 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.0001711915559204602, "loss": 2.0331, "step": 210620 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.0001711902579177507, "loss": 2.0884, "step": 210625 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.0001711889598907213, "loss": 2.0678, "step": 210630 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017118766183937245, "loss": 2.1242, "step": 210635 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.0001711863637637046, "loss": 2.0264, "step": 210640 }, { "epoch": 0.5, "grad_norm": 1.984375, "learning_rate": 0.00017118506566371822, "loss": 2.0585, "step": 210645 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017118376753941372, "loss": 2.3448, "step": 210650 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017118246939079156, "loss": 2.0934, "step": 210655 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017118117121785217, "loss": 1.9174, "step": 210660 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017117987302059598, "loss": 1.8992, "step": 210665 }, { "epoch": 0.5, "grad_norm": 1.9453125, "learning_rate": 0.00017117857479902348, "loss": 2.1032, "step": 210670 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017117727655313508, "loss": 2.2596, "step": 210675 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017117597828293123, "loss": 2.0641, "step": 210680 }, { "epoch": 0.5, "grad_norm": 2.484375, "learning_rate": 0.00017117467998841238, "loss": 2.1638, "step": 210685 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.000171173381669579, "loss": 2.1338, "step": 210690 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017117208332643146, "loss": 2.0683, "step": 210695 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.00017117078495897022, "loss": 2.0802, "step": 210700 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.0001711694865671958, "loss": 2.0932, "step": 210705 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017116818815110857, "loss": 2.0653, "step": 210710 }, { "epoch": 0.5, "grad_norm": 1.953125, "learning_rate": 0.00017116688971070902, "loss": 2.0068, "step": 210715 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017116559124599756, "loss": 2.2674, "step": 210720 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017116429275697463, "loss": 1.8102, "step": 210725 }, { "epoch": 0.5, "grad_norm": 1.703125, "learning_rate": 0.00017116299424364072, "loss": 2.0402, "step": 210730 }, { "epoch": 0.5, "grad_norm": 2.5625, "learning_rate": 0.00017116169570599617, "loss": 2.1257, "step": 210735 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017116039714404154, "loss": 2.1383, "step": 210740 }, { "epoch": 0.5, "grad_norm": 1.9296875, "learning_rate": 0.00017115909855777722, "loss": 1.9063, "step": 210745 }, { "epoch": 0.5, "grad_norm": 1.8828125, "learning_rate": 0.00017115779994720368, "loss": 2.1238, "step": 210750 }, { "epoch": 0.5, "grad_norm": 2.5, "learning_rate": 0.00017115650131232135, "loss": 1.924, "step": 210755 }, { "epoch": 0.5, "grad_norm": 1.953125, "learning_rate": 0.00017115520265313064, "loss": 2.0564, "step": 210760 }, { "epoch": 0.5, "grad_norm": 1.875, "learning_rate": 0.000171153903969632, "loss": 1.9502, "step": 210765 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017115260526182597, "loss": 2.1035, "step": 210770 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017115130652971288, "loss": 1.9513, "step": 210775 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.0001711500077732932, "loss": 1.8852, "step": 210780 }, { "epoch": 0.5, "grad_norm": 1.84375, "learning_rate": 0.0001711487089925674, "loss": 1.9772, "step": 210785 }, { "epoch": 0.5, "grad_norm": 2.46875, "learning_rate": 0.0001711474101875359, "loss": 2.135, "step": 210790 }, { "epoch": 0.5, "grad_norm": 2.546875, "learning_rate": 0.00017114611135819917, "loss": 2.2787, "step": 210795 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017114481250455764, "loss": 2.2634, "step": 210800 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017114351362661174, "loss": 1.9975, "step": 210805 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017114221472436193, "loss": 2.1272, "step": 210810 }, { "epoch": 0.5, "grad_norm": 2.453125, "learning_rate": 0.00017114091579780864, "loss": 2.2368, "step": 210815 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017113961684695234, "loss": 2.1106, "step": 210820 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017113831787179344, "loss": 2.1119, "step": 210825 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017113701887233243, "loss": 2.1022, "step": 210830 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.0001711357198485697, "loss": 1.9604, "step": 210835 }, { "epoch": 0.5, "grad_norm": 1.96875, "learning_rate": 0.0001711344208005057, "loss": 2.2082, "step": 210840 }, { "epoch": 0.5, "grad_norm": 2.46875, "learning_rate": 0.00017113312172814093, "loss": 2.1611, "step": 210845 }, { "epoch": 0.5, "grad_norm": 1.7890625, "learning_rate": 0.00017113182263147575, "loss": 2.2003, "step": 210850 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017113052351051068, "loss": 2.1284, "step": 210855 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017112922436524617, "loss": 2.1097, "step": 210860 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017112792519568257, "loss": 2.0453, "step": 210865 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017112662600182038, "loss": 2.1454, "step": 210870 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.00017112532678366005, "loss": 2.0583, "step": 210875 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017112402754120205, "loss": 2.229, "step": 210880 }, { "epoch": 0.5, "grad_norm": 1.7109375, "learning_rate": 0.0001711227282744468, "loss": 2.0567, "step": 210885 }, { "epoch": 0.5, "grad_norm": 2.921875, "learning_rate": 0.0001711214289833947, "loss": 1.9794, "step": 210890 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.0001711201296680462, "loss": 2.2323, "step": 210895 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017111883032840184, "loss": 2.4499, "step": 210900 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017111753096446197, "loss": 2.2127, "step": 210905 }, { "epoch": 0.5, "grad_norm": 2.40625, "learning_rate": 0.00017111623157622707, "loss": 2.1253, "step": 210910 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.00017111493216369757, "loss": 2.3145, "step": 210915 }, { "epoch": 0.5, "grad_norm": 1.6875, "learning_rate": 0.0001711136327268739, "loss": 2.055, "step": 210920 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017111233326575656, "loss": 2.077, "step": 210925 }, { "epoch": 0.5, "grad_norm": 2.546875, "learning_rate": 0.00017111103378034595, "loss": 2.0564, "step": 210930 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.0001711097342706425, "loss": 2.2119, "step": 210935 }, { "epoch": 0.5, "grad_norm": 1.984375, "learning_rate": 0.0001711084347366467, "loss": 2.2541, "step": 210940 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017110713517835894, "loss": 2.1292, "step": 210945 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.0001711058355957797, "loss": 2.2752, "step": 210950 }, { "epoch": 0.5, "grad_norm": 1.9453125, "learning_rate": 0.00017110453598890942, "loss": 2.2918, "step": 210955 }, { "epoch": 0.5, "grad_norm": 2.53125, "learning_rate": 0.00017110323635774854, "loss": 2.0303, "step": 210960 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017110193670229751, "loss": 2.2001, "step": 210965 }, { "epoch": 0.5, "grad_norm": 1.9609375, "learning_rate": 0.00017110063702255679, "loss": 2.0544, "step": 210970 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017109933731852679, "loss": 2.1432, "step": 210975 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017109803759020794, "loss": 1.9462, "step": 210980 }, { "epoch": 0.5, "grad_norm": 2.484375, "learning_rate": 0.00017109673783760073, "loss": 2.2371, "step": 210985 }, { "epoch": 0.5, "grad_norm": 1.9453125, "learning_rate": 0.0001710954380607056, "loss": 2.0467, "step": 210990 }, { "epoch": 0.5, "grad_norm": 1.9921875, "learning_rate": 0.00017109413825952294, "loss": 2.0325, "step": 210995 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017109283843405327, "loss": 2.1065, "step": 211000 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017109153858429696, "loss": 2.1372, "step": 211005 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017109023871025454, "loss": 2.0908, "step": 211010 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017108893881192638, "loss": 2.1257, "step": 211015 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017108763888931295, "loss": 2.0528, "step": 211020 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.0001710863389424147, "loss": 2.1167, "step": 211025 }, { "epoch": 0.5, "grad_norm": 1.96875, "learning_rate": 0.00017108503897123205, "loss": 2.0644, "step": 211030 }, { "epoch": 0.5, "grad_norm": 2.1875, "learning_rate": 0.00017108373897576547, "loss": 2.2239, "step": 211035 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.0001710824389560154, "loss": 2.0164, "step": 211040 }, { "epoch": 0.5, "grad_norm": 1.859375, "learning_rate": 0.00017108113891198227, "loss": 2.2945, "step": 211045 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017107983884366655, "loss": 2.1946, "step": 211050 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017107853875106865, "loss": 1.9628, "step": 211055 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017107723863418904, "loss": 2.016, "step": 211060 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017107593849302816, "loss": 2.1622, "step": 211065 }, { "epoch": 0.5, "grad_norm": 2.4375, "learning_rate": 0.00017107463832758644, "loss": 2.2847, "step": 211070 }, { "epoch": 0.5, "grad_norm": 1.9296875, "learning_rate": 0.00017107333813786433, "loss": 2.1331, "step": 211075 }, { "epoch": 0.5, "grad_norm": 2.484375, "learning_rate": 0.0001710720379238623, "loss": 2.2219, "step": 211080 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017107073768558078, "loss": 2.0793, "step": 211085 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.00017106943742302018, "loss": 2.0963, "step": 211090 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017106813713618096, "loss": 2.1255, "step": 211095 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.0001710668368250636, "loss": 2.1882, "step": 211100 }, { "epoch": 0.5, "grad_norm": 2.40625, "learning_rate": 0.00017106553648966856, "loss": 2.0355, "step": 211105 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017106423612999619, "loss": 2.0321, "step": 211110 }, { "epoch": 0.5, "grad_norm": 1.953125, "learning_rate": 0.00017106293574604698, "loss": 2.1159, "step": 211115 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.0001710616353378214, "loss": 2.1428, "step": 211120 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017106033490531987, "loss": 2.1345, "step": 211125 }, { "epoch": 0.5, "grad_norm": 2.390625, "learning_rate": 0.00017105903444854286, "loss": 2.093, "step": 211130 }, { "epoch": 0.5, "grad_norm": 1.984375, "learning_rate": 0.00017105773396749077, "loss": 2.2365, "step": 211135 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017105643346216408, "loss": 2.227, "step": 211140 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017105513293256324, "loss": 2.0397, "step": 211145 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017105383237868866, "loss": 2.0814, "step": 211150 }, { "epoch": 0.5, "grad_norm": 1.9375, "learning_rate": 0.00017105253180054082, "loss": 2.1723, "step": 211155 }, { "epoch": 0.5, "grad_norm": 2.4375, "learning_rate": 0.00017105123119812014, "loss": 1.9885, "step": 211160 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017104993057142704, "loss": 1.9869, "step": 211165 }, { "epoch": 0.5, "grad_norm": 1.9921875, "learning_rate": 0.00017104862992046202, "loss": 1.8868, "step": 211170 }, { "epoch": 0.5, "grad_norm": 1.828125, "learning_rate": 0.0001710473292452255, "loss": 1.9582, "step": 211175 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.00017104602854571793, "loss": 2.1527, "step": 211180 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017104472782193973, "loss": 2.0707, "step": 211185 }, { "epoch": 0.5, "grad_norm": 1.9375, "learning_rate": 0.00017104342707389137, "loss": 2.2688, "step": 211190 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.0001710421263015733, "loss": 2.1435, "step": 211195 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017104082550498594, "loss": 1.919, "step": 211200 }, { "epoch": 0.5, "grad_norm": 1.6796875, "learning_rate": 0.00017103952468412975, "loss": 2.0735, "step": 211205 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.00017103822383900518, "loss": 2.1346, "step": 211210 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017103692296961264, "loss": 2.1005, "step": 211215 }, { "epoch": 0.5, "grad_norm": 1.96875, "learning_rate": 0.00017103562207595262, "loss": 2.0014, "step": 211220 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017103432115802552, "loss": 2.1706, "step": 211225 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017103302021583182, "loss": 2.0074, "step": 211230 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017103171924937196, "loss": 2.0355, "step": 211235 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017103041825864635, "loss": 2.1864, "step": 211240 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017102911724365552, "loss": 1.9928, "step": 211245 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.0001710278162043998, "loss": 2.1772, "step": 211250 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017102651514087967, "loss": 2.2055, "step": 211255 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017102521405309567, "loss": 2.0469, "step": 211260 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.0001710239129410481, "loss": 2.0582, "step": 211265 }, { "epoch": 0.5, "grad_norm": 1.890625, "learning_rate": 0.00017102261180473754, "loss": 2.1687, "step": 211270 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017102131064416432, "loss": 2.1591, "step": 211275 }, { "epoch": 0.5, "grad_norm": 1.9921875, "learning_rate": 0.00017102000945932894, "loss": 2.1054, "step": 211280 }, { "epoch": 0.5, "grad_norm": 1.9140625, "learning_rate": 0.00017101870825023184, "loss": 2.0822, "step": 211285 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.00017101740701687344, "loss": 1.9646, "step": 211290 }, { "epoch": 0.5, "grad_norm": 2.578125, "learning_rate": 0.00017101610575925424, "loss": 2.1888, "step": 211295 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017101480447737462, "loss": 2.2377, "step": 211300 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.0001710135031712351, "loss": 2.2233, "step": 211305 }, { "epoch": 0.5, "grad_norm": 2.71875, "learning_rate": 0.00017101220184083605, "loss": 2.1072, "step": 211310 }, { "epoch": 0.5, "grad_norm": 2.4375, "learning_rate": 0.00017101090048617795, "loss": 2.2092, "step": 211315 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.0001710095991072612, "loss": 2.1699, "step": 211320 }, { "epoch": 0.5, "grad_norm": 2.484375, "learning_rate": 0.00017100829770408632, "loss": 2.0943, "step": 211325 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017100699627665373, "loss": 2.0163, "step": 211330 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017100569482496385, "loss": 2.1384, "step": 211335 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.0001710043933490171, "loss": 2.1395, "step": 211340 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.000171003091848814, "loss": 2.19, "step": 211345 }, { "epoch": 0.5, "grad_norm": 2.1875, "learning_rate": 0.00017100179032435498, "loss": 2.1022, "step": 211350 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017100048877564041, "loss": 1.9905, "step": 211355 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017099918720267084, "loss": 2.0425, "step": 211360 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.0001709978856054466, "loss": 2.0333, "step": 211365 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017099658398396823, "loss": 2.1273, "step": 211370 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017099528233823614, "loss": 2.0433, "step": 211375 }, { "epoch": 0.5, "grad_norm": 2.484375, "learning_rate": 0.00017099398066825074, "loss": 2.0246, "step": 211380 }, { "epoch": 0.5, "grad_norm": 1.984375, "learning_rate": 0.00017099267897401256, "loss": 2.0645, "step": 211385 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017099137725552194, "loss": 2.1062, "step": 211390 }, { "epoch": 0.5, "grad_norm": 1.9375, "learning_rate": 0.00017099007551277945, "loss": 2.0719, "step": 211395 }, { "epoch": 0.5, "grad_norm": 1.84375, "learning_rate": 0.0001709887737457854, "loss": 1.9411, "step": 211400 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.0001709874719545403, "loss": 2.0394, "step": 211405 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.00017098617013904462, "loss": 2.2844, "step": 211410 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017098486829929876, "loss": 2.1347, "step": 211415 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.0001709835664353032, "loss": 2.3377, "step": 211420 }, { "epoch": 0.5, "grad_norm": 2.578125, "learning_rate": 0.00017098226454705833, "loss": 2.2717, "step": 211425 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017098096263456464, "loss": 2.1435, "step": 211430 }, { "epoch": 0.5, "grad_norm": 1.875, "learning_rate": 0.00017097966069782257, "loss": 2.1532, "step": 211435 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.0001709783587368326, "loss": 2.1674, "step": 211440 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.0001709770567515951, "loss": 2.1474, "step": 211445 }, { "epoch": 0.5, "grad_norm": 2.484375, "learning_rate": 0.00017097575474211057, "loss": 2.0656, "step": 211450 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.0001709744527083794, "loss": 1.9566, "step": 211455 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017097315065040206, "loss": 2.1668, "step": 211460 }, { "epoch": 0.5, "grad_norm": 1.9609375, "learning_rate": 0.00017097184856817906, "loss": 2.1592, "step": 211465 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.00017097054646171076, "loss": 2.1326, "step": 211470 }, { "epoch": 0.5, "grad_norm": 1.8828125, "learning_rate": 0.00017096924433099764, "loss": 2.0686, "step": 211475 }, { "epoch": 0.5, "grad_norm": 2.40625, "learning_rate": 0.00017096794217604013, "loss": 2.1965, "step": 211480 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017096663999683872, "loss": 2.1962, "step": 211485 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.0001709653377933938, "loss": 2.2317, "step": 211490 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.0001709640355657058, "loss": 2.158, "step": 211495 }, { "epoch": 0.5, "grad_norm": 3.015625, "learning_rate": 0.00017096273331377522, "loss": 2.2524, "step": 211500 }, { "epoch": 0.5, "grad_norm": 2.59375, "learning_rate": 0.00017096143103760247, "loss": 2.0433, "step": 211505 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017096012873718804, "loss": 2.2771, "step": 211510 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017095882641253232, "loss": 2.2104, "step": 211515 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017095752406363579, "loss": 2.0837, "step": 211520 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017095622169049888, "loss": 2.0315, "step": 211525 }, { "epoch": 0.5, "grad_norm": 2.546875, "learning_rate": 0.00017095491929312203, "loss": 2.1124, "step": 211530 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.0001709536168715057, "loss": 1.9583, "step": 211535 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017095231442565032, "loss": 2.1784, "step": 211540 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017095101195555636, "loss": 1.94, "step": 211545 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017094970946122422, "loss": 2.1028, "step": 211550 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017094840694265438, "loss": 2.078, "step": 211555 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017094710439984732, "loss": 2.2479, "step": 211560 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.0001709458018328034, "loss": 2.2166, "step": 211565 }, { "epoch": 0.5, "grad_norm": 1.9375, "learning_rate": 0.0001709444992415231, "loss": 2.2264, "step": 211570 }, { "epoch": 0.5, "grad_norm": 2.46875, "learning_rate": 0.00017094319662600688, "loss": 2.273, "step": 211575 }, { "epoch": 0.5, "grad_norm": 2.484375, "learning_rate": 0.00017094189398625517, "loss": 2.1636, "step": 211580 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.00017094059132226845, "loss": 2.1631, "step": 211585 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017093928863404713, "loss": 2.0906, "step": 211590 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017093798592159165, "loss": 2.1409, "step": 211595 }, { "epoch": 0.5, "grad_norm": 1.9453125, "learning_rate": 0.0001709366831849025, "loss": 2.2256, "step": 211600 }, { "epoch": 0.5, "grad_norm": 2.484375, "learning_rate": 0.00017093538042398004, "loss": 1.9437, "step": 211605 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.0001709340776388248, "loss": 2.2026, "step": 211610 }, { "epoch": 0.5, "grad_norm": 2.5625, "learning_rate": 0.00017093277482943718, "loss": 2.1221, "step": 211615 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017093147199581768, "loss": 2.3203, "step": 211620 }, { "epoch": 0.5, "grad_norm": 1.9609375, "learning_rate": 0.00017093016913796664, "loss": 2.1512, "step": 211625 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.0001709288662558846, "loss": 2.0633, "step": 211630 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.000170927563349572, "loss": 2.0192, "step": 211635 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.0001709262604190292, "loss": 2.1888, "step": 211640 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017092495746425673, "loss": 2.0337, "step": 211645 }, { "epoch": 0.5, "grad_norm": 1.9921875, "learning_rate": 0.000170923654485255, "loss": 2.16, "step": 211650 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.0001709223514820245, "loss": 2.0195, "step": 211655 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.00017092104845456557, "loss": 1.9944, "step": 211660 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017091974540287878, "loss": 2.0191, "step": 211665 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.0001709184423269645, "loss": 2.1416, "step": 211670 }, { "epoch": 0.5, "grad_norm": 1.8828125, "learning_rate": 0.0001709171392268232, "loss": 2.1924, "step": 211675 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017091583610245534, "loss": 2.1845, "step": 211680 }, { "epoch": 0.5, "grad_norm": 2.671875, "learning_rate": 0.00017091453295386128, "loss": 2.1171, "step": 211685 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017091322978104157, "loss": 2.322, "step": 211690 }, { "epoch": 0.5, "grad_norm": 1.984375, "learning_rate": 0.00017091192658399664, "loss": 1.9716, "step": 211695 }, { "epoch": 0.5, "grad_norm": 2.40625, "learning_rate": 0.00017091062336272686, "loss": 2.1494, "step": 211700 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017090932011723276, "loss": 1.9956, "step": 211705 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017090801684751473, "loss": 1.8992, "step": 211710 }, { "epoch": 0.5, "grad_norm": 2.625, "learning_rate": 0.00017090671355357326, "loss": 2.1066, "step": 211715 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017090541023540877, "loss": 2.2923, "step": 211720 }, { "epoch": 0.5, "grad_norm": 1.796875, "learning_rate": 0.00017090410689302169, "loss": 2.1772, "step": 211725 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017090280352641247, "loss": 2.1073, "step": 211730 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.0001709015001355816, "loss": 2.1719, "step": 211735 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017090019672052944, "loss": 2.1041, "step": 211740 }, { "epoch": 0.5, "grad_norm": 1.9921875, "learning_rate": 0.00017089889328125653, "loss": 2.071, "step": 211745 }, { "epoch": 0.5, "grad_norm": 2.65625, "learning_rate": 0.00017089758981776325, "loss": 1.9622, "step": 211750 }, { "epoch": 0.5, "grad_norm": 2.921875, "learning_rate": 0.00017089628633005005, "loss": 2.0226, "step": 211755 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017089498281811744, "loss": 1.9888, "step": 211760 }, { "epoch": 0.5, "grad_norm": 1.671875, "learning_rate": 0.0001708936792819658, "loss": 1.9881, "step": 211765 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.0001708923757215956, "loss": 2.1498, "step": 211770 }, { "epoch": 0.5, "grad_norm": 1.84375, "learning_rate": 0.00017089107213700725, "loss": 2.1852, "step": 211775 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017088976852820128, "loss": 2.2673, "step": 211780 }, { "epoch": 0.5, "grad_norm": 2.46875, "learning_rate": 0.00017088846489517803, "loss": 2.2409, "step": 211785 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.000170887161237938, "loss": 2.1077, "step": 211790 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017088585755648165, "loss": 1.9867, "step": 211795 }, { "epoch": 0.5, "grad_norm": 1.96875, "learning_rate": 0.0001708845538508094, "loss": 2.33, "step": 211800 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017088325012092168, "loss": 2.1142, "step": 211805 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017088194636681898, "loss": 2.1467, "step": 211810 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017088064258850171, "loss": 2.0503, "step": 211815 }, { "epoch": 0.5, "grad_norm": 2.1875, "learning_rate": 0.0001708793387859703, "loss": 2.1819, "step": 211820 }, { "epoch": 0.5, "grad_norm": 2.640625, "learning_rate": 0.00017087803495922528, "loss": 2.1468, "step": 211825 }, { "epoch": 0.5, "grad_norm": 2.40625, "learning_rate": 0.00017087673110826698, "loss": 2.159, "step": 211830 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017087542723309595, "loss": 2.1779, "step": 211835 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.0001708741233337126, "loss": 2.1692, "step": 211840 }, { "epoch": 0.5, "grad_norm": 1.9375, "learning_rate": 0.00017087281941011731, "loss": 2.1224, "step": 211845 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017087151546231062, "loss": 2.1386, "step": 211850 }, { "epoch": 0.5, "grad_norm": 2.8125, "learning_rate": 0.0001708702114902929, "loss": 2.1073, "step": 211855 }, { "epoch": 0.5, "grad_norm": 2.765625, "learning_rate": 0.00017086890749406467, "loss": 2.046, "step": 211860 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017086760347362632, "loss": 2.1847, "step": 211865 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.0001708662994289783, "loss": 2.1, "step": 211870 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017086499536012108, "loss": 2.0811, "step": 211875 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.0001708636912670551, "loss": 2.134, "step": 211880 }, { "epoch": 0.5, "grad_norm": 1.8828125, "learning_rate": 0.00017086238714978074, "loss": 1.9839, "step": 211885 }, { "epoch": 0.5, "grad_norm": 2.4375, "learning_rate": 0.00017086108300829857, "loss": 2.1849, "step": 211890 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017085977884260896, "loss": 2.1911, "step": 211895 }, { "epoch": 0.5, "grad_norm": 1.796875, "learning_rate": 0.00017085847465271234, "loss": 2.0099, "step": 211900 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017085717043860917, "loss": 2.0406, "step": 211905 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017085586620029997, "loss": 2.3046, "step": 211910 }, { "epoch": 0.5, "grad_norm": 2.40625, "learning_rate": 0.00017085456193778506, "loss": 2.097, "step": 211915 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017085325765106498, "loss": 2.0618, "step": 211920 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017085195334014012, "loss": 2.0317, "step": 211925 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017085064900501095, "loss": 2.0478, "step": 211930 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017084934464567793, "loss": 1.9412, "step": 211935 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.00017084804026214147, "loss": 2.1406, "step": 211940 }, { "epoch": 0.5, "grad_norm": 1.9453125, "learning_rate": 0.00017084673585440204, "loss": 2.0349, "step": 211945 }, { "epoch": 0.5, "grad_norm": 1.9296875, "learning_rate": 0.00017084543142246008, "loss": 2.1137, "step": 211950 }, { "epoch": 0.5, "grad_norm": 2.390625, "learning_rate": 0.00017084412696631603, "loss": 2.2309, "step": 211955 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017084282248597035, "loss": 2.0425, "step": 211960 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017084151798142346, "loss": 2.0809, "step": 211965 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017084021345267587, "loss": 2.0954, "step": 211970 }, { "epoch": 0.5, "grad_norm": 1.90625, "learning_rate": 0.00017083890889972792, "loss": 2.171, "step": 211975 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017083760432258017, "loss": 1.9663, "step": 211980 }, { "epoch": 0.5, "grad_norm": 2.390625, "learning_rate": 0.000170836299721233, "loss": 1.9993, "step": 211985 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.0001708349950956868, "loss": 2.0342, "step": 211990 }, { "epoch": 0.5, "grad_norm": 1.8515625, "learning_rate": 0.00017083369044594216, "loss": 1.8536, "step": 211995 }, { "epoch": 0.5, "grad_norm": 2.75, "learning_rate": 0.0001708323857719994, "loss": 1.9912, "step": 212000 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.000170831081073859, "loss": 2.1749, "step": 212005 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017082977635152145, "loss": 2.0728, "step": 212010 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.00017082847160498717, "loss": 1.9178, "step": 212015 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017082716683425658, "loss": 2.1515, "step": 212020 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.00017082586203933016, "loss": 2.2343, "step": 212025 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017082455722020835, "loss": 2.1819, "step": 212030 }, { "epoch": 0.5, "grad_norm": 1.6796875, "learning_rate": 0.00017082325237689156, "loss": 2.0653, "step": 212035 }, { "epoch": 0.5, "grad_norm": 1.8359375, "learning_rate": 0.00017082194750938026, "loss": 1.8001, "step": 212040 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017082064261767491, "loss": 2.0441, "step": 212045 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017081933770177597, "loss": 2.2177, "step": 212050 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017081803276168382, "loss": 1.8949, "step": 212055 }, { "epoch": 0.5, "grad_norm": 1.953125, "learning_rate": 0.00017081672779739897, "loss": 2.1383, "step": 212060 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017081542280892185, "loss": 2.1144, "step": 212065 }, { "epoch": 0.5, "grad_norm": 1.921875, "learning_rate": 0.00017081411779625288, "loss": 2.1328, "step": 212070 }, { "epoch": 0.5, "grad_norm": 2.40625, "learning_rate": 0.0001708128127593925, "loss": 2.0357, "step": 212075 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.0001708115076983412, "loss": 2.1139, "step": 212080 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017081020261309942, "loss": 2.0469, "step": 212085 }, { "epoch": 0.5, "grad_norm": 1.7421875, "learning_rate": 0.00017080889750366757, "loss": 2.1113, "step": 212090 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017080759237004614, "loss": 2.1491, "step": 212095 }, { "epoch": 0.5, "grad_norm": 1.890625, "learning_rate": 0.00017080628721223554, "loss": 1.9946, "step": 212100 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.0001708049820302362, "loss": 2.026, "step": 212105 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017080367682404863, "loss": 1.9887, "step": 212110 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017080237159367324, "loss": 2.1126, "step": 212115 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017080106633911047, "loss": 2.0842, "step": 212120 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017079976106036074, "loss": 2.0773, "step": 212125 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017079845575742456, "loss": 2.1653, "step": 212130 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017079715043030237, "loss": 2.1702, "step": 212135 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.0001707958450789945, "loss": 2.1231, "step": 212140 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017079453970350155, "loss": 2.0574, "step": 212145 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017079323430382393, "loss": 2.1504, "step": 212150 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017079192887996201, "loss": 2.0954, "step": 212155 }, { "epoch": 0.5, "grad_norm": 2.796875, "learning_rate": 0.00017079062343191626, "loss": 2.0159, "step": 212160 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.0001707893179596872, "loss": 2.0076, "step": 212165 }, { "epoch": 0.5, "grad_norm": 2.1875, "learning_rate": 0.0001707880124632752, "loss": 2.1897, "step": 212170 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017078670694268075, "loss": 2.2648, "step": 212175 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017078540139790427, "loss": 2.2777, "step": 212180 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.0001707840958289462, "loss": 2.1133, "step": 212185 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.000170782790235807, "loss": 2.1818, "step": 212190 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017078148461848716, "loss": 2.3211, "step": 212195 }, { "epoch": 0.5, "grad_norm": 1.953125, "learning_rate": 0.00017078017897698703, "loss": 2.0826, "step": 212200 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.0001707788733113071, "loss": 2.165, "step": 212205 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017077756762144786, "loss": 2.0669, "step": 212210 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017077626190740972, "loss": 2.1294, "step": 212215 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017077495616919311, "loss": 1.8663, "step": 212220 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.0001707736504067985, "loss": 2.0089, "step": 212225 }, { "epoch": 0.5, "grad_norm": 1.9375, "learning_rate": 0.0001707723446202263, "loss": 2.2202, "step": 212230 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.000170771038809477, "loss": 2.0058, "step": 212235 }, { "epoch": 0.5, "grad_norm": 1.9609375, "learning_rate": 0.00017076973297455104, "loss": 2.115, "step": 212240 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017076842711544882, "loss": 2.0376, "step": 212245 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017076712123217087, "loss": 2.1311, "step": 212250 }, { "epoch": 0.5, "grad_norm": 2.65625, "learning_rate": 0.00017076581532471757, "loss": 2.1289, "step": 212255 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.00017076450939308938, "loss": 2.2838, "step": 212260 }, { "epoch": 0.5, "grad_norm": 1.5703125, "learning_rate": 0.00017076320343728676, "loss": 2.0587, "step": 212265 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.00017076189745731011, "loss": 1.8114, "step": 212270 }, { "epoch": 0.5, "grad_norm": 2.453125, "learning_rate": 0.00017076059145315994, "loss": 2.1162, "step": 212275 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.0001707592854248367, "loss": 2.3221, "step": 212280 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017075797937234075, "loss": 2.145, "step": 212285 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017075667329567262, "loss": 2.2705, "step": 212290 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017075536719483273, "loss": 2.1603, "step": 212295 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.0001707540610698215, "loss": 2.1738, "step": 212300 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017075275492063944, "loss": 2.3195, "step": 212305 }, { "epoch": 0.5, "grad_norm": 1.8828125, "learning_rate": 0.0001707514487472869, "loss": 2.1623, "step": 212310 }, { "epoch": 0.5, "grad_norm": 2.71875, "learning_rate": 0.00017075014254976442, "loss": 2.3005, "step": 212315 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.0001707488363280724, "loss": 2.1422, "step": 212320 }, { "epoch": 0.5, "grad_norm": 2.1875, "learning_rate": 0.00017074753008221132, "loss": 2.0314, "step": 212325 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017074622381218156, "loss": 1.9965, "step": 212330 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017074491751798362, "loss": 2.1581, "step": 212335 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017074361119961794, "loss": 2.2546, "step": 212340 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017074230485708496, "loss": 1.9983, "step": 212345 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.0001707409984903851, "loss": 2.198, "step": 212350 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017073969209951888, "loss": 2.0813, "step": 212355 }, { "epoch": 0.5, "grad_norm": 2.390625, "learning_rate": 0.00017073838568448668, "loss": 2.012, "step": 212360 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017073707924528892, "loss": 2.0148, "step": 212365 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017073577278192614, "loss": 2.1766, "step": 212370 }, { "epoch": 0.5, "grad_norm": 1.9453125, "learning_rate": 0.00017073446629439873, "loss": 2.189, "step": 212375 }, { "epoch": 0.5, "grad_norm": 2.53125, "learning_rate": 0.00017073315978270715, "loss": 2.1932, "step": 212380 }, { "epoch": 0.5, "grad_norm": 2.734375, "learning_rate": 0.0001707318532468518, "loss": 2.1101, "step": 212385 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017073054668683319, "loss": 1.9918, "step": 212390 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017072924010265176, "loss": 1.9396, "step": 212395 }, { "epoch": 0.5, "grad_norm": 1.953125, "learning_rate": 0.00017072793349430794, "loss": 1.9908, "step": 212400 }, { "epoch": 0.5, "grad_norm": 1.9765625, "learning_rate": 0.00017072662686180214, "loss": 2.0823, "step": 212405 }, { "epoch": 0.5, "grad_norm": 2.46875, "learning_rate": 0.00017072532020513488, "loss": 2.177, "step": 212410 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017072401352430654, "loss": 2.151, "step": 212415 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.0001707227068193176, "loss": 2.0757, "step": 212420 }, { "epoch": 0.5, "grad_norm": 1.921875, "learning_rate": 0.0001707214000901685, "loss": 2.1503, "step": 212425 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.0001707200933368597, "loss": 2.1497, "step": 212430 }, { "epoch": 0.5, "grad_norm": 1.9921875, "learning_rate": 0.0001707187865593916, "loss": 2.1111, "step": 212435 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017071747975776474, "loss": 2.0747, "step": 212440 }, { "epoch": 0.5, "grad_norm": 1.875, "learning_rate": 0.00017071617293197946, "loss": 2.1598, "step": 212445 }, { "epoch": 0.5, "grad_norm": 2.4375, "learning_rate": 0.00017071486608203625, "loss": 2.0371, "step": 212450 }, { "epoch": 0.5, "grad_norm": 2.9375, "learning_rate": 0.0001707135592079356, "loss": 2.168, "step": 212455 }, { "epoch": 0.5, "grad_norm": 1.8671875, "learning_rate": 0.00017071225230967787, "loss": 2.1536, "step": 212460 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.0001707109453872636, "loss": 2.0967, "step": 212465 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017070963844069312, "loss": 2.0103, "step": 212470 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.000170708331469967, "loss": 2.0382, "step": 212475 }, { "epoch": 0.5, "grad_norm": 1.703125, "learning_rate": 0.0001707070244750856, "loss": 2.0781, "step": 212480 }, { "epoch": 0.5, "grad_norm": 2.453125, "learning_rate": 0.00017070571745604943, "loss": 2.2983, "step": 212485 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017070441041285887, "loss": 2.0765, "step": 212490 }, { "epoch": 0.5, "grad_norm": 2.703125, "learning_rate": 0.00017070310334551446, "loss": 1.9755, "step": 212495 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.00017070179625401654, "loss": 2.0782, "step": 212500 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.0001707004891383656, "loss": 2.2848, "step": 212505 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.0001706991819985621, "loss": 2.0496, "step": 212510 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017069787483460649, "loss": 2.0704, "step": 212515 }, { "epoch": 0.5, "grad_norm": 1.734375, "learning_rate": 0.0001706965676464992, "loss": 2.2172, "step": 212520 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017069526043424067, "loss": 2.1462, "step": 212525 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.00017069395319783138, "loss": 2.1781, "step": 212530 }, { "epoch": 0.5, "grad_norm": 2.4375, "learning_rate": 0.0001706926459372717, "loss": 2.1247, "step": 212535 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017069133865256218, "loss": 2.136, "step": 212540 }, { "epoch": 0.5, "grad_norm": 2.5, "learning_rate": 0.0001706900313437032, "loss": 2.1836, "step": 212545 }, { "epoch": 0.5, "grad_norm": 2.65625, "learning_rate": 0.00017068872401069523, "loss": 2.1303, "step": 212550 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.0001706874166535387, "loss": 2.0994, "step": 212555 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017068610927223405, "loss": 2.2654, "step": 212560 }, { "epoch": 0.5, "grad_norm": 1.7421875, "learning_rate": 0.00017068480186678176, "loss": 2.0525, "step": 212565 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017068349443718228, "loss": 2.0892, "step": 212570 }, { "epoch": 0.5, "grad_norm": 1.9609375, "learning_rate": 0.00017068218698343602, "loss": 1.9751, "step": 212575 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017068087950554347, "loss": 1.8063, "step": 212580 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.000170679572003505, "loss": 2.1645, "step": 212585 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017067826447732113, "loss": 2.0017, "step": 212590 }, { "epoch": 0.5, "grad_norm": 1.953125, "learning_rate": 0.0001706769569269923, "loss": 1.999, "step": 212595 }, { "epoch": 0.5, "grad_norm": 1.8828125, "learning_rate": 0.00017067564935251893, "loss": 2.0088, "step": 212600 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017067434175390144, "loss": 2.0576, "step": 212605 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017067303413114036, "loss": 2.0366, "step": 212610 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017067172648423603, "loss": 2.1109, "step": 212615 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.000170670418813189, "loss": 2.0661, "step": 212620 }, { "epoch": 0.5, "grad_norm": 1.9609375, "learning_rate": 0.0001706691111179997, "loss": 1.9986, "step": 212625 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017066780339866853, "loss": 2.2407, "step": 212630 }, { "epoch": 0.5, "grad_norm": 2.453125, "learning_rate": 0.00017066649565519596, "loss": 2.045, "step": 212635 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.0001706651878875824, "loss": 2.2229, "step": 212640 }, { "epoch": 0.5, "grad_norm": 1.8515625, "learning_rate": 0.00017066388009582834, "loss": 2.109, "step": 212645 }, { "epoch": 0.5, "grad_norm": 1.9453125, "learning_rate": 0.00017066257227993424, "loss": 1.917, "step": 212650 }, { "epoch": 0.5, "grad_norm": 2.1875, "learning_rate": 0.00017066126443990052, "loss": 2.1738, "step": 212655 }, { "epoch": 0.5, "grad_norm": 1.96875, "learning_rate": 0.0001706599565757276, "loss": 1.895, "step": 212660 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.000170658648687416, "loss": 2.0241, "step": 212665 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017065734077496608, "loss": 2.0526, "step": 212670 }, { "epoch": 0.5, "grad_norm": 8.25, "learning_rate": 0.00017065603283837835, "loss": 2.1667, "step": 212675 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017065472487765327, "loss": 2.0951, "step": 212680 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017065341689279122, "loss": 2.0506, "step": 212685 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.0001706521088837927, "loss": 2.102, "step": 212690 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.00017065080085065812, "loss": 2.2337, "step": 212695 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017064949279338792, "loss": 2.3075, "step": 212700 }, { "epoch": 0.5, "grad_norm": 1.8984375, "learning_rate": 0.00017064818471198263, "loss": 2.1753, "step": 212705 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017064687660644262, "loss": 2.0315, "step": 212710 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017064556847676835, "loss": 2.1457, "step": 212715 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017064426032296028, "loss": 2.1576, "step": 212720 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017064295214501882, "loss": 2.1847, "step": 212725 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017064164394294448, "loss": 2.1103, "step": 212730 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017064033571673765, "loss": 1.9384, "step": 212735 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017063902746639883, "loss": 2.0998, "step": 212740 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.0001706377191919284, "loss": 2.1215, "step": 212745 }, { "epoch": 0.5, "grad_norm": 1.75, "learning_rate": 0.00017063641089332685, "loss": 1.9996, "step": 212750 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017063510257059466, "loss": 2.1888, "step": 212755 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.0001706337942237322, "loss": 2.1262, "step": 212760 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017063248585273997, "loss": 2.1141, "step": 212765 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.0001706311774576184, "loss": 2.0257, "step": 212770 }, { "epoch": 0.5, "grad_norm": 2.65625, "learning_rate": 0.00017062986903836793, "loss": 2.2492, "step": 212775 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017062856059498903, "loss": 2.0199, "step": 212780 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.0001706272521274821, "loss": 2.0553, "step": 212785 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017062594363584767, "loss": 2.1034, "step": 212790 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.0001706246351200861, "loss": 1.9083, "step": 212795 }, { "epoch": 0.5, "grad_norm": 1.9453125, "learning_rate": 0.00017062332658019787, "loss": 2.1208, "step": 212800 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017062201801618346, "loss": 2.2827, "step": 212805 }, { "epoch": 0.5, "grad_norm": 1.96875, "learning_rate": 0.0001706207094280433, "loss": 2.1766, "step": 212810 }, { "epoch": 0.5, "grad_norm": 1.875, "learning_rate": 0.00017061940081577779, "loss": 2.1761, "step": 212815 }, { "epoch": 0.5, "grad_norm": 1.90625, "learning_rate": 0.0001706180921793874, "loss": 2.1226, "step": 212820 }, { "epoch": 0.5, "grad_norm": 1.953125, "learning_rate": 0.00017061678351887265, "loss": 2.179, "step": 212825 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017061547483423385, "loss": 2.2393, "step": 212830 }, { "epoch": 0.5, "grad_norm": 1.8984375, "learning_rate": 0.00017061416612547156, "loss": 2.0837, "step": 212835 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.0001706128573925862, "loss": 1.9983, "step": 212840 }, { "epoch": 0.5, "grad_norm": 2.6875, "learning_rate": 0.0001706115486355782, "loss": 2.2647, "step": 212845 }, { "epoch": 0.5, "grad_norm": 1.8359375, "learning_rate": 0.000170610239854448, "loss": 2.018, "step": 212850 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017060893104919606, "loss": 2.0728, "step": 212855 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017060762221982287, "loss": 2.1002, "step": 212860 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.0001706063133663288, "loss": 2.0411, "step": 212865 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017060500448871433, "loss": 1.9859, "step": 212870 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017060369558697994, "loss": 1.986, "step": 212875 }, { "epoch": 0.5, "grad_norm": 2.6875, "learning_rate": 0.00017060238666112603, "loss": 1.9153, "step": 212880 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017060107771115304, "loss": 2.1068, "step": 212885 }, { "epoch": 0.5, "grad_norm": 1.7890625, "learning_rate": 0.00017059976873706146, "loss": 2.1402, "step": 212890 }, { "epoch": 0.5, "grad_norm": 1.9765625, "learning_rate": 0.00017059845973885175, "loss": 2.0158, "step": 212895 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017059715071652426, "loss": 2.0699, "step": 212900 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017059584167007955, "loss": 2.0409, "step": 212905 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.000170594532599518, "loss": 2.1016, "step": 212910 }, { "epoch": 0.5, "grad_norm": 1.765625, "learning_rate": 0.0001705932235048401, "loss": 2.1234, "step": 212915 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017059191438604626, "loss": 2.0938, "step": 212920 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017059060524313694, "loss": 1.9953, "step": 212925 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.0001705892960761126, "loss": 2.174, "step": 212930 }, { "epoch": 0.5, "grad_norm": 1.9375, "learning_rate": 0.00017058798688497366, "loss": 2.1755, "step": 212935 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.0001705866776697206, "loss": 2.0863, "step": 212940 }, { "epoch": 0.5, "grad_norm": 2.53125, "learning_rate": 0.00017058536843035383, "loss": 2.0779, "step": 212945 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017058405916687384, "loss": 2.1701, "step": 212950 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017058274987928105, "loss": 2.2527, "step": 212955 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.0001705814405675759, "loss": 2.0856, "step": 212960 }, { "epoch": 0.5, "grad_norm": 2.734375, "learning_rate": 0.00017058013123175887, "loss": 1.9969, "step": 212965 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017057882187183036, "loss": 2.0921, "step": 212970 }, { "epoch": 0.5, "grad_norm": 1.796875, "learning_rate": 0.00017057751248779088, "loss": 2.0753, "step": 212975 }, { "epoch": 0.5, "grad_norm": 1.8125, "learning_rate": 0.0001705762030796408, "loss": 2.0427, "step": 212980 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.00017057489364738065, "loss": 1.8621, "step": 212985 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.00017057358419101081, "loss": 2.3013, "step": 212990 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.00017057227471053177, "loss": 2.3091, "step": 212995 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017057096520594397, "loss": 2.0171, "step": 213000 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017056965567724782, "loss": 2.244, "step": 213005 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.0001705683461244438, "loss": 2.2714, "step": 213010 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017056703654753236, "loss": 2.149, "step": 213015 }, { "epoch": 0.5, "grad_norm": 1.9921875, "learning_rate": 0.00017056572694651393, "loss": 2.1541, "step": 213020 }, { "epoch": 0.5, "grad_norm": 1.96875, "learning_rate": 0.00017056441732138898, "loss": 2.0867, "step": 213025 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.00017056310767215796, "loss": 1.9444, "step": 213030 }, { "epoch": 0.5, "grad_norm": 1.921875, "learning_rate": 0.00017056179799882127, "loss": 1.9849, "step": 213035 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017056048830137944, "loss": 2.0563, "step": 213040 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017055917857983278, "loss": 2.0495, "step": 213045 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.0001705578688341819, "loss": 1.9995, "step": 213050 }, { "epoch": 0.5, "grad_norm": 1.546875, "learning_rate": 0.00017055655906442717, "loss": 1.8844, "step": 213055 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.000170555249270569, "loss": 2.0689, "step": 213060 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.0001705539394526079, "loss": 2.0923, "step": 213065 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.0001705526296105443, "loss": 2.055, "step": 213070 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017055131974437864, "loss": 2.1178, "step": 213075 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017055000985411137, "loss": 2.051, "step": 213080 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017054869993974293, "loss": 2.101, "step": 213085 }, { "epoch": 0.5, "grad_norm": 1.8359375, "learning_rate": 0.00017054739000127377, "loss": 2.0903, "step": 213090 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017054608003870433, "loss": 2.1961, "step": 213095 }, { "epoch": 0.5, "grad_norm": 1.984375, "learning_rate": 0.00017054477005203512, "loss": 2.115, "step": 213100 }, { "epoch": 0.5, "grad_norm": 1.7890625, "learning_rate": 0.0001705434600412665, "loss": 2.0858, "step": 213105 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017054215000639894, "loss": 2.0391, "step": 213110 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017054083994743293, "loss": 2.1076, "step": 213115 }, { "epoch": 0.5, "grad_norm": 1.8515625, "learning_rate": 0.0001705395298643689, "loss": 2.1253, "step": 213120 }, { "epoch": 0.5, "grad_norm": 2.515625, "learning_rate": 0.00017053821975720722, "loss": 1.9508, "step": 213125 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017053690962594848, "loss": 1.9803, "step": 213130 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017053559947059304, "loss": 2.2591, "step": 213135 }, { "epoch": 0.5, "grad_norm": 2.1875, "learning_rate": 0.00017053428929114133, "loss": 1.9546, "step": 213140 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017053297908759384, "loss": 2.1467, "step": 213145 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.000170531668859951, "loss": 2.0682, "step": 213150 }, { "epoch": 0.5, "grad_norm": 2.5, "learning_rate": 0.00017053035860821328, "loss": 1.9766, "step": 213155 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.0001705290483323811, "loss": 2.1548, "step": 213160 }, { "epoch": 0.5, "grad_norm": 1.953125, "learning_rate": 0.00017052773803245494, "loss": 1.9484, "step": 213165 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.0001705264277084352, "loss": 2.0667, "step": 213170 }, { "epoch": 0.5, "grad_norm": 2.734375, "learning_rate": 0.00017052511736032235, "loss": 2.1381, "step": 213175 }, { "epoch": 0.5, "grad_norm": 2.1875, "learning_rate": 0.00017052380698811685, "loss": 2.1324, "step": 213180 }, { "epoch": 0.5, "grad_norm": 2.4375, "learning_rate": 0.00017052249659181917, "loss": 2.0487, "step": 213185 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017052118617142967, "loss": 1.9956, "step": 213190 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017051987572694888, "loss": 2.007, "step": 213195 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017051856525837725, "loss": 1.9768, "step": 213200 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017051725476571514, "loss": 2.188, "step": 213205 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017051594424896308, "loss": 2.0732, "step": 213210 }, { "epoch": 0.5, "grad_norm": 1.8515625, "learning_rate": 0.00017051463370812151, "loss": 2.1525, "step": 213215 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017051332314319087, "loss": 2.0498, "step": 213220 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017051201255417157, "loss": 2.006, "step": 213225 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.0001705107019410641, "loss": 2.1897, "step": 213230 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.0001705093913038689, "loss": 2.01, "step": 213235 }, { "epoch": 0.5, "grad_norm": 1.9765625, "learning_rate": 0.0001705080806425864, "loss": 2.0376, "step": 213240 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017050676995721708, "loss": 2.0191, "step": 213245 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017050545924776137, "loss": 2.0697, "step": 213250 }, { "epoch": 0.5, "grad_norm": 1.953125, "learning_rate": 0.00017050414851421967, "loss": 2.0467, "step": 213255 }, { "epoch": 0.5, "grad_norm": 3.390625, "learning_rate": 0.00017050283775659253, "loss": 1.9746, "step": 213260 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017050152697488033, "loss": 2.1571, "step": 213265 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017050021616908353, "loss": 2.0752, "step": 213270 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017049890533920255, "loss": 2.1346, "step": 213275 }, { "epoch": 0.5, "grad_norm": 1.8359375, "learning_rate": 0.00017049759448523787, "loss": 2.109, "step": 213280 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017049628360718994, "loss": 1.9217, "step": 213285 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017049497270505922, "loss": 1.9571, "step": 213290 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.00017049366177884613, "loss": 2.0225, "step": 213295 }, { "epoch": 0.5, "grad_norm": 1.984375, "learning_rate": 0.00017049235082855114, "loss": 2.0245, "step": 213300 }, { "epoch": 0.5, "grad_norm": 2.6875, "learning_rate": 0.00017049103985417467, "loss": 2.1776, "step": 213305 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017048972885571717, "loss": 1.9844, "step": 213310 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.0001704884178331791, "loss": 2.1817, "step": 213315 }, { "epoch": 0.5, "grad_norm": 1.8671875, "learning_rate": 0.0001704871067865609, "loss": 2.1073, "step": 213320 }, { "epoch": 0.5, "grad_norm": 2.40625, "learning_rate": 0.00017048579571586308, "loss": 2.1148, "step": 213325 }, { "epoch": 0.5, "grad_norm": 1.796875, "learning_rate": 0.00017048448462108598, "loss": 2.1646, "step": 213330 }, { "epoch": 0.5, "grad_norm": 1.875, "learning_rate": 0.00017048317350223013, "loss": 2.1422, "step": 213335 }, { "epoch": 0.5, "grad_norm": 2.453125, "learning_rate": 0.00017048186235929595, "loss": 1.833, "step": 213340 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017048055119228388, "loss": 2.1183, "step": 213345 }, { "epoch": 0.5, "grad_norm": 1.7578125, "learning_rate": 0.00017047924000119437, "loss": 2.0705, "step": 213350 }, { "epoch": 0.5, "grad_norm": 1.9453125, "learning_rate": 0.00017047792878602787, "loss": 2.0569, "step": 213355 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017047661754678487, "loss": 2.3018, "step": 213360 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017047530628346572, "loss": 2.19, "step": 213365 }, { "epoch": 0.5, "grad_norm": 1.71875, "learning_rate": 0.00017047399499607095, "loss": 2.1004, "step": 213370 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017047268368460097, "loss": 1.9643, "step": 213375 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.0001704713723490563, "loss": 2.133, "step": 213380 }, { "epoch": 0.5, "grad_norm": 2.40625, "learning_rate": 0.00017047006098943728, "loss": 2.2934, "step": 213385 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017046874960574443, "loss": 2.2087, "step": 213390 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017046743819797814, "loss": 2.1968, "step": 213395 }, { "epoch": 0.5, "grad_norm": 1.8125, "learning_rate": 0.00017046612676613893, "loss": 2.0494, "step": 213400 }, { "epoch": 0.5, "grad_norm": 2.4375, "learning_rate": 0.00017046481531022724, "loss": 1.8871, "step": 213405 }, { "epoch": 0.5, "grad_norm": 1.75, "learning_rate": 0.00017046350383024346, "loss": 2.0708, "step": 213410 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017046219232618807, "loss": 2.0727, "step": 213415 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.00017046088079806148, "loss": 2.1918, "step": 213420 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017045956924586423, "loss": 2.168, "step": 213425 }, { "epoch": 0.5, "grad_norm": 1.890625, "learning_rate": 0.0001704582576695967, "loss": 1.9708, "step": 213430 }, { "epoch": 0.5, "grad_norm": 1.9765625, "learning_rate": 0.00017045694606925935, "loss": 2.0819, "step": 213435 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017045563444485263, "loss": 2.1074, "step": 213440 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017045432279637698, "loss": 2.0697, "step": 213445 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017045301112383286, "loss": 2.1524, "step": 213450 }, { "epoch": 0.5, "grad_norm": 2.546875, "learning_rate": 0.0001704516994272207, "loss": 2.2267, "step": 213455 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.000170450387706541, "loss": 2.2235, "step": 213460 }, { "epoch": 0.5, "grad_norm": 1.8828125, "learning_rate": 0.00017044907596179413, "loss": 1.9196, "step": 213465 }, { "epoch": 0.5, "grad_norm": 2.390625, "learning_rate": 0.00017044776419298057, "loss": 2.0646, "step": 213470 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.00017044645240010082, "loss": 2.0723, "step": 213475 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017044514058315525, "loss": 2.114, "step": 213480 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017044382874214438, "loss": 2.2355, "step": 213485 }, { "epoch": 0.5, "grad_norm": 1.921875, "learning_rate": 0.00017044251687706858, "loss": 2.0483, "step": 213490 }, { "epoch": 0.5, "grad_norm": 1.9765625, "learning_rate": 0.00017044120498792834, "loss": 2.3023, "step": 213495 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017043989307472413, "loss": 2.1512, "step": 213500 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017043858113745638, "loss": 1.8837, "step": 213505 }, { "epoch": 0.5, "grad_norm": 1.9140625, "learning_rate": 0.0001704372691761255, "loss": 2.2597, "step": 213510 }, { "epoch": 0.5, "grad_norm": 2.390625, "learning_rate": 0.000170435957190732, "loss": 2.1807, "step": 213515 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017043464518127629, "loss": 2.3339, "step": 213520 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.0001704333331477588, "loss": 2.1417, "step": 213525 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017043202109018007, "loss": 2.1049, "step": 213530 }, { "epoch": 0.5, "grad_norm": 2.390625, "learning_rate": 0.00017043070900854044, "loss": 2.3818, "step": 213535 }, { "epoch": 0.5, "grad_norm": 1.9765625, "learning_rate": 0.00017042939690284042, "loss": 2.0056, "step": 213540 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.0001704280847730804, "loss": 2.0638, "step": 213545 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.0001704267726192609, "loss": 2.0419, "step": 213550 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017042546044138235, "loss": 2.2079, "step": 213555 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017042414823944518, "loss": 1.979, "step": 213560 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017042283601344984, "loss": 2.0245, "step": 213565 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017042152376339678, "loss": 2.2197, "step": 213570 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017042021148928646, "loss": 2.0703, "step": 213575 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017041889919111928, "loss": 2.0651, "step": 213580 }, { "epoch": 0.5, "grad_norm": 1.875, "learning_rate": 0.00017041758686889576, "loss": 1.783, "step": 213585 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017041627452261634, "loss": 2.136, "step": 213590 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017041496215228138, "loss": 2.2608, "step": 213595 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.00017041364975789144, "loss": 2.1309, "step": 213600 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.00017041233733944687, "loss": 2.1587, "step": 213605 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017041102489694822, "loss": 2.2658, "step": 213610 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017040971243039587, "loss": 2.0957, "step": 213615 }, { "epoch": 0.5, "grad_norm": 2.71875, "learning_rate": 0.0001704083999397903, "loss": 2.1669, "step": 213620 }, { "epoch": 0.5, "grad_norm": 1.765625, "learning_rate": 0.0001704070874251319, "loss": 2.0523, "step": 213625 }, { "epoch": 0.5, "grad_norm": 2.453125, "learning_rate": 0.00017040577488642122, "loss": 2.1807, "step": 213630 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.0001704044623236586, "loss": 2.1473, "step": 213635 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017040314973684457, "loss": 2.1016, "step": 213640 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017040183712597955, "loss": 1.8929, "step": 213645 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.000170400524491064, "loss": 2.092, "step": 213650 }, { "epoch": 0.5, "grad_norm": 1.921875, "learning_rate": 0.00017039921183209834, "loss": 2.2426, "step": 213655 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017039789914908296, "loss": 1.9811, "step": 213660 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017039658644201847, "loss": 2.039, "step": 213665 }, { "epoch": 0.5, "grad_norm": 1.859375, "learning_rate": 0.00017039527371090518, "loss": 2.0492, "step": 213670 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017039396095574363, "loss": 2.0766, "step": 213675 }, { "epoch": 0.5, "grad_norm": 1.7734375, "learning_rate": 0.0001703926481765342, "loss": 2.1863, "step": 213680 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017039133537327737, "loss": 2.0214, "step": 213685 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017039002254597358, "loss": 2.1798, "step": 213690 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.0001703887096946233, "loss": 2.0937, "step": 213695 }, { "epoch": 0.5, "grad_norm": 1.9921875, "learning_rate": 0.00017038739681922693, "loss": 2.0847, "step": 213700 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017038608391978496, "loss": 2.0691, "step": 213705 }, { "epoch": 0.5, "grad_norm": 2.453125, "learning_rate": 0.00017038477099629783, "loss": 2.2018, "step": 213710 }, { "epoch": 0.5, "grad_norm": 1.8203125, "learning_rate": 0.000170383458048766, "loss": 2.148, "step": 213715 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017038214507718988, "loss": 2.1546, "step": 213720 }, { "epoch": 0.5, "grad_norm": 2.390625, "learning_rate": 0.00017038083208156994, "loss": 1.9719, "step": 213725 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017037951906190665, "loss": 2.254, "step": 213730 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.00017037820601820044, "loss": 2.2616, "step": 213735 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017037689295045175, "loss": 2.0981, "step": 213740 }, { "epoch": 0.5, "grad_norm": 1.9453125, "learning_rate": 0.00017037557985866104, "loss": 2.1174, "step": 213745 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017037426674282874, "loss": 2.2659, "step": 213750 }, { "epoch": 0.5, "grad_norm": 1.96875, "learning_rate": 0.00017037295360295535, "loss": 1.9724, "step": 213755 }, { "epoch": 0.5, "grad_norm": 1.6953125, "learning_rate": 0.0001703716404390413, "loss": 2.1503, "step": 213760 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017037032725108694, "loss": 2.0776, "step": 213765 }, { "epoch": 0.5, "grad_norm": 1.8984375, "learning_rate": 0.00017036901403909285, "loss": 2.0878, "step": 213770 }, { "epoch": 0.5, "grad_norm": 2.59375, "learning_rate": 0.00017036770080305943, "loss": 2.207, "step": 213775 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.0001703663875429871, "loss": 2.1559, "step": 213780 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.00017036507425887638, "loss": 2.1528, "step": 213785 }, { "epoch": 0.5, "grad_norm": 2.53125, "learning_rate": 0.00017036376095072762, "loss": 2.1492, "step": 213790 }, { "epoch": 0.5, "grad_norm": 2.484375, "learning_rate": 0.00017036244761854137, "loss": 2.1508, "step": 213795 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.000170361134262318, "loss": 2.2746, "step": 213800 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017035982088205805, "loss": 2.0411, "step": 213805 }, { "epoch": 0.5, "grad_norm": 1.984375, "learning_rate": 0.00017035850747776184, "loss": 2.0589, "step": 213810 }, { "epoch": 0.5, "grad_norm": 2.859375, "learning_rate": 0.00017035719404942992, "loss": 2.1218, "step": 213815 }, { "epoch": 0.5, "grad_norm": 1.984375, "learning_rate": 0.00017035588059706268, "loss": 2.0559, "step": 213820 }, { "epoch": 0.5, "grad_norm": 2.421875, "learning_rate": 0.00017035456712066063, "loss": 2.1645, "step": 213825 }, { "epoch": 0.5, "grad_norm": 2.546875, "learning_rate": 0.00017035325362022417, "loss": 2.1812, "step": 213830 }, { "epoch": 0.5, "grad_norm": 2.578125, "learning_rate": 0.00017035194009575375, "loss": 2.2096, "step": 213835 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017035062654724984, "loss": 1.989, "step": 213840 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.0001703493129747129, "loss": 2.2494, "step": 213845 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017034799937814334, "loss": 2.0531, "step": 213850 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.0001703466857575416, "loss": 2.083, "step": 213855 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.0001703453721129082, "loss": 2.1627, "step": 213860 }, { "epoch": 0.5, "grad_norm": 2.390625, "learning_rate": 0.00017034405844424354, "loss": 2.001, "step": 213865 }, { "epoch": 0.5, "grad_norm": 1.9765625, "learning_rate": 0.0001703427447515481, "loss": 1.892, "step": 213870 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017034143103482224, "loss": 2.0244, "step": 213875 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.0001703401172940665, "loss": 2.2773, "step": 213880 }, { "epoch": 0.5, "grad_norm": 2.546875, "learning_rate": 0.00017033880352928127, "loss": 2.2218, "step": 213885 }, { "epoch": 0.5, "grad_norm": 2.1875, "learning_rate": 0.00017033748974046707, "loss": 1.9995, "step": 213890 }, { "epoch": 0.5, "grad_norm": 2.65625, "learning_rate": 0.0001703361759276243, "loss": 2.2148, "step": 213895 }, { "epoch": 0.5, "grad_norm": 1.828125, "learning_rate": 0.0001703348620907534, "loss": 2.1054, "step": 213900 }, { "epoch": 0.5, "grad_norm": 1.9765625, "learning_rate": 0.00017033354822985485, "loss": 2.0654, "step": 213905 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017033223434492908, "loss": 1.9213, "step": 213910 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017033092043597657, "loss": 2.1097, "step": 213915 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.0001703296065029977, "loss": 2.2124, "step": 213920 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.000170328292545993, "loss": 2.0211, "step": 213925 }, { "epoch": 0.5, "grad_norm": 2.25, "learning_rate": 0.00017032697856496283, "loss": 2.089, "step": 213930 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017032566455990773, "loss": 2.1282, "step": 213935 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.0001703243505308281, "loss": 1.96, "step": 213940 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017032303647772435, "loss": 2.0945, "step": 213945 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017032172240059705, "loss": 2.2507, "step": 213950 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017032040829944654, "loss": 1.9944, "step": 213955 }, { "epoch": 0.5, "grad_norm": 2.84375, "learning_rate": 0.0001703190941742733, "loss": 2.0877, "step": 213960 }, { "epoch": 0.5, "grad_norm": 1.875, "learning_rate": 0.00017031778002507778, "loss": 1.9616, "step": 213965 }, { "epoch": 0.5, "grad_norm": 1.7890625, "learning_rate": 0.00017031646585186042, "loss": 2.0749, "step": 213970 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.0001703151516546217, "loss": 1.9554, "step": 213975 }, { "epoch": 0.5, "grad_norm": 2.1875, "learning_rate": 0.00017031383743336206, "loss": 2.0062, "step": 213980 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.0001703125231880819, "loss": 1.9187, "step": 213985 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.00017031120891878177, "loss": 2.2336, "step": 213990 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.000170309894625462, "loss": 2.2085, "step": 213995 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.0001703085803081231, "loss": 2.2929, "step": 214000 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017030726596676553, "loss": 2.0833, "step": 214005 }, { "epoch": 0.5, "grad_norm": 2.4375, "learning_rate": 0.00017030595160138972, "loss": 2.0109, "step": 214010 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017030463721199611, "loss": 2.2535, "step": 214015 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.00017030332279858518, "loss": 2.1315, "step": 214020 }, { "epoch": 0.5, "grad_norm": 2.5, "learning_rate": 0.00017030200836115736, "loss": 2.1187, "step": 214025 }, { "epoch": 0.5, "grad_norm": 1.9375, "learning_rate": 0.0001703006938997131, "loss": 2.0955, "step": 214030 }, { "epoch": 0.5, "grad_norm": 2.84375, "learning_rate": 0.00017029937941425281, "loss": 2.1932, "step": 214035 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.000170298064904777, "loss": 2.2454, "step": 214040 }, { "epoch": 0.5, "grad_norm": 2.140625, "learning_rate": 0.00017029675037128611, "loss": 2.2412, "step": 214045 }, { "epoch": 0.5, "grad_norm": 2.390625, "learning_rate": 0.00017029543581378057, "loss": 2.0661, "step": 214050 }, { "epoch": 0.5, "grad_norm": 2.46875, "learning_rate": 0.00017029412123226086, "loss": 2.2676, "step": 214055 }, { "epoch": 0.5, "grad_norm": 1.953125, "learning_rate": 0.00017029280662672736, "loss": 2.1226, "step": 214060 }, { "epoch": 0.5, "grad_norm": 2.4375, "learning_rate": 0.00017029149199718056, "loss": 2.3067, "step": 214065 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.0001702901773436209, "loss": 2.1034, "step": 214070 }, { "epoch": 0.5, "grad_norm": 2.515625, "learning_rate": 0.0001702888626660489, "loss": 2.001, "step": 214075 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.0001702875479644649, "loss": 1.9598, "step": 214080 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017028623323886945, "loss": 2.2645, "step": 214085 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.0001702849184892629, "loss": 2.226, "step": 214090 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017028360371564575, "loss": 2.0363, "step": 214095 }, { "epoch": 0.5, "grad_norm": 2.65625, "learning_rate": 0.00017028228891801845, "loss": 2.2423, "step": 214100 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017028097409638147, "loss": 2.0248, "step": 214105 }, { "epoch": 0.5, "grad_norm": 2.75, "learning_rate": 0.00017027965925073523, "loss": 2.212, "step": 214110 }, { "epoch": 0.5, "grad_norm": 1.8515625, "learning_rate": 0.00017027834438108016, "loss": 2.0551, "step": 214115 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017027702948741675, "loss": 2.1582, "step": 214120 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017027571456974542, "loss": 2.116, "step": 214125 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017027439962806663, "loss": 2.2865, "step": 214130 }, { "epoch": 0.5, "grad_norm": 1.65625, "learning_rate": 0.00017027308466238087, "loss": 2.2608, "step": 214135 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.0001702717696726885, "loss": 2.0412, "step": 214140 }, { "epoch": 0.5, "grad_norm": 2.21875, "learning_rate": 0.00017027045465899004, "loss": 2.1614, "step": 214145 }, { "epoch": 0.5, "grad_norm": 1.8125, "learning_rate": 0.00017026913962128593, "loss": 2.3357, "step": 214150 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017026782455957658, "loss": 2.1353, "step": 214155 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.00017026650947386246, "loss": 2.239, "step": 214160 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017026519436414407, "loss": 2.0352, "step": 214165 }, { "epoch": 0.5, "grad_norm": 2.734375, "learning_rate": 0.00017026387923042176, "loss": 2.0681, "step": 214170 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017026256407269607, "loss": 2.1977, "step": 214175 }, { "epoch": 0.5, "grad_norm": 2.71875, "learning_rate": 0.00017026124889096744, "loss": 2.1142, "step": 214180 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017025993368523627, "loss": 2.1269, "step": 214185 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017025861845550302, "loss": 2.1729, "step": 214190 }, { "epoch": 0.5, "grad_norm": 2.3125, "learning_rate": 0.00017025730320176816, "loss": 2.1664, "step": 214195 }, { "epoch": 0.5, "grad_norm": 2.890625, "learning_rate": 0.0001702559879240321, "loss": 1.9556, "step": 214200 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.00017025467262229537, "loss": 2.229, "step": 214205 }, { "epoch": 0.5, "grad_norm": 2.375, "learning_rate": 0.00017025335729655835, "loss": 2.2172, "step": 214210 }, { "epoch": 0.5, "grad_norm": 2.484375, "learning_rate": 0.00017025204194682153, "loss": 2.0123, "step": 214215 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017025072657308527, "loss": 2.1189, "step": 214220 }, { "epoch": 0.5, "grad_norm": 1.75, "learning_rate": 0.00017024941117535015, "loss": 2.0774, "step": 214225 }, { "epoch": 0.5, "grad_norm": 1.9765625, "learning_rate": 0.00017024809575361656, "loss": 2.0339, "step": 214230 }, { "epoch": 0.5, "grad_norm": 2.390625, "learning_rate": 0.0001702467803078849, "loss": 2.1252, "step": 214235 }, { "epoch": 0.5, "grad_norm": 1.9765625, "learning_rate": 0.0001702454648381557, "loss": 2.0611, "step": 214240 }, { "epoch": 0.5, "grad_norm": 1.9765625, "learning_rate": 0.00017024414934442938, "loss": 2.0617, "step": 214245 }, { "epoch": 0.5, "grad_norm": 1.859375, "learning_rate": 0.00017024283382670636, "loss": 2.1665, "step": 214250 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017024151828498712, "loss": 2.1557, "step": 214255 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.0001702402027192721, "loss": 2.2194, "step": 214260 }, { "epoch": 0.5, "grad_norm": 2.1875, "learning_rate": 0.0001702388871295618, "loss": 2.1343, "step": 214265 }, { "epoch": 0.5, "grad_norm": 2.46875, "learning_rate": 0.0001702375715158566, "loss": 2.1357, "step": 214270 }, { "epoch": 0.5, "grad_norm": 3.3125, "learning_rate": 0.00017023625587815692, "loss": 2.0584, "step": 214275 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.00017023494021646332, "loss": 1.9922, "step": 214280 }, { "epoch": 0.5, "grad_norm": 2.75, "learning_rate": 0.00017023362453077616, "loss": 2.1535, "step": 214285 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017023230882109593, "loss": 1.9683, "step": 214290 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017023099308742306, "loss": 2.1464, "step": 214295 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.00017022967732975802, "loss": 2.0111, "step": 214300 }, { "epoch": 0.5, "grad_norm": 2.5625, "learning_rate": 0.00017022836154810124, "loss": 2.1589, "step": 214305 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.0001702270457424532, "loss": 2.0338, "step": 214310 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.0001702257299128143, "loss": 1.9835, "step": 214315 }, { "epoch": 0.5, "grad_norm": 2.65625, "learning_rate": 0.00017022441405918504, "loss": 2.0677, "step": 214320 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017022309818156582, "loss": 2.2355, "step": 214325 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017022178227995714, "loss": 2.0965, "step": 214330 }, { "epoch": 0.5, "grad_norm": 2.28125, "learning_rate": 0.00017022046635435943, "loss": 2.0938, "step": 214335 }, { "epoch": 0.5, "grad_norm": 2.4375, "learning_rate": 0.00017021915040477313, "loss": 2.1108, "step": 214340 }, { "epoch": 0.5, "grad_norm": 2.40625, "learning_rate": 0.00017021783443119868, "loss": 2.0952, "step": 214345 }, { "epoch": 0.5, "grad_norm": 1.8359375, "learning_rate": 0.00017021651843363654, "loss": 2.0714, "step": 214350 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017021520241208718, "loss": 2.0207, "step": 214355 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017021388636655102, "loss": 2.1994, "step": 214360 }, { "epoch": 0.5, "grad_norm": 1.96875, "learning_rate": 0.00017021257029702853, "loss": 2.0025, "step": 214365 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017021125420352019, "loss": 2.0269, "step": 214370 }, { "epoch": 0.5, "grad_norm": 1.8828125, "learning_rate": 0.00017020993808602634, "loss": 2.1733, "step": 214375 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017020862194454757, "loss": 2.1546, "step": 214380 }, { "epoch": 0.5, "grad_norm": 1.640625, "learning_rate": 0.0001702073057790842, "loss": 2.0198, "step": 214385 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.00017020598958963678, "loss": 1.9067, "step": 214390 }, { "epoch": 0.5, "grad_norm": 2.046875, "learning_rate": 0.0001702046733762057, "loss": 2.1813, "step": 214395 }, { "epoch": 0.5, "grad_norm": 1.921875, "learning_rate": 0.00017020335713879145, "loss": 2.0305, "step": 214400 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017020204087739445, "loss": 2.0681, "step": 214405 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017020072459201515, "loss": 2.0586, "step": 214410 }, { "epoch": 0.5, "grad_norm": 1.71875, "learning_rate": 0.00017019940828265405, "loss": 2.0799, "step": 214415 }, { "epoch": 0.5, "grad_norm": 2.328125, "learning_rate": 0.0001701980919493115, "loss": 2.1025, "step": 214420 }, { "epoch": 0.5, "grad_norm": 2.359375, "learning_rate": 0.00017019677559198805, "loss": 2.1254, "step": 214425 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017019545921068412, "loss": 2.2561, "step": 214430 }, { "epoch": 0.5, "grad_norm": 2.03125, "learning_rate": 0.0001701941428054001, "loss": 2.1054, "step": 214435 }, { "epoch": 0.5, "grad_norm": 1.8984375, "learning_rate": 0.00017019282637613652, "loss": 2.1703, "step": 214440 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.0001701915099228938, "loss": 2.1322, "step": 214445 }, { "epoch": 0.5, "grad_norm": 2.5625, "learning_rate": 0.00017019019344567237, "loss": 2.0952, "step": 214450 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.0001701888769444727, "loss": 1.9541, "step": 214455 }, { "epoch": 0.5, "grad_norm": 2.34375, "learning_rate": 0.00017018756041929526, "loss": 2.219, "step": 214460 }, { "epoch": 0.5, "grad_norm": 2.09375, "learning_rate": 0.00017018624387014043, "loss": 2.1085, "step": 214465 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017018492729700875, "loss": 1.9217, "step": 214470 }, { "epoch": 0.5, "grad_norm": 2.1875, "learning_rate": 0.0001701836106999006, "loss": 2.3588, "step": 214475 }, { "epoch": 0.5, "grad_norm": 1.96875, "learning_rate": 0.00017018229407881648, "loss": 1.9755, "step": 214480 }, { "epoch": 0.5, "grad_norm": 2.203125, "learning_rate": 0.00017018097743375682, "loss": 2.1358, "step": 214485 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.000170179660764722, "loss": 2.05, "step": 214490 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.0001701783440717126, "loss": 2.0721, "step": 214495 }, { "epoch": 0.5, "grad_norm": 2.234375, "learning_rate": 0.000170177027354729, "loss": 2.0024, "step": 214500 }, { "epoch": 0.5, "grad_norm": 2.546875, "learning_rate": 0.00017017571061377163, "loss": 1.9612, "step": 214505 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.00017017439384884098, "loss": 2.0693, "step": 214510 }, { "epoch": 0.5, "grad_norm": 2.15625, "learning_rate": 0.00017017307705993748, "loss": 2.0565, "step": 214515 }, { "epoch": 0.5, "grad_norm": 2.0625, "learning_rate": 0.00017017176024706156, "loss": 2.3087, "step": 214520 }, { "epoch": 0.5, "grad_norm": 2.125, "learning_rate": 0.00017017044341021374, "loss": 2.0219, "step": 214525 }, { "epoch": 0.5, "grad_norm": 1.78125, "learning_rate": 0.0001701691265493944, "loss": 2.1136, "step": 214530 }, { "epoch": 0.5, "grad_norm": 1.59375, "learning_rate": 0.000170167809664604, "loss": 2.1154, "step": 214535 }, { "epoch": 0.5, "grad_norm": 1.921875, "learning_rate": 0.00017016649275584304, "loss": 2.2081, "step": 214540 }, { "epoch": 0.5, "grad_norm": 2.078125, "learning_rate": 0.0001701651758231119, "loss": 2.0915, "step": 214545 }, { "epoch": 0.5, "grad_norm": 2.0, "learning_rate": 0.00017016385886641107, "loss": 2.1155, "step": 214550 }, { "epoch": 0.5, "grad_norm": 2.265625, "learning_rate": 0.00017016254188574102, "loss": 2.1462, "step": 214555 }, { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.00017016122488110216, "loss": 1.9838, "step": 214560 }, { "epoch": 0.5, "grad_norm": 2.5625, "learning_rate": 0.00017015990785249497, "loss": 2.0408, "step": 214565 }, { "epoch": 0.5, "grad_norm": 2.296875, "learning_rate": 0.00017015859079991982, "loss": 2.2332, "step": 214570 }, { "epoch": 0.5, "grad_norm": 2.171875, "learning_rate": 0.00017015727372337726, "loss": 1.9233, "step": 214575 }, { "epoch": 0.5, "grad_norm": 2.109375, "learning_rate": 0.0001701559566228677, "loss": 2.0741, "step": 214580 }, { "epoch": 0.5, "grad_norm": 1.8984375, "learning_rate": 0.00017015463949839163, "loss": 2.0559, "step": 214585 }, { "epoch": 0.5, "grad_norm": 3.265625, "learning_rate": 0.0001701533223499494, "loss": 2.307, "step": 214590 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.0001701520051775416, "loss": 2.0116, "step": 214595 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00017015068798116852, "loss": 1.9953, "step": 214600 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00017014937076083073, "loss": 2.2671, "step": 214605 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.00017014805351652865, "loss": 2.2679, "step": 214610 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00017014673624826272, "loss": 2.0204, "step": 214615 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00017014541895603338, "loss": 2.2639, "step": 214620 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.0001701441016398411, "loss": 2.1894, "step": 214625 }, { "epoch": 0.51, "grad_norm": 1.9609375, "learning_rate": 0.00017014278429968634, "loss": 1.8763, "step": 214630 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.0001701414669355695, "loss": 2.0486, "step": 214635 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00017014014954749108, "loss": 2.1037, "step": 214640 }, { "epoch": 0.51, "grad_norm": 1.984375, "learning_rate": 0.00017013883213545152, "loss": 1.9854, "step": 214645 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.00017013751469945122, "loss": 2.0505, "step": 214650 }, { "epoch": 0.51, "grad_norm": 2.515625, "learning_rate": 0.00017013619723949073, "loss": 2.3705, "step": 214655 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00017013487975557043, "loss": 2.0573, "step": 214660 }, { "epoch": 0.51, "grad_norm": 1.984375, "learning_rate": 0.00017013356224769078, "loss": 2.2139, "step": 214665 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00017013224471585221, "loss": 1.9802, "step": 214670 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00017013092716005522, "loss": 2.0547, "step": 214675 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00017012960958030023, "loss": 2.2434, "step": 214680 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00017012829197658768, "loss": 2.1862, "step": 214685 }, { "epoch": 0.51, "grad_norm": 1.78125, "learning_rate": 0.00017012697434891805, "loss": 2.1967, "step": 214690 }, { "epoch": 0.51, "grad_norm": 2.5625, "learning_rate": 0.00017012565669729173, "loss": 2.077, "step": 214695 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.00017012433902170926, "loss": 2.1048, "step": 214700 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00017012302132217105, "loss": 2.0682, "step": 214705 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00017012170359867752, "loss": 2.1073, "step": 214710 }, { "epoch": 0.51, "grad_norm": 2.46875, "learning_rate": 0.00017012038585122915, "loss": 2.0588, "step": 214715 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.0001701190680798264, "loss": 2.1551, "step": 214720 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.00017011775028446968, "loss": 2.1068, "step": 214725 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00017011643246515946, "loss": 2.1864, "step": 214730 }, { "epoch": 0.51, "grad_norm": 1.96875, "learning_rate": 0.0001701151146218962, "loss": 1.9978, "step": 214735 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.00017011379675468038, "loss": 2.1736, "step": 214740 }, { "epoch": 0.51, "grad_norm": 2.78125, "learning_rate": 0.0001701124788635124, "loss": 2.3925, "step": 214745 }, { "epoch": 0.51, "grad_norm": 1.8984375, "learning_rate": 0.00017011116094839271, "loss": 2.1727, "step": 214750 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00017010984300932177, "loss": 2.221, "step": 214755 }, { "epoch": 0.51, "grad_norm": 2.59375, "learning_rate": 0.00017010852504630003, "loss": 2.1393, "step": 214760 }, { "epoch": 0.51, "grad_norm": 1.984375, "learning_rate": 0.00017010720705932798, "loss": 2.2346, "step": 214765 }, { "epoch": 0.51, "grad_norm": 2.5, "learning_rate": 0.00017010588904840603, "loss": 2.1809, "step": 214770 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00017010457101353463, "loss": 2.0988, "step": 214775 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00017010325295471423, "loss": 2.1099, "step": 214780 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.0001701019348719453, "loss": 1.9314, "step": 214785 }, { "epoch": 0.51, "grad_norm": 1.90625, "learning_rate": 0.00017010061676522826, "loss": 2.0764, "step": 214790 }, { "epoch": 0.51, "grad_norm": 1.9921875, "learning_rate": 0.00017009929863456358, "loss": 2.1401, "step": 214795 }, { "epoch": 0.51, "grad_norm": 1.90625, "learning_rate": 0.00017009798047995173, "loss": 1.9531, "step": 214800 }, { "epoch": 0.51, "grad_norm": 1.9375, "learning_rate": 0.00017009666230139315, "loss": 2.2965, "step": 214805 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.0001700953440988882, "loss": 1.9714, "step": 214810 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00017009402587243747, "loss": 2.1179, "step": 214815 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.00017009270762204136, "loss": 2.0569, "step": 214820 }, { "epoch": 0.51, "grad_norm": 1.859375, "learning_rate": 0.0001700913893477003, "loss": 2.057, "step": 214825 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00017009007104941474, "loss": 2.0569, "step": 214830 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00017008875272718515, "loss": 2.2793, "step": 214835 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00017008743438101195, "loss": 2.0637, "step": 214840 }, { "epoch": 0.51, "grad_norm": 1.9296875, "learning_rate": 0.00017008611601089562, "loss": 2.3059, "step": 214845 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00017008479761683663, "loss": 2.1547, "step": 214850 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00017008347919883536, "loss": 1.9749, "step": 214855 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00017008216075689234, "loss": 2.0119, "step": 214860 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00017008084229100793, "loss": 2.2314, "step": 214865 }, { "epoch": 0.51, "grad_norm": 5.03125, "learning_rate": 0.0001700795238011827, "loss": 2.1123, "step": 214870 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00017007820528741697, "loss": 2.1258, "step": 214875 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.00017007688674971133, "loss": 1.9188, "step": 214880 }, { "epoch": 0.51, "grad_norm": 2.515625, "learning_rate": 0.0001700755681880661, "loss": 2.1782, "step": 214885 }, { "epoch": 0.51, "grad_norm": 1.9453125, "learning_rate": 0.0001700742496024818, "loss": 2.0252, "step": 214890 }, { "epoch": 0.51, "grad_norm": 1.921875, "learning_rate": 0.0001700729309929588, "loss": 2.1619, "step": 214895 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.00017007161235949774, "loss": 2.0126, "step": 214900 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.00017007029370209886, "loss": 2.1317, "step": 214905 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00017006897502076272, "loss": 1.9749, "step": 214910 }, { "epoch": 0.51, "grad_norm": 2.359375, "learning_rate": 0.00017006765631548976, "loss": 1.9975, "step": 214915 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.0001700663375862804, "loss": 2.1904, "step": 214920 }, { "epoch": 0.51, "grad_norm": 3.6875, "learning_rate": 0.00017006501883313512, "loss": 2.0263, "step": 214925 }, { "epoch": 0.51, "grad_norm": 1.921875, "learning_rate": 0.00017006370005605433, "loss": 2.0495, "step": 214930 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00017006238125503854, "loss": 2.2242, "step": 214935 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00017006106243008817, "loss": 2.0553, "step": 214940 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00017005974358120366, "loss": 2.1683, "step": 214945 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00017005842470838546, "loss": 2.1779, "step": 214950 }, { "epoch": 0.51, "grad_norm": 2.0, "learning_rate": 0.00017005710581163407, "loss": 1.926, "step": 214955 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00017005578689094984, "loss": 2.0003, "step": 214960 }, { "epoch": 0.51, "grad_norm": 2.359375, "learning_rate": 0.00017005446794633334, "loss": 2.2003, "step": 214965 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00017005314897778498, "loss": 2.1091, "step": 214970 }, { "epoch": 0.51, "grad_norm": 1.9375, "learning_rate": 0.00017005182998530512, "loss": 2.0303, "step": 214975 }, { "epoch": 0.51, "grad_norm": 2.0, "learning_rate": 0.00017005051096889433, "loss": 2.1983, "step": 214980 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.000170049191928553, "loss": 2.3392, "step": 214985 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.00017004787286428158, "loss": 2.1284, "step": 214990 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00017004655377608058, "loss": 1.9553, "step": 214995 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.00017004523466395037, "loss": 2.191, "step": 215000 }, { "epoch": 0.51, "grad_norm": 2.0, "learning_rate": 0.00017004391552789144, "loss": 2.0102, "step": 215005 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00017004259636790427, "loss": 2.0783, "step": 215010 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.00017004127718398928, "loss": 2.1034, "step": 215015 }, { "epoch": 0.51, "grad_norm": 2.5, "learning_rate": 0.0001700399579761469, "loss": 2.0188, "step": 215020 }, { "epoch": 0.51, "grad_norm": 1.8828125, "learning_rate": 0.00017003863874437758, "loss": 2.1013, "step": 215025 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00017003731948868182, "loss": 2.2274, "step": 215030 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00017003600020906002, "loss": 2.0884, "step": 215035 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00017003468090551267, "loss": 2.1069, "step": 215040 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.0001700333615780402, "loss": 2.0198, "step": 215045 }, { "epoch": 0.51, "grad_norm": 1.8515625, "learning_rate": 0.00017003204222664305, "loss": 2.252, "step": 215050 }, { "epoch": 0.51, "grad_norm": 1.890625, "learning_rate": 0.00017003072285132168, "loss": 2.0464, "step": 215055 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.00017002940345207658, "loss": 2.2524, "step": 215060 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.00017002808402890812, "loss": 2.0937, "step": 215065 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00017002676458181685, "loss": 2.1796, "step": 215070 }, { "epoch": 0.51, "grad_norm": 1.96875, "learning_rate": 0.00017002544511080314, "loss": 1.9808, "step": 215075 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00017002412561586744, "loss": 2.0939, "step": 215080 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00017002280609701027, "loss": 2.302, "step": 215085 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00017002148655423203, "loss": 2.179, "step": 215090 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.00017002016698753318, "loss": 2.1169, "step": 215095 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.00017001884739691417, "loss": 2.1146, "step": 215100 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00017001752778237543, "loss": 2.0, "step": 215105 }, { "epoch": 0.51, "grad_norm": 1.921875, "learning_rate": 0.00017001620814391746, "loss": 2.1144, "step": 215110 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.0001700148884815407, "loss": 2.2261, "step": 215115 }, { "epoch": 0.51, "grad_norm": 1.9296875, "learning_rate": 0.00017001356879524555, "loss": 2.1179, "step": 215120 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00017001224908503248, "loss": 2.1648, "step": 215125 }, { "epoch": 0.51, "grad_norm": 2.734375, "learning_rate": 0.000170010929350902, "loss": 2.0655, "step": 215130 }, { "epoch": 0.51, "grad_norm": 2.46875, "learning_rate": 0.00017000960959285448, "loss": 2.2085, "step": 215135 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00017000828981089043, "loss": 2.2547, "step": 215140 }, { "epoch": 0.51, "grad_norm": 1.9296875, "learning_rate": 0.00017000697000501026, "loss": 2.1137, "step": 215145 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00017000565017521446, "loss": 2.0826, "step": 215150 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00017000433032150343, "loss": 2.2665, "step": 215155 }, { "epoch": 0.51, "grad_norm": 2.0, "learning_rate": 0.00017000301044387767, "loss": 1.8654, "step": 215160 }, { "epoch": 0.51, "grad_norm": 1.953125, "learning_rate": 0.00017000169054233764, "loss": 2.0871, "step": 215165 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00017000037061688374, "loss": 2.1421, "step": 215170 }, { "epoch": 0.51, "grad_norm": 1.953125, "learning_rate": 0.00016999905066751646, "loss": 1.9633, "step": 215175 }, { "epoch": 0.51, "grad_norm": 1.734375, "learning_rate": 0.0001699977306942362, "loss": 1.9801, "step": 215180 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016999641069704343, "loss": 2.0584, "step": 215185 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016999509067593867, "loss": 2.2454, "step": 215190 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.0001699937706309223, "loss": 2.2276, "step": 215195 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.0001699924505619948, "loss": 2.1103, "step": 215200 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.0001699911304691566, "loss": 2.2343, "step": 215205 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016998981035240814, "loss": 2.0995, "step": 215210 }, { "epoch": 0.51, "grad_norm": 1.8125, "learning_rate": 0.0001699884902117499, "loss": 2.0163, "step": 215215 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.00016998717004718233, "loss": 2.1414, "step": 215220 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016998584985870586, "loss": 2.1415, "step": 215225 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.000169984529646321, "loss": 2.1916, "step": 215230 }, { "epoch": 0.51, "grad_norm": 1.8046875, "learning_rate": 0.00016998320941002812, "loss": 2.0606, "step": 215235 }, { "epoch": 0.51, "grad_norm": 1.953125, "learning_rate": 0.00016998188914982772, "loss": 2.3022, "step": 215240 }, { "epoch": 0.51, "grad_norm": 1.921875, "learning_rate": 0.0001699805688657202, "loss": 1.8866, "step": 215245 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.00016997924855770608, "loss": 2.0387, "step": 215250 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.0001699779282257858, "loss": 1.8618, "step": 215255 }, { "epoch": 0.51, "grad_norm": 2.46875, "learning_rate": 0.00016997660786995976, "loss": 1.9761, "step": 215260 }, { "epoch": 0.51, "grad_norm": 2.515625, "learning_rate": 0.00016997528749022845, "loss": 2.1008, "step": 215265 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00016997396708659228, "loss": 2.1188, "step": 215270 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.00016997264665905178, "loss": 2.035, "step": 215275 }, { "epoch": 0.51, "grad_norm": 1.7890625, "learning_rate": 0.00016997132620760733, "loss": 2.017, "step": 215280 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016997000573225942, "loss": 2.1496, "step": 215285 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.0001699686852330085, "loss": 2.1202, "step": 215290 }, { "epoch": 0.51, "grad_norm": 1.9140625, "learning_rate": 0.00016996736470985496, "loss": 2.1084, "step": 215295 }, { "epoch": 0.51, "grad_norm": 1.8203125, "learning_rate": 0.00016996604416279933, "loss": 1.911, "step": 215300 }, { "epoch": 0.51, "grad_norm": 1.984375, "learning_rate": 0.00016996472359184203, "loss": 2.0766, "step": 215305 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.00016996340299698352, "loss": 2.0816, "step": 215310 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016996208237822423, "loss": 2.0397, "step": 215315 }, { "epoch": 0.51, "grad_norm": 2.53125, "learning_rate": 0.0001699607617355646, "loss": 2.0271, "step": 215320 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016995944106900518, "loss": 2.0407, "step": 215325 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016995812037854627, "loss": 2.2162, "step": 215330 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016995679966418843, "loss": 2.1545, "step": 215335 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.00016995547892593207, "loss": 2.1094, "step": 215340 }, { "epoch": 0.51, "grad_norm": 2.453125, "learning_rate": 0.00016995415816377767, "loss": 1.9766, "step": 215345 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.00016995283737772563, "loss": 2.0222, "step": 215350 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.00016995151656777643, "loss": 1.9712, "step": 215355 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016995019573393057, "loss": 2.1417, "step": 215360 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016994887487618843, "loss": 2.42, "step": 215365 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016994755399455047, "loss": 2.118, "step": 215370 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016994623308901717, "loss": 2.2084, "step": 215375 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016994491215958893, "loss": 1.914, "step": 215380 }, { "epoch": 0.51, "grad_norm": 1.8984375, "learning_rate": 0.0001699435912062663, "loss": 2.1622, "step": 215385 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.00016994227022904966, "loss": 2.001, "step": 215390 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.00016994094922793943, "loss": 1.9638, "step": 215395 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016993962820293615, "loss": 2.1149, "step": 215400 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.0001699383071540402, "loss": 2.1304, "step": 215405 }, { "epoch": 0.51, "grad_norm": 1.8671875, "learning_rate": 0.0001699369860812521, "loss": 1.9824, "step": 215410 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016993566498457217, "loss": 2.0295, "step": 215415 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.000169934343864001, "loss": 2.107, "step": 215420 }, { "epoch": 0.51, "grad_norm": 2.453125, "learning_rate": 0.000169933022719539, "loss": 2.167, "step": 215425 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.0001699317015511866, "loss": 2.0023, "step": 215430 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016993038035894426, "loss": 2.1431, "step": 215435 }, { "epoch": 0.51, "grad_norm": 1.9375, "learning_rate": 0.00016992905914281243, "loss": 1.8906, "step": 215440 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016992773790279158, "loss": 1.9158, "step": 215445 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.00016992641663888213, "loss": 2.0675, "step": 215450 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016992509535108456, "loss": 2.1323, "step": 215455 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.0001699237740393993, "loss": 2.2745, "step": 215460 }, { "epoch": 0.51, "grad_norm": 2.0, "learning_rate": 0.0001699224527038268, "loss": 2.1943, "step": 215465 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00016992113134436755, "loss": 2.2373, "step": 215470 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.00016991980996102198, "loss": 2.0866, "step": 215475 }, { "epoch": 0.51, "grad_norm": 2.890625, "learning_rate": 0.0001699184885537905, "loss": 2.1152, "step": 215480 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.0001699171671226736, "loss": 2.0057, "step": 215485 }, { "epoch": 0.51, "grad_norm": 1.8515625, "learning_rate": 0.00016991584566767177, "loss": 2.1537, "step": 215490 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016991452418878535, "loss": 2.1137, "step": 215495 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.0001699132026860149, "loss": 2.027, "step": 215500 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00016991188115936083, "loss": 2.0558, "step": 215505 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.0001699105596088236, "loss": 1.9094, "step": 215510 }, { "epoch": 0.51, "grad_norm": 2.609375, "learning_rate": 0.00016990923803440367, "loss": 2.1296, "step": 215515 }, { "epoch": 0.51, "grad_norm": 1.8671875, "learning_rate": 0.00016990791643610144, "loss": 2.1313, "step": 215520 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.0001699065948139174, "loss": 2.1637, "step": 215525 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.000169905273167852, "loss": 2.2834, "step": 215530 }, { "epoch": 0.51, "grad_norm": 2.71875, "learning_rate": 0.00016990395149790572, "loss": 2.0549, "step": 215535 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016990262980407897, "loss": 2.208, "step": 215540 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016990130808637222, "loss": 2.0776, "step": 215545 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.0001698999863447859, "loss": 2.1781, "step": 215550 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016989866457932048, "loss": 2.1686, "step": 215555 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.0001698973427899764, "loss": 1.9819, "step": 215560 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016989602097675413, "loss": 2.1651, "step": 215565 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016989469913965413, "loss": 1.9824, "step": 215570 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.0001698933772786768, "loss": 2.1266, "step": 215575 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016989205539382263, "loss": 1.8915, "step": 215580 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.0001698907334850921, "loss": 2.1728, "step": 215585 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016988941155248558, "loss": 1.8946, "step": 215590 }, { "epoch": 0.51, "grad_norm": 2.0, "learning_rate": 0.0001698880895960036, "loss": 2.0143, "step": 215595 }, { "epoch": 0.51, "grad_norm": 2.703125, "learning_rate": 0.0001698867676156466, "loss": 2.0942, "step": 215600 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00016988544561141497, "loss": 2.0645, "step": 215605 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.0001698841235833092, "loss": 2.0408, "step": 215610 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016988280153132978, "loss": 2.1454, "step": 215615 }, { "epoch": 0.51, "grad_norm": 1.96875, "learning_rate": 0.00016988147945547712, "loss": 2.1764, "step": 215620 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.00016988015735575166, "loss": 2.1182, "step": 215625 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.0001698788352321539, "loss": 2.1579, "step": 215630 }, { "epoch": 0.51, "grad_norm": 1.84375, "learning_rate": 0.00016987751308468425, "loss": 2.0837, "step": 215635 }, { "epoch": 0.51, "grad_norm": 1.96875, "learning_rate": 0.0001698761909133432, "loss": 2.0532, "step": 215640 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.00016987486871813113, "loss": 2.0634, "step": 215645 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016987354649904853, "loss": 2.0899, "step": 215650 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.0001698722242560959, "loss": 1.9585, "step": 215655 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016987090198927363, "loss": 2.1363, "step": 215660 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.00016986957969858222, "loss": 2.1064, "step": 215665 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016986825738402205, "loss": 2.2703, "step": 215670 }, { "epoch": 0.51, "grad_norm": 2.5625, "learning_rate": 0.00016986693504559365, "loss": 1.8601, "step": 215675 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.0001698656126832974, "loss": 2.1685, "step": 215680 }, { "epoch": 0.51, "grad_norm": 1.921875, "learning_rate": 0.00016986429029713384, "loss": 2.0638, "step": 215685 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016986296788710337, "loss": 2.1621, "step": 215690 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.0001698616454532064, "loss": 2.2071, "step": 215695 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016986032299544346, "loss": 2.0698, "step": 215700 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016985900051381494, "loss": 2.2653, "step": 215705 }, { "epoch": 0.51, "grad_norm": 2.5625, "learning_rate": 0.00016985767800832132, "loss": 2.0421, "step": 215710 }, { "epoch": 0.51, "grad_norm": 1.8359375, "learning_rate": 0.00016985635547896307, "loss": 2.0724, "step": 215715 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.00016985503292574064, "loss": 2.2388, "step": 215720 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.0001698537103486544, "loss": 2.1901, "step": 215725 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00016985238774770492, "loss": 1.8755, "step": 215730 }, { "epoch": 0.51, "grad_norm": 2.484375, "learning_rate": 0.00016985106512289256, "loss": 2.0995, "step": 215735 }, { "epoch": 0.51, "grad_norm": 1.890625, "learning_rate": 0.00016984974247421784, "loss": 1.9841, "step": 215740 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00016984841980168121, "loss": 2.1104, "step": 215745 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.000169847097105283, "loss": 2.1506, "step": 215750 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016984577438502384, "loss": 2.1327, "step": 215755 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.00016984445164090406, "loss": 1.9099, "step": 215760 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00016984312887292417, "loss": 2.0697, "step": 215765 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.0001698418060810846, "loss": 2.1137, "step": 215770 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016984048326538576, "loss": 2.1841, "step": 215775 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016983916042582816, "loss": 2.0055, "step": 215780 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016983783756241224, "loss": 1.987, "step": 215785 }, { "epoch": 0.51, "grad_norm": 2.359375, "learning_rate": 0.00016983651467513848, "loss": 2.059, "step": 215790 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016983519176400725, "loss": 2.1445, "step": 215795 }, { "epoch": 0.51, "grad_norm": 2.984375, "learning_rate": 0.0001698338688290191, "loss": 2.1985, "step": 215800 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016983254587017442, "loss": 2.1364, "step": 215805 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016983122288747365, "loss": 2.0318, "step": 215810 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016982989988091732, "loss": 2.1743, "step": 215815 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016982857685050577, "loss": 1.9544, "step": 215820 }, { "epoch": 0.51, "grad_norm": 2.5, "learning_rate": 0.00016982725379623953, "loss": 2.2785, "step": 215825 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016982593071811905, "loss": 1.7642, "step": 215830 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016982460761614477, "loss": 2.1054, "step": 215835 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.0001698232844903171, "loss": 2.2789, "step": 215840 }, { "epoch": 0.51, "grad_norm": 1.8984375, "learning_rate": 0.00016982196134063656, "loss": 2.1944, "step": 215845 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.00016982063816710355, "loss": 2.1812, "step": 215850 }, { "epoch": 0.51, "grad_norm": 1.90625, "learning_rate": 0.00016981931496971856, "loss": 2.197, "step": 215855 }, { "epoch": 0.51, "grad_norm": 1.828125, "learning_rate": 0.00016981799174848203, "loss": 2.1158, "step": 215860 }, { "epoch": 0.51, "grad_norm": 1.890625, "learning_rate": 0.00016981666850339442, "loss": 2.124, "step": 215865 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00016981534523445615, "loss": 2.1367, "step": 215870 }, { "epoch": 0.51, "grad_norm": 1.59375, "learning_rate": 0.00016981402194166768, "loss": 2.0856, "step": 215875 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.0001698126986250295, "loss": 1.9889, "step": 215880 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.00016981137528454204, "loss": 2.2632, "step": 215885 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.0001698100519202057, "loss": 2.1023, "step": 215890 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.00016980872853202103, "loss": 2.0542, "step": 215895 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016980740511998843, "loss": 2.0489, "step": 215900 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.0001698060816841083, "loss": 1.8353, "step": 215905 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016980475822438122, "loss": 2.1858, "step": 215910 }, { "epoch": 0.51, "grad_norm": 2.46875, "learning_rate": 0.0001698034347408075, "loss": 2.2576, "step": 215915 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.00016980211123338772, "loss": 2.1759, "step": 215920 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016980078770212223, "loss": 2.1527, "step": 215925 }, { "epoch": 0.51, "grad_norm": 1.859375, "learning_rate": 0.0001697994641470116, "loss": 2.17, "step": 215930 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.0001697981405680561, "loss": 2.0268, "step": 215935 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016979681696525635, "loss": 2.0935, "step": 215940 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016979549333861273, "loss": 2.0057, "step": 215945 }, { "epoch": 0.51, "grad_norm": 2.46875, "learning_rate": 0.00016979416968812571, "loss": 2.113, "step": 215950 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.00016979284601379574, "loss": 2.1183, "step": 215955 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016979152231562326, "loss": 2.1575, "step": 215960 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016979019859360872, "loss": 2.2808, "step": 215965 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.0001697888748477526, "loss": 1.9137, "step": 215970 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016978755107805533, "loss": 1.9355, "step": 215975 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016978622728451736, "loss": 2.0441, "step": 215980 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016978490346713915, "loss": 2.1054, "step": 215985 }, { "epoch": 0.51, "grad_norm": 1.8203125, "learning_rate": 0.00016978357962592116, "loss": 1.9595, "step": 215990 }, { "epoch": 0.51, "grad_norm": 1.8515625, "learning_rate": 0.00016978225576086382, "loss": 1.9482, "step": 215995 }, { "epoch": 0.51, "grad_norm": 1.84375, "learning_rate": 0.0001697809318719676, "loss": 1.8617, "step": 216000 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016977960795923295, "loss": 2.1644, "step": 216005 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.0001697782840226603, "loss": 1.9698, "step": 216010 }, { "epoch": 0.51, "grad_norm": 1.8203125, "learning_rate": 0.00016977696006225017, "loss": 1.9082, "step": 216015 }, { "epoch": 0.51, "grad_norm": 1.640625, "learning_rate": 0.00016977563607800293, "loss": 2.0556, "step": 216020 }, { "epoch": 0.51, "grad_norm": 1.8125, "learning_rate": 0.00016977431206991906, "loss": 2.0123, "step": 216025 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016977298803799905, "loss": 2.0587, "step": 216030 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.0001697716639822433, "loss": 2.0275, "step": 216035 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00016977033990265233, "loss": 2.1128, "step": 216040 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.0001697690157992265, "loss": 2.138, "step": 216045 }, { "epoch": 0.51, "grad_norm": 2.65625, "learning_rate": 0.0001697676916719663, "loss": 2.1094, "step": 216050 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016976636752087223, "loss": 2.2021, "step": 216055 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016976504334594467, "loss": 2.1216, "step": 216060 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016976371914718413, "loss": 2.0372, "step": 216065 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.000169762394924591, "loss": 2.0483, "step": 216070 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.0001697610706781658, "loss": 2.1132, "step": 216075 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016975974640790896, "loss": 2.0076, "step": 216080 }, { "epoch": 0.51, "grad_norm": 2.671875, "learning_rate": 0.00016975842211382092, "loss": 2.1323, "step": 216085 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.0001697570977959021, "loss": 2.1225, "step": 216090 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016975577345415305, "loss": 2.1481, "step": 216095 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.00016975444908857415, "loss": 2.0326, "step": 216100 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016975312469916584, "loss": 2.0434, "step": 216105 }, { "epoch": 0.51, "grad_norm": 2.359375, "learning_rate": 0.00016975180028592864, "loss": 2.3693, "step": 216110 }, { "epoch": 0.51, "grad_norm": 2.6875, "learning_rate": 0.00016975047584886292, "loss": 2.1139, "step": 216115 }, { "epoch": 0.51, "grad_norm": 1.8828125, "learning_rate": 0.00016974915138796915, "loss": 2.0417, "step": 216120 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016974782690324785, "loss": 1.9578, "step": 216125 }, { "epoch": 0.51, "grad_norm": 1.8125, "learning_rate": 0.0001697465023946994, "loss": 2.001, "step": 216130 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016974517786232432, "loss": 2.0533, "step": 216135 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.000169743853306123, "loss": 2.2179, "step": 216140 }, { "epoch": 0.51, "grad_norm": 1.90625, "learning_rate": 0.0001697425287260959, "loss": 1.889, "step": 216145 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016974120412224347, "loss": 2.0234, "step": 216150 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00016973987949456625, "loss": 1.9866, "step": 216155 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016973855484306457, "loss": 2.0126, "step": 216160 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016973723016773894, "loss": 2.0884, "step": 216165 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016973590546858982, "loss": 2.1147, "step": 216170 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016973458074561766, "loss": 2.1828, "step": 216175 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016973325599882287, "loss": 2.2076, "step": 216180 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016973193122820593, "loss": 1.9676, "step": 216185 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016973060643376731, "loss": 2.3144, "step": 216190 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016972928161550747, "loss": 2.1351, "step": 216195 }, { "epoch": 0.51, "grad_norm": 2.796875, "learning_rate": 0.00016972795677342687, "loss": 2.0531, "step": 216200 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016972663190752588, "loss": 2.005, "step": 216205 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.000169725307017805, "loss": 2.1349, "step": 216210 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00016972398210426474, "loss": 2.1389, "step": 216215 }, { "epoch": 0.51, "grad_norm": 1.8671875, "learning_rate": 0.00016972265716690547, "loss": 2.1746, "step": 216220 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.0001697213322057277, "loss": 2.2507, "step": 216225 }, { "epoch": 0.51, "grad_norm": 1.9140625, "learning_rate": 0.00016972000722073182, "loss": 1.8945, "step": 216230 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016971868221191832, "loss": 2.0882, "step": 216235 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.0001697173571792877, "loss": 1.972, "step": 216240 }, { "epoch": 0.51, "grad_norm": 1.8359375, "learning_rate": 0.00016971603212284034, "loss": 2.2713, "step": 216245 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016971470704257676, "loss": 2.036, "step": 216250 }, { "epoch": 0.51, "grad_norm": 1.8671875, "learning_rate": 0.00016971338193849734, "loss": 2.0114, "step": 216255 }, { "epoch": 0.51, "grad_norm": 2.640625, "learning_rate": 0.00016971205681060253, "loss": 2.0842, "step": 216260 }, { "epoch": 0.51, "grad_norm": 2.5, "learning_rate": 0.00016971073165889284, "loss": 2.1723, "step": 216265 }, { "epoch": 0.51, "grad_norm": 1.8359375, "learning_rate": 0.00016970940648336873, "loss": 2.286, "step": 216270 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.0001697080812840306, "loss": 2.2039, "step": 216275 }, { "epoch": 0.51, "grad_norm": 2.5, "learning_rate": 0.0001697067560608789, "loss": 1.9374, "step": 216280 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00016970543081391414, "loss": 1.8674, "step": 216285 }, { "epoch": 0.51, "grad_norm": 1.5703125, "learning_rate": 0.00016970410554313672, "loss": 1.8896, "step": 216290 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016970278024854712, "loss": 2.205, "step": 216295 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.0001697014549301458, "loss": 2.089, "step": 216300 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.0001697001295879332, "loss": 2.3214, "step": 216305 }, { "epoch": 0.51, "grad_norm": 1.8828125, "learning_rate": 0.00016969880422190977, "loss": 1.9508, "step": 216310 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016969747883207594, "loss": 1.9734, "step": 216315 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016969615341843222, "loss": 2.049, "step": 216320 }, { "epoch": 0.51, "grad_norm": 1.9609375, "learning_rate": 0.000169694827980979, "loss": 2.1541, "step": 216325 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.0001696935025197168, "loss": 2.2696, "step": 216330 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016969217703464602, "loss": 2.046, "step": 216335 }, { "epoch": 0.51, "grad_norm": 1.921875, "learning_rate": 0.0001696908515257671, "loss": 1.8676, "step": 216340 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016968952599308055, "loss": 2.1729, "step": 216345 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016968820043658677, "loss": 2.1891, "step": 216350 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016968687485628627, "loss": 2.1887, "step": 216355 }, { "epoch": 0.51, "grad_norm": 2.359375, "learning_rate": 0.00016968554925217942, "loss": 1.9895, "step": 216360 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016968422362426678, "loss": 2.0248, "step": 216365 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016968289797254872, "loss": 1.9381, "step": 216370 }, { "epoch": 0.51, "grad_norm": 2.59375, "learning_rate": 0.0001696815722970257, "loss": 2.1642, "step": 216375 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016968024659769822, "loss": 2.118, "step": 216380 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.0001696789208745667, "loss": 1.9259, "step": 216385 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.0001696775951276316, "loss": 2.2098, "step": 216390 }, { "epoch": 0.51, "grad_norm": 2.6875, "learning_rate": 0.00016967626935689335, "loss": 2.1923, "step": 216395 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00016967494356235244, "loss": 1.9738, "step": 216400 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.0001696736177440093, "loss": 2.1229, "step": 216405 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016967229190186439, "loss": 2.0249, "step": 216410 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016967096603591814, "loss": 2.2126, "step": 216415 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016966964014617105, "loss": 2.1893, "step": 216420 }, { "epoch": 0.51, "grad_norm": 8.125, "learning_rate": 0.00016966831423262355, "loss": 2.1896, "step": 216425 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016966698829527609, "loss": 1.9228, "step": 216430 }, { "epoch": 0.51, "grad_norm": 1.9921875, "learning_rate": 0.0001696656623341291, "loss": 2.0869, "step": 216435 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00016966433634918308, "loss": 2.3109, "step": 216440 }, { "epoch": 0.51, "grad_norm": 2.515625, "learning_rate": 0.00016966301034043846, "loss": 2.1865, "step": 216445 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016966168430789565, "loss": 2.1025, "step": 216450 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016966035825155518, "loss": 2.064, "step": 216455 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.0001696590321714175, "loss": 1.964, "step": 216460 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016965770606748297, "loss": 2.0019, "step": 216465 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016965637993975215, "loss": 2.0204, "step": 216470 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016965505378822542, "loss": 1.9426, "step": 216475 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00016965372761290327, "loss": 1.9902, "step": 216480 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.00016965240141378614, "loss": 2.1095, "step": 216485 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016965107519087452, "loss": 2.0337, "step": 216490 }, { "epoch": 0.51, "grad_norm": 1.9140625, "learning_rate": 0.00016964974894416877, "loss": 2.0019, "step": 216495 }, { "epoch": 0.51, "grad_norm": 1.890625, "learning_rate": 0.00016964842267366942, "loss": 2.174, "step": 216500 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016964709637937693, "loss": 2.2261, "step": 216505 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016964577006129173, "loss": 2.0674, "step": 216510 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.00016964444371941423, "loss": 2.3296, "step": 216515 }, { "epoch": 0.51, "grad_norm": 1.9453125, "learning_rate": 0.000169643117353745, "loss": 2.0705, "step": 216520 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016964179096428433, "loss": 1.906, "step": 216525 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016964046455103282, "loss": 2.2966, "step": 216530 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.00016963913811399083, "loss": 2.1953, "step": 216535 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.00016963781165315887, "loss": 2.3029, "step": 216540 }, { "epoch": 0.51, "grad_norm": 2.53125, "learning_rate": 0.00016963648516853737, "loss": 1.8796, "step": 216545 }, { "epoch": 0.51, "grad_norm": 1.9921875, "learning_rate": 0.00016963515866012677, "loss": 1.804, "step": 216550 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016963383212792755, "loss": 2.0242, "step": 216555 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016963250557194015, "loss": 2.0323, "step": 216560 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.000169631178992165, "loss": 2.1647, "step": 216565 }, { "epoch": 0.51, "grad_norm": 2.578125, "learning_rate": 0.00016962985238860262, "loss": 2.1345, "step": 216570 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.0001696285257612534, "loss": 2.0794, "step": 216575 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.0001696271991101178, "loss": 2.0807, "step": 216580 }, { "epoch": 0.51, "grad_norm": 2.53125, "learning_rate": 0.0001696258724351963, "loss": 1.9418, "step": 216585 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.0001696245457364893, "loss": 2.1786, "step": 216590 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.00016962321901399733, "loss": 2.1723, "step": 216595 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00016962189226772082, "loss": 2.1824, "step": 216600 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.0001696205654976602, "loss": 2.0319, "step": 216605 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016961923870381589, "loss": 2.1811, "step": 216610 }, { "epoch": 0.51, "grad_norm": 1.8828125, "learning_rate": 0.00016961791188618844, "loss": 2.1188, "step": 216615 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016961658504477823, "loss": 1.9343, "step": 216620 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016961525817958572, "loss": 2.1917, "step": 216625 }, { "epoch": 0.51, "grad_norm": 2.359375, "learning_rate": 0.00016961393129061138, "loss": 1.996, "step": 216630 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016961260437785565, "loss": 2.0518, "step": 216635 }, { "epoch": 0.51, "grad_norm": 1.984375, "learning_rate": 0.00016961127744131898, "loss": 2.0229, "step": 216640 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.0001696099504810019, "loss": 2.0374, "step": 216645 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016960862349690476, "loss": 2.1873, "step": 216650 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016960729648902805, "loss": 2.0547, "step": 216655 }, { "epoch": 0.51, "grad_norm": 1.96875, "learning_rate": 0.0001696059694573722, "loss": 1.8514, "step": 216660 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.00016960464240193773, "loss": 2.128, "step": 216665 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016960331532272504, "loss": 2.2444, "step": 216670 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00016960198821973458, "loss": 2.1775, "step": 216675 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016960066109296683, "loss": 2.0245, "step": 216680 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00016959933394242223, "loss": 2.0279, "step": 216685 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016959800676810124, "loss": 2.0352, "step": 216690 }, { "epoch": 0.51, "grad_norm": 2.609375, "learning_rate": 0.0001695966795700043, "loss": 2.178, "step": 216695 }, { "epoch": 0.51, "grad_norm": 1.7734375, "learning_rate": 0.00016959535234813188, "loss": 2.2267, "step": 216700 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016959402510248443, "loss": 2.0849, "step": 216705 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016959269783306237, "loss": 2.1725, "step": 216710 }, { "epoch": 0.51, "grad_norm": 1.75, "learning_rate": 0.0001695913705398662, "loss": 1.9119, "step": 216715 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016959004322289638, "loss": 1.9353, "step": 216720 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016958871588215328, "loss": 2.2089, "step": 216725 }, { "epoch": 0.51, "grad_norm": 1.9453125, "learning_rate": 0.00016958738851763746, "loss": 1.949, "step": 216730 }, { "epoch": 0.51, "grad_norm": 2.359375, "learning_rate": 0.00016958606112934933, "loss": 1.9027, "step": 216735 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.00016958473371728934, "loss": 2.2963, "step": 216740 }, { "epoch": 0.51, "grad_norm": 1.953125, "learning_rate": 0.0001695834062814579, "loss": 1.9086, "step": 216745 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016958207882185555, "loss": 2.1097, "step": 216750 }, { "epoch": 0.51, "grad_norm": 2.5625, "learning_rate": 0.00016958075133848268, "loss": 2.0432, "step": 216755 }, { "epoch": 0.51, "grad_norm": 1.9453125, "learning_rate": 0.00016957942383133979, "loss": 2.01, "step": 216760 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.00016957809630042728, "loss": 2.0366, "step": 216765 }, { "epoch": 0.51, "grad_norm": 1.7421875, "learning_rate": 0.0001695767687457456, "loss": 2.0835, "step": 216770 }, { "epoch": 0.51, "grad_norm": 2.5, "learning_rate": 0.00016957544116729528, "loss": 2.2218, "step": 216775 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.00016957411356507672, "loss": 2.2031, "step": 216780 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.00016957278593909042, "loss": 2.2503, "step": 216785 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016957145828933672, "loss": 1.9508, "step": 216790 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016957013061581617, "loss": 2.0899, "step": 216795 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.0001695688029185292, "loss": 1.9664, "step": 216800 }, { "epoch": 0.51, "grad_norm": 2.578125, "learning_rate": 0.0001695674751974763, "loss": 2.0975, "step": 216805 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.0001695661474526579, "loss": 1.9241, "step": 216810 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.0001695648196840744, "loss": 2.0801, "step": 216815 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.0001695634918917263, "loss": 2.1822, "step": 216820 }, { "epoch": 0.51, "grad_norm": 2.78125, "learning_rate": 0.00016956216407561402, "loss": 2.1962, "step": 216825 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.0001695608362357381, "loss": 1.9577, "step": 216830 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.0001695595083720989, "loss": 2.0775, "step": 216835 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016955818048469695, "loss": 2.1014, "step": 216840 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016955685257353262, "loss": 2.166, "step": 216845 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016955552463860643, "loss": 2.0923, "step": 216850 }, { "epoch": 0.51, "grad_norm": 1.9609375, "learning_rate": 0.0001695541966799188, "loss": 2.1307, "step": 216855 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.0001695528686974702, "loss": 2.1786, "step": 216860 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00016955154069126112, "loss": 2.1242, "step": 216865 }, { "epoch": 0.51, "grad_norm": 2.625, "learning_rate": 0.00016955021266129192, "loss": 1.8247, "step": 216870 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.00016954888460756311, "loss": 2.2676, "step": 216875 }, { "epoch": 0.51, "grad_norm": 1.9609375, "learning_rate": 0.00016954755653007512, "loss": 1.993, "step": 216880 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.00016954622842882847, "loss": 2.0787, "step": 216885 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00016954490030382356, "loss": 1.8873, "step": 216890 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00016954357215506085, "loss": 2.2422, "step": 216895 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.00016954224398254078, "loss": 2.1585, "step": 216900 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.0001695409157862638, "loss": 1.9456, "step": 216905 }, { "epoch": 0.51, "grad_norm": 2.765625, "learning_rate": 0.00016953958756623042, "loss": 2.0951, "step": 216910 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016953825932244105, "loss": 2.1681, "step": 216915 }, { "epoch": 0.51, "grad_norm": 1.9921875, "learning_rate": 0.00016953693105489613, "loss": 2.1704, "step": 216920 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016953560276359615, "loss": 2.0819, "step": 216925 }, { "epoch": 0.51, "grad_norm": 1.9609375, "learning_rate": 0.00016953427444854152, "loss": 2.1633, "step": 216930 }, { "epoch": 0.51, "grad_norm": 1.96875, "learning_rate": 0.00016953294610973276, "loss": 2.1735, "step": 216935 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016953161774717028, "loss": 2.2414, "step": 216940 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00016953028936085454, "loss": 2.0103, "step": 216945 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.00016952896095078597, "loss": 2.105, "step": 216950 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.000169527632516965, "loss": 2.0169, "step": 216955 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016952630405939222, "loss": 2.152, "step": 216960 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00016952497557806795, "loss": 2.1744, "step": 216965 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.00016952364707299272, "loss": 2.1239, "step": 216970 }, { "epoch": 0.51, "grad_norm": 2.6875, "learning_rate": 0.00016952231854416692, "loss": 2.1627, "step": 216975 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.000169520989991591, "loss": 2.079, "step": 216980 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016951966141526552, "loss": 2.0754, "step": 216985 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.0001695183328151908, "loss": 2.0468, "step": 216990 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.0001695170041913674, "loss": 2.1531, "step": 216995 }, { "epoch": 0.51, "grad_norm": 1.859375, "learning_rate": 0.00016951567554379575, "loss": 2.1265, "step": 217000 }, { "epoch": 0.51, "grad_norm": 1.8203125, "learning_rate": 0.00016951434687247625, "loss": 1.8009, "step": 217005 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016951301817740938, "loss": 2.0423, "step": 217010 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.0001695116894585956, "loss": 2.2227, "step": 217015 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.00016951036071603538, "loss": 2.2113, "step": 217020 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.0001695090319497292, "loss": 1.9789, "step": 217025 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.0001695077031596774, "loss": 2.0996, "step": 217030 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.00016950637434588056, "loss": 2.0663, "step": 217035 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.00016950504550833904, "loss": 2.2598, "step": 217040 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016950371664705338, "loss": 2.11, "step": 217045 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016950238776202396, "loss": 2.1247, "step": 217050 }, { "epoch": 0.51, "grad_norm": 1.984375, "learning_rate": 0.00016950105885325128, "loss": 2.2012, "step": 217055 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.0001694997299207358, "loss": 2.0348, "step": 217060 }, { "epoch": 0.51, "grad_norm": 2.5, "learning_rate": 0.0001694984009644779, "loss": 2.1336, "step": 217065 }, { "epoch": 0.51, "grad_norm": 2.703125, "learning_rate": 0.00016949707198447812, "loss": 2.151, "step": 217070 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016949574298073686, "loss": 2.0799, "step": 217075 }, { "epoch": 0.51, "grad_norm": 1.9296875, "learning_rate": 0.0001694944139532546, "loss": 2.1234, "step": 217080 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.0001694930849020318, "loss": 2.0259, "step": 217085 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016949175582706887, "loss": 2.1428, "step": 217090 }, { "epoch": 0.51, "grad_norm": 1.9375, "learning_rate": 0.00016949042672836633, "loss": 1.9444, "step": 217095 }, { "epoch": 0.51, "grad_norm": 1.9296875, "learning_rate": 0.0001694890976059246, "loss": 2.0894, "step": 217100 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016948776845974412, "loss": 1.9592, "step": 217105 }, { "epoch": 0.51, "grad_norm": 1.921875, "learning_rate": 0.00016948643928982535, "loss": 2.0857, "step": 217110 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00016948511009616876, "loss": 2.1427, "step": 217115 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.0001694837808787748, "loss": 2.0832, "step": 217120 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016948245163764394, "loss": 2.1967, "step": 217125 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.00016948112237277657, "loss": 2.1433, "step": 217130 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.0001694797930841732, "loss": 2.0206, "step": 217135 }, { "epoch": 0.51, "grad_norm": 1.7421875, "learning_rate": 0.0001694784637718343, "loss": 1.9985, "step": 217140 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016947713443576028, "loss": 2.0344, "step": 217145 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.0001694758050759516, "loss": 2.0774, "step": 217150 }, { "epoch": 0.51, "grad_norm": 1.953125, "learning_rate": 0.00016947447569240875, "loss": 2.2191, "step": 217155 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016947314628513218, "loss": 2.1336, "step": 217160 }, { "epoch": 0.51, "grad_norm": 1.96875, "learning_rate": 0.00016947181685412227, "loss": 2.1087, "step": 217165 }, { "epoch": 0.51, "grad_norm": 2.0, "learning_rate": 0.00016947048739937955, "loss": 2.2429, "step": 217170 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016946915792090446, "loss": 1.9651, "step": 217175 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.0001694678284186974, "loss": 2.0567, "step": 217180 }, { "epoch": 0.51, "grad_norm": 1.90625, "learning_rate": 0.00016946649889275893, "loss": 2.0748, "step": 217185 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.0001694651693430894, "loss": 2.1967, "step": 217190 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00016946383976968935, "loss": 2.2612, "step": 217195 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016946251017255914, "loss": 2.1411, "step": 217200 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00016946118055169933, "loss": 2.1002, "step": 217205 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.0001694598509071103, "loss": 2.2033, "step": 217210 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016945852123879254, "loss": 2.1519, "step": 217215 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00016945719154674646, "loss": 2.2379, "step": 217220 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016945586183097255, "loss": 2.1216, "step": 217225 }, { "epoch": 0.51, "grad_norm": 2.578125, "learning_rate": 0.00016945453209147126, "loss": 2.1282, "step": 217230 }, { "epoch": 0.51, "grad_norm": 1.9921875, "learning_rate": 0.00016945320232824305, "loss": 2.1528, "step": 217235 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016945187254128837, "loss": 2.0567, "step": 217240 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.0001694505427306077, "loss": 2.0086, "step": 217245 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016944921289620138, "loss": 1.869, "step": 217250 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.00016944788303807, "loss": 2.0043, "step": 217255 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.000169446553156214, "loss": 2.1159, "step": 217260 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016944522325063372, "loss": 2.0959, "step": 217265 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016944389332132972, "loss": 2.2171, "step": 217270 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016944256336830243, "loss": 2.0252, "step": 217275 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.00016944123339155233, "loss": 2.003, "step": 217280 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.0001694399033910798, "loss": 2.1033, "step": 217285 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016943857336688538, "loss": 2.2007, "step": 217290 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016943724331896948, "loss": 2.0904, "step": 217295 }, { "epoch": 0.51, "grad_norm": 1.9453125, "learning_rate": 0.00016943591324733255, "loss": 2.0106, "step": 217300 }, { "epoch": 0.51, "grad_norm": 1.8828125, "learning_rate": 0.00016943458315197505, "loss": 2.1295, "step": 217305 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00016943325303289743, "loss": 2.0066, "step": 217310 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016943192289010014, "loss": 2.0028, "step": 217315 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.0001694305927235837, "loss": 2.0659, "step": 217320 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00016942926253334846, "loss": 2.0545, "step": 217325 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016942793231939493, "loss": 2.1785, "step": 217330 }, { "epoch": 0.51, "grad_norm": 2.0, "learning_rate": 0.0001694266020817236, "loss": 2.1564, "step": 217335 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00016942527182033483, "loss": 2.183, "step": 217340 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016942394153522913, "loss": 2.2202, "step": 217345 }, { "epoch": 0.51, "grad_norm": 1.859375, "learning_rate": 0.000169422611226407, "loss": 2.0282, "step": 217350 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.0001694212808938688, "loss": 2.0812, "step": 217355 }, { "epoch": 0.51, "grad_norm": 2.46875, "learning_rate": 0.00016941995053761506, "loss": 1.9428, "step": 217360 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.0001694186201576462, "loss": 2.0243, "step": 217365 }, { "epoch": 0.51, "grad_norm": 1.7734375, "learning_rate": 0.00016941728975396268, "loss": 1.9933, "step": 217370 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00016941595932656495, "loss": 2.1114, "step": 217375 }, { "epoch": 0.51, "grad_norm": 1.765625, "learning_rate": 0.00016941462887545348, "loss": 2.2268, "step": 217380 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.00016941329840062868, "loss": 2.1401, "step": 217385 }, { "epoch": 0.51, "grad_norm": 1.921875, "learning_rate": 0.00016941196790209108, "loss": 2.0858, "step": 217390 }, { "epoch": 0.51, "grad_norm": 1.7421875, "learning_rate": 0.00016941063737984106, "loss": 2.194, "step": 217395 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016940930683387914, "loss": 2.3453, "step": 217400 }, { "epoch": 0.51, "grad_norm": 1.8828125, "learning_rate": 0.00016940797626420573, "loss": 2.035, "step": 217405 }, { "epoch": 0.51, "grad_norm": 1.859375, "learning_rate": 0.0001694066456708213, "loss": 2.0614, "step": 217410 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00016940531505372627, "loss": 2.1217, "step": 217415 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016940398441292115, "loss": 2.0066, "step": 217420 }, { "epoch": 0.51, "grad_norm": 1.71875, "learning_rate": 0.00016940265374840638, "loss": 2.0379, "step": 217425 }, { "epoch": 0.51, "grad_norm": 1.65625, "learning_rate": 0.0001694013230601824, "loss": 2.0603, "step": 217430 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.00016939999234824967, "loss": 2.1328, "step": 217435 }, { "epoch": 0.51, "grad_norm": 1.9140625, "learning_rate": 0.00016939866161260863, "loss": 2.0655, "step": 217440 }, { "epoch": 0.51, "grad_norm": 1.953125, "learning_rate": 0.00016939733085325975, "loss": 2.0304, "step": 217445 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.0001693960000702035, "loss": 2.1237, "step": 217450 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.0001693946692634403, "loss": 2.0476, "step": 217455 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016939333843297064, "loss": 1.9298, "step": 217460 }, { "epoch": 0.51, "grad_norm": 1.78125, "learning_rate": 0.0001693920075787949, "loss": 2.2538, "step": 217465 }, { "epoch": 0.51, "grad_norm": 1.796875, "learning_rate": 0.00016939067670091368, "loss": 1.9063, "step": 217470 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016938934579932728, "loss": 2.3347, "step": 217475 }, { "epoch": 0.51, "grad_norm": 2.640625, "learning_rate": 0.00016938801487403627, "loss": 2.1887, "step": 217480 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00016938668392504103, "loss": 2.0339, "step": 217485 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.000169385352952342, "loss": 2.0239, "step": 217490 }, { "epoch": 0.51, "grad_norm": 2.484375, "learning_rate": 0.00016938402195593974, "loss": 2.1835, "step": 217495 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016938269093583463, "loss": 2.1042, "step": 217500 }, { "epoch": 0.51, "grad_norm": 2.484375, "learning_rate": 0.00016938135989202712, "loss": 2.0985, "step": 217505 }, { "epoch": 0.51, "grad_norm": 2.640625, "learning_rate": 0.00016938002882451768, "loss": 2.1589, "step": 217510 }, { "epoch": 0.51, "grad_norm": 1.9609375, "learning_rate": 0.00016937869773330676, "loss": 2.0193, "step": 217515 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00016937736661839484, "loss": 1.9791, "step": 217520 }, { "epoch": 0.51, "grad_norm": 1.703125, "learning_rate": 0.0001693760354797823, "loss": 2.0655, "step": 217525 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016937470431746968, "loss": 2.1957, "step": 217530 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016937337313145743, "loss": 2.1067, "step": 217535 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00016937204192174596, "loss": 2.1542, "step": 217540 }, { "epoch": 0.51, "grad_norm": 1.859375, "learning_rate": 0.00016937071068833572, "loss": 2.0148, "step": 217545 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.0001693693794312272, "loss": 1.9999, "step": 217550 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016936804815042086, "loss": 2.307, "step": 217555 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.00016936671684591713, "loss": 2.2366, "step": 217560 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.00016936538551771645, "loss": 2.0499, "step": 217565 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016936405416581932, "loss": 1.8763, "step": 217570 }, { "epoch": 0.51, "grad_norm": 2.53125, "learning_rate": 0.00016936272279022616, "loss": 2.0644, "step": 217575 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016936139139093746, "loss": 1.9228, "step": 217580 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.0001693600599679536, "loss": 2.0913, "step": 217585 }, { "epoch": 0.51, "grad_norm": 1.9140625, "learning_rate": 0.0001693587285212751, "loss": 1.985, "step": 217590 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016935739705090246, "loss": 2.12, "step": 217595 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.000169356065556836, "loss": 2.2388, "step": 217600 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.0001693547340390763, "loss": 1.9799, "step": 217605 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.00016935340249762373, "loss": 2.2774, "step": 217610 }, { "epoch": 0.51, "grad_norm": 1.9921875, "learning_rate": 0.0001693520709324788, "loss": 2.2218, "step": 217615 }, { "epoch": 0.51, "grad_norm": 2.5625, "learning_rate": 0.00016935073934364194, "loss": 1.9726, "step": 217620 }, { "epoch": 0.51, "grad_norm": 1.96875, "learning_rate": 0.0001693494077311136, "loss": 2.2285, "step": 217625 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016934807609489424, "loss": 2.0467, "step": 217630 }, { "epoch": 0.51, "grad_norm": 1.90625, "learning_rate": 0.00016934674443498435, "loss": 1.8827, "step": 217635 }, { "epoch": 0.51, "grad_norm": 1.953125, "learning_rate": 0.00016934541275138434, "loss": 1.9696, "step": 217640 }, { "epoch": 0.51, "grad_norm": 1.9296875, "learning_rate": 0.00016934408104409468, "loss": 2.0513, "step": 217645 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.0001693427493131158, "loss": 2.05, "step": 217650 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.0001693414175584482, "loss": 2.0776, "step": 217655 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016934008578009234, "loss": 1.933, "step": 217660 }, { "epoch": 0.51, "grad_norm": 1.984375, "learning_rate": 0.0001693387539780486, "loss": 2.0189, "step": 217665 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016933742215231753, "loss": 1.9349, "step": 217670 }, { "epoch": 0.51, "grad_norm": 1.9296875, "learning_rate": 0.0001693360903028995, "loss": 2.0779, "step": 217675 }, { "epoch": 0.51, "grad_norm": 2.9375, "learning_rate": 0.00016933475842979503, "loss": 2.0196, "step": 217680 }, { "epoch": 0.51, "grad_norm": 1.8984375, "learning_rate": 0.00016933342653300453, "loss": 2.0936, "step": 217685 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.0001693320946125285, "loss": 2.2643, "step": 217690 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016933076266836735, "loss": 1.9949, "step": 217695 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016932943070052157, "loss": 2.0599, "step": 217700 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016932809870899159, "loss": 2.1624, "step": 217705 }, { "epoch": 0.51, "grad_norm": 2.359375, "learning_rate": 0.00016932676669377785, "loss": 1.9792, "step": 217710 }, { "epoch": 0.51, "grad_norm": 1.953125, "learning_rate": 0.00016932543465488084, "loss": 2.2494, "step": 217715 }, { "epoch": 0.51, "grad_norm": 1.9375, "learning_rate": 0.00016932410259230102, "loss": 2.024, "step": 217720 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016932277050603882, "loss": 2.0366, "step": 217725 }, { "epoch": 0.51, "grad_norm": 1.9375, "learning_rate": 0.0001693214383960947, "loss": 2.0108, "step": 217730 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00016932010626246916, "loss": 2.204, "step": 217735 }, { "epoch": 0.51, "grad_norm": 1.9140625, "learning_rate": 0.00016931877410516257, "loss": 1.8848, "step": 217740 }, { "epoch": 0.51, "grad_norm": 1.7421875, "learning_rate": 0.00016931744192417544, "loss": 1.9023, "step": 217745 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016931610971950822, "loss": 2.1475, "step": 217750 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00016931477749116135, "loss": 2.2208, "step": 217755 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.0001693134452391353, "loss": 2.0996, "step": 217760 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.00016931211296343051, "loss": 2.1361, "step": 217765 }, { "epoch": 0.51, "grad_norm": 1.828125, "learning_rate": 0.00016931078066404747, "loss": 1.713, "step": 217770 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.0001693094483409866, "loss": 2.3094, "step": 217775 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016930811599424837, "loss": 2.1765, "step": 217780 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.0001693067836238332, "loss": 2.0613, "step": 217785 }, { "epoch": 0.51, "grad_norm": 1.984375, "learning_rate": 0.0001693054512297416, "loss": 1.9954, "step": 217790 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00016930411881197402, "loss": 2.1018, "step": 217795 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016930278637053087, "loss": 2.2265, "step": 217800 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016930145390541264, "loss": 2.0381, "step": 217805 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016930012141661978, "loss": 2.1572, "step": 217810 }, { "epoch": 0.51, "grad_norm": 1.9453125, "learning_rate": 0.00016929878890415272, "loss": 2.0986, "step": 217815 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.00016929745636801196, "loss": 2.0652, "step": 217820 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.0001692961238081979, "loss": 2.1848, "step": 217825 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016929479122471106, "loss": 2.1837, "step": 217830 }, { "epoch": 0.51, "grad_norm": 1.90625, "learning_rate": 0.00016929345861755186, "loss": 2.1089, "step": 217835 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016929212598672076, "loss": 2.1072, "step": 217840 }, { "epoch": 0.51, "grad_norm": 1.9140625, "learning_rate": 0.00016929079333221817, "loss": 2.0592, "step": 217845 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016928946065404465, "loss": 2.046, "step": 217850 }, { "epoch": 0.51, "grad_norm": 1.7265625, "learning_rate": 0.00016928812795220056, "loss": 2.1983, "step": 217855 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.0001692867952266864, "loss": 2.1736, "step": 217860 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.0001692854624775026, "loss": 2.0374, "step": 217865 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016928412970464965, "loss": 2.1005, "step": 217870 }, { "epoch": 0.51, "grad_norm": 1.9921875, "learning_rate": 0.00016928279690812796, "loss": 2.0591, "step": 217875 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00016928146408793802, "loss": 1.9508, "step": 217880 }, { "epoch": 0.51, "grad_norm": 1.8984375, "learning_rate": 0.0001692801312440803, "loss": 2.1475, "step": 217885 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.00016927879837655522, "loss": 1.982, "step": 217890 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016927746548536324, "loss": 1.8413, "step": 217895 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016927613257050478, "loss": 2.0811, "step": 217900 }, { "epoch": 0.51, "grad_norm": 2.453125, "learning_rate": 0.0001692747996319804, "loss": 2.2631, "step": 217905 }, { "epoch": 0.51, "grad_norm": 1.9140625, "learning_rate": 0.00016927346666979046, "loss": 2.0348, "step": 217910 }, { "epoch": 0.51, "grad_norm": 1.875, "learning_rate": 0.00016927213368393546, "loss": 1.9625, "step": 217915 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016927080067441583, "loss": 2.1365, "step": 217920 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.00016926946764123205, "loss": 2.2054, "step": 217925 }, { "epoch": 0.51, "grad_norm": 2.421875, "learning_rate": 0.00016926813458438455, "loss": 2.0756, "step": 217930 }, { "epoch": 0.51, "grad_norm": 1.96875, "learning_rate": 0.00016926680150387382, "loss": 1.9563, "step": 217935 }, { "epoch": 0.51, "grad_norm": 1.9609375, "learning_rate": 0.00016926546839970028, "loss": 2.1045, "step": 217940 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016926413527186437, "loss": 2.2331, "step": 217945 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016926280212036665, "loss": 2.1289, "step": 217950 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.00016926146894520746, "loss": 2.0103, "step": 217955 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016926013574638726, "loss": 2.1395, "step": 217960 }, { "epoch": 0.51, "grad_norm": 2.828125, "learning_rate": 0.0001692588025239066, "loss": 2.0827, "step": 217965 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016925746927776584, "loss": 1.9735, "step": 217970 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.00016925613600796548, "loss": 2.1025, "step": 217975 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016925480271450598, "loss": 2.0565, "step": 217980 }, { "epoch": 0.51, "grad_norm": 2.359375, "learning_rate": 0.00016925346939738778, "loss": 2.0301, "step": 217985 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016925213605661132, "loss": 2.007, "step": 217990 }, { "epoch": 0.51, "grad_norm": 2.515625, "learning_rate": 0.0001692508026921771, "loss": 2.2068, "step": 217995 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016924946930408553, "loss": 2.0538, "step": 218000 }, { "epoch": 0.51, "grad_norm": 2.46875, "learning_rate": 0.0001692481358923371, "loss": 2.1558, "step": 218005 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016924680245693224, "loss": 2.0179, "step": 218010 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016924546899787143, "loss": 2.2469, "step": 218015 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.0001692441355151551, "loss": 2.0963, "step": 218020 }, { "epoch": 0.51, "grad_norm": 1.8984375, "learning_rate": 0.00016924280200878374, "loss": 2.0032, "step": 218025 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00016924146847875775, "loss": 2.1351, "step": 218030 }, { "epoch": 0.51, "grad_norm": 1.9765625, "learning_rate": 0.00016924013492507762, "loss": 2.0286, "step": 218035 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016923880134774383, "loss": 2.1701, "step": 218040 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.0001692374677467568, "loss": 2.0809, "step": 218045 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.000169236134122117, "loss": 2.1627, "step": 218050 }, { "epoch": 0.51, "grad_norm": 2.375, "learning_rate": 0.00016923480047382485, "loss": 2.0425, "step": 218055 }, { "epoch": 0.51, "grad_norm": 2.609375, "learning_rate": 0.00016923346680188087, "loss": 2.1502, "step": 218060 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.00016923213310628547, "loss": 2.1592, "step": 218065 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016923079938703913, "loss": 2.0813, "step": 218070 }, { "epoch": 0.51, "grad_norm": 1.828125, "learning_rate": 0.00016922946564414227, "loss": 2.1508, "step": 218075 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00016922813187759535, "loss": 2.1807, "step": 218080 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.0001692267980873989, "loss": 2.1505, "step": 218085 }, { "epoch": 0.51, "grad_norm": 2.453125, "learning_rate": 0.0001692254642735533, "loss": 2.1553, "step": 218090 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016922413043605903, "loss": 1.9513, "step": 218095 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00016922279657491656, "loss": 2.0032, "step": 218100 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00016922146269012626, "loss": 2.1071, "step": 218105 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.00016922012878168872, "loss": 1.9938, "step": 218110 }, { "epoch": 0.51, "grad_norm": 1.921875, "learning_rate": 0.00016921879484960427, "loss": 2.0217, "step": 218115 }, { "epoch": 0.51, "grad_norm": 2.5, "learning_rate": 0.00016921746089387346, "loss": 2.0244, "step": 218120 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00016921612691449673, "loss": 1.9394, "step": 218125 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016921479291147449, "loss": 1.9462, "step": 218130 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016921345888480722, "loss": 1.9327, "step": 218135 }, { "epoch": 0.51, "grad_norm": 1.765625, "learning_rate": 0.00016921212483449538, "loss": 2.1378, "step": 218140 }, { "epoch": 0.51, "grad_norm": 2.53125, "learning_rate": 0.00016921079076053942, "loss": 2.0902, "step": 218145 }, { "epoch": 0.51, "grad_norm": 2.578125, "learning_rate": 0.0001692094566629398, "loss": 1.8942, "step": 218150 }, { "epoch": 0.51, "grad_norm": 2.75, "learning_rate": 0.000169208122541697, "loss": 2.1925, "step": 218155 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.0001692067883968114, "loss": 2.3157, "step": 218160 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016920545422828353, "loss": 2.3131, "step": 218165 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016920412003611386, "loss": 2.0888, "step": 218170 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016920278582030276, "loss": 2.0001, "step": 218175 }, { "epoch": 0.51, "grad_norm": 2.578125, "learning_rate": 0.00016920145158085074, "loss": 2.196, "step": 218180 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016920011731775826, "loss": 2.2233, "step": 218185 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016919878303102578, "loss": 2.0794, "step": 218190 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.0001691974487206537, "loss": 2.0981, "step": 218195 }, { "epoch": 0.51, "grad_norm": 1.9296875, "learning_rate": 0.00016919611438664253, "loss": 1.904, "step": 218200 }, { "epoch": 0.51, "grad_norm": 1.984375, "learning_rate": 0.0001691947800289927, "loss": 2.2055, "step": 218205 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.0001691934456477047, "loss": 2.0132, "step": 218210 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.000169192111242779, "loss": 2.064, "step": 218215 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016919077681421598, "loss": 2.0306, "step": 218220 }, { "epoch": 0.51, "grad_norm": 2.5, "learning_rate": 0.0001691894423620161, "loss": 2.2093, "step": 218225 }, { "epoch": 0.51, "grad_norm": 2.359375, "learning_rate": 0.0001691881078861799, "loss": 1.991, "step": 218230 }, { "epoch": 0.51, "grad_norm": 2.515625, "learning_rate": 0.0001691867733867078, "loss": 2.1903, "step": 218235 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016918543886360022, "loss": 2.1684, "step": 218240 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016918410431685765, "loss": 2.0653, "step": 218245 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.0001691827697464805, "loss": 2.0696, "step": 218250 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016918143515246928, "loss": 2.2251, "step": 218255 }, { "epoch": 0.51, "grad_norm": 1.9921875, "learning_rate": 0.00016918010053482445, "loss": 1.9384, "step": 218260 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016917876589354642, "loss": 2.0954, "step": 218265 }, { "epoch": 0.51, "grad_norm": 2.578125, "learning_rate": 0.0001691774312286357, "loss": 2.0716, "step": 218270 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016917609654009267, "loss": 2.1846, "step": 218275 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016917476182791785, "loss": 2.2171, "step": 218280 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.0001691734270921117, "loss": 2.0945, "step": 218285 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.0001691720923326746, "loss": 2.1112, "step": 218290 }, { "epoch": 0.51, "grad_norm": 2.234375, "learning_rate": 0.0001691707575496071, "loss": 1.9035, "step": 218295 }, { "epoch": 0.51, "grad_norm": 2.734375, "learning_rate": 0.0001691694227429096, "loss": 2.0346, "step": 218300 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016916808791258257, "loss": 2.0067, "step": 218305 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.00016916675305862647, "loss": 2.2099, "step": 218310 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016916541818104178, "loss": 2.1392, "step": 218315 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016916408327982888, "loss": 2.035, "step": 218320 }, { "epoch": 0.51, "grad_norm": 1.796875, "learning_rate": 0.00016916274835498832, "loss": 2.0249, "step": 218325 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016916141340652047, "loss": 2.2213, "step": 218330 }, { "epoch": 0.51, "grad_norm": 2.1875, "learning_rate": 0.00016916007843442585, "loss": 2.0895, "step": 218335 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.0001691587434387049, "loss": 1.9074, "step": 218340 }, { "epoch": 0.51, "grad_norm": 1.9609375, "learning_rate": 0.00016915740841935806, "loss": 2.2923, "step": 218345 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016915607337638578, "loss": 1.8545, "step": 218350 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00016915473830978855, "loss": 2.0517, "step": 218355 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.0001691534032195668, "loss": 2.1172, "step": 218360 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016915206810572098, "loss": 2.3074, "step": 218365 }, { "epoch": 0.51, "grad_norm": 1.875, "learning_rate": 0.0001691507329682516, "loss": 2.0411, "step": 218370 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016914939780715904, "loss": 2.0004, "step": 218375 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.0001691480626224438, "loss": 2.117, "step": 218380 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00016914672741410634, "loss": 2.1447, "step": 218385 }, { "epoch": 0.51, "grad_norm": 1.7890625, "learning_rate": 0.00016914539218214708, "loss": 1.8764, "step": 218390 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016914405692656652, "loss": 2.2453, "step": 218395 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016914272164736508, "loss": 1.9466, "step": 218400 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016914138634454323, "loss": 2.0218, "step": 218405 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016914005101810147, "loss": 2.0648, "step": 218410 }, { "epoch": 0.51, "grad_norm": 2.0, "learning_rate": 0.00016913871566804015, "loss": 2.2087, "step": 218415 }, { "epoch": 0.51, "grad_norm": 1.7890625, "learning_rate": 0.00016913738029435984, "loss": 1.9824, "step": 218420 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.0001691360448970609, "loss": 2.0926, "step": 218425 }, { "epoch": 0.51, "grad_norm": 1.984375, "learning_rate": 0.0001691347094761439, "loss": 2.0438, "step": 218430 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.00016913337403160917, "loss": 2.0865, "step": 218435 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016913203856345728, "loss": 2.1651, "step": 218440 }, { "epoch": 0.51, "grad_norm": 1.765625, "learning_rate": 0.0001691307030716886, "loss": 1.974, "step": 218445 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00016912936755630363, "loss": 2.1712, "step": 218450 }, { "epoch": 0.51, "grad_norm": 1.84375, "learning_rate": 0.0001691280320173028, "loss": 2.1464, "step": 218455 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016912669645468659, "loss": 2.047, "step": 218460 }, { "epoch": 0.51, "grad_norm": 1.8203125, "learning_rate": 0.00016912536086845543, "loss": 1.9521, "step": 218465 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.0001691240252586098, "loss": 2.2713, "step": 218470 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.00016912268962515016, "loss": 2.1201, "step": 218475 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016912135396807693, "loss": 2.0707, "step": 218480 }, { "epoch": 0.51, "grad_norm": 1.765625, "learning_rate": 0.00016912001828739062, "loss": 2.1612, "step": 218485 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016911868258309164, "loss": 2.0221, "step": 218490 }, { "epoch": 0.51, "grad_norm": 2.171875, "learning_rate": 0.0001691173468551805, "loss": 2.199, "step": 218495 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.0001691160111036576, "loss": 2.0991, "step": 218500 }, { "epoch": 0.51, "grad_norm": 1.7890625, "learning_rate": 0.0001691146753285234, "loss": 2.0117, "step": 218505 }, { "epoch": 0.51, "grad_norm": 1.7734375, "learning_rate": 0.00016911333952977838, "loss": 1.9941, "step": 218510 }, { "epoch": 0.51, "grad_norm": 1.9140625, "learning_rate": 0.000169112003707423, "loss": 1.9108, "step": 218515 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.00016911066786145772, "loss": 2.1765, "step": 218520 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016910933199188296, "loss": 2.2133, "step": 218525 }, { "epoch": 0.51, "grad_norm": 2.046875, "learning_rate": 0.00016910799609869918, "loss": 2.0072, "step": 218530 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016910666018190687, "loss": 1.9948, "step": 218535 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.0001691053242415065, "loss": 2.1363, "step": 218540 }, { "epoch": 0.51, "grad_norm": 1.8515625, "learning_rate": 0.00016910398827749848, "loss": 2.1063, "step": 218545 }, { "epoch": 0.51, "grad_norm": 2.109375, "learning_rate": 0.00016910265228988326, "loss": 1.9827, "step": 218550 }, { "epoch": 0.51, "grad_norm": 2.4375, "learning_rate": 0.00016910131627866136, "loss": 2.3136, "step": 218555 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.0001690999802438332, "loss": 2.0126, "step": 218560 }, { "epoch": 0.51, "grad_norm": 2.9375, "learning_rate": 0.00016909864418539918, "loss": 2.0611, "step": 218565 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016909730810335985, "loss": 2.2006, "step": 218570 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016909597199771563, "loss": 2.2399, "step": 218575 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.00016909463586846696, "loss": 2.0725, "step": 218580 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016909329971561432, "loss": 2.1122, "step": 218585 }, { "epoch": 0.51, "grad_norm": 1.921875, "learning_rate": 0.00016909196353915814, "loss": 2.072, "step": 218590 }, { "epoch": 0.51, "grad_norm": 3.21875, "learning_rate": 0.0001690906273390989, "loss": 2.2164, "step": 218595 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.00016908929111543705, "loss": 2.1202, "step": 218600 }, { "epoch": 0.51, "grad_norm": 1.8984375, "learning_rate": 0.00016908795486817303, "loss": 1.9681, "step": 218605 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016908661859730732, "loss": 1.978, "step": 218610 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016908528230284036, "loss": 2.0631, "step": 218615 }, { "epoch": 0.51, "grad_norm": 2.328125, "learning_rate": 0.00016908394598477266, "loss": 2.1375, "step": 218620 }, { "epoch": 0.51, "grad_norm": 1.9453125, "learning_rate": 0.00016908260964310459, "loss": 2.1501, "step": 218625 }, { "epoch": 0.51, "grad_norm": 1.9921875, "learning_rate": 0.00016908127327783668, "loss": 1.9021, "step": 218630 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016907993688896933, "loss": 2.035, "step": 218635 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016907860047650302, "loss": 1.9119, "step": 218640 }, { "epoch": 0.51, "grad_norm": 2.46875, "learning_rate": 0.0001690772640404382, "loss": 2.0531, "step": 218645 }, { "epoch": 0.51, "grad_norm": 2.625, "learning_rate": 0.00016907592758077535, "loss": 2.0843, "step": 218650 }, { "epoch": 0.51, "grad_norm": 1.921875, "learning_rate": 0.0001690745910975149, "loss": 2.126, "step": 218655 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.0001690732545906573, "loss": 2.1278, "step": 218660 }, { "epoch": 0.51, "grad_norm": 2.28125, "learning_rate": 0.00016907191806020305, "loss": 2.172, "step": 218665 }, { "epoch": 0.51, "grad_norm": 2.25, "learning_rate": 0.0001690705815061526, "loss": 2.2034, "step": 218670 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016906924492850634, "loss": 2.2828, "step": 218675 }, { "epoch": 0.51, "grad_norm": 2.0625, "learning_rate": 0.0001690679083272648, "loss": 2.0819, "step": 218680 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016906657170242842, "loss": 2.1413, "step": 218685 }, { "epoch": 0.51, "grad_norm": 2.125, "learning_rate": 0.00016906523505399762, "loss": 2.141, "step": 218690 }, { "epoch": 0.51, "grad_norm": 2.484375, "learning_rate": 0.00016906389838197288, "loss": 2.246, "step": 218695 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00016906256168635467, "loss": 2.2167, "step": 218700 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.00016906122496714344, "loss": 2.1404, "step": 218705 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016905988822433964, "loss": 2.0566, "step": 218710 }, { "epoch": 0.51, "grad_norm": 1.953125, "learning_rate": 0.00016905855145794374, "loss": 1.9699, "step": 218715 }, { "epoch": 0.51, "grad_norm": 2.15625, "learning_rate": 0.0001690572146679562, "loss": 2.0825, "step": 218720 }, { "epoch": 0.51, "grad_norm": 2.5, "learning_rate": 0.00016905587785437742, "loss": 2.1954, "step": 218725 }, { "epoch": 0.51, "grad_norm": 2.390625, "learning_rate": 0.00016905454101720792, "loss": 2.0047, "step": 218730 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.00016905320415644813, "loss": 2.0786, "step": 218735 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016905186727209854, "loss": 2.0927, "step": 218740 }, { "epoch": 0.51, "grad_norm": 2.734375, "learning_rate": 0.00016905053036415953, "loss": 2.0571, "step": 218745 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016904919343263165, "loss": 1.9042, "step": 218750 }, { "epoch": 0.51, "grad_norm": 2.0, "learning_rate": 0.00016904785647751532, "loss": 2.2618, "step": 218755 }, { "epoch": 0.51, "grad_norm": 2.203125, "learning_rate": 0.00016904651949881097, "loss": 2.2233, "step": 218760 }, { "epoch": 0.51, "grad_norm": 2.140625, "learning_rate": 0.00016904518249651908, "loss": 1.9449, "step": 218765 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.0001690438454706401, "loss": 2.0276, "step": 218770 }, { "epoch": 0.51, "grad_norm": 2.265625, "learning_rate": 0.00016904250842117446, "loss": 2.0973, "step": 218775 }, { "epoch": 0.51, "grad_norm": 2.453125, "learning_rate": 0.00016904117134812269, "loss": 2.0517, "step": 218780 }, { "epoch": 0.51, "grad_norm": 1.6953125, "learning_rate": 0.0001690398342514852, "loss": 2.2198, "step": 218785 }, { "epoch": 0.51, "grad_norm": 1.9296875, "learning_rate": 0.00016903849713126244, "loss": 2.0846, "step": 218790 }, { "epoch": 0.51, "grad_norm": 2.3125, "learning_rate": 0.00016903715998745487, "loss": 2.1695, "step": 218795 }, { "epoch": 0.51, "grad_norm": 2.09375, "learning_rate": 0.00016903582282006298, "loss": 2.1369, "step": 218800 }, { "epoch": 0.51, "grad_norm": 2.03125, "learning_rate": 0.00016903448562908716, "loss": 2.0084, "step": 218805 }, { "epoch": 0.51, "grad_norm": 2.40625, "learning_rate": 0.00016903314841452795, "loss": 2.1979, "step": 218810 }, { "epoch": 0.51, "grad_norm": 2.078125, "learning_rate": 0.00016903181117638576, "loss": 2.1349, "step": 218815 }, { "epoch": 0.51, "grad_norm": 2.296875, "learning_rate": 0.00016903047391466104, "loss": 1.9622, "step": 218820 }, { "epoch": 0.51, "grad_norm": 2.015625, "learning_rate": 0.00016902913662935424, "loss": 2.1476, "step": 218825 }, { "epoch": 0.51, "grad_norm": 2.21875, "learning_rate": 0.00016902779932046586, "loss": 2.1675, "step": 218830 }, { "epoch": 0.51, "grad_norm": 2.34375, "learning_rate": 0.00016902646198799632, "loss": 2.0614, "step": 218835 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.0001690251246319461, "loss": 2.0781, "step": 218840 }, { "epoch": 0.52, "grad_norm": 2.4375, "learning_rate": 0.00016902378725231564, "loss": 2.0647, "step": 218845 }, { "epoch": 0.52, "grad_norm": 1.8125, "learning_rate": 0.0001690224498491054, "loss": 1.9295, "step": 218850 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.0001690211124223158, "loss": 2.1664, "step": 218855 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016901977497194742, "loss": 2.1994, "step": 218860 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016901843749800058, "loss": 2.1117, "step": 218865 }, { "epoch": 0.52, "grad_norm": 2.796875, "learning_rate": 0.0001690171000004758, "loss": 2.3291, "step": 218870 }, { "epoch": 0.52, "grad_norm": 1.875, "learning_rate": 0.0001690157624793735, "loss": 2.0518, "step": 218875 }, { "epoch": 0.52, "grad_norm": 2.40625, "learning_rate": 0.00016901442493469422, "loss": 2.1002, "step": 218880 }, { "epoch": 0.52, "grad_norm": 2.359375, "learning_rate": 0.0001690130873664383, "loss": 2.0858, "step": 218885 }, { "epoch": 0.52, "grad_norm": 1.9609375, "learning_rate": 0.0001690117497746063, "loss": 2.1012, "step": 218890 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016901041215919863, "loss": 2.3064, "step": 218895 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016900907452021576, "loss": 2.012, "step": 218900 }, { "epoch": 0.52, "grad_norm": 2.875, "learning_rate": 0.0001690077368576581, "loss": 2.06, "step": 218905 }, { "epoch": 0.52, "grad_norm": 2.8125, "learning_rate": 0.00016900639917152616, "loss": 2.2498, "step": 218910 }, { "epoch": 0.52, "grad_norm": 1.9453125, "learning_rate": 0.0001690050614618204, "loss": 2.2298, "step": 218915 }, { "epoch": 0.52, "grad_norm": 1.984375, "learning_rate": 0.00016900372372854124, "loss": 2.1103, "step": 218920 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016900238597168917, "loss": 1.8553, "step": 218925 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016900104819126465, "loss": 2.0327, "step": 218930 }, { "epoch": 0.52, "grad_norm": 1.9921875, "learning_rate": 0.0001689997103872681, "loss": 2.0509, "step": 218935 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016899837255969998, "loss": 1.932, "step": 218940 }, { "epoch": 0.52, "grad_norm": 2.359375, "learning_rate": 0.0001689970347085608, "loss": 2.2533, "step": 218945 }, { "epoch": 0.52, "grad_norm": 1.9140625, "learning_rate": 0.00016899569683385097, "loss": 2.1455, "step": 218950 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.00016899435893557096, "loss": 2.1629, "step": 218955 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.0001689930210137212, "loss": 2.2785, "step": 218960 }, { "epoch": 0.52, "grad_norm": 1.8984375, "learning_rate": 0.0001689916830683022, "loss": 2.1666, "step": 218965 }, { "epoch": 0.52, "grad_norm": 2.625, "learning_rate": 0.00016899034509931438, "loss": 2.2493, "step": 218970 }, { "epoch": 0.52, "grad_norm": 1.8125, "learning_rate": 0.00016898900710675822, "loss": 2.2623, "step": 218975 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016898766909063416, "loss": 2.3972, "step": 218980 }, { "epoch": 0.52, "grad_norm": 1.90625, "learning_rate": 0.00016898633105094267, "loss": 2.0458, "step": 218985 }, { "epoch": 0.52, "grad_norm": 1.921875, "learning_rate": 0.00016898499298768416, "loss": 1.9117, "step": 218990 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016898365490085912, "loss": 2.028, "step": 218995 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016898231679046805, "loss": 2.1313, "step": 219000 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016898097865651138, "loss": 2.1608, "step": 219005 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016897964049898952, "loss": 2.0524, "step": 219010 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.000168978302317903, "loss": 1.8688, "step": 219015 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.0001689769641132522, "loss": 1.9024, "step": 219020 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.00016897562588503765, "loss": 2.0845, "step": 219025 }, { "epoch": 0.52, "grad_norm": 1.8203125, "learning_rate": 0.00016897428763325975, "loss": 2.0137, "step": 219030 }, { "epoch": 0.52, "grad_norm": 2.734375, "learning_rate": 0.000168972949357919, "loss": 2.0012, "step": 219035 }, { "epoch": 0.52, "grad_norm": 1.984375, "learning_rate": 0.00016897161105901585, "loss": 1.9484, "step": 219040 }, { "epoch": 0.52, "grad_norm": 1.8046875, "learning_rate": 0.00016897027273655071, "loss": 2.2417, "step": 219045 }, { "epoch": 0.52, "grad_norm": 1.921875, "learning_rate": 0.0001689689343905241, "loss": 1.9247, "step": 219050 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016896759602093645, "loss": 2.2088, "step": 219055 }, { "epoch": 0.52, "grad_norm": 1.9921875, "learning_rate": 0.0001689662576277882, "loss": 2.2231, "step": 219060 }, { "epoch": 0.52, "grad_norm": 2.609375, "learning_rate": 0.00016896491921107986, "loss": 2.1914, "step": 219065 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016896358077081181, "loss": 2.1674, "step": 219070 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.0001689622423069846, "loss": 2.3123, "step": 219075 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016896090381959863, "loss": 2.3124, "step": 219080 }, { "epoch": 0.52, "grad_norm": 5.0, "learning_rate": 0.00016895956530865435, "loss": 2.1227, "step": 219085 }, { "epoch": 0.52, "grad_norm": 1.6953125, "learning_rate": 0.0001689582267741522, "loss": 2.1441, "step": 219090 }, { "epoch": 0.52, "grad_norm": 1.9375, "learning_rate": 0.0001689568882160927, "loss": 2.0543, "step": 219095 }, { "epoch": 0.52, "grad_norm": 1.84375, "learning_rate": 0.0001689555496344763, "loss": 2.0965, "step": 219100 }, { "epoch": 0.52, "grad_norm": 2.46875, "learning_rate": 0.00016895421102930338, "loss": 2.1259, "step": 219105 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.0001689528724005745, "loss": 2.0424, "step": 219110 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.00016895153374829003, "loss": 2.1366, "step": 219115 }, { "epoch": 0.52, "grad_norm": 1.90625, "learning_rate": 0.0001689501950724505, "loss": 2.022, "step": 219120 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.0001689488563730563, "loss": 1.9931, "step": 219125 }, { "epoch": 0.52, "grad_norm": 1.9453125, "learning_rate": 0.00016894751765010794, "loss": 2.1019, "step": 219130 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.00016894617890360586, "loss": 2.0815, "step": 219135 }, { "epoch": 0.52, "grad_norm": 1.9921875, "learning_rate": 0.00016894484013355048, "loss": 2.0931, "step": 219140 }, { "epoch": 0.52, "grad_norm": 2.46875, "learning_rate": 0.00016894350133994234, "loss": 2.1281, "step": 219145 }, { "epoch": 0.52, "grad_norm": 1.984375, "learning_rate": 0.00016894216252278182, "loss": 2.0122, "step": 219150 }, { "epoch": 0.52, "grad_norm": 1.90625, "learning_rate": 0.00016894082368206946, "loss": 2.1848, "step": 219155 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.0001689394848178056, "loss": 2.1161, "step": 219160 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.0001689381459299908, "loss": 2.098, "step": 219165 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016893680701862544, "loss": 1.949, "step": 219170 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016893546808371002, "loss": 2.0799, "step": 219175 }, { "epoch": 0.52, "grad_norm": 2.53125, "learning_rate": 0.00016893412912524503, "loss": 1.9143, "step": 219180 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016893279014323088, "loss": 2.1125, "step": 219185 }, { "epoch": 0.52, "grad_norm": 2.59375, "learning_rate": 0.000168931451137668, "loss": 2.0447, "step": 219190 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016893011210855697, "loss": 2.1641, "step": 219195 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.0001689287730558981, "loss": 2.2152, "step": 219200 }, { "epoch": 0.52, "grad_norm": 1.9296875, "learning_rate": 0.0001689274339796919, "loss": 2.0685, "step": 219205 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016892609487993888, "loss": 2.1367, "step": 219210 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016892475575663944, "loss": 2.2756, "step": 219215 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.00016892341660979405, "loss": 2.1481, "step": 219220 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016892207743940318, "loss": 2.2518, "step": 219225 }, { "epoch": 0.52, "grad_norm": 1.84375, "learning_rate": 0.00016892073824546726, "loss": 2.0542, "step": 219230 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.00016891939902798677, "loss": 2.0861, "step": 219235 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.0001689180597869622, "loss": 2.0462, "step": 219240 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.0001689167205223939, "loss": 1.9873, "step": 219245 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016891538123428246, "loss": 2.0934, "step": 219250 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016891404192262825, "loss": 2.1922, "step": 219255 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.00016891270258743174, "loss": 2.2326, "step": 219260 }, { "epoch": 0.52, "grad_norm": 1.796875, "learning_rate": 0.00016891136322869343, "loss": 2.1289, "step": 219265 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.00016891002384641376, "loss": 2.198, "step": 219270 }, { "epoch": 0.52, "grad_norm": 1.8984375, "learning_rate": 0.00016890868444059313, "loss": 2.107, "step": 219275 }, { "epoch": 0.52, "grad_norm": 1.8828125, "learning_rate": 0.00016890734501123208, "loss": 2.0419, "step": 219280 }, { "epoch": 0.52, "grad_norm": 1.9609375, "learning_rate": 0.000168906005558331, "loss": 1.812, "step": 219285 }, { "epoch": 0.52, "grad_norm": 1.703125, "learning_rate": 0.00016890466608189043, "loss": 1.9171, "step": 219290 }, { "epoch": 0.52, "grad_norm": 1.734375, "learning_rate": 0.00016890332658191072, "loss": 1.8022, "step": 219295 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.0001689019870583924, "loss": 1.9054, "step": 219300 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016890064751133593, "loss": 2.3006, "step": 219305 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016889930794074175, "loss": 2.086, "step": 219310 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.0001688979683466103, "loss": 2.184, "step": 219315 }, { "epoch": 0.52, "grad_norm": 2.40625, "learning_rate": 0.00016889662872894204, "loss": 2.1613, "step": 219320 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.0001688952890877375, "loss": 2.1615, "step": 219325 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016889394942299705, "loss": 2.009, "step": 219330 }, { "epoch": 0.52, "grad_norm": 1.96875, "learning_rate": 0.00016889260973472115, "loss": 2.1679, "step": 219335 }, { "epoch": 0.52, "grad_norm": 2.0, "learning_rate": 0.0001688912700229103, "loss": 2.0262, "step": 219340 }, { "epoch": 0.52, "grad_norm": 2.984375, "learning_rate": 0.00016888993028756495, "loss": 2.1523, "step": 219345 }, { "epoch": 0.52, "grad_norm": 2.40625, "learning_rate": 0.00016888859052868553, "loss": 2.1213, "step": 219350 }, { "epoch": 0.52, "grad_norm": 1.9453125, "learning_rate": 0.00016888725074627257, "loss": 2.1726, "step": 219355 }, { "epoch": 0.52, "grad_norm": 1.8046875, "learning_rate": 0.00016888591094032642, "loss": 2.1648, "step": 219360 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.0001688845711108476, "loss": 2.0482, "step": 219365 }, { "epoch": 0.52, "grad_norm": 1.765625, "learning_rate": 0.0001688832312578366, "loss": 2.1489, "step": 219370 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016888189138129382, "loss": 2.0046, "step": 219375 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.00016888055148121974, "loss": 2.1195, "step": 219380 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.0001688792115576148, "loss": 2.0315, "step": 219385 }, { "epoch": 0.52, "grad_norm": 1.984375, "learning_rate": 0.0001688778716104795, "loss": 2.2028, "step": 219390 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016887653163981422, "loss": 2.0304, "step": 219395 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.00016887519164561948, "loss": 2.1354, "step": 219400 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016887385162789577, "loss": 2.2061, "step": 219405 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016887251158664347, "loss": 2.2179, "step": 219410 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016887117152186308, "loss": 2.0122, "step": 219415 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.00016886983143355502, "loss": 1.8809, "step": 219420 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016886849132171982, "loss": 2.1351, "step": 219425 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016886715118635785, "loss": 2.166, "step": 219430 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.00016886581102746966, "loss": 2.016, "step": 219435 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016886447084505567, "loss": 2.1513, "step": 219440 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016886313063911625, "loss": 2.1512, "step": 219445 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016886179040965198, "loss": 2.1089, "step": 219450 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.0001688604501566633, "loss": 2.011, "step": 219455 }, { "epoch": 0.52, "grad_norm": 1.8515625, "learning_rate": 0.0001688591098801506, "loss": 2.0444, "step": 219460 }, { "epoch": 0.52, "grad_norm": 1.96875, "learning_rate": 0.00016885776958011438, "loss": 2.1827, "step": 219465 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016885642925655513, "loss": 2.1364, "step": 219470 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016885508890947327, "loss": 2.2547, "step": 219475 }, { "epoch": 0.52, "grad_norm": 1.84375, "learning_rate": 0.0001688537485388692, "loss": 2.0812, "step": 219480 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016885240814474352, "loss": 1.9918, "step": 219485 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.0001688510677270966, "loss": 2.095, "step": 219490 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016884972728592888, "loss": 2.0554, "step": 219495 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016884838682124084, "loss": 2.2104, "step": 219500 }, { "epoch": 0.52, "grad_norm": 2.546875, "learning_rate": 0.00016884704633303294, "loss": 1.9897, "step": 219505 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016884570582130569, "loss": 2.0002, "step": 219510 }, { "epoch": 0.52, "grad_norm": 1.84375, "learning_rate": 0.00016884436528605946, "loss": 2.0621, "step": 219515 }, { "epoch": 0.52, "grad_norm": 3.28125, "learning_rate": 0.00016884302472729474, "loss": 1.9828, "step": 219520 }, { "epoch": 0.52, "grad_norm": 2.40625, "learning_rate": 0.00016884168414501197, "loss": 2.2078, "step": 219525 }, { "epoch": 0.52, "grad_norm": 1.96875, "learning_rate": 0.0001688403435392117, "loss": 1.9831, "step": 219530 }, { "epoch": 0.52, "grad_norm": 1.9765625, "learning_rate": 0.00016883900290989428, "loss": 2.2111, "step": 219535 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.00016883766225706022, "loss": 1.918, "step": 219540 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016883632158070996, "loss": 2.2282, "step": 219545 }, { "epoch": 0.52, "grad_norm": 1.90625, "learning_rate": 0.00016883498088084395, "loss": 2.3076, "step": 219550 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016883364015746267, "loss": 2.1353, "step": 219555 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016883229941056657, "loss": 2.0527, "step": 219560 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016883095864015613, "loss": 2.1, "step": 219565 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016882961784623176, "loss": 2.1476, "step": 219570 }, { "epoch": 0.52, "grad_norm": 1.8671875, "learning_rate": 0.000168828277028794, "loss": 1.9664, "step": 219575 }, { "epoch": 0.52, "grad_norm": 1.9609375, "learning_rate": 0.00016882693618784316, "loss": 2.1231, "step": 219580 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016882559532337984, "loss": 1.9186, "step": 219585 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016882425443540445, "loss": 1.995, "step": 219590 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.00016882291352391743, "loss": 2.1083, "step": 219595 }, { "epoch": 0.52, "grad_norm": 1.921875, "learning_rate": 0.00016882157258891927, "loss": 2.1756, "step": 219600 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.0001688202316304104, "loss": 2.1826, "step": 219605 }, { "epoch": 0.52, "grad_norm": 1.96875, "learning_rate": 0.00016881889064839132, "loss": 2.0797, "step": 219610 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016881754964286243, "loss": 2.0884, "step": 219615 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016881620861382423, "loss": 2.0637, "step": 219620 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016881486756127714, "loss": 2.0958, "step": 219625 }, { "epoch": 0.52, "grad_norm": 1.9296875, "learning_rate": 0.00016881352648522168, "loss": 2.1499, "step": 219630 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.00016881218538565825, "loss": 2.0169, "step": 219635 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.00016881084426258736, "loss": 2.227, "step": 219640 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.00016880950311600937, "loss": 2.1201, "step": 219645 }, { "epoch": 0.52, "grad_norm": 1.8828125, "learning_rate": 0.00016880816194592488, "loss": 2.2302, "step": 219650 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016880682075233426, "loss": 2.1726, "step": 219655 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016880547953523794, "loss": 2.1941, "step": 219660 }, { "epoch": 0.52, "grad_norm": 2.46875, "learning_rate": 0.00016880413829463645, "loss": 1.9024, "step": 219665 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016880279703053019, "loss": 2.2725, "step": 219670 }, { "epoch": 0.52, "grad_norm": 1.9765625, "learning_rate": 0.0001688014557429197, "loss": 2.143, "step": 219675 }, { "epoch": 0.52, "grad_norm": 1.90625, "learning_rate": 0.00016880011443180535, "loss": 2.2307, "step": 219680 }, { "epoch": 0.52, "grad_norm": 1.984375, "learning_rate": 0.00016879877309718765, "loss": 2.0307, "step": 219685 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016879743173906699, "loss": 2.054, "step": 219690 }, { "epoch": 0.52, "grad_norm": 1.9453125, "learning_rate": 0.0001687960903574439, "loss": 2.1271, "step": 219695 }, { "epoch": 0.52, "grad_norm": 1.859375, "learning_rate": 0.0001687947489523189, "loss": 2.0689, "step": 219700 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.0001687934075236923, "loss": 2.1213, "step": 219705 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.0001687920660715646, "loss": 2.1597, "step": 219710 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016879072459593633, "loss": 2.0359, "step": 219715 }, { "epoch": 0.52, "grad_norm": 2.4375, "learning_rate": 0.00016878938309680788, "loss": 2.2278, "step": 219720 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016878804157417974, "loss": 2.0731, "step": 219725 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016878670002805232, "loss": 2.2284, "step": 219730 }, { "epoch": 0.52, "grad_norm": 1.8984375, "learning_rate": 0.00016878535845842615, "loss": 2.0932, "step": 219735 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016878401686530166, "loss": 2.117, "step": 219740 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.0001687826752486793, "loss": 2.0751, "step": 219745 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.0001687813336085595, "loss": 2.1423, "step": 219750 }, { "epoch": 0.52, "grad_norm": 1.8828125, "learning_rate": 0.00016877999194494275, "loss": 2.069, "step": 219755 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016877865025782956, "loss": 2.1158, "step": 219760 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.0001687773085472203, "loss": 2.075, "step": 219765 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.00016877596681311546, "loss": 2.0637, "step": 219770 }, { "epoch": 0.52, "grad_norm": 1.8671875, "learning_rate": 0.0001687746250555155, "loss": 2.1151, "step": 219775 }, { "epoch": 0.52, "grad_norm": 1.9296875, "learning_rate": 0.00016877328327442087, "loss": 1.965, "step": 219780 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.0001687719414698321, "loss": 2.0432, "step": 219785 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016877059964174953, "loss": 1.9977, "step": 219790 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016876925779017368, "loss": 2.3101, "step": 219795 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016876791591510502, "loss": 2.1402, "step": 219800 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.00016876657401654398, "loss": 1.9208, "step": 219805 }, { "epoch": 0.52, "grad_norm": 1.9765625, "learning_rate": 0.00016876523209449106, "loss": 2.1483, "step": 219810 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016876389014894666, "loss": 2.2482, "step": 219815 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016876254817991128, "loss": 2.0346, "step": 219820 }, { "epoch": 0.52, "grad_norm": 2.359375, "learning_rate": 0.00016876120618738538, "loss": 2.197, "step": 219825 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016875986417136938, "loss": 2.0876, "step": 219830 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016875852213186376, "loss": 2.1573, "step": 219835 }, { "epoch": 0.52, "grad_norm": 2.453125, "learning_rate": 0.00016875718006886903, "loss": 2.0501, "step": 219840 }, { "epoch": 0.52, "grad_norm": 2.5625, "learning_rate": 0.00016875583798238556, "loss": 2.0652, "step": 219845 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016875449587241384, "loss": 2.2324, "step": 219850 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016875315373895435, "loss": 2.3437, "step": 219855 }, { "epoch": 0.52, "grad_norm": 1.875, "learning_rate": 0.00016875181158200751, "loss": 2.1535, "step": 219860 }, { "epoch": 0.52, "grad_norm": 2.40625, "learning_rate": 0.00016875046940157385, "loss": 2.0996, "step": 219865 }, { "epoch": 0.52, "grad_norm": 1.703125, "learning_rate": 0.00016874912719765377, "loss": 2.156, "step": 219870 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016874778497024773, "loss": 2.1515, "step": 219875 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.0001687464427193562, "loss": 2.141, "step": 219880 }, { "epoch": 0.52, "grad_norm": 2.359375, "learning_rate": 0.00016874510044497963, "loss": 2.0937, "step": 219885 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.00016874375814711852, "loss": 2.1186, "step": 219890 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016874241582577327, "loss": 2.0326, "step": 219895 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.0001687410734809444, "loss": 1.9924, "step": 219900 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.0001687397311126323, "loss": 2.0453, "step": 219905 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.00016873838872083746, "loss": 2.0991, "step": 219910 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.00016873704630556032, "loss": 2.1719, "step": 219915 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.0001687357038668014, "loss": 2.0549, "step": 219920 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.0001687343614045611, "loss": 1.9074, "step": 219925 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.00016873301891883987, "loss": 2.1515, "step": 219930 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016873167640963822, "loss": 2.0576, "step": 219935 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.0001687303338769566, "loss": 2.2666, "step": 219940 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016872899132079543, "loss": 2.1226, "step": 219945 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.0001687276487411552, "loss": 2.0778, "step": 219950 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016872630613803632, "loss": 2.0998, "step": 219955 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016872496351143934, "loss": 2.1766, "step": 219960 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016872362086136464, "loss": 2.0395, "step": 219965 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.0001687222781878127, "loss": 2.0964, "step": 219970 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016872093549078404, "loss": 2.1967, "step": 219975 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.00016871959277027897, "loss": 2.2656, "step": 219980 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016871825002629808, "loss": 1.9363, "step": 219985 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016871690725884183, "loss": 2.1579, "step": 219990 }, { "epoch": 0.52, "grad_norm": 1.765625, "learning_rate": 0.0001687155644679106, "loss": 2.1703, "step": 219995 }, { "epoch": 0.52, "grad_norm": 1.734375, "learning_rate": 0.0001687142216535049, "loss": 2.2157, "step": 220000 }, { "epoch": 0.52, "grad_norm": 2.578125, "learning_rate": 0.00016871287881562515, "loss": 1.9357, "step": 220005 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.00016871153595427184, "loss": 1.9797, "step": 220010 }, { "epoch": 0.52, "grad_norm": 1.90625, "learning_rate": 0.00016871019306944544, "loss": 2.1182, "step": 220015 }, { "epoch": 0.52, "grad_norm": 1.9765625, "learning_rate": 0.0001687088501611464, "loss": 2.1794, "step": 220020 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016870750722937516, "loss": 2.0355, "step": 220025 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.0001687061642741322, "loss": 2.0114, "step": 220030 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016870482129541792, "loss": 2.2113, "step": 220035 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016870347829323287, "loss": 2.0366, "step": 220040 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016870213526757745, "loss": 1.9911, "step": 220045 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016870079221845217, "loss": 2.239, "step": 220050 }, { "epoch": 0.52, "grad_norm": 3.0, "learning_rate": 0.00016869944914585743, "loss": 2.07, "step": 220055 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016869810604979368, "loss": 2.2139, "step": 220060 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016869676293026145, "loss": 1.8678, "step": 220065 }, { "epoch": 0.52, "grad_norm": 1.84375, "learning_rate": 0.00016869541978726114, "loss": 2.2387, "step": 220070 }, { "epoch": 0.52, "grad_norm": 1.8671875, "learning_rate": 0.00016869407662079323, "loss": 2.1792, "step": 220075 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016869273343085822, "loss": 2.0286, "step": 220080 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.00016869139021745647, "loss": 2.1858, "step": 220085 }, { "epoch": 0.52, "grad_norm": 2.46875, "learning_rate": 0.0001686900469805885, "loss": 2.1078, "step": 220090 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.0001686887037202548, "loss": 2.0548, "step": 220095 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.00016868736043645577, "loss": 2.083, "step": 220100 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.0001686860171291919, "loss": 2.1492, "step": 220105 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016868467379846364, "loss": 2.1933, "step": 220110 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016868333044427143, "loss": 2.1978, "step": 220115 }, { "epoch": 0.52, "grad_norm": 2.484375, "learning_rate": 0.0001686819870666158, "loss": 2.1642, "step": 220120 }, { "epoch": 0.52, "grad_norm": 2.734375, "learning_rate": 0.0001686806436654971, "loss": 2.0749, "step": 220125 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016867930024091587, "loss": 2.142, "step": 220130 }, { "epoch": 0.52, "grad_norm": 1.9375, "learning_rate": 0.00016867795679287255, "loss": 2.1676, "step": 220135 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016867661332136758, "loss": 2.0533, "step": 220140 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016867526982640145, "loss": 1.9991, "step": 220145 }, { "epoch": 0.52, "grad_norm": 2.359375, "learning_rate": 0.0001686739263079746, "loss": 2.0983, "step": 220150 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016867258276608748, "loss": 1.7897, "step": 220155 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.00016867123920074058, "loss": 2.0344, "step": 220160 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.0001686698956119343, "loss": 2.06, "step": 220165 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016866855199966916, "loss": 2.0372, "step": 220170 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.0001686672083639456, "loss": 2.1773, "step": 220175 }, { "epoch": 0.52, "grad_norm": 1.921875, "learning_rate": 0.0001686658647047641, "loss": 2.1448, "step": 220180 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016866452102212506, "loss": 2.1171, "step": 220185 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.000168663177316029, "loss": 2.1731, "step": 220190 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016866183358647634, "loss": 2.118, "step": 220195 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016866048983346757, "loss": 2.2118, "step": 220200 }, { "epoch": 0.52, "grad_norm": 2.40625, "learning_rate": 0.0001686591460570031, "loss": 2.09, "step": 220205 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016865780225708342, "loss": 2.1485, "step": 220210 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.000168656458433709, "loss": 2.118, "step": 220215 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.0001686551145868803, "loss": 2.0185, "step": 220220 }, { "epoch": 0.52, "grad_norm": 2.46875, "learning_rate": 0.00016865377071659775, "loss": 2.0843, "step": 220225 }, { "epoch": 0.52, "grad_norm": 2.578125, "learning_rate": 0.00016865242682286184, "loss": 2.0243, "step": 220230 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016865108290567302, "loss": 2.2256, "step": 220235 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.00016864973896503176, "loss": 2.1869, "step": 220240 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016864839500093848, "loss": 2.1987, "step": 220245 }, { "epoch": 0.52, "grad_norm": 2.515625, "learning_rate": 0.00016864705101339364, "loss": 2.0177, "step": 220250 }, { "epoch": 0.52, "grad_norm": 2.984375, "learning_rate": 0.00016864570700239777, "loss": 2.1564, "step": 220255 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016864436296795126, "loss": 2.1144, "step": 220260 }, { "epoch": 0.52, "grad_norm": 2.765625, "learning_rate": 0.00016864301891005457, "loss": 2.1511, "step": 220265 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016864167482870824, "loss": 2.0326, "step": 220270 }, { "epoch": 0.52, "grad_norm": 1.9921875, "learning_rate": 0.0001686403307239126, "loss": 2.2338, "step": 220275 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.0001686389865956682, "loss": 2.1178, "step": 220280 }, { "epoch": 0.52, "grad_norm": 2.515625, "learning_rate": 0.0001686376424439755, "loss": 2.1967, "step": 220285 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016863629826883493, "loss": 2.0759, "step": 220290 }, { "epoch": 0.52, "grad_norm": 1.859375, "learning_rate": 0.00016863495407024693, "loss": 2.1857, "step": 220295 }, { "epoch": 0.52, "grad_norm": 2.5, "learning_rate": 0.000168633609848212, "loss": 2.0614, "step": 220300 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016863226560273059, "loss": 2.1846, "step": 220305 }, { "epoch": 0.52, "grad_norm": 2.5, "learning_rate": 0.00016863092133380316, "loss": 2.0081, "step": 220310 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016862957704143016, "loss": 2.0415, "step": 220315 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016862823272561204, "loss": 1.9375, "step": 220320 }, { "epoch": 0.52, "grad_norm": 2.484375, "learning_rate": 0.00016862688838634927, "loss": 2.2609, "step": 220325 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.0001686255440236423, "loss": 2.0529, "step": 220330 }, { "epoch": 0.52, "grad_norm": 1.96875, "learning_rate": 0.00016862419963749163, "loss": 2.1151, "step": 220335 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.00016862285522789767, "loss": 2.1382, "step": 220340 }, { "epoch": 0.52, "grad_norm": 1.7734375, "learning_rate": 0.0001686215107948609, "loss": 2.0227, "step": 220345 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.00016862016633838178, "loss": 2.3432, "step": 220350 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.00016861882185846078, "loss": 2.0692, "step": 220355 }, { "epoch": 0.52, "grad_norm": 1.75, "learning_rate": 0.00016861747735509832, "loss": 2.0141, "step": 220360 }, { "epoch": 0.52, "grad_norm": 2.40625, "learning_rate": 0.00016861613282829493, "loss": 2.02, "step": 220365 }, { "epoch": 0.52, "grad_norm": 1.90625, "learning_rate": 0.00016861478827805097, "loss": 2.2039, "step": 220370 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016861344370436697, "loss": 2.1253, "step": 220375 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.00016861209910724339, "loss": 1.9861, "step": 220380 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016861075448668065, "loss": 2.053, "step": 220385 }, { "epoch": 0.52, "grad_norm": 1.875, "learning_rate": 0.00016860940984267923, "loss": 2.3034, "step": 220390 }, { "epoch": 0.52, "grad_norm": 1.9453125, "learning_rate": 0.0001686080651752396, "loss": 2.0445, "step": 220395 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016860672048436224, "loss": 2.1587, "step": 220400 }, { "epoch": 0.52, "grad_norm": 1.84375, "learning_rate": 0.00016860537577004757, "loss": 1.9772, "step": 220405 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.000168604031032296, "loss": 2.2369, "step": 220410 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016860268627110815, "loss": 2.0268, "step": 220415 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.0001686013414864843, "loss": 2.1446, "step": 220420 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.000168599996678425, "loss": 2.1039, "step": 220425 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016859865184693074, "loss": 2.0439, "step": 220430 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.0001685973069920019, "loss": 2.0779, "step": 220435 }, { "epoch": 0.52, "grad_norm": 1.9375, "learning_rate": 0.00016859596211363896, "loss": 2.2, "step": 220440 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.0001685946172118424, "loss": 1.9191, "step": 220445 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.0001685932722866127, "loss": 2.2482, "step": 220450 }, { "epoch": 0.52, "grad_norm": 2.0, "learning_rate": 0.00016859192733795027, "loss": 2.2232, "step": 220455 }, { "epoch": 0.52, "grad_norm": 1.828125, "learning_rate": 0.0001685905823658556, "loss": 2.0617, "step": 220460 }, { "epoch": 0.52, "grad_norm": 2.671875, "learning_rate": 0.00016858923737032917, "loss": 2.1056, "step": 220465 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.0001685878923513714, "loss": 2.1837, "step": 220470 }, { "epoch": 0.52, "grad_norm": 2.359375, "learning_rate": 0.00016858654730898275, "loss": 1.9685, "step": 220475 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.0001685852022431637, "loss": 2.0401, "step": 220480 }, { "epoch": 0.52, "grad_norm": 1.6875, "learning_rate": 0.00016858385715391472, "loss": 2.1046, "step": 220485 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016858251204123617, "loss": 2.0135, "step": 220490 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016858116690512867, "loss": 2.0446, "step": 220495 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016857982174559258, "loss": 2.0366, "step": 220500 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016857847656262837, "loss": 2.1659, "step": 220505 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016857713135623654, "loss": 2.1227, "step": 220510 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016857578612641748, "loss": 2.2252, "step": 220515 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.0001685744408731717, "loss": 2.2466, "step": 220520 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016857309559649964, "loss": 1.9983, "step": 220525 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016857175029640179, "loss": 2.3306, "step": 220530 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016857040497287858, "loss": 2.2505, "step": 220535 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.00016856905962593047, "loss": 2.2557, "step": 220540 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.0001685677142555579, "loss": 2.0806, "step": 220545 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.0001685663688617614, "loss": 1.815, "step": 220550 }, { "epoch": 0.52, "grad_norm": 2.65625, "learning_rate": 0.00016856502344454136, "loss": 1.7895, "step": 220555 }, { "epoch": 0.52, "grad_norm": 1.8359375, "learning_rate": 0.00016856367800389824, "loss": 2.1567, "step": 220560 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.0001685623325398326, "loss": 1.9101, "step": 220565 }, { "epoch": 0.52, "grad_norm": 2.53125, "learning_rate": 0.00016856098705234475, "loss": 2.1777, "step": 220570 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016855964154143527, "loss": 2.0857, "step": 220575 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016855829600710454, "loss": 2.1091, "step": 220580 }, { "epoch": 0.52, "grad_norm": 2.65625, "learning_rate": 0.00016855695044935308, "loss": 2.0718, "step": 220585 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.0001685556048681813, "loss": 2.0448, "step": 220590 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.00016855425926358967, "loss": 1.9051, "step": 220595 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.0001685529136355787, "loss": 2.1118, "step": 220600 }, { "epoch": 0.52, "grad_norm": 1.65625, "learning_rate": 0.00016855156798414877, "loss": 2.1044, "step": 220605 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016855022230930042, "loss": 2.1467, "step": 220610 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016854887661103403, "loss": 2.2485, "step": 220615 }, { "epoch": 0.52, "grad_norm": 1.828125, "learning_rate": 0.00016854753088935015, "loss": 2.087, "step": 220620 }, { "epoch": 0.52, "grad_norm": 1.96875, "learning_rate": 0.00016854618514424916, "loss": 2.1894, "step": 220625 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016854483937573156, "loss": 1.874, "step": 220630 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.0001685434935837978, "loss": 1.9028, "step": 220635 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016854214776844834, "loss": 2.1578, "step": 220640 }, { "epoch": 0.52, "grad_norm": 1.9609375, "learning_rate": 0.00016854080192968364, "loss": 2.1333, "step": 220645 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016853945606750415, "loss": 2.0709, "step": 220650 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.0001685381101819103, "loss": 2.3681, "step": 220655 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.00016853676427290267, "loss": 2.202, "step": 220660 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.0001685354183404816, "loss": 1.997, "step": 220665 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.0001685340723846476, "loss": 2.1891, "step": 220670 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016853272640540107, "loss": 2.0846, "step": 220675 }, { "epoch": 0.52, "grad_norm": 2.78125, "learning_rate": 0.00016853138040274257, "loss": 2.2682, "step": 220680 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016853003437667248, "loss": 2.118, "step": 220685 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.00016852868832719132, "loss": 2.2147, "step": 220690 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.0001685273422542995, "loss": 2.1941, "step": 220695 }, { "epoch": 0.52, "grad_norm": 1.9140625, "learning_rate": 0.00016852599615799747, "loss": 1.9676, "step": 220700 }, { "epoch": 0.52, "grad_norm": 1.8359375, "learning_rate": 0.00016852465003828573, "loss": 2.2501, "step": 220705 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016852330389516474, "loss": 2.1187, "step": 220710 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.00016852195772863496, "loss": 2.1354, "step": 220715 }, { "epoch": 0.52, "grad_norm": 1.8203125, "learning_rate": 0.00016852061153869682, "loss": 2.0693, "step": 220720 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.0001685192653253508, "loss": 2.1805, "step": 220725 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016851791908859736, "loss": 2.1881, "step": 220730 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016851657282843693, "loss": 2.0819, "step": 220735 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016851522654487, "loss": 2.0882, "step": 220740 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.00016851388023789708, "loss": 2.1205, "step": 220745 }, { "epoch": 0.52, "grad_norm": 2.46875, "learning_rate": 0.0001685125339075185, "loss": 2.2312, "step": 220750 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016851118755373484, "loss": 2.0724, "step": 220755 }, { "epoch": 0.52, "grad_norm": 1.96875, "learning_rate": 0.00016850984117654648, "loss": 2.0907, "step": 220760 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016850849477595395, "loss": 2.0492, "step": 220765 }, { "epoch": 0.52, "grad_norm": 1.71875, "learning_rate": 0.00016850714835195768, "loss": 2.0656, "step": 220770 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016850580190455814, "loss": 2.0269, "step": 220775 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016850445543375574, "loss": 2.1623, "step": 220780 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016850310893955098, "loss": 2.1447, "step": 220785 }, { "epoch": 0.52, "grad_norm": 1.9296875, "learning_rate": 0.0001685017624219443, "loss": 2.1562, "step": 220790 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.00016850041588093622, "loss": 2.2243, "step": 220795 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016849906931652714, "loss": 2.0833, "step": 220800 }, { "epoch": 0.52, "grad_norm": 2.609375, "learning_rate": 0.00016849772272871754, "loss": 2.177, "step": 220805 }, { "epoch": 0.52, "grad_norm": 2.515625, "learning_rate": 0.00016849637611750785, "loss": 2.0966, "step": 220810 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016849502948289855, "loss": 2.2178, "step": 220815 }, { "epoch": 0.52, "grad_norm": 1.9453125, "learning_rate": 0.00016849368282489016, "loss": 2.1623, "step": 220820 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016849233614348304, "loss": 2.1061, "step": 220825 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.0001684909894386777, "loss": 2.097, "step": 220830 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.0001684896427104746, "loss": 2.0956, "step": 220835 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.0001684882959588742, "loss": 2.102, "step": 220840 }, { "epoch": 0.52, "grad_norm": 2.40625, "learning_rate": 0.00016848694918387698, "loss": 2.0448, "step": 220845 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016848560238548333, "loss": 1.915, "step": 220850 }, { "epoch": 0.52, "grad_norm": 2.53125, "learning_rate": 0.0001684842555636938, "loss": 2.1773, "step": 220855 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016848290871850877, "loss": 2.2084, "step": 220860 }, { "epoch": 0.52, "grad_norm": 1.984375, "learning_rate": 0.00016848156184992873, "loss": 2.0244, "step": 220865 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016848021495795417, "loss": 2.3253, "step": 220870 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.00016847886804258552, "loss": 2.0795, "step": 220875 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016847752110382325, "loss": 2.059, "step": 220880 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.00016847617414166783, "loss": 2.005, "step": 220885 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016847482715611972, "loss": 2.2081, "step": 220890 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016847348014717932, "loss": 2.1474, "step": 220895 }, { "epoch": 0.52, "grad_norm": 1.5703125, "learning_rate": 0.00016847213311484716, "loss": 1.9549, "step": 220900 }, { "epoch": 0.52, "grad_norm": 1.9609375, "learning_rate": 0.00016847078605912365, "loss": 2.0015, "step": 220905 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.0001684694389800093, "loss": 2.0005, "step": 220910 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.0001684680918775046, "loss": 1.9388, "step": 220915 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.00016846674475160987, "loss": 2.2097, "step": 220920 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.0001684653976023257, "loss": 2.2555, "step": 220925 }, { "epoch": 0.52, "grad_norm": 1.8046875, "learning_rate": 0.0001684640504296525, "loss": 2.0553, "step": 220930 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.00016846270323359077, "loss": 2.1249, "step": 220935 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.0001684613560141409, "loss": 2.1797, "step": 220940 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.0001684600087713034, "loss": 2.1314, "step": 220945 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.0001684586615050787, "loss": 2.174, "step": 220950 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016845731421546734, "loss": 1.8422, "step": 220955 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.00016845596690246968, "loss": 1.8631, "step": 220960 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016845461956608624, "loss": 2.0709, "step": 220965 }, { "epoch": 0.52, "grad_norm": 1.8671875, "learning_rate": 0.0001684532722063174, "loss": 2.1826, "step": 220970 }, { "epoch": 0.52, "grad_norm": 2.59375, "learning_rate": 0.00016845192482316373, "loss": 2.2695, "step": 220975 }, { "epoch": 0.52, "grad_norm": 1.9609375, "learning_rate": 0.00016845057741662564, "loss": 2.0347, "step": 220980 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016844922998670358, "loss": 2.2583, "step": 220985 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016844788253339801, "loss": 2.1042, "step": 220990 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016844653505670943, "loss": 2.2191, "step": 220995 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016844518755663827, "loss": 2.2694, "step": 221000 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016844384003318502, "loss": 2.1146, "step": 221005 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016844249248635005, "loss": 2.0892, "step": 221010 }, { "epoch": 0.52, "grad_norm": 2.453125, "learning_rate": 0.0001684411449161339, "loss": 2.0432, "step": 221015 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016843979732253703, "loss": 2.0754, "step": 221020 }, { "epoch": 0.52, "grad_norm": 2.0, "learning_rate": 0.00016843844970555988, "loss": 2.1156, "step": 221025 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.0001684371020652029, "loss": 2.1567, "step": 221030 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.0001684357544014666, "loss": 2.2035, "step": 221035 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016843440671435137, "loss": 2.1719, "step": 221040 }, { "epoch": 0.52, "grad_norm": 2.75, "learning_rate": 0.00016843305900385774, "loss": 2.1752, "step": 221045 }, { "epoch": 0.52, "grad_norm": 1.9375, "learning_rate": 0.0001684317112699861, "loss": 2.0567, "step": 221050 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016843036351273696, "loss": 2.0714, "step": 221055 }, { "epoch": 0.52, "grad_norm": 3.5625, "learning_rate": 0.00016842901573211076, "loss": 2.0565, "step": 221060 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016842766792810797, "loss": 2.23, "step": 221065 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.00016842632010072905, "loss": 1.9545, "step": 221070 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016842497224997448, "loss": 2.0406, "step": 221075 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016842362437584466, "loss": 2.1222, "step": 221080 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016842227647834014, "loss": 2.2086, "step": 221085 }, { "epoch": 0.52, "grad_norm": 1.5859375, "learning_rate": 0.00016842092855746127, "loss": 1.8753, "step": 221090 }, { "epoch": 0.52, "grad_norm": 2.5, "learning_rate": 0.0001684195806132086, "loss": 2.1572, "step": 221095 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.00016841823264558255, "loss": 2.1882, "step": 221100 }, { "epoch": 0.52, "grad_norm": 2.546875, "learning_rate": 0.00016841688465458362, "loss": 1.9822, "step": 221105 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.00016841553664021221, "loss": 2.0485, "step": 221110 }, { "epoch": 0.52, "grad_norm": 1.8515625, "learning_rate": 0.00016841418860246882, "loss": 2.1545, "step": 221115 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.0001684128405413539, "loss": 2.1382, "step": 221120 }, { "epoch": 0.52, "grad_norm": 1.7890625, "learning_rate": 0.0001684114924568679, "loss": 1.8668, "step": 221125 }, { "epoch": 0.52, "grad_norm": 2.6875, "learning_rate": 0.00016841014434901134, "loss": 2.3557, "step": 221130 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.0001684087962177846, "loss": 2.2067, "step": 221135 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016840744806318817, "loss": 2.0937, "step": 221140 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016840609988522253, "loss": 2.1242, "step": 221145 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016840475168388813, "loss": 2.1042, "step": 221150 }, { "epoch": 0.52, "grad_norm": 1.921875, "learning_rate": 0.0001684034034591854, "loss": 1.9095, "step": 221155 }, { "epoch": 0.52, "grad_norm": 1.90625, "learning_rate": 0.00016840205521111485, "loss": 1.907, "step": 221160 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.0001684007069396769, "loss": 2.1288, "step": 221165 }, { "epoch": 0.52, "grad_norm": 2.6875, "learning_rate": 0.00016839935864487203, "loss": 2.0597, "step": 221170 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016839801032670073, "loss": 2.0037, "step": 221175 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.0001683966619851634, "loss": 2.1992, "step": 221180 }, { "epoch": 0.52, "grad_norm": 1.96875, "learning_rate": 0.00016839531362026054, "loss": 2.112, "step": 221185 }, { "epoch": 0.52, "grad_norm": 2.65625, "learning_rate": 0.00016839396523199259, "loss": 2.275, "step": 221190 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016839261682036004, "loss": 2.1143, "step": 221195 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016839126838536333, "loss": 2.1684, "step": 221200 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.0001683899199270029, "loss": 2.1738, "step": 221205 }, { "epoch": 0.52, "grad_norm": 1.875, "learning_rate": 0.00016838857144527925, "loss": 1.9939, "step": 221210 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.0001683872229401928, "loss": 2.0691, "step": 221215 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016838587441174406, "loss": 2.1625, "step": 221220 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016838452585993344, "loss": 2.2984, "step": 221225 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016838317728476145, "loss": 2.152, "step": 221230 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.00016838182868622855, "loss": 2.0516, "step": 221235 }, { "epoch": 0.52, "grad_norm": 1.90625, "learning_rate": 0.0001683804800643351, "loss": 1.9753, "step": 221240 }, { "epoch": 0.52, "grad_norm": 2.46875, "learning_rate": 0.0001683791314190817, "loss": 2.1669, "step": 221245 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016837778275046873, "loss": 2.249, "step": 221250 }, { "epoch": 0.52, "grad_norm": 3.78125, "learning_rate": 0.0001683764340584967, "loss": 1.9771, "step": 221255 }, { "epoch": 0.52, "grad_norm": 2.53125, "learning_rate": 0.000168375085343166, "loss": 1.9944, "step": 221260 }, { "epoch": 0.52, "grad_norm": 2.65625, "learning_rate": 0.00016837373660447716, "loss": 2.254, "step": 221265 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016837238784243056, "loss": 1.8832, "step": 221270 }, { "epoch": 0.52, "grad_norm": 1.96875, "learning_rate": 0.00016837103905702677, "loss": 2.1936, "step": 221275 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016836969024826618, "loss": 2.157, "step": 221280 }, { "epoch": 0.52, "grad_norm": 2.578125, "learning_rate": 0.00016836834141614924, "loss": 2.1423, "step": 221285 }, { "epoch": 0.52, "grad_norm": 2.578125, "learning_rate": 0.00016836699256067648, "loss": 2.1701, "step": 221290 }, { "epoch": 0.52, "grad_norm": 2.46875, "learning_rate": 0.00016836564368184824, "loss": 2.089, "step": 221295 }, { "epoch": 0.52, "grad_norm": 1.828125, "learning_rate": 0.00016836429477966512, "loss": 2.0997, "step": 221300 }, { "epoch": 0.52, "grad_norm": 2.0, "learning_rate": 0.0001683629458541275, "loss": 2.2677, "step": 221305 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016836159690523586, "loss": 2.1276, "step": 221310 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016836024793299065, "loss": 2.2898, "step": 221315 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016835889893739231, "loss": 2.1887, "step": 221320 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016835754991844138, "loss": 2.1241, "step": 221325 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016835620087613827, "loss": 2.136, "step": 221330 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016835485181048343, "loss": 2.1574, "step": 221335 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.0001683535027214773, "loss": 2.1648, "step": 221340 }, { "epoch": 0.52, "grad_norm": 2.6875, "learning_rate": 0.0001683521536091204, "loss": 2.2346, "step": 221345 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016835080447341318, "loss": 2.0663, "step": 221350 }, { "epoch": 0.52, "grad_norm": 1.8984375, "learning_rate": 0.00016834945531435608, "loss": 2.1551, "step": 221355 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016834810613194958, "loss": 2.0903, "step": 221360 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.0001683467569261941, "loss": 2.1657, "step": 221365 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016834540769709015, "loss": 2.0239, "step": 221370 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.0001683440584446381, "loss": 2.281, "step": 221375 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.0001683427091688386, "loss": 2.1653, "step": 221380 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.0001683413598696919, "loss": 2.2316, "step": 221385 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.0001683400105471986, "loss": 2.1933, "step": 221390 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016833866120135907, "loss": 2.0762, "step": 221395 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016833731183217384, "loss": 1.9737, "step": 221400 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016833596243964337, "loss": 2.0825, "step": 221405 }, { "epoch": 0.52, "grad_norm": 1.9296875, "learning_rate": 0.00016833461302376805, "loss": 2.0184, "step": 221410 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.0001683332635845484, "loss": 2.0996, "step": 221415 }, { "epoch": 0.52, "grad_norm": 1.828125, "learning_rate": 0.0001683319141219849, "loss": 2.1049, "step": 221420 }, { "epoch": 0.52, "grad_norm": 1.8671875, "learning_rate": 0.00016833056463607792, "loss": 2.2302, "step": 221425 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.000168329215126828, "loss": 2.0675, "step": 221430 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.0001683278655942356, "loss": 2.0706, "step": 221435 }, { "epoch": 0.52, "grad_norm": 1.828125, "learning_rate": 0.00016832651603830115, "loss": 2.2538, "step": 221440 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016832516645902512, "loss": 1.8502, "step": 221445 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.000168323816856408, "loss": 2.0403, "step": 221450 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.0001683224672304502, "loss": 2.2442, "step": 221455 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.0001683211175811522, "loss": 2.107, "step": 221460 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.0001683197679085145, "loss": 2.1565, "step": 221465 }, { "epoch": 0.52, "grad_norm": 1.7421875, "learning_rate": 0.0001683184182125375, "loss": 2.0388, "step": 221470 }, { "epoch": 0.52, "grad_norm": 2.453125, "learning_rate": 0.0001683170684932217, "loss": 2.193, "step": 221475 }, { "epoch": 0.52, "grad_norm": 2.546875, "learning_rate": 0.00016831571875056754, "loss": 2.2357, "step": 221480 }, { "epoch": 0.52, "grad_norm": 1.8828125, "learning_rate": 0.00016831436898457548, "loss": 1.9913, "step": 221485 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.000168313019195246, "loss": 2.2367, "step": 221490 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.0001683116693825796, "loss": 1.9394, "step": 221495 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016831031954657664, "loss": 2.132, "step": 221500 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.00016830896968723767, "loss": 2.3004, "step": 221505 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.0001683076198045631, "loss": 2.1302, "step": 221510 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016830626989855341, "loss": 2.0504, "step": 221515 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016830491996920905, "loss": 2.1493, "step": 221520 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016830357001653052, "loss": 2.1239, "step": 221525 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.0001683022200405182, "loss": 2.2371, "step": 221530 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016830087004117265, "loss": 2.2071, "step": 221535 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016829952001849429, "loss": 2.1302, "step": 221540 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016829816997248354, "loss": 2.0903, "step": 221545 }, { "epoch": 0.52, "grad_norm": 1.9765625, "learning_rate": 0.0001682968199031409, "loss": 1.9532, "step": 221550 }, { "epoch": 0.52, "grad_norm": 2.359375, "learning_rate": 0.00016829546981046684, "loss": 1.9052, "step": 221555 }, { "epoch": 0.52, "grad_norm": 1.9296875, "learning_rate": 0.0001682941196944618, "loss": 2.0356, "step": 221560 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.00016829276955512628, "loss": 2.216, "step": 221565 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.0001682914193924607, "loss": 2.1219, "step": 221570 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.0001682900692064655, "loss": 2.2355, "step": 221575 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.0001682887189971412, "loss": 2.0456, "step": 221580 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016828736876448822, "loss": 2.0663, "step": 221585 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016828601850850706, "loss": 2.1365, "step": 221590 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016828466822919812, "loss": 1.9504, "step": 221595 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016828331792656192, "loss": 2.0407, "step": 221600 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016828196760059893, "loss": 2.0666, "step": 221605 }, { "epoch": 0.52, "grad_norm": 1.90625, "learning_rate": 0.00016828061725130956, "loss": 1.9955, "step": 221610 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.0001682792668786943, "loss": 2.1623, "step": 221615 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016827791648275358, "loss": 2.0458, "step": 221620 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016827656606348789, "loss": 2.0838, "step": 221625 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.0001682752156208977, "loss": 2.1398, "step": 221630 }, { "epoch": 0.52, "grad_norm": 2.46875, "learning_rate": 0.00016827386515498344, "loss": 2.2046, "step": 221635 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.0001682725146657456, "loss": 2.115, "step": 221640 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016827116415318462, "loss": 2.097, "step": 221645 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.000168269813617301, "loss": 1.9251, "step": 221650 }, { "epoch": 0.52, "grad_norm": 5.03125, "learning_rate": 0.00016826846305809516, "loss": 2.099, "step": 221655 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.00016826711247556756, "loss": 1.9481, "step": 221660 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016826576186971872, "loss": 2.0714, "step": 221665 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016826441124054902, "loss": 2.1364, "step": 221670 }, { "epoch": 0.52, "grad_norm": 1.8359375, "learning_rate": 0.00016826306058805897, "loss": 2.1021, "step": 221675 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.000168261709912249, "loss": 2.202, "step": 221680 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016826035921311964, "loss": 2.0901, "step": 221685 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016825900849067128, "loss": 2.2134, "step": 221690 }, { "epoch": 0.52, "grad_norm": 2.46875, "learning_rate": 0.00016825765774490436, "loss": 2.0002, "step": 221695 }, { "epoch": 0.52, "grad_norm": 2.0, "learning_rate": 0.00016825630697581942, "loss": 2.1454, "step": 221700 }, { "epoch": 0.52, "grad_norm": 2.359375, "learning_rate": 0.0001682549561834169, "loss": 1.8752, "step": 221705 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.00016825360536769726, "loss": 1.9419, "step": 221710 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016825225452866093, "loss": 2.138, "step": 221715 }, { "epoch": 0.52, "grad_norm": 2.484375, "learning_rate": 0.0001682509036663084, "loss": 2.0939, "step": 221720 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.00016824955278064006, "loss": 2.0782, "step": 221725 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.0001682482018716565, "loss": 2.0026, "step": 221730 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016824685093935814, "loss": 2.1065, "step": 221735 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016824549998374538, "loss": 2.1135, "step": 221740 }, { "epoch": 0.52, "grad_norm": 2.5, "learning_rate": 0.00016824414900481872, "loss": 1.893, "step": 221745 }, { "epoch": 0.52, "grad_norm": 1.921875, "learning_rate": 0.00016824279800257863, "loss": 2.1611, "step": 221750 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016824144697702556, "loss": 2.0242, "step": 221755 }, { "epoch": 0.52, "grad_norm": 2.4375, "learning_rate": 0.00016824009592815998, "loss": 2.0119, "step": 221760 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016823874485598234, "loss": 2.1809, "step": 221765 }, { "epoch": 0.52, "grad_norm": 2.546875, "learning_rate": 0.00016823739376049308, "loss": 1.9554, "step": 221770 }, { "epoch": 0.52, "grad_norm": 1.9765625, "learning_rate": 0.00016823604264169271, "loss": 2.2274, "step": 221775 }, { "epoch": 0.52, "grad_norm": 1.9921875, "learning_rate": 0.00016823469149958168, "loss": 1.9638, "step": 221780 }, { "epoch": 0.52, "grad_norm": 2.5, "learning_rate": 0.00016823334033416046, "loss": 2.0225, "step": 221785 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016823198914542946, "loss": 2.0631, "step": 221790 }, { "epoch": 0.52, "grad_norm": 1.609375, "learning_rate": 0.00016823063793338917, "loss": 1.9719, "step": 221795 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016822928669804008, "loss": 2.2436, "step": 221800 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.00016822793543938263, "loss": 2.0176, "step": 221805 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.0001682265841574173, "loss": 1.9618, "step": 221810 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.00016822523285214448, "loss": 2.2165, "step": 221815 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.0001682238815235647, "loss": 2.0604, "step": 221820 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016822253017167839, "loss": 2.0885, "step": 221825 }, { "epoch": 0.52, "grad_norm": 2.546875, "learning_rate": 0.00016822117879648605, "loss": 2.0204, "step": 221830 }, { "epoch": 0.52, "grad_norm": 1.875, "learning_rate": 0.00016821982739798812, "loss": 2.0906, "step": 221835 }, { "epoch": 0.52, "grad_norm": 1.6015625, "learning_rate": 0.00016821847597618504, "loss": 1.9523, "step": 221840 }, { "epoch": 0.52, "grad_norm": 2.4375, "learning_rate": 0.0001682171245310773, "loss": 2.1682, "step": 221845 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016821577306266536, "loss": 2.0706, "step": 221850 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.0001682144215709497, "loss": 2.0944, "step": 221855 }, { "epoch": 0.52, "grad_norm": 1.9921875, "learning_rate": 0.0001682130700559307, "loss": 2.0382, "step": 221860 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016821171851760892, "loss": 2.3078, "step": 221865 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016821036695598477, "loss": 2.0684, "step": 221870 }, { "epoch": 0.52, "grad_norm": 1.9609375, "learning_rate": 0.0001682090153710587, "loss": 2.2048, "step": 221875 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016820766376283124, "loss": 2.0466, "step": 221880 }, { "epoch": 0.52, "grad_norm": 2.703125, "learning_rate": 0.00016820631213130275, "loss": 2.0794, "step": 221885 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016820496047647378, "loss": 2.1181, "step": 221890 }, { "epoch": 0.52, "grad_norm": 2.40625, "learning_rate": 0.00016820360879834476, "loss": 1.9753, "step": 221895 }, { "epoch": 0.52, "grad_norm": 2.40625, "learning_rate": 0.00016820225709691616, "loss": 2.2806, "step": 221900 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.0001682009053721884, "loss": 2.2946, "step": 221905 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016819955362416198, "loss": 2.1175, "step": 221910 }, { "epoch": 0.52, "grad_norm": 2.546875, "learning_rate": 0.00016819820185283736, "loss": 2.2581, "step": 221915 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.000168196850058215, "loss": 2.2779, "step": 221920 }, { "epoch": 0.52, "grad_norm": 2.4375, "learning_rate": 0.0001681954982402954, "loss": 2.0556, "step": 221925 }, { "epoch": 0.52, "grad_norm": 1.9921875, "learning_rate": 0.0001681941463990789, "loss": 2.0976, "step": 221930 }, { "epoch": 0.52, "grad_norm": 1.96875, "learning_rate": 0.0001681927945345661, "loss": 2.0251, "step": 221935 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016819144264675737, "loss": 1.8177, "step": 221940 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016819009073565323, "loss": 2.1661, "step": 221945 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016818873880125414, "loss": 2.1297, "step": 221950 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.0001681873868435605, "loss": 2.2281, "step": 221955 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016818603486257284, "loss": 2.1614, "step": 221960 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.00016818468285829155, "loss": 2.2354, "step": 221965 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016818333083071718, "loss": 2.0529, "step": 221970 }, { "epoch": 0.52, "grad_norm": 3.046875, "learning_rate": 0.00016818197877985013, "loss": 2.0597, "step": 221975 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.0001681806267056909, "loss": 2.2035, "step": 221980 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016817927460823992, "loss": 2.2283, "step": 221985 }, { "epoch": 0.52, "grad_norm": 1.8125, "learning_rate": 0.00016817792248749764, "loss": 2.1387, "step": 221990 }, { "epoch": 0.52, "grad_norm": 2.5625, "learning_rate": 0.00016817657034346457, "loss": 2.2296, "step": 221995 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016817521817614112, "loss": 2.1873, "step": 222000 }, { "epoch": 0.52, "grad_norm": 2.546875, "learning_rate": 0.0001681738659855278, "loss": 2.2225, "step": 222005 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016817251377162506, "loss": 2.2457, "step": 222010 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016817116153443334, "loss": 2.1311, "step": 222015 }, { "epoch": 0.52, "grad_norm": 2.53125, "learning_rate": 0.0001681698092739531, "loss": 2.1213, "step": 222020 }, { "epoch": 0.52, "grad_norm": 2.46875, "learning_rate": 0.00016816845699018488, "loss": 2.0428, "step": 222025 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.00016816710468312902, "loss": 2.1951, "step": 222030 }, { "epoch": 0.52, "grad_norm": 1.9921875, "learning_rate": 0.00016816575235278604, "loss": 1.9558, "step": 222035 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.00016816439999915644, "loss": 1.9534, "step": 222040 }, { "epoch": 0.52, "grad_norm": 1.9375, "learning_rate": 0.0001681630476222406, "loss": 2.0364, "step": 222045 }, { "epoch": 0.52, "grad_norm": 1.78125, "learning_rate": 0.00016816169522203907, "loss": 1.9543, "step": 222050 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.00016816034279855223, "loss": 2.0363, "step": 222055 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.0001681589903517806, "loss": 2.1145, "step": 222060 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016815763788172462, "loss": 2.1319, "step": 222065 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016815628538838475, "loss": 2.1404, "step": 222070 }, { "epoch": 0.52, "grad_norm": 2.4375, "learning_rate": 0.00016815493287176145, "loss": 2.0848, "step": 222075 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.0001681535803318552, "loss": 2.2702, "step": 222080 }, { "epoch": 0.52, "grad_norm": 1.9453125, "learning_rate": 0.00016815222776866645, "loss": 2.0567, "step": 222085 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016815087518219567, "loss": 2.0502, "step": 222090 }, { "epoch": 0.52, "grad_norm": 1.8671875, "learning_rate": 0.00016814952257244333, "loss": 1.9491, "step": 222095 }, { "epoch": 0.52, "grad_norm": 2.359375, "learning_rate": 0.00016814816993940984, "loss": 2.3156, "step": 222100 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016814681728309573, "loss": 2.2465, "step": 222105 }, { "epoch": 0.52, "grad_norm": 2.640625, "learning_rate": 0.0001681454646035014, "loss": 2.1176, "step": 222110 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.0001681441119006274, "loss": 2.0005, "step": 222115 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016814275917447408, "loss": 2.0686, "step": 222120 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016814140642504194, "loss": 1.8966, "step": 222125 }, { "epoch": 0.52, "grad_norm": 1.9765625, "learning_rate": 0.00016814005365233152, "loss": 2.0794, "step": 222130 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016813870085634317, "loss": 2.0369, "step": 222135 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016813734803707746, "loss": 2.3326, "step": 222140 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016813599519453475, "loss": 2.068, "step": 222145 }, { "epoch": 0.52, "grad_norm": 2.75, "learning_rate": 0.00016813464232871557, "loss": 2.1342, "step": 222150 }, { "epoch": 0.52, "grad_norm": 2.515625, "learning_rate": 0.00016813328943962035, "loss": 1.987, "step": 222155 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016813193652724957, "loss": 2.1563, "step": 222160 }, { "epoch": 0.52, "grad_norm": 2.4375, "learning_rate": 0.0001681305835916037, "loss": 2.2124, "step": 222165 }, { "epoch": 0.52, "grad_norm": 1.9765625, "learning_rate": 0.00016812923063268316, "loss": 2.1386, "step": 222170 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016812787765048846, "loss": 2.3145, "step": 222175 }, { "epoch": 0.52, "grad_norm": 2.578125, "learning_rate": 0.00016812652464502002, "loss": 1.9064, "step": 222180 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016812517161627835, "loss": 2.1206, "step": 222185 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016812381856426385, "loss": 2.2275, "step": 222190 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016812246548897702, "loss": 2.0997, "step": 222195 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016812111239041837, "loss": 2.0498, "step": 222200 }, { "epoch": 0.52, "grad_norm": 1.9375, "learning_rate": 0.00016811975926858826, "loss": 2.2522, "step": 222205 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016811840612348725, "loss": 2.0225, "step": 222210 }, { "epoch": 0.52, "grad_norm": 1.9765625, "learning_rate": 0.0001681170529551157, "loss": 2.0611, "step": 222215 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016811569976347417, "loss": 2.1599, "step": 222220 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.0001681143465485631, "loss": 2.1752, "step": 222225 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016811299331038289, "loss": 1.9649, "step": 222230 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016811164004893408, "loss": 2.3036, "step": 222235 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016811028676421707, "loss": 2.1367, "step": 222240 }, { "epoch": 0.52, "grad_norm": 2.5625, "learning_rate": 0.00016810893345623235, "loss": 2.1266, "step": 222245 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.0001681075801249804, "loss": 2.2431, "step": 222250 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016810622677046165, "loss": 2.2046, "step": 222255 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.0001681048733926766, "loss": 1.9804, "step": 222260 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016810351999162567, "loss": 2.1939, "step": 222265 }, { "epoch": 0.52, "grad_norm": 1.921875, "learning_rate": 0.00016810216656730934, "loss": 2.1754, "step": 222270 }, { "epoch": 0.52, "grad_norm": 2.59375, "learning_rate": 0.00016810081311972812, "loss": 2.018, "step": 222275 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016809945964888238, "loss": 2.1155, "step": 222280 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016809810615477265, "loss": 2.3152, "step": 222285 }, { "epoch": 0.52, "grad_norm": 8.6875, "learning_rate": 0.00016809675263739936, "loss": 2.1471, "step": 222290 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016809539909676298, "loss": 2.2697, "step": 222295 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.000168094045532864, "loss": 2.0924, "step": 222300 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.00016809269194570283, "loss": 2.1211, "step": 222305 }, { "epoch": 0.52, "grad_norm": 2.5, "learning_rate": 0.00016809133833528, "loss": 2.053, "step": 222310 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016808998470159588, "loss": 2.0985, "step": 222315 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016808863104465104, "loss": 2.1643, "step": 222320 }, { "epoch": 0.52, "grad_norm": 2.671875, "learning_rate": 0.00016808727736444587, "loss": 2.2191, "step": 222325 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016808592366098084, "loss": 2.0469, "step": 222330 }, { "epoch": 0.52, "grad_norm": 2.453125, "learning_rate": 0.00016808456993425646, "loss": 1.9821, "step": 222335 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.0001680832161842731, "loss": 2.1371, "step": 222340 }, { "epoch": 0.52, "grad_norm": 1.9609375, "learning_rate": 0.0001680818624110313, "loss": 2.0287, "step": 222345 }, { "epoch": 0.52, "grad_norm": 1.8671875, "learning_rate": 0.00016808050861453152, "loss": 1.9815, "step": 222350 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016807915479477418, "loss": 2.0145, "step": 222355 }, { "epoch": 0.52, "grad_norm": 1.7109375, "learning_rate": 0.0001680778009517598, "loss": 2.0941, "step": 222360 }, { "epoch": 0.52, "grad_norm": 1.9140625, "learning_rate": 0.00016807644708548874, "loss": 2.121, "step": 222365 }, { "epoch": 0.52, "grad_norm": 1.8046875, "learning_rate": 0.0001680750931959616, "loss": 1.863, "step": 222370 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016807373928317873, "loss": 2.112, "step": 222375 }, { "epoch": 0.52, "grad_norm": 2.359375, "learning_rate": 0.00016807238534714066, "loss": 2.1381, "step": 222380 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016807103138784783, "loss": 2.2116, "step": 222385 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.0001680696774053007, "loss": 1.9442, "step": 222390 }, { "epoch": 0.52, "grad_norm": 1.9921875, "learning_rate": 0.00016806832339949973, "loss": 2.1859, "step": 222395 }, { "epoch": 0.52, "grad_norm": 3.3125, "learning_rate": 0.0001680669693704454, "loss": 2.1053, "step": 222400 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016806561531813814, "loss": 2.081, "step": 222405 }, { "epoch": 0.52, "grad_norm": 1.875, "learning_rate": 0.0001680642612425784, "loss": 2.1618, "step": 222410 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016806290714376672, "loss": 1.9586, "step": 222415 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016806155302170352, "loss": 2.0352, "step": 222420 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.00016806019887638925, "loss": 2.0254, "step": 222425 }, { "epoch": 0.52, "grad_norm": 2.4375, "learning_rate": 0.00016805884470782437, "loss": 2.1614, "step": 222430 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016805749051600937, "loss": 2.0003, "step": 222435 }, { "epoch": 0.52, "grad_norm": 1.9609375, "learning_rate": 0.00016805613630094468, "loss": 2.0465, "step": 222440 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016805478206263077, "loss": 2.2374, "step": 222445 }, { "epoch": 0.52, "grad_norm": 2.90625, "learning_rate": 0.00016805342780106813, "loss": 2.0764, "step": 222450 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.00016805207351625723, "loss": 2.1374, "step": 222455 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016805071920819847, "loss": 2.0613, "step": 222460 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.00016804936487689235, "loss": 2.0432, "step": 222465 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016804801052233936, "loss": 2.1552, "step": 222470 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016804665614453992, "loss": 2.1265, "step": 222475 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.0001680453017434945, "loss": 1.9622, "step": 222480 }, { "epoch": 0.52, "grad_norm": 1.6875, "learning_rate": 0.00016804394731920357, "loss": 2.1285, "step": 222485 }, { "epoch": 0.52, "grad_norm": 1.8515625, "learning_rate": 0.00016804259287166761, "loss": 2.0898, "step": 222490 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016804123840088707, "loss": 2.2116, "step": 222495 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016803988390686237, "loss": 1.9056, "step": 222500 }, { "epoch": 0.52, "grad_norm": 1.9765625, "learning_rate": 0.00016803852938959406, "loss": 2.1639, "step": 222505 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.00016803717484908254, "loss": 2.1526, "step": 222510 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.00016803582028532827, "loss": 1.9691, "step": 222515 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016803446569833174, "loss": 2.0969, "step": 222520 }, { "epoch": 0.52, "grad_norm": 2.625, "learning_rate": 0.00016803311108809342, "loss": 2.2406, "step": 222525 }, { "epoch": 0.52, "grad_norm": 2.671875, "learning_rate": 0.0001680317564546137, "loss": 2.0218, "step": 222530 }, { "epoch": 0.52, "grad_norm": 1.953125, "learning_rate": 0.0001680304017978932, "loss": 2.1754, "step": 222535 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016802904711793218, "loss": 1.988, "step": 222540 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016802769241473127, "loss": 2.1794, "step": 222545 }, { "epoch": 0.52, "grad_norm": 1.8046875, "learning_rate": 0.00016802633768829082, "loss": 2.1679, "step": 222550 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.00016802498293861135, "loss": 2.0902, "step": 222555 }, { "epoch": 0.52, "grad_norm": 1.78125, "learning_rate": 0.00016802362816569332, "loss": 2.0856, "step": 222560 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016802227336953717, "loss": 1.8999, "step": 222565 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.00016802091855014343, "loss": 2.2822, "step": 222570 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016801956370751246, "loss": 2.0681, "step": 222575 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.0001680182088416448, "loss": 2.078, "step": 222580 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016801685395254084, "loss": 2.0865, "step": 222585 }, { "epoch": 0.52, "grad_norm": 2.578125, "learning_rate": 0.00016801549904020115, "loss": 1.8773, "step": 222590 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.0001680141441046261, "loss": 2.0699, "step": 222595 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016801278914581617, "loss": 2.1268, "step": 222600 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016801143416377188, "loss": 2.0916, "step": 222605 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.00016801007915849363, "loss": 2.3597, "step": 222610 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016800872412998187, "loss": 2.1115, "step": 222615 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016800736907823715, "loss": 2.2292, "step": 222620 }, { "epoch": 0.52, "grad_norm": 2.59375, "learning_rate": 0.00016800601400325987, "loss": 2.0719, "step": 222625 }, { "epoch": 0.52, "grad_norm": 2.625, "learning_rate": 0.00016800465890505048, "loss": 2.1581, "step": 222630 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016800330378360947, "loss": 2.1371, "step": 222635 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.0001680019486389373, "loss": 2.2594, "step": 222640 }, { "epoch": 0.52, "grad_norm": 1.9453125, "learning_rate": 0.00016800059347103443, "loss": 2.0901, "step": 222645 }, { "epoch": 0.52, "grad_norm": 2.0, "learning_rate": 0.00016799923827990135, "loss": 2.0305, "step": 222650 }, { "epoch": 0.52, "grad_norm": 2.109375, "learning_rate": 0.00016799788306553847, "loss": 2.1371, "step": 222655 }, { "epoch": 0.52, "grad_norm": 2.515625, "learning_rate": 0.00016799652782794626, "loss": 1.9931, "step": 222660 }, { "epoch": 0.52, "grad_norm": 2.6875, "learning_rate": 0.00016799517256712524, "loss": 2.1814, "step": 222665 }, { "epoch": 0.52, "grad_norm": 1.7578125, "learning_rate": 0.0001679938172830758, "loss": 2.009, "step": 222670 }, { "epoch": 0.52, "grad_norm": 2.84375, "learning_rate": 0.00016799246197579846, "loss": 1.9531, "step": 222675 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.00016799110664529368, "loss": 2.2807, "step": 222680 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.0001679897512915619, "loss": 1.9804, "step": 222685 }, { "epoch": 0.52, "grad_norm": 1.609375, "learning_rate": 0.00016798839591460358, "loss": 2.3605, "step": 222690 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.0001679870405144192, "loss": 2.2816, "step": 222695 }, { "epoch": 0.52, "grad_norm": 2.1875, "learning_rate": 0.00016798568509100918, "loss": 2.0587, "step": 222700 }, { "epoch": 0.52, "grad_norm": 2.40625, "learning_rate": 0.00016798432964437404, "loss": 1.9409, "step": 222705 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016798297417451423, "loss": 2.2374, "step": 222710 }, { "epoch": 0.52, "grad_norm": 2.8125, "learning_rate": 0.00016798161868143016, "loss": 2.1536, "step": 222715 }, { "epoch": 0.52, "grad_norm": 1.9140625, "learning_rate": 0.0001679802631651224, "loss": 2.0901, "step": 222720 }, { "epoch": 0.52, "grad_norm": 1.90625, "learning_rate": 0.0001679789076255913, "loss": 2.2205, "step": 222725 }, { "epoch": 0.52, "grad_norm": 2.171875, "learning_rate": 0.0001679775520628374, "loss": 2.424, "step": 222730 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016797619647686114, "loss": 2.2607, "step": 222735 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016797484086766295, "loss": 2.1719, "step": 222740 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016797348523524332, "loss": 2.1151, "step": 222745 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016797212957960276, "loss": 2.1317, "step": 222750 }, { "epoch": 0.52, "grad_norm": 1.90625, "learning_rate": 0.00016797077390074162, "loss": 1.9037, "step": 222755 }, { "epoch": 0.52, "grad_norm": 2.0, "learning_rate": 0.00016796941819866048, "loss": 2.0836, "step": 222760 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016796806247335977, "loss": 2.0982, "step": 222765 }, { "epoch": 0.52, "grad_norm": 1.7109375, "learning_rate": 0.00016796670672483986, "loss": 2.0239, "step": 222770 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016796535095310136, "loss": 2.0267, "step": 222775 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016796399515814464, "loss": 2.1895, "step": 222780 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.0001679626393399702, "loss": 2.1004, "step": 222785 }, { "epoch": 0.52, "grad_norm": 1.921875, "learning_rate": 0.00016796128349857848, "loss": 2.0257, "step": 222790 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016795992763396996, "loss": 2.1662, "step": 222795 }, { "epoch": 0.52, "grad_norm": 1.7578125, "learning_rate": 0.00016795857174614506, "loss": 1.959, "step": 222800 }, { "epoch": 0.52, "grad_norm": 1.65625, "learning_rate": 0.00016795721583510434, "loss": 1.9245, "step": 222805 }, { "epoch": 0.52, "grad_norm": 2.015625, "learning_rate": 0.00016795585990084816, "loss": 1.8617, "step": 222810 }, { "epoch": 0.52, "grad_norm": 2.046875, "learning_rate": 0.00016795450394337704, "loss": 1.988, "step": 222815 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016795314796269143, "loss": 2.1873, "step": 222820 }, { "epoch": 0.52, "grad_norm": 2.40625, "learning_rate": 0.00016795179195879176, "loss": 2.1057, "step": 222825 }, { "epoch": 0.52, "grad_norm": 1.9921875, "learning_rate": 0.00016795043593167858, "loss": 2.158, "step": 222830 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016794907988135227, "loss": 1.9957, "step": 222835 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.00016794772380781332, "loss": 1.9428, "step": 222840 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.0001679463677110622, "loss": 2.1679, "step": 222845 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016794501159109936, "loss": 2.0308, "step": 222850 }, { "epoch": 0.52, "grad_norm": 2.0625, "learning_rate": 0.00016794365544792528, "loss": 2.1353, "step": 222855 }, { "epoch": 0.52, "grad_norm": 1.9765625, "learning_rate": 0.0001679422992815404, "loss": 2.234, "step": 222860 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.00016794094309194524, "loss": 2.206, "step": 222865 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.0001679395868791402, "loss": 2.0404, "step": 222870 }, { "epoch": 0.52, "grad_norm": 2.5, "learning_rate": 0.00016793823064312574, "loss": 2.0102, "step": 222875 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016793687438390237, "loss": 2.244, "step": 222880 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016793551810147052, "loss": 2.1661, "step": 222885 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016793416179583068, "loss": 2.1573, "step": 222890 }, { "epoch": 0.52, "grad_norm": 2.328125, "learning_rate": 0.00016793280546698332, "loss": 2.1171, "step": 222895 }, { "epoch": 0.52, "grad_norm": 2.15625, "learning_rate": 0.00016793144911492883, "loss": 2.1109, "step": 222900 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.00016793009273966775, "loss": 2.393, "step": 222905 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016792873634120055, "loss": 2.0397, "step": 222910 }, { "epoch": 0.52, "grad_norm": 2.25, "learning_rate": 0.0001679273799195276, "loss": 1.7978, "step": 222915 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.00016792602347464946, "loss": 2.2663, "step": 222920 }, { "epoch": 0.52, "grad_norm": 1.5625, "learning_rate": 0.00016792466700656658, "loss": 2.0169, "step": 222925 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016792331051527937, "loss": 2.1, "step": 222930 }, { "epoch": 0.52, "grad_norm": 2.234375, "learning_rate": 0.00016792195400078833, "loss": 2.1209, "step": 222935 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.0001679205974630939, "loss": 2.0731, "step": 222940 }, { "epoch": 0.52, "grad_norm": 1.5703125, "learning_rate": 0.00016791924090219656, "loss": 2.024, "step": 222945 }, { "epoch": 0.52, "grad_norm": 1.984375, "learning_rate": 0.00016791788431809682, "loss": 2.0769, "step": 222950 }, { "epoch": 0.52, "grad_norm": 2.078125, "learning_rate": 0.00016791652771079506, "loss": 2.2378, "step": 222955 }, { "epoch": 0.52, "grad_norm": 2.375, "learning_rate": 0.0001679151710802918, "loss": 2.0982, "step": 222960 }, { "epoch": 0.52, "grad_norm": 1.8984375, "learning_rate": 0.00016791381442658748, "loss": 1.9632, "step": 222965 }, { "epoch": 0.52, "grad_norm": 2.21875, "learning_rate": 0.00016791245774968257, "loss": 2.153, "step": 222970 }, { "epoch": 0.52, "grad_norm": 1.9921875, "learning_rate": 0.00016791110104957756, "loss": 1.9762, "step": 222975 }, { "epoch": 0.52, "grad_norm": 2.6875, "learning_rate": 0.00016790974432627287, "loss": 2.2377, "step": 222980 }, { "epoch": 0.52, "grad_norm": 2.390625, "learning_rate": 0.00016790838757976896, "loss": 1.9416, "step": 222985 }, { "epoch": 0.52, "grad_norm": 2.203125, "learning_rate": 0.00016790703081006632, "loss": 2.2126, "step": 222990 }, { "epoch": 0.52, "grad_norm": 2.421875, "learning_rate": 0.00016790567401716542, "loss": 2.1344, "step": 222995 }, { "epoch": 0.52, "grad_norm": 2.5625, "learning_rate": 0.0001679043172010667, "loss": 2.2264, "step": 223000 }, { "epoch": 0.52, "grad_norm": 2.265625, "learning_rate": 0.00016790296036177065, "loss": 2.1753, "step": 223005 }, { "epoch": 0.52, "grad_norm": 1.8046875, "learning_rate": 0.00016790160349927768, "loss": 2.1248, "step": 223010 }, { "epoch": 0.52, "grad_norm": 2.125, "learning_rate": 0.00016790024661358832, "loss": 2.1434, "step": 223015 }, { "epoch": 0.52, "grad_norm": 2.140625, "learning_rate": 0.000167898889704703, "loss": 2.1934, "step": 223020 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.00016789753277262218, "loss": 2.2087, "step": 223025 }, { "epoch": 0.52, "grad_norm": 2.03125, "learning_rate": 0.00016789617581734634, "loss": 2.1714, "step": 223030 }, { "epoch": 0.52, "grad_norm": 2.3125, "learning_rate": 0.00016789481883887594, "loss": 2.0895, "step": 223035 }, { "epoch": 0.52, "grad_norm": 2.09375, "learning_rate": 0.00016789346183721142, "loss": 2.0732, "step": 223040 }, { "epoch": 0.52, "grad_norm": 1.984375, "learning_rate": 0.00016789210481235327, "loss": 2.1572, "step": 223045 }, { "epoch": 0.52, "grad_norm": 1.9609375, "learning_rate": 0.00016789074776430197, "loss": 2.0495, "step": 223050 }, { "epoch": 0.52, "grad_norm": 1.875, "learning_rate": 0.0001678893906930579, "loss": 2.186, "step": 223055 }, { "epoch": 0.52, "grad_norm": 1.765625, "learning_rate": 0.00016788803359862168, "loss": 2.1776, "step": 223060 }, { "epoch": 0.52, "grad_norm": 1.9921875, "learning_rate": 0.00016788667648099358, "loss": 1.8031, "step": 223065 }, { "epoch": 0.52, "grad_norm": 2.34375, "learning_rate": 0.0001678853193401742, "loss": 2.268, "step": 223070 }, { "epoch": 0.52, "grad_norm": 2.296875, "learning_rate": 0.00016788396217616396, "loss": 2.167, "step": 223075 }, { "epoch": 0.52, "grad_norm": 1.84375, "learning_rate": 0.00016788260498896333, "loss": 2.1838, "step": 223080 }, { "epoch": 0.52, "grad_norm": 2.28125, "learning_rate": 0.00016788124777857277, "loss": 2.2263, "step": 223085 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016787989054499277, "loss": 2.0304, "step": 223090 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.00016787853328822372, "loss": 2.1399, "step": 223095 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016787717600826615, "loss": 1.9841, "step": 223100 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016787581870512052, "loss": 2.1714, "step": 223105 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.00016787446137878727, "loss": 2.0265, "step": 223110 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.0001678731040292669, "loss": 2.1582, "step": 223115 }, { "epoch": 0.53, "grad_norm": 1.921875, "learning_rate": 0.0001678717466565598, "loss": 2.2052, "step": 223120 }, { "epoch": 0.53, "grad_norm": 1.8203125, "learning_rate": 0.00016787038926066648, "loss": 2.1918, "step": 223125 }, { "epoch": 0.53, "grad_norm": 1.96875, "learning_rate": 0.00016786903184158743, "loss": 2.0274, "step": 223130 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.0001678676743993231, "loss": 1.9683, "step": 223135 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016786631693387392, "loss": 2.1685, "step": 223140 }, { "epoch": 0.53, "grad_norm": 2.453125, "learning_rate": 0.00016786495944524034, "loss": 2.1439, "step": 223145 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016786360193342295, "loss": 2.0822, "step": 223150 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016786224439842203, "loss": 2.1923, "step": 223155 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.0001678608868402382, "loss": 2.1647, "step": 223160 }, { "epoch": 0.53, "grad_norm": 1.875, "learning_rate": 0.00016785952925887183, "loss": 2.115, "step": 223165 }, { "epoch": 0.53, "grad_norm": 2.578125, "learning_rate": 0.00016785817165432344, "loss": 1.9723, "step": 223170 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.00016785681402659345, "loss": 2.1765, "step": 223175 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016785545637568232, "loss": 2.0891, "step": 223180 }, { "epoch": 0.53, "grad_norm": 3.875, "learning_rate": 0.00016785409870159057, "loss": 1.9802, "step": 223185 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.0001678527410043186, "loss": 2.1674, "step": 223190 }, { "epoch": 0.53, "grad_norm": 3.203125, "learning_rate": 0.00016785138328386694, "loss": 2.0315, "step": 223195 }, { "epoch": 0.53, "grad_norm": 2.390625, "learning_rate": 0.00016785002554023598, "loss": 2.0624, "step": 223200 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016784866777342625, "loss": 2.0614, "step": 223205 }, { "epoch": 0.53, "grad_norm": 2.46875, "learning_rate": 0.00016784730998343816, "loss": 2.115, "step": 223210 }, { "epoch": 0.53, "grad_norm": 2.96875, "learning_rate": 0.00016784595217027222, "loss": 2.0661, "step": 223215 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016784459433392886, "loss": 1.9062, "step": 223220 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016784323647440856, "loss": 2.2478, "step": 223225 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016784187859171176, "loss": 2.152, "step": 223230 }, { "epoch": 0.53, "grad_norm": 1.8984375, "learning_rate": 0.00016784052068583897, "loss": 2.1974, "step": 223235 }, { "epoch": 0.53, "grad_norm": 1.9765625, "learning_rate": 0.0001678391627567906, "loss": 2.0411, "step": 223240 }, { "epoch": 0.53, "grad_norm": 2.546875, "learning_rate": 0.0001678378048045672, "loss": 2.0948, "step": 223245 }, { "epoch": 0.53, "grad_norm": 2.484375, "learning_rate": 0.0001678364468291691, "loss": 2.1167, "step": 223250 }, { "epoch": 0.53, "grad_norm": 2.40625, "learning_rate": 0.0001678350888305969, "loss": 2.1689, "step": 223255 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016783373080885098, "loss": 2.2333, "step": 223260 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016783237276393184, "loss": 2.3798, "step": 223265 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.0001678310146958399, "loss": 1.9692, "step": 223270 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016782965660457565, "loss": 2.1187, "step": 223275 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.00016782829849013958, "loss": 1.9444, "step": 223280 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016782694035253216, "loss": 2.1884, "step": 223285 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.00016782558219175382, "loss": 2.014, "step": 223290 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.000167824224007805, "loss": 2.092, "step": 223295 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016782286580068622, "loss": 2.2491, "step": 223300 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001678215075703979, "loss": 2.1327, "step": 223305 }, { "epoch": 0.53, "grad_norm": 1.9765625, "learning_rate": 0.00016782014931694054, "loss": 1.889, "step": 223310 }, { "epoch": 0.53, "grad_norm": 1.8671875, "learning_rate": 0.00016781879104031458, "loss": 2.0293, "step": 223315 }, { "epoch": 0.53, "grad_norm": 2.59375, "learning_rate": 0.00016781743274052048, "loss": 2.1208, "step": 223320 }, { "epoch": 0.53, "grad_norm": 2.46875, "learning_rate": 0.00016781607441755872, "loss": 2.1021, "step": 223325 }, { "epoch": 0.53, "grad_norm": 1.8515625, "learning_rate": 0.00016781471607142976, "loss": 1.8657, "step": 223330 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.00016781335770213408, "loss": 2.1163, "step": 223335 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016781199930967212, "loss": 2.1405, "step": 223340 }, { "epoch": 0.53, "grad_norm": 2.328125, "learning_rate": 0.00016781064089404436, "loss": 2.2499, "step": 223345 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.0001678092824552512, "loss": 2.0042, "step": 223350 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016780792399329322, "loss": 2.0092, "step": 223355 }, { "epoch": 0.53, "grad_norm": 1.9453125, "learning_rate": 0.00016780656550817078, "loss": 2.2444, "step": 223360 }, { "epoch": 0.53, "grad_norm": 1.9140625, "learning_rate": 0.00016780520699988442, "loss": 2.0866, "step": 223365 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016780384846843454, "loss": 2.298, "step": 223370 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.0001678024899138217, "loss": 2.0394, "step": 223375 }, { "epoch": 0.53, "grad_norm": 1.953125, "learning_rate": 0.00016780113133604626, "loss": 2.1933, "step": 223380 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.0001677997727351087, "loss": 2.0903, "step": 223385 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016779841411100954, "loss": 1.9954, "step": 223390 }, { "epoch": 0.53, "grad_norm": 1.71875, "learning_rate": 0.0001677970554637492, "loss": 2.2318, "step": 223395 }, { "epoch": 0.53, "grad_norm": 2.609375, "learning_rate": 0.00016779569679332816, "loss": 2.034, "step": 223400 }, { "epoch": 0.53, "grad_norm": 1.765625, "learning_rate": 0.00016779433809974688, "loss": 1.9693, "step": 223405 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.0001677929793830058, "loss": 1.8557, "step": 223410 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016779162064310544, "loss": 2.1375, "step": 223415 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016779026188004626, "loss": 2.1387, "step": 223420 }, { "epoch": 0.53, "grad_norm": 1.953125, "learning_rate": 0.00016778890309382863, "loss": 2.176, "step": 223425 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016778754428445312, "loss": 2.0683, "step": 223430 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016778618545192016, "loss": 2.1118, "step": 223435 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016778482659623016, "loss": 2.1417, "step": 223440 }, { "epoch": 0.53, "grad_norm": 2.328125, "learning_rate": 0.0001677834677173837, "loss": 2.0838, "step": 223445 }, { "epoch": 0.53, "grad_norm": 2.328125, "learning_rate": 0.00016778210881538116, "loss": 2.0649, "step": 223450 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016778074989022302, "loss": 1.9166, "step": 223455 }, { "epoch": 0.53, "grad_norm": 3.625, "learning_rate": 0.00016777939094190974, "loss": 2.1928, "step": 223460 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016777803197044175, "loss": 2.0387, "step": 223465 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016777667297581962, "loss": 2.1306, "step": 223470 }, { "epoch": 0.53, "grad_norm": 1.90625, "learning_rate": 0.00016777531395804373, "loss": 2.2465, "step": 223475 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.00016777395491711453, "loss": 2.2141, "step": 223480 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.00016777259585303256, "loss": 1.9562, "step": 223485 }, { "epoch": 0.53, "grad_norm": 2.734375, "learning_rate": 0.00016777123676579823, "loss": 2.2729, "step": 223490 }, { "epoch": 0.53, "grad_norm": 2.53125, "learning_rate": 0.000167769877655412, "loss": 2.1015, "step": 223495 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.0001677685185218744, "loss": 2.1217, "step": 223500 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.0001677671593651858, "loss": 2.2734, "step": 223505 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.0001677658001853467, "loss": 2.2404, "step": 223510 }, { "epoch": 0.53, "grad_norm": 1.7578125, "learning_rate": 0.0001677644409823576, "loss": 2.1713, "step": 223515 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016776308175621893, "loss": 2.2013, "step": 223520 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016776172250693115, "loss": 1.9616, "step": 223525 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016776036323449476, "loss": 2.1091, "step": 223530 }, { "epoch": 0.53, "grad_norm": 2.390625, "learning_rate": 0.0001677590039389102, "loss": 2.0422, "step": 223535 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016775764462017794, "loss": 2.2032, "step": 223540 }, { "epoch": 0.53, "grad_norm": 1.984375, "learning_rate": 0.00016775628527829838, "loss": 1.9506, "step": 223545 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.0001677549259132721, "loss": 2.1739, "step": 223550 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016775356652509952, "loss": 2.1874, "step": 223555 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016775220711378106, "loss": 2.0794, "step": 223560 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016775084767931724, "loss": 2.2806, "step": 223565 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016774948822170847, "loss": 2.1333, "step": 223570 }, { "epoch": 0.53, "grad_norm": 2.328125, "learning_rate": 0.0001677481287409553, "loss": 2.1682, "step": 223575 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.0001677467692370581, "loss": 2.029, "step": 223580 }, { "epoch": 0.53, "grad_norm": 3.5625, "learning_rate": 0.00016774540971001736, "loss": 2.0161, "step": 223585 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001677440501598336, "loss": 2.0973, "step": 223590 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016774269058650722, "loss": 2.03, "step": 223595 }, { "epoch": 0.53, "grad_norm": 1.8828125, "learning_rate": 0.00016774133099003871, "loss": 2.074, "step": 223600 }, { "epoch": 0.53, "grad_norm": 2.40625, "learning_rate": 0.00016773997137042854, "loss": 2.1499, "step": 223605 }, { "epoch": 0.53, "grad_norm": 2.390625, "learning_rate": 0.00016773861172767718, "loss": 2.0921, "step": 223610 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016773725206178504, "loss": 2.1176, "step": 223615 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.00016773589237275267, "loss": 2.0839, "step": 223620 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016773453266058047, "loss": 2.106, "step": 223625 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.0001677331729252689, "loss": 2.0781, "step": 223630 }, { "epoch": 0.53, "grad_norm": 2.390625, "learning_rate": 0.0001677318131668185, "loss": 2.1352, "step": 223635 }, { "epoch": 0.53, "grad_norm": 1.953125, "learning_rate": 0.00016773045338522966, "loss": 1.8356, "step": 223640 }, { "epoch": 0.53, "grad_norm": 1.71875, "learning_rate": 0.0001677290935805029, "loss": 1.7767, "step": 223645 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.00016772773375263857, "loss": 2.1617, "step": 223650 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.0001677263739016373, "loss": 2.0495, "step": 223655 }, { "epoch": 0.53, "grad_norm": 1.8515625, "learning_rate": 0.00016772501402749943, "loss": 2.1114, "step": 223660 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016772365413022548, "loss": 2.0496, "step": 223665 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.0001677222942098159, "loss": 1.9153, "step": 223670 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016772093426627117, "loss": 2.2178, "step": 223675 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016771957429959173, "loss": 2.1511, "step": 223680 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016771821430977806, "loss": 2.0338, "step": 223685 }, { "epoch": 0.53, "grad_norm": 2.625, "learning_rate": 0.0001677168542968306, "loss": 2.1014, "step": 223690 }, { "epoch": 0.53, "grad_norm": 1.984375, "learning_rate": 0.00016771549426074987, "loss": 2.0127, "step": 223695 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016771413420153626, "loss": 2.1937, "step": 223700 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.00016771277411919028, "loss": 2.1109, "step": 223705 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.0001677114140137124, "loss": 2.1753, "step": 223710 }, { "epoch": 0.53, "grad_norm": 1.78125, "learning_rate": 0.0001677100538851031, "loss": 2.1652, "step": 223715 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016770869373336278, "loss": 2.1016, "step": 223720 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016770733355849196, "loss": 2.0954, "step": 223725 }, { "epoch": 0.53, "grad_norm": 1.84375, "learning_rate": 0.00016770597336049107, "loss": 2.2484, "step": 223730 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.0001677046131393606, "loss": 2.0109, "step": 223735 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.000167703252895101, "loss": 2.0007, "step": 223740 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016770189262771274, "loss": 2.0675, "step": 223745 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001677005323371963, "loss": 2.3203, "step": 223750 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.00016769917202355214, "loss": 2.0814, "step": 223755 }, { "epoch": 0.53, "grad_norm": 2.515625, "learning_rate": 0.00016769781168678069, "loss": 2.0983, "step": 223760 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016769645132688244, "loss": 2.1787, "step": 223765 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.0001676950909438579, "loss": 2.1107, "step": 223770 }, { "epoch": 0.53, "grad_norm": 1.890625, "learning_rate": 0.00016769373053770746, "loss": 2.0695, "step": 223775 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.0001676923701084316, "loss": 2.189, "step": 223780 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016769100965603078, "loss": 2.1511, "step": 223785 }, { "epoch": 0.53, "grad_norm": 1.9765625, "learning_rate": 0.00016768964918050552, "loss": 2.0867, "step": 223790 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.0001676882886818562, "loss": 2.0117, "step": 223795 }, { "epoch": 0.53, "grad_norm": 2.515625, "learning_rate": 0.0001676869281600834, "loss": 2.1335, "step": 223800 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016768556761518748, "loss": 2.0662, "step": 223805 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016768420704716896, "loss": 2.0357, "step": 223810 }, { "epoch": 0.53, "grad_norm": 1.828125, "learning_rate": 0.0001676828464560283, "loss": 2.0148, "step": 223815 }, { "epoch": 0.53, "grad_norm": 3.109375, "learning_rate": 0.00016768148584176592, "loss": 2.0808, "step": 223820 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.0001676801252043823, "loss": 2.2122, "step": 223825 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.00016767876454387796, "loss": 2.006, "step": 223830 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016767740386025332, "loss": 2.1231, "step": 223835 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016767604315350885, "loss": 2.1647, "step": 223840 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.000167674682423645, "loss": 2.0659, "step": 223845 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016767332167066227, "loss": 2.2394, "step": 223850 }, { "epoch": 0.53, "grad_norm": 1.8671875, "learning_rate": 0.00016767196089456112, "loss": 2.1918, "step": 223855 }, { "epoch": 0.53, "grad_norm": 2.546875, "learning_rate": 0.00016767060009534196, "loss": 2.1973, "step": 223860 }, { "epoch": 0.53, "grad_norm": 2.640625, "learning_rate": 0.0001676692392730053, "loss": 2.255, "step": 223865 }, { "epoch": 0.53, "grad_norm": 1.8046875, "learning_rate": 0.00016766787842755162, "loss": 2.1685, "step": 223870 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.00016766651755898137, "loss": 2.1433, "step": 223875 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.000167665156667295, "loss": 2.1434, "step": 223880 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.000167663795752493, "loss": 2.227, "step": 223885 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016766243481457582, "loss": 2.0986, "step": 223890 }, { "epoch": 0.53, "grad_norm": 2.515625, "learning_rate": 0.0001676610738535439, "loss": 2.1875, "step": 223895 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016765971286939775, "loss": 2.3529, "step": 223900 }, { "epoch": 0.53, "grad_norm": 1.96875, "learning_rate": 0.00016765835186213781, "loss": 2.2211, "step": 223905 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016765699083176453, "loss": 2.105, "step": 223910 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.00016765562977827844, "loss": 1.9976, "step": 223915 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.0001676542687016799, "loss": 2.1406, "step": 223920 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.0001676529076019695, "loss": 2.0645, "step": 223925 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001676515464791476, "loss": 1.8824, "step": 223930 }, { "epoch": 0.53, "grad_norm": 1.8046875, "learning_rate": 0.0001676501853332147, "loss": 1.7713, "step": 223935 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001676488241641713, "loss": 1.9488, "step": 223940 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.0001676474629720178, "loss": 2.1877, "step": 223945 }, { "epoch": 0.53, "grad_norm": 2.328125, "learning_rate": 0.00016764610175675472, "loss": 2.1536, "step": 223950 }, { "epoch": 0.53, "grad_norm": 2.859375, "learning_rate": 0.00016764474051838253, "loss": 2.213, "step": 223955 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.00016764337925690163, "loss": 2.0799, "step": 223960 }, { "epoch": 0.53, "grad_norm": 1.90625, "learning_rate": 0.00016764201797231254, "loss": 1.962, "step": 223965 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016764065666461574, "loss": 2.1876, "step": 223970 }, { "epoch": 0.53, "grad_norm": 1.7890625, "learning_rate": 0.0001676392953338116, "loss": 2.2039, "step": 223975 }, { "epoch": 0.53, "grad_norm": 1.875, "learning_rate": 0.00016763793397990066, "loss": 2.0506, "step": 223980 }, { "epoch": 0.53, "grad_norm": 2.765625, "learning_rate": 0.0001676365726028834, "loss": 1.9909, "step": 223985 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.0001676352112027603, "loss": 2.0048, "step": 223990 }, { "epoch": 0.53, "grad_norm": 2.5, "learning_rate": 0.00016763384977953172, "loss": 1.9602, "step": 223995 }, { "epoch": 0.53, "grad_norm": 1.8359375, "learning_rate": 0.0001676324883331982, "loss": 2.2895, "step": 224000 }, { "epoch": 0.53, "grad_norm": 1.9921875, "learning_rate": 0.0001676311268637602, "loss": 2.0123, "step": 224005 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016762976537121823, "loss": 2.1189, "step": 224010 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016762840385557267, "loss": 2.0907, "step": 224015 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016762704231682398, "loss": 1.9527, "step": 224020 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.0001676256807549727, "loss": 2.0739, "step": 224025 }, { "epoch": 0.53, "grad_norm": 1.78125, "learning_rate": 0.00016762431917001925, "loss": 2.3182, "step": 224030 }, { "epoch": 0.53, "grad_norm": 1.5, "learning_rate": 0.00016762295756196413, "loss": 2.1654, "step": 224035 }, { "epoch": 0.53, "grad_norm": 1.8359375, "learning_rate": 0.00016762159593080778, "loss": 2.0564, "step": 224040 }, { "epoch": 0.53, "grad_norm": 1.8671875, "learning_rate": 0.00016762023427655066, "loss": 2.0787, "step": 224045 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016761887259919323, "loss": 2.0527, "step": 224050 }, { "epoch": 0.53, "grad_norm": 1.796875, "learning_rate": 0.00016761751089873597, "loss": 2.2896, "step": 224055 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016761614917517936, "loss": 2.1998, "step": 224060 }, { "epoch": 0.53, "grad_norm": 2.609375, "learning_rate": 0.00016761478742852385, "loss": 1.9585, "step": 224065 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016761342565876986, "loss": 1.9621, "step": 224070 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016761206386591793, "loss": 2.1449, "step": 224075 }, { "epoch": 0.53, "grad_norm": 1.953125, "learning_rate": 0.00016761070204996848, "loss": 2.1979, "step": 224080 }, { "epoch": 0.53, "grad_norm": 1.9609375, "learning_rate": 0.00016760934021092198, "loss": 2.0694, "step": 224085 }, { "epoch": 0.53, "grad_norm": 2.59375, "learning_rate": 0.00016760797834877893, "loss": 2.1454, "step": 224090 }, { "epoch": 0.53, "grad_norm": 1.9375, "learning_rate": 0.00016760661646353974, "loss": 1.9941, "step": 224095 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016760525455520494, "loss": 2.1685, "step": 224100 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016760389262377495, "loss": 2.1148, "step": 224105 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.0001676025306692502, "loss": 2.0929, "step": 224110 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016760116869163124, "loss": 2.0931, "step": 224115 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016759980669091848, "loss": 2.0977, "step": 224120 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016759844466711243, "loss": 2.0319, "step": 224125 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.0001675970826202135, "loss": 2.1289, "step": 224130 }, { "epoch": 0.53, "grad_norm": 2.40625, "learning_rate": 0.00016759572055022217, "loss": 2.1874, "step": 224135 }, { "epoch": 0.53, "grad_norm": 1.8046875, "learning_rate": 0.00016759435845713894, "loss": 2.2475, "step": 224140 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016759299634096423, "loss": 2.1755, "step": 224145 }, { "epoch": 0.53, "grad_norm": 2.65625, "learning_rate": 0.00016759163420169854, "loss": 2.0827, "step": 224150 }, { "epoch": 0.53, "grad_norm": 2.53125, "learning_rate": 0.00016759027203934232, "loss": 1.9803, "step": 224155 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016758890985389604, "loss": 2.1666, "step": 224160 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.00016758754764536015, "loss": 2.0215, "step": 224165 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016758618541373515, "loss": 1.9848, "step": 224170 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016758482315902147, "loss": 2.0406, "step": 224175 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.0001675834608812196, "loss": 2.2479, "step": 224180 }, { "epoch": 0.53, "grad_norm": 1.9765625, "learning_rate": 0.00016758209858032998, "loss": 2.0684, "step": 224185 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016758073625635308, "loss": 2.281, "step": 224190 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016757937390928938, "loss": 2.0221, "step": 224195 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016757801153913934, "loss": 2.0275, "step": 224200 }, { "epoch": 0.53, "grad_norm": 1.875, "learning_rate": 0.00016757664914590344, "loss": 2.143, "step": 224205 }, { "epoch": 0.53, "grad_norm": 2.390625, "learning_rate": 0.0001675752867295821, "loss": 1.945, "step": 224210 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016757392429017585, "loss": 2.0877, "step": 224215 }, { "epoch": 0.53, "grad_norm": 1.96875, "learning_rate": 0.00016757256182768512, "loss": 1.9527, "step": 224220 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016757119934211038, "loss": 1.9235, "step": 224225 }, { "epoch": 0.53, "grad_norm": 1.9140625, "learning_rate": 0.0001675698368334521, "loss": 2.2098, "step": 224230 }, { "epoch": 0.53, "grad_norm": 1.6328125, "learning_rate": 0.0001675684743017107, "loss": 1.9457, "step": 224235 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.0001675671117468867, "loss": 2.0998, "step": 224240 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016756574916898057, "loss": 2.1693, "step": 224245 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.00016756438656799274, "loss": 2.0632, "step": 224250 }, { "epoch": 0.53, "grad_norm": 2.5, "learning_rate": 0.0001675630239439237, "loss": 2.2877, "step": 224255 }, { "epoch": 0.53, "grad_norm": 1.9765625, "learning_rate": 0.00016756166129677387, "loss": 2.2667, "step": 224260 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.0001675602986265438, "loss": 2.2282, "step": 224265 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.00016755893593323385, "loss": 2.189, "step": 224270 }, { "epoch": 0.53, "grad_norm": 1.984375, "learning_rate": 0.00016755757321684458, "loss": 1.7126, "step": 224275 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.0001675562104773764, "loss": 2.1515, "step": 224280 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016755484771482983, "loss": 2.3555, "step": 224285 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.00016755348492920527, "loss": 2.0678, "step": 224290 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.0001675521221205032, "loss": 2.1572, "step": 224295 }, { "epoch": 0.53, "grad_norm": 2.515625, "learning_rate": 0.00016755075928872413, "loss": 2.1364, "step": 224300 }, { "epoch": 0.53, "grad_norm": 1.7578125, "learning_rate": 0.0001675493964338685, "loss": 2.0422, "step": 224305 }, { "epoch": 0.53, "grad_norm": 1.8359375, "learning_rate": 0.00016754803355593673, "loss": 2.1243, "step": 224310 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016754667065492935, "loss": 2.1393, "step": 224315 }, { "epoch": 0.53, "grad_norm": 2.609375, "learning_rate": 0.00016754530773084684, "loss": 2.1499, "step": 224320 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016754394478368955, "loss": 1.9112, "step": 224325 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016754258181345808, "loss": 1.8714, "step": 224330 }, { "epoch": 0.53, "grad_norm": 1.9765625, "learning_rate": 0.00016754121882015283, "loss": 2.0473, "step": 224335 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016753985580377428, "loss": 2.0317, "step": 224340 }, { "epoch": 0.53, "grad_norm": 1.8828125, "learning_rate": 0.00016753849276432287, "loss": 2.0135, "step": 224345 }, { "epoch": 0.53, "grad_norm": 2.453125, "learning_rate": 0.0001675371297017991, "loss": 2.2548, "step": 224350 }, { "epoch": 0.53, "grad_norm": 2.453125, "learning_rate": 0.0001675357666162034, "loss": 2.0043, "step": 224355 }, { "epoch": 0.53, "grad_norm": 2.40625, "learning_rate": 0.00016753440350753628, "loss": 2.1243, "step": 224360 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.0001675330403757982, "loss": 1.865, "step": 224365 }, { "epoch": 0.53, "grad_norm": 1.796875, "learning_rate": 0.00016753167722098958, "loss": 2.1193, "step": 224370 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001675303140431109, "loss": 2.0943, "step": 224375 }, { "epoch": 0.53, "grad_norm": 2.5, "learning_rate": 0.00016752895084216267, "loss": 2.1673, "step": 224380 }, { "epoch": 0.53, "grad_norm": 1.921875, "learning_rate": 0.00016752758761814533, "loss": 2.1234, "step": 224385 }, { "epoch": 0.53, "grad_norm": 3.09375, "learning_rate": 0.0001675262243710593, "loss": 2.345, "step": 224390 }, { "epoch": 0.53, "grad_norm": 1.859375, "learning_rate": 0.00016752486110090513, "loss": 2.0154, "step": 224395 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016752349780768325, "loss": 2.0537, "step": 224400 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016752213449139407, "loss": 1.9641, "step": 224405 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016752077115203814, "loss": 2.0838, "step": 224410 }, { "epoch": 0.53, "grad_norm": 1.90625, "learning_rate": 0.00016751940778961588, "loss": 2.1554, "step": 224415 }, { "epoch": 0.53, "grad_norm": 1.7265625, "learning_rate": 0.00016751804440412777, "loss": 2.2202, "step": 224420 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016751668099557426, "loss": 2.0321, "step": 224425 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016751531756395584, "loss": 2.1781, "step": 224430 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.00016751395410927295, "loss": 2.047, "step": 224435 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016751259063152606, "loss": 2.1339, "step": 224440 }, { "epoch": 0.53, "grad_norm": 1.8984375, "learning_rate": 0.00016751122713071567, "loss": 1.9913, "step": 224445 }, { "epoch": 0.53, "grad_norm": 1.90625, "learning_rate": 0.00016750986360684222, "loss": 1.8366, "step": 224450 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016750850005990616, "loss": 2.1112, "step": 224455 }, { "epoch": 0.53, "grad_norm": 3.421875, "learning_rate": 0.00016750713648990798, "loss": 2.1041, "step": 224460 }, { "epoch": 0.53, "grad_norm": 1.9296875, "learning_rate": 0.00016750577289684814, "loss": 1.8296, "step": 224465 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016750440928072714, "loss": 2.1356, "step": 224470 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016750304564154536, "loss": 2.0426, "step": 224475 }, { "epoch": 0.53, "grad_norm": 1.9140625, "learning_rate": 0.00016750168197930335, "loss": 2.1694, "step": 224480 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.0001675003182940015, "loss": 1.8403, "step": 224485 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016749895458564033, "loss": 2.0597, "step": 224490 }, { "epoch": 0.53, "grad_norm": 1.8984375, "learning_rate": 0.00016749759085422027, "loss": 2.0299, "step": 224495 }, { "epoch": 0.53, "grad_norm": 1.9453125, "learning_rate": 0.00016749622709974187, "loss": 2.1757, "step": 224500 }, { "epoch": 0.53, "grad_norm": 2.578125, "learning_rate": 0.0001674948633222055, "loss": 2.3122, "step": 224505 }, { "epoch": 0.53, "grad_norm": 1.890625, "learning_rate": 0.00016749349952161166, "loss": 2.1141, "step": 224510 }, { "epoch": 0.53, "grad_norm": 1.9609375, "learning_rate": 0.0001674921356979608, "loss": 2.157, "step": 224515 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016749077185125341, "loss": 2.2004, "step": 224520 }, { "epoch": 0.53, "grad_norm": 1.859375, "learning_rate": 0.00016748940798148998, "loss": 2.1459, "step": 224525 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016748804408867093, "loss": 2.0933, "step": 224530 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016748668017279675, "loss": 2.0403, "step": 224535 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.00016748531623386785, "loss": 2.2275, "step": 224540 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.00016748395227188478, "loss": 2.0041, "step": 224545 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016748258828684794, "loss": 2.1924, "step": 224550 }, { "epoch": 0.53, "grad_norm": 2.640625, "learning_rate": 0.00016748122427875787, "loss": 2.1074, "step": 224555 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016747986024761498, "loss": 2.1561, "step": 224560 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.0001674784961934197, "loss": 2.13, "step": 224565 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.0001674771321161726, "loss": 2.0236, "step": 224570 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016747576801587403, "loss": 2.0618, "step": 224575 }, { "epoch": 0.53, "grad_norm": 2.640625, "learning_rate": 0.00016747440389252457, "loss": 2.0475, "step": 224580 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.0001674730397461246, "loss": 2.236, "step": 224585 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.0001674716755766746, "loss": 1.9974, "step": 224590 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016747031138417508, "loss": 2.1044, "step": 224595 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016746894716862648, "loss": 2.1788, "step": 224600 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016746758293002925, "loss": 2.2005, "step": 224605 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016746621866838387, "loss": 1.8215, "step": 224610 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016746485438369078, "loss": 2.0811, "step": 224615 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016746349007595052, "loss": 2.0062, "step": 224620 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.0001674621257451635, "loss": 2.0652, "step": 224625 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.0001674607613913302, "loss": 2.1361, "step": 224630 }, { "epoch": 0.53, "grad_norm": 1.8671875, "learning_rate": 0.00016745939701445106, "loss": 2.1326, "step": 224635 }, { "epoch": 0.53, "grad_norm": 1.828125, "learning_rate": 0.00016745803261452658, "loss": 2.1093, "step": 224640 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.0001674566681915572, "loss": 2.1228, "step": 224645 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001674553037455434, "loss": 2.1815, "step": 224650 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016745393927648566, "loss": 1.9667, "step": 224655 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.00016745257478438438, "loss": 2.2277, "step": 224660 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.00016745121026924014, "loss": 2.239, "step": 224665 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.00016744984573105332, "loss": 1.9545, "step": 224670 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.0001674484811698244, "loss": 2.1444, "step": 224675 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.00016744711658555389, "loss": 2.204, "step": 224680 }, { "epoch": 0.53, "grad_norm": 2.65625, "learning_rate": 0.00016744575197824216, "loss": 1.8763, "step": 224685 }, { "epoch": 0.53, "grad_norm": 1.8828125, "learning_rate": 0.00016744438734788978, "loss": 2.0609, "step": 224690 }, { "epoch": 0.53, "grad_norm": 1.8515625, "learning_rate": 0.00016744302269449716, "loss": 1.9844, "step": 224695 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016744165801806479, "loss": 1.9922, "step": 224700 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016744029331859314, "loss": 2.0278, "step": 224705 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016743892859608263, "loss": 2.2616, "step": 224710 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.0001674375638505338, "loss": 1.9632, "step": 224715 }, { "epoch": 0.53, "grad_norm": 1.9375, "learning_rate": 0.00016743619908194705, "loss": 2.0238, "step": 224720 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016743483429032289, "loss": 2.0376, "step": 224725 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016743346947566172, "loss": 1.9867, "step": 224730 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.0001674321046379641, "loss": 2.1762, "step": 224735 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016743073977723043, "loss": 1.9296, "step": 224740 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.0001674293748934612, "loss": 1.8124, "step": 224745 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016742800998665686, "loss": 1.9338, "step": 224750 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016742664505681792, "loss": 1.9998, "step": 224755 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016742528010394478, "loss": 2.1223, "step": 224760 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016742391512803798, "loss": 2.1353, "step": 224765 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.00016742255012909793, "loss": 1.9697, "step": 224770 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.0001674211851071251, "loss": 1.9201, "step": 224775 }, { "epoch": 0.53, "grad_norm": 1.9140625, "learning_rate": 0.00016741982006212, "loss": 2.1079, "step": 224780 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.000167418454994083, "loss": 2.0327, "step": 224785 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.0001674170899030147, "loss": 2.0631, "step": 224790 }, { "epoch": 0.53, "grad_norm": 1.8203125, "learning_rate": 0.00016741572478891549, "loss": 2.0843, "step": 224795 }, { "epoch": 0.53, "grad_norm": 2.40625, "learning_rate": 0.00016741435965178584, "loss": 1.9199, "step": 224800 }, { "epoch": 0.53, "grad_norm": 1.625, "learning_rate": 0.00016741299449162623, "loss": 2.0417, "step": 224805 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016741162930843708, "loss": 2.1065, "step": 224810 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016741026410221893, "loss": 1.9607, "step": 224815 }, { "epoch": 0.53, "grad_norm": 2.46875, "learning_rate": 0.00016740889887297222, "loss": 2.0227, "step": 224820 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016740753362069738, "loss": 2.061, "step": 224825 }, { "epoch": 0.53, "grad_norm": 2.796875, "learning_rate": 0.0001674061683453949, "loss": 2.2245, "step": 224830 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016740480304706527, "loss": 2.0349, "step": 224835 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016740343772570895, "loss": 2.096, "step": 224840 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.00016740207238132636, "loss": 1.968, "step": 224845 }, { "epoch": 0.53, "grad_norm": 2.609375, "learning_rate": 0.00016740070701391804, "loss": 1.8633, "step": 224850 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016739934162348437, "loss": 2.1696, "step": 224855 }, { "epoch": 0.53, "grad_norm": 1.96875, "learning_rate": 0.00016739797621002588, "loss": 2.1427, "step": 224860 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016739661077354304, "loss": 2.023, "step": 224865 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.0001673952453140363, "loss": 2.1294, "step": 224870 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001673938798315061, "loss": 2.1735, "step": 224875 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.0001673925143259529, "loss": 1.8106, "step": 224880 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.0001673911487973772, "loss": 2.0765, "step": 224885 }, { "epoch": 0.53, "grad_norm": 1.8125, "learning_rate": 0.0001673897832457795, "loss": 2.1711, "step": 224890 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016738841767116022, "loss": 2.1361, "step": 224895 }, { "epoch": 0.53, "grad_norm": 1.8046875, "learning_rate": 0.00016738705207351983, "loss": 2.0692, "step": 224900 }, { "epoch": 0.53, "grad_norm": 2.71875, "learning_rate": 0.00016738568645285881, "loss": 2.2706, "step": 224905 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016738432080917759, "loss": 2.2092, "step": 224910 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016738295514247668, "loss": 2.1034, "step": 224915 }, { "epoch": 0.53, "grad_norm": 2.609375, "learning_rate": 0.00016738158945275652, "loss": 2.0061, "step": 224920 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.0001673802237400176, "loss": 2.1954, "step": 224925 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.0001673788580042604, "loss": 2.1443, "step": 224930 }, { "epoch": 0.53, "grad_norm": 1.953125, "learning_rate": 0.0001673774922454853, "loss": 2.163, "step": 224935 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.00016737612646369288, "loss": 2.1422, "step": 224940 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.0001673747606588835, "loss": 2.2119, "step": 224945 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016737339483105773, "loss": 2.153, "step": 224950 }, { "epoch": 0.53, "grad_norm": 2.5, "learning_rate": 0.00016737202898021596, "loss": 2.0396, "step": 224955 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.0001673706631063587, "loss": 2.0548, "step": 224960 }, { "epoch": 0.53, "grad_norm": 1.9609375, "learning_rate": 0.00016736929720948638, "loss": 2.2874, "step": 224965 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.0001673679312895995, "loss": 1.9928, "step": 224970 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016736656534669853, "loss": 2.0494, "step": 224975 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016736519938078388, "loss": 2.0744, "step": 224980 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016736383339185608, "loss": 2.2727, "step": 224985 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016736246737991553, "loss": 1.9735, "step": 224990 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.0001673611013449628, "loss": 2.1968, "step": 224995 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.00016735973528699826, "loss": 2.1038, "step": 225000 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016735836920602246, "loss": 2.2032, "step": 225005 }, { "epoch": 0.53, "grad_norm": 1.8125, "learning_rate": 0.00016735700310203578, "loss": 2.0357, "step": 225010 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016735563697503874, "loss": 2.0758, "step": 225015 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016735427082503174, "loss": 2.1736, "step": 225020 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016735290465201536, "loss": 2.1155, "step": 225025 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016735153845599002, "loss": 2.278, "step": 225030 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016735017223695612, "loss": 1.95, "step": 225035 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.0001673488059949142, "loss": 2.0959, "step": 225040 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.0001673474397298647, "loss": 2.1132, "step": 225045 }, { "epoch": 0.53, "grad_norm": 2.796875, "learning_rate": 0.0001673460734418081, "loss": 2.0646, "step": 225050 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016734470713074487, "loss": 2.1058, "step": 225055 }, { "epoch": 0.53, "grad_norm": 2.390625, "learning_rate": 0.00016734334079667547, "loss": 2.1618, "step": 225060 }, { "epoch": 0.53, "grad_norm": 1.921875, "learning_rate": 0.00016734197443960035, "loss": 1.9385, "step": 225065 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016734060805951998, "loss": 2.0609, "step": 225070 }, { "epoch": 0.53, "grad_norm": 1.953125, "learning_rate": 0.00016733924165643487, "loss": 2.1957, "step": 225075 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016733787523034543, "loss": 2.0521, "step": 225080 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016733650878125213, "loss": 2.0092, "step": 225085 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.0001673351423091555, "loss": 2.2573, "step": 225090 }, { "epoch": 0.53, "grad_norm": 1.8046875, "learning_rate": 0.00016733377581405593, "loss": 2.1364, "step": 225095 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016733240929595392, "loss": 2.1083, "step": 225100 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016733104275484995, "loss": 1.735, "step": 225105 }, { "epoch": 0.53, "grad_norm": 1.8359375, "learning_rate": 0.00016732967619074448, "loss": 2.0144, "step": 225110 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016732830960363798, "loss": 1.9336, "step": 225115 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016732694299353092, "loss": 2.1483, "step": 225120 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016732557636042373, "loss": 2.1075, "step": 225125 }, { "epoch": 0.53, "grad_norm": 2.75, "learning_rate": 0.0001673242097043169, "loss": 2.0138, "step": 225130 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.0001673228430252109, "loss": 1.9901, "step": 225135 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.0001673214763231062, "loss": 2.1665, "step": 225140 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016732010959800327, "loss": 2.155, "step": 225145 }, { "epoch": 0.53, "grad_norm": 1.8359375, "learning_rate": 0.00016731874284990256, "loss": 2.1294, "step": 225150 }, { "epoch": 0.53, "grad_norm": 1.90625, "learning_rate": 0.00016731737607880454, "loss": 2.2497, "step": 225155 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.0001673160092847097, "loss": 2.1812, "step": 225160 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001673146424676185, "loss": 2.03, "step": 225165 }, { "epoch": 0.53, "grad_norm": 1.90625, "learning_rate": 0.0001673132756275314, "loss": 2.0007, "step": 225170 }, { "epoch": 0.53, "grad_norm": 1.5546875, "learning_rate": 0.00016731190876444885, "loss": 2.0307, "step": 225175 }, { "epoch": 0.53, "grad_norm": 1.859375, "learning_rate": 0.0001673105418783713, "loss": 2.0284, "step": 225180 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.00016730917496929928, "loss": 2.1311, "step": 225185 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016730780803723324, "loss": 2.1737, "step": 225190 }, { "epoch": 0.53, "grad_norm": 2.453125, "learning_rate": 0.0001673064410821736, "loss": 2.1671, "step": 225195 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016730507410412087, "loss": 2.2306, "step": 225200 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.0001673037071030755, "loss": 2.1227, "step": 225205 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016730234007903796, "loss": 2.1294, "step": 225210 }, { "epoch": 0.53, "grad_norm": 2.71875, "learning_rate": 0.00016730097303200873, "loss": 2.1021, "step": 225215 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016729960596198828, "loss": 2.0048, "step": 225220 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016729823886897703, "loss": 1.9097, "step": 225225 }, { "epoch": 0.53, "grad_norm": 2.71875, "learning_rate": 0.00016729687175297552, "loss": 1.8844, "step": 225230 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016729550461398415, "loss": 2.1507, "step": 225235 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016729413745200343, "loss": 2.2852, "step": 225240 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016729277026703381, "loss": 2.0234, "step": 225245 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016729140305907575, "loss": 2.1127, "step": 225250 }, { "epoch": 0.53, "grad_norm": 1.7109375, "learning_rate": 0.00016729003582812975, "loss": 2.0935, "step": 225255 }, { "epoch": 0.53, "grad_norm": 2.46875, "learning_rate": 0.0001672886685741962, "loss": 2.1234, "step": 225260 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.0001672873012972757, "loss": 2.2147, "step": 225265 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016728593399736858, "loss": 2.1102, "step": 225270 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016728456667447538, "loss": 2.1515, "step": 225275 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016728319932859655, "loss": 2.0761, "step": 225280 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.00016728183195973255, "loss": 1.9824, "step": 225285 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016728046456788387, "loss": 1.993, "step": 225290 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016727909715305095, "loss": 2.0976, "step": 225295 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016727772971523427, "loss": 2.2588, "step": 225300 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016727636225443435, "loss": 2.1587, "step": 225305 }, { "epoch": 0.53, "grad_norm": 2.484375, "learning_rate": 0.00016727499477065157, "loss": 2.0479, "step": 225310 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.0001672736272638864, "loss": 2.136, "step": 225315 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016727225973413935, "loss": 2.0887, "step": 225320 }, { "epoch": 0.53, "grad_norm": 2.859375, "learning_rate": 0.00016727089218141088, "loss": 2.241, "step": 225325 }, { "epoch": 0.53, "grad_norm": 1.9140625, "learning_rate": 0.0001672695246057015, "loss": 2.3281, "step": 225330 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016726815700701154, "loss": 2.1933, "step": 225335 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016726678938534163, "loss": 2.1865, "step": 225340 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.00016726542174069214, "loss": 2.0344, "step": 225345 }, { "epoch": 0.53, "grad_norm": 1.9453125, "learning_rate": 0.00016726405407306358, "loss": 2.2205, "step": 225350 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.00016726268638245638, "loss": 2.063, "step": 225355 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016726131866887103, "loss": 2.2039, "step": 225360 }, { "epoch": 0.53, "grad_norm": 1.859375, "learning_rate": 0.00016725995093230797, "loss": 2.1378, "step": 225365 }, { "epoch": 0.53, "grad_norm": 1.8203125, "learning_rate": 0.00016725858317276772, "loss": 1.9045, "step": 225370 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016725721539025075, "loss": 1.8199, "step": 225375 }, { "epoch": 0.53, "grad_norm": 1.984375, "learning_rate": 0.00016725584758475742, "loss": 2.1212, "step": 225380 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016725447975628833, "loss": 2.2127, "step": 225385 }, { "epoch": 0.53, "grad_norm": 1.9609375, "learning_rate": 0.0001672531119048439, "loss": 2.258, "step": 225390 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.00016725174403042455, "loss": 2.1973, "step": 225395 }, { "epoch": 0.53, "grad_norm": 2.8125, "learning_rate": 0.00016725037613303075, "loss": 2.1435, "step": 225400 }, { "epoch": 0.53, "grad_norm": 1.9296875, "learning_rate": 0.00016724900821266306, "loss": 2.1792, "step": 225405 }, { "epoch": 0.53, "grad_norm": 1.7890625, "learning_rate": 0.0001672476402693219, "loss": 2.0665, "step": 225410 }, { "epoch": 0.53, "grad_norm": 2.484375, "learning_rate": 0.00016724627230300772, "loss": 2.2792, "step": 225415 }, { "epoch": 0.53, "grad_norm": 2.328125, "learning_rate": 0.00016724490431372093, "loss": 2.218, "step": 225420 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.0001672435363014621, "loss": 2.0782, "step": 225425 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.0001672421682662317, "loss": 2.1866, "step": 225430 }, { "epoch": 0.53, "grad_norm": 1.7890625, "learning_rate": 0.00016724080020803013, "loss": 1.9457, "step": 225435 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016723943212685786, "loss": 1.8663, "step": 225440 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.00016723806402271538, "loss": 1.9637, "step": 225445 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016723669589560318, "loss": 2.0389, "step": 225450 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.00016723532774552167, "loss": 2.1586, "step": 225455 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.0001672339595724714, "loss": 2.0467, "step": 225460 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016723259137645279, "loss": 2.1627, "step": 225465 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.0001672312231574663, "loss": 2.1164, "step": 225470 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016722985491551237, "loss": 2.206, "step": 225475 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016722848665059152, "loss": 2.0797, "step": 225480 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.0001672271183627042, "loss": 2.1059, "step": 225485 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.0001672257500518509, "loss": 2.1175, "step": 225490 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016722438171803202, "loss": 2.0158, "step": 225495 }, { "epoch": 0.53, "grad_norm": 1.875, "learning_rate": 0.0001672230133612481, "loss": 2.0372, "step": 225500 }, { "epoch": 0.53, "grad_norm": 2.40625, "learning_rate": 0.00016722164498149958, "loss": 1.8849, "step": 225505 }, { "epoch": 0.53, "grad_norm": 2.6875, "learning_rate": 0.00016722027657878696, "loss": 2.1389, "step": 225510 }, { "epoch": 0.53, "grad_norm": 2.609375, "learning_rate": 0.00016721890815311064, "loss": 2.2261, "step": 225515 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.0001672175397044711, "loss": 1.9868, "step": 225520 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016721617123286885, "loss": 2.2461, "step": 225525 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016721480273830435, "loss": 1.889, "step": 225530 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016721343422077803, "loss": 2.1215, "step": 225535 }, { "epoch": 0.53, "grad_norm": 1.9921875, "learning_rate": 0.0001672120656802904, "loss": 2.1114, "step": 225540 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.0001672106971168419, "loss": 2.2292, "step": 225545 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016720932853043303, "loss": 2.2395, "step": 225550 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016720795992106419, "loss": 1.8407, "step": 225555 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016720659128873592, "loss": 2.0796, "step": 225560 }, { "epoch": 0.53, "grad_norm": 2.390625, "learning_rate": 0.00016720522263344867, "loss": 2.0821, "step": 225565 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.00016720385395520285, "loss": 2.0669, "step": 225570 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016720248525399901, "loss": 2.1795, "step": 225575 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016720111652983758, "loss": 2.1113, "step": 225580 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016719974778271906, "loss": 2.1214, "step": 225585 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016719837901264385, "loss": 2.1333, "step": 225590 }, { "epoch": 0.53, "grad_norm": 1.8359375, "learning_rate": 0.00016719701021961246, "loss": 2.0297, "step": 225595 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016719564140362536, "loss": 2.0344, "step": 225600 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.000167194272564683, "loss": 1.9645, "step": 225605 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016719290370278587, "loss": 2.1981, "step": 225610 }, { "epoch": 0.53, "grad_norm": 1.8046875, "learning_rate": 0.00016719153481793442, "loss": 2.1313, "step": 225615 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016719016591012912, "loss": 1.9975, "step": 225620 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016718879697937044, "loss": 2.1146, "step": 225625 }, { "epoch": 0.53, "grad_norm": 2.40625, "learning_rate": 0.00016718742802565884, "loss": 2.2846, "step": 225630 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016718605904899483, "loss": 2.0413, "step": 225635 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.0001671846900493788, "loss": 1.9648, "step": 225640 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016718332102681128, "loss": 2.0874, "step": 225645 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016718195198129273, "loss": 1.9966, "step": 225650 }, { "epoch": 0.53, "grad_norm": 2.53125, "learning_rate": 0.0001671805829128236, "loss": 2.0166, "step": 225655 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016717921382140437, "loss": 2.1337, "step": 225660 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016717784470703548, "loss": 2.1524, "step": 225665 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016717647556971745, "loss": 2.1875, "step": 225670 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.0001671751064094507, "loss": 2.0088, "step": 225675 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.00016717373722623574, "loss": 2.0043, "step": 225680 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016717236802007296, "loss": 2.0339, "step": 225685 }, { "epoch": 0.53, "grad_norm": 1.890625, "learning_rate": 0.00016717099879096293, "loss": 2.2919, "step": 225690 }, { "epoch": 0.53, "grad_norm": 1.9765625, "learning_rate": 0.00016716962953890603, "loss": 2.1854, "step": 225695 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016716826026390278, "loss": 1.9591, "step": 225700 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016716689096595366, "loss": 2.0476, "step": 225705 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.0001671655216450591, "loss": 2.2371, "step": 225710 }, { "epoch": 0.53, "grad_norm": 1.7109375, "learning_rate": 0.00016716415230121957, "loss": 2.1439, "step": 225715 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016716278293443554, "loss": 2.1391, "step": 225720 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.0001671614135447075, "loss": 2.0213, "step": 225725 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.0001671600441320359, "loss": 1.9183, "step": 225730 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001671586746964212, "loss": 2.2286, "step": 225735 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016715730523786387, "loss": 2.0003, "step": 225740 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016715593575636443, "loss": 1.9477, "step": 225745 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.00016715456625192326, "loss": 2.0199, "step": 225750 }, { "epoch": 0.53, "grad_norm": 2.5, "learning_rate": 0.00016715319672454092, "loss": 2.0403, "step": 225755 }, { "epoch": 0.53, "grad_norm": 1.921875, "learning_rate": 0.0001671518271742178, "loss": 1.9793, "step": 225760 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.00016715045760095439, "loss": 2.0004, "step": 225765 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016714908800475117, "loss": 2.0948, "step": 225770 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.0001671477183856086, "loss": 2.224, "step": 225775 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016714634874352718, "loss": 1.9854, "step": 225780 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.00016714497907850733, "loss": 1.993, "step": 225785 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.00016714360939054955, "loss": 1.9187, "step": 225790 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.00016714223967965425, "loss": 2.224, "step": 225795 }, { "epoch": 0.53, "grad_norm": 2.390625, "learning_rate": 0.00016714086994582198, "loss": 2.1901, "step": 225800 }, { "epoch": 0.53, "grad_norm": 2.5625, "learning_rate": 0.00016713950018905317, "loss": 2.0844, "step": 225805 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.0001671381304093483, "loss": 2.1445, "step": 225810 }, { "epoch": 0.53, "grad_norm": 1.8359375, "learning_rate": 0.0001671367606067078, "loss": 1.863, "step": 225815 }, { "epoch": 0.53, "grad_norm": 1.8671875, "learning_rate": 0.00016713539078113215, "loss": 1.9108, "step": 225820 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.0001671340209326219, "loss": 2.1791, "step": 225825 }, { "epoch": 0.53, "grad_norm": 2.65625, "learning_rate": 0.0001671326510611774, "loss": 2.0383, "step": 225830 }, { "epoch": 0.53, "grad_norm": 1.90625, "learning_rate": 0.0001671312811667992, "loss": 2.0632, "step": 225835 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.00016712991124948772, "loss": 2.1125, "step": 225840 }, { "epoch": 0.53, "grad_norm": 1.9921875, "learning_rate": 0.00016712854130924342, "loss": 2.1094, "step": 225845 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016712717134606683, "loss": 2.0276, "step": 225850 }, { "epoch": 0.53, "grad_norm": 1.9453125, "learning_rate": 0.00016712580135995835, "loss": 2.2109, "step": 225855 }, { "epoch": 0.53, "grad_norm": 1.921875, "learning_rate": 0.00016712443135091852, "loss": 1.9244, "step": 225860 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016712306131894775, "loss": 2.1575, "step": 225865 }, { "epoch": 0.53, "grad_norm": 1.8359375, "learning_rate": 0.0001671216912640465, "loss": 1.9265, "step": 225870 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016712032118621525, "loss": 1.9335, "step": 225875 }, { "epoch": 0.53, "grad_norm": 1.984375, "learning_rate": 0.00016711895108545454, "loss": 2.0357, "step": 225880 }, { "epoch": 0.53, "grad_norm": 1.875, "learning_rate": 0.00016711758096176475, "loss": 2.1281, "step": 225885 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016711621081514638, "loss": 2.2174, "step": 225890 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016711484064559987, "loss": 2.126, "step": 225895 }, { "epoch": 0.53, "grad_norm": 1.828125, "learning_rate": 0.00016711347045312577, "loss": 1.9401, "step": 225900 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016711210023772445, "loss": 2.3341, "step": 225905 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.0001671107299993964, "loss": 2.1614, "step": 225910 }, { "epoch": 0.53, "grad_norm": 2.59375, "learning_rate": 0.00016710935973814214, "loss": 2.2193, "step": 225915 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016710798945396212, "loss": 2.0211, "step": 225920 }, { "epoch": 0.53, "grad_norm": 2.40625, "learning_rate": 0.00016710661914685676, "loss": 2.2786, "step": 225925 }, { "epoch": 0.53, "grad_norm": 2.328125, "learning_rate": 0.0001671052488168266, "loss": 2.005, "step": 225930 }, { "epoch": 0.53, "grad_norm": 2.78125, "learning_rate": 0.000167103878463872, "loss": 2.0759, "step": 225935 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016710250808799358, "loss": 2.0978, "step": 225940 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016710113768919168, "loss": 2.1152, "step": 225945 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016709976726746683, "loss": 2.0392, "step": 225950 }, { "epoch": 0.53, "grad_norm": 1.796875, "learning_rate": 0.00016709839682281947, "loss": 2.1068, "step": 225955 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.0001670970263552501, "loss": 1.9014, "step": 225960 }, { "epoch": 0.53, "grad_norm": 1.8671875, "learning_rate": 0.00016709565586475912, "loss": 1.9234, "step": 225965 }, { "epoch": 0.53, "grad_norm": 1.96875, "learning_rate": 0.0001670942853513471, "loss": 1.982, "step": 225970 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016709291481501444, "loss": 2.1244, "step": 225975 }, { "epoch": 0.53, "grad_norm": 2.65625, "learning_rate": 0.00016709154425576162, "loss": 2.1797, "step": 225980 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016709017367358913, "loss": 1.85, "step": 225985 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016708880306849739, "loss": 2.1188, "step": 225990 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.0001670874324404869, "loss": 2.218, "step": 225995 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.00016708606178955816, "loss": 2.1935, "step": 226000 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016708469111571158, "loss": 2.2047, "step": 226005 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016708332041894764, "loss": 1.995, "step": 226010 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016708194969926681, "loss": 2.1052, "step": 226015 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.0001670805789566696, "loss": 1.9338, "step": 226020 }, { "epoch": 0.53, "grad_norm": 2.546875, "learning_rate": 0.00016707920819115647, "loss": 2.2151, "step": 226025 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016707783740272785, "loss": 2.1414, "step": 226030 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.0001670764665913842, "loss": 2.2273, "step": 226035 }, { "epoch": 0.53, "grad_norm": 1.9296875, "learning_rate": 0.00016707509575712603, "loss": 2.156, "step": 226040 }, { "epoch": 0.53, "grad_norm": 1.84375, "learning_rate": 0.0001670737248999538, "loss": 2.0739, "step": 226045 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016707235401986793, "loss": 2.1358, "step": 226050 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016707098311686898, "loss": 2.2234, "step": 226055 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016706961219095735, "loss": 1.9028, "step": 226060 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016706824124213348, "loss": 2.2123, "step": 226065 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.00016706687027039793, "loss": 1.9919, "step": 226070 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016706549927575113, "loss": 2.088, "step": 226075 }, { "epoch": 0.53, "grad_norm": 2.328125, "learning_rate": 0.0001670641282581935, "loss": 2.1412, "step": 226080 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016706275721772555, "loss": 1.9737, "step": 226085 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.00016706138615434776, "loss": 2.1445, "step": 226090 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.00016706001506806059, "loss": 1.9769, "step": 226095 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.0001670586439588645, "loss": 2.0458, "step": 226100 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.00016705727282675996, "loss": 2.133, "step": 226105 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016705590167174746, "loss": 2.086, "step": 226110 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.0001670545304938274, "loss": 2.0251, "step": 226115 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016705315929300033, "loss": 1.9418, "step": 226120 }, { "epoch": 0.53, "grad_norm": 2.46875, "learning_rate": 0.00016705178806926667, "loss": 2.0869, "step": 226125 }, { "epoch": 0.53, "grad_norm": 1.796875, "learning_rate": 0.00016705041682262692, "loss": 2.0858, "step": 226130 }, { "epoch": 0.53, "grad_norm": 2.515625, "learning_rate": 0.00016704904555308152, "loss": 2.1162, "step": 226135 }, { "epoch": 0.53, "grad_norm": 2.859375, "learning_rate": 0.00016704767426063095, "loss": 2.0443, "step": 226140 }, { "epoch": 0.53, "grad_norm": 1.96875, "learning_rate": 0.00016704630294527565, "loss": 2.1018, "step": 226145 }, { "epoch": 0.53, "grad_norm": 1.9921875, "learning_rate": 0.00016704493160701618, "loss": 2.0169, "step": 226150 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.0001670435602458529, "loss": 2.1431, "step": 226155 }, { "epoch": 0.53, "grad_norm": 1.8984375, "learning_rate": 0.00016704218886178632, "loss": 2.1997, "step": 226160 }, { "epoch": 0.53, "grad_norm": 2.578125, "learning_rate": 0.00016704081745481694, "loss": 2.1376, "step": 226165 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016703944602494518, "loss": 2.1503, "step": 226170 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016703807457217154, "loss": 2.0526, "step": 226175 }, { "epoch": 0.53, "grad_norm": 1.96875, "learning_rate": 0.0001670367030964965, "loss": 2.0479, "step": 226180 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.00016703533159792046, "loss": 2.2134, "step": 226185 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.000167033960076444, "loss": 1.9799, "step": 226190 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016703258853206744, "loss": 2.1898, "step": 226195 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016703121696479137, "loss": 2.1148, "step": 226200 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016702984537461625, "loss": 2.2088, "step": 226205 }, { "epoch": 0.53, "grad_norm": 1.96875, "learning_rate": 0.00016702847376154248, "loss": 2.2661, "step": 226210 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016702710212557057, "loss": 2.083, "step": 226215 }, { "epoch": 0.53, "grad_norm": 1.9921875, "learning_rate": 0.000167025730466701, "loss": 2.1214, "step": 226220 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016702435878493424, "loss": 2.1803, "step": 226225 }, { "epoch": 0.53, "grad_norm": 2.890625, "learning_rate": 0.0001670229870802707, "loss": 2.0806, "step": 226230 }, { "epoch": 0.53, "grad_norm": 1.921875, "learning_rate": 0.00016702161535271092, "loss": 2.1935, "step": 226235 }, { "epoch": 0.53, "grad_norm": 3.578125, "learning_rate": 0.00016702024360225535, "loss": 2.2326, "step": 226240 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016701887182890443, "loss": 2.088, "step": 226245 }, { "epoch": 0.53, "grad_norm": 2.53125, "learning_rate": 0.00016701750003265863, "loss": 2.1104, "step": 226250 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.0001670161282135185, "loss": 2.1038, "step": 226255 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016701475637148438, "loss": 2.245, "step": 226260 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016701338450655687, "loss": 2.0126, "step": 226265 }, { "epoch": 0.53, "grad_norm": 1.796875, "learning_rate": 0.00016701201261873632, "loss": 2.033, "step": 226270 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016701064070802325, "loss": 2.0731, "step": 226275 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.00016700926877441817, "loss": 1.9742, "step": 226280 }, { "epoch": 0.53, "grad_norm": 1.859375, "learning_rate": 0.00016700789681792146, "loss": 2.2874, "step": 226285 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016700652483853364, "loss": 1.9707, "step": 226290 }, { "epoch": 0.53, "grad_norm": 1.96875, "learning_rate": 0.00016700515283625523, "loss": 2.0702, "step": 226295 }, { "epoch": 0.53, "grad_norm": 2.328125, "learning_rate": 0.00016700378081108662, "loss": 2.1186, "step": 226300 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016700240876302828, "loss": 2.1816, "step": 226305 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016700103669208073, "loss": 2.0872, "step": 226310 }, { "epoch": 0.53, "grad_norm": 2.6875, "learning_rate": 0.0001669996645982444, "loss": 2.0689, "step": 226315 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016699829248151976, "loss": 2.1493, "step": 226320 }, { "epoch": 0.53, "grad_norm": 1.921875, "learning_rate": 0.00016699692034190727, "loss": 2.0269, "step": 226325 }, { "epoch": 0.53, "grad_norm": 1.953125, "learning_rate": 0.00016699554817940743, "loss": 1.9816, "step": 226330 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016699417599402074, "loss": 2.1549, "step": 226335 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016699280378574757, "loss": 1.9599, "step": 226340 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016699143155458847, "loss": 2.0924, "step": 226345 }, { "epoch": 0.53, "grad_norm": 1.8828125, "learning_rate": 0.00016699005930054388, "loss": 2.1541, "step": 226350 }, { "epoch": 0.53, "grad_norm": 1.984375, "learning_rate": 0.0001669886870236143, "loss": 2.0595, "step": 226355 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.0001669873147238001, "loss": 2.0781, "step": 226360 }, { "epoch": 0.53, "grad_norm": 2.40625, "learning_rate": 0.00016698594240110185, "loss": 2.161, "step": 226365 }, { "epoch": 0.53, "grad_norm": 1.9609375, "learning_rate": 0.00016698457005552003, "loss": 2.2664, "step": 226370 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.000166983197687055, "loss": 2.1661, "step": 226375 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016698182529570734, "loss": 2.0139, "step": 226380 }, { "epoch": 0.53, "grad_norm": 1.9140625, "learning_rate": 0.00016698045288147744, "loss": 2.0861, "step": 226385 }, { "epoch": 0.53, "grad_norm": 1.90625, "learning_rate": 0.00016697908044436584, "loss": 2.1505, "step": 226390 }, { "epoch": 0.53, "grad_norm": 2.484375, "learning_rate": 0.00016697770798437293, "loss": 2.3362, "step": 226395 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016697633550149924, "loss": 2.1799, "step": 226400 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016697496299574523, "loss": 2.0832, "step": 226405 }, { "epoch": 0.53, "grad_norm": 2.6875, "learning_rate": 0.00016697359046711135, "loss": 2.0252, "step": 226410 }, { "epoch": 0.53, "grad_norm": 1.8203125, "learning_rate": 0.00016697221791559807, "loss": 2.0737, "step": 226415 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016697084534120587, "loss": 2.1916, "step": 226420 }, { "epoch": 0.53, "grad_norm": 1.78125, "learning_rate": 0.00016696947274393522, "loss": 1.9643, "step": 226425 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.0001669681001237866, "loss": 2.0293, "step": 226430 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016696672748076042, "loss": 2.2264, "step": 226435 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016696535481485722, "loss": 2.2, "step": 226440 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.0001669639821260774, "loss": 1.9523, "step": 226445 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.00016696260941442153, "loss": 2.2412, "step": 226450 }, { "epoch": 0.53, "grad_norm": 2.328125, "learning_rate": 0.00016696123667989002, "loss": 2.1204, "step": 226455 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016695986392248327, "loss": 2.1559, "step": 226460 }, { "epoch": 0.53, "grad_norm": 1.8359375, "learning_rate": 0.00016695849114220187, "loss": 1.9523, "step": 226465 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016695711833904618, "loss": 2.1192, "step": 226470 }, { "epoch": 0.53, "grad_norm": 1.8203125, "learning_rate": 0.00016695574551301682, "loss": 2.1241, "step": 226475 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016695437266411406, "loss": 2.2478, "step": 226480 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016695299979233857, "loss": 2.2378, "step": 226485 }, { "epoch": 0.53, "grad_norm": 2.40625, "learning_rate": 0.00016695162689769064, "loss": 2.0636, "step": 226490 }, { "epoch": 0.53, "grad_norm": 1.8515625, "learning_rate": 0.00016695025398017084, "loss": 2.0933, "step": 226495 }, { "epoch": 0.53, "grad_norm": 1.859375, "learning_rate": 0.00016694888103977964, "loss": 2.0811, "step": 226500 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.0001669475080765175, "loss": 2.1952, "step": 226505 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016694613509038483, "loss": 2.1136, "step": 226510 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016694476208138216, "loss": 1.9957, "step": 226515 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.00016694338904950996, "loss": 1.9777, "step": 226520 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.0001669420159947687, "loss": 2.0851, "step": 226525 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.0001669406429171588, "loss": 2.3595, "step": 226530 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016693926981668078, "loss": 2.0292, "step": 226535 }, { "epoch": 0.53, "grad_norm": 1.8203125, "learning_rate": 0.0001669378966933351, "loss": 1.938, "step": 226540 }, { "epoch": 0.53, "grad_norm": 1.9921875, "learning_rate": 0.00016693652354712218, "loss": 2.0014, "step": 226545 }, { "epoch": 0.53, "grad_norm": 2.40625, "learning_rate": 0.00016693515037804256, "loss": 2.1282, "step": 226550 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.0001669337771860967, "loss": 2.1429, "step": 226555 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.000166932403971285, "loss": 2.1206, "step": 226560 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.000166931030733608, "loss": 2.1515, "step": 226565 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.00016692965747306616, "loss": 2.0156, "step": 226570 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016692828418965992, "loss": 2.1701, "step": 226575 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016692691088338973, "loss": 2.1524, "step": 226580 }, { "epoch": 0.53, "grad_norm": 2.390625, "learning_rate": 0.00016692553755425616, "loss": 1.9636, "step": 226585 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016692416420225958, "loss": 2.0361, "step": 226590 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.00016692279082740049, "loss": 2.1252, "step": 226595 }, { "epoch": 0.53, "grad_norm": 1.8125, "learning_rate": 0.00016692141742967936, "loss": 2.1368, "step": 226600 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016692004400909664, "loss": 2.0423, "step": 226605 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016691867056565287, "loss": 2.0441, "step": 226610 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016691729709934845, "loss": 2.0176, "step": 226615 }, { "epoch": 0.53, "grad_norm": 2.390625, "learning_rate": 0.00016691592361018387, "loss": 2.2291, "step": 226620 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.00016691455009815956, "loss": 2.2132, "step": 226625 }, { "epoch": 0.53, "grad_norm": 1.9453125, "learning_rate": 0.00016691317656327605, "loss": 2.0603, "step": 226630 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016691180300553379, "loss": 2.0693, "step": 226635 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016691042942493323, "loss": 1.9251, "step": 226640 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016690905582147488, "loss": 2.0382, "step": 226645 }, { "epoch": 0.53, "grad_norm": 2.3125, "learning_rate": 0.0001669076821951592, "loss": 2.1923, "step": 226650 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.0001669063085459866, "loss": 2.133, "step": 226655 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016690493487395757, "loss": 2.1227, "step": 226660 }, { "epoch": 0.53, "grad_norm": 1.9296875, "learning_rate": 0.00016690356117907268, "loss": 2.1879, "step": 226665 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016690218746133226, "loss": 2.0603, "step": 226670 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016690081372073686, "loss": 2.127, "step": 226675 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001668994399572869, "loss": 2.0479, "step": 226680 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.0001668980661709829, "loss": 1.9758, "step": 226685 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.00016689669236182532, "loss": 2.1045, "step": 226690 }, { "epoch": 0.53, "grad_norm": 1.90625, "learning_rate": 0.0001668953185298146, "loss": 1.9469, "step": 226695 }, { "epoch": 0.53, "grad_norm": 2.4375, "learning_rate": 0.00016689394467495124, "loss": 2.1006, "step": 226700 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016689257079723565, "loss": 2.1683, "step": 226705 }, { "epoch": 0.53, "grad_norm": 1.953125, "learning_rate": 0.00016689119689666842, "loss": 2.1942, "step": 226710 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016688982297324989, "loss": 2.0801, "step": 226715 }, { "epoch": 0.53, "grad_norm": 1.8671875, "learning_rate": 0.0001668884490269806, "loss": 1.8688, "step": 226720 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.000166887075057861, "loss": 2.1138, "step": 226725 }, { "epoch": 0.53, "grad_norm": 1.9765625, "learning_rate": 0.00016688570106589156, "loss": 1.9386, "step": 226730 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016688432705107276, "loss": 2.2483, "step": 226735 }, { "epoch": 0.53, "grad_norm": 1.8359375, "learning_rate": 0.00016688295301340507, "loss": 2.0032, "step": 226740 }, { "epoch": 0.53, "grad_norm": 2.453125, "learning_rate": 0.00016688157895288893, "loss": 2.0813, "step": 226745 }, { "epoch": 0.53, "grad_norm": 2.484375, "learning_rate": 0.00016688020486952483, "loss": 2.1364, "step": 226750 }, { "epoch": 0.53, "grad_norm": 2.515625, "learning_rate": 0.00016687883076331326, "loss": 2.0994, "step": 226755 }, { "epoch": 0.53, "grad_norm": 2.328125, "learning_rate": 0.00016687745663425461, "loss": 2.0584, "step": 226760 }, { "epoch": 0.53, "grad_norm": 2.46875, "learning_rate": 0.00016687608248234948, "loss": 1.9834, "step": 226765 }, { "epoch": 0.53, "grad_norm": 1.921875, "learning_rate": 0.00016687470830759824, "loss": 2.0564, "step": 226770 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.00016687333411000136, "loss": 2.1603, "step": 226775 }, { "epoch": 0.53, "grad_norm": 1.8671875, "learning_rate": 0.00016687195988955938, "loss": 2.0419, "step": 226780 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.0001668705856462727, "loss": 2.0848, "step": 226785 }, { "epoch": 0.53, "grad_norm": 2.546875, "learning_rate": 0.00016686921138014182, "loss": 2.0189, "step": 226790 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.0001668678370911672, "loss": 2.142, "step": 226795 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016686646277934934, "loss": 2.2631, "step": 226800 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016686508844468868, "loss": 2.1383, "step": 226805 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.00016686371408718565, "loss": 2.0895, "step": 226810 }, { "epoch": 0.53, "grad_norm": 1.8984375, "learning_rate": 0.00016686233970684077, "loss": 2.2143, "step": 226815 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.00016686096530365454, "loss": 1.9039, "step": 226820 }, { "epoch": 0.53, "grad_norm": 2.453125, "learning_rate": 0.00016685959087762737, "loss": 2.0736, "step": 226825 }, { "epoch": 0.53, "grad_norm": 2.234375, "learning_rate": 0.00016685821642875977, "loss": 2.0676, "step": 226830 }, { "epoch": 0.53, "grad_norm": 1.96875, "learning_rate": 0.00016685684195705218, "loss": 2.0491, "step": 226835 }, { "epoch": 0.53, "grad_norm": 1.9453125, "learning_rate": 0.00016685546746250505, "loss": 2.0902, "step": 226840 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.0001668540929451189, "loss": 2.0073, "step": 226845 }, { "epoch": 0.53, "grad_norm": 2.765625, "learning_rate": 0.00016685271840489422, "loss": 2.1626, "step": 226850 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.0001668513438418314, "loss": 2.1901, "step": 226855 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016684996925593097, "loss": 1.93, "step": 226860 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016684859464719334, "loss": 2.205, "step": 226865 }, { "epoch": 0.53, "grad_norm": 1.953125, "learning_rate": 0.00016684722001561903, "loss": 2.202, "step": 226870 }, { "epoch": 0.53, "grad_norm": 2.421875, "learning_rate": 0.00016684584536120852, "loss": 2.046, "step": 226875 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.00016684447068396224, "loss": 1.9277, "step": 226880 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001668430959838807, "loss": 2.119, "step": 226885 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016684172126096432, "loss": 2.079, "step": 226890 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.00016684034651521357, "loss": 2.1282, "step": 226895 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.00016683897174662898, "loss": 2.1136, "step": 226900 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.000166837596955211, "loss": 2.1413, "step": 226905 }, { "epoch": 0.53, "grad_norm": 2.609375, "learning_rate": 0.00016683622214096007, "loss": 2.1099, "step": 226910 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016683484730387667, "loss": 2.1065, "step": 226915 }, { "epoch": 0.53, "grad_norm": 1.828125, "learning_rate": 0.00016683347244396125, "loss": 1.987, "step": 226920 }, { "epoch": 0.53, "grad_norm": 1.9140625, "learning_rate": 0.00016683209756121435, "loss": 1.9823, "step": 226925 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.00016683072265563634, "loss": 2.0962, "step": 226930 }, { "epoch": 0.53, "grad_norm": 2.0625, "learning_rate": 0.0001668293477272278, "loss": 2.1155, "step": 226935 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.0001668279727759891, "loss": 2.2511, "step": 226940 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016682659780192078, "loss": 2.0853, "step": 226945 }, { "epoch": 0.53, "grad_norm": 1.7421875, "learning_rate": 0.0001668252228050233, "loss": 2.0691, "step": 226950 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016682384778529707, "loss": 1.9802, "step": 226955 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016682247274274264, "loss": 1.9225, "step": 226960 }, { "epoch": 0.53, "grad_norm": 2.359375, "learning_rate": 0.00016682109767736042, "loss": 1.9457, "step": 226965 }, { "epoch": 0.53, "grad_norm": 1.796875, "learning_rate": 0.00016681972258915088, "loss": 2.122, "step": 226970 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.0001668183474781145, "loss": 2.2601, "step": 226975 }, { "epoch": 0.53, "grad_norm": 1.9453125, "learning_rate": 0.0001668169723442518, "loss": 2.2112, "step": 226980 }, { "epoch": 0.53, "grad_norm": 1.9765625, "learning_rate": 0.00016681559718756322, "loss": 2.2145, "step": 226985 }, { "epoch": 0.53, "grad_norm": 1.9453125, "learning_rate": 0.0001668142220080492, "loss": 2.094, "step": 226990 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.00016681284680571023, "loss": 2.0966, "step": 226995 }, { "epoch": 0.53, "grad_norm": 2.46875, "learning_rate": 0.00016681147158054675, "loss": 2.1887, "step": 227000 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.00016681009633255932, "loss": 2.0379, "step": 227005 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016680872106174832, "loss": 2.1464, "step": 227010 }, { "epoch": 0.53, "grad_norm": 2.265625, "learning_rate": 0.00016680734576811424, "loss": 2.0195, "step": 227015 }, { "epoch": 0.53, "grad_norm": 1.953125, "learning_rate": 0.00016680597045165758, "loss": 2.2449, "step": 227020 }, { "epoch": 0.53, "grad_norm": 2.40625, "learning_rate": 0.00016680459511237878, "loss": 2.1593, "step": 227025 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.0001668032197502783, "loss": 1.9733, "step": 227030 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016680184436535664, "loss": 1.936, "step": 227035 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016680046895761427, "loss": 1.9377, "step": 227040 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016679909352705166, "loss": 2.1622, "step": 227045 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016679771807366923, "loss": 2.2319, "step": 227050 }, { "epoch": 0.53, "grad_norm": 2.390625, "learning_rate": 0.0001667963425974675, "loss": 1.9923, "step": 227055 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016679496709844695, "loss": 2.1608, "step": 227060 }, { "epoch": 0.53, "grad_norm": 2.59375, "learning_rate": 0.000166793591576608, "loss": 2.0086, "step": 227065 }, { "epoch": 0.53, "grad_norm": 1.984375, "learning_rate": 0.0001667922160319512, "loss": 1.7817, "step": 227070 }, { "epoch": 0.53, "grad_norm": 2.328125, "learning_rate": 0.0001667908404644769, "loss": 1.7801, "step": 227075 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016678946487418567, "loss": 1.953, "step": 227080 }, { "epoch": 0.53, "grad_norm": 2.296875, "learning_rate": 0.00016678808926107793, "loss": 2.1241, "step": 227085 }, { "epoch": 0.53, "grad_norm": 2.578125, "learning_rate": 0.00016678671362515418, "loss": 2.1178, "step": 227090 }, { "epoch": 0.53, "grad_norm": 1.96875, "learning_rate": 0.0001667853379664149, "loss": 2.0946, "step": 227095 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.0001667839622848605, "loss": 2.177, "step": 227100 }, { "epoch": 0.53, "grad_norm": 2.8125, "learning_rate": 0.0001667825865804915, "loss": 2.1575, "step": 227105 }, { "epoch": 0.53, "grad_norm": 2.28125, "learning_rate": 0.00016678121085330835, "loss": 2.1636, "step": 227110 }, { "epoch": 0.53, "grad_norm": 2.625, "learning_rate": 0.00016677983510331155, "loss": 2.0585, "step": 227115 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016677845933050155, "loss": 2.1594, "step": 227120 }, { "epoch": 0.53, "grad_norm": 2.375, "learning_rate": 0.0001667770835348788, "loss": 2.0439, "step": 227125 }, { "epoch": 0.53, "grad_norm": 2.1875, "learning_rate": 0.0001667757077164438, "loss": 2.2852, "step": 227130 }, { "epoch": 0.53, "grad_norm": 2.015625, "learning_rate": 0.000166774331875197, "loss": 2.1423, "step": 227135 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016677295601113886, "loss": 1.9414, "step": 227140 }, { "epoch": 0.53, "grad_norm": 2.09375, "learning_rate": 0.00016677158012426988, "loss": 1.9752, "step": 227145 }, { "epoch": 0.53, "grad_norm": 2.59375, "learning_rate": 0.00016677020421459053, "loss": 2.1755, "step": 227150 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.00016676882828210124, "loss": 2.1111, "step": 227155 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016676745232680254, "loss": 2.2083, "step": 227160 }, { "epoch": 0.53, "grad_norm": 1.9609375, "learning_rate": 0.00016676607634869483, "loss": 2.1951, "step": 227165 }, { "epoch": 0.53, "grad_norm": 1.9375, "learning_rate": 0.00016676470034777865, "loss": 2.1697, "step": 227170 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.0001667633243240544, "loss": 2.1274, "step": 227175 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016676194827752264, "loss": 2.1706, "step": 227180 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.00016676057220818376, "loss": 2.0595, "step": 227185 }, { "epoch": 0.53, "grad_norm": 2.109375, "learning_rate": 0.00016675919611603825, "loss": 2.1628, "step": 227190 }, { "epoch": 0.53, "grad_norm": 1.90625, "learning_rate": 0.0001667578200010866, "loss": 2.1899, "step": 227195 }, { "epoch": 0.53, "grad_norm": 2.34375, "learning_rate": 0.00016675644386332926, "loss": 2.074, "step": 227200 }, { "epoch": 0.53, "grad_norm": 2.71875, "learning_rate": 0.00016675506770276672, "loss": 2.1432, "step": 227205 }, { "epoch": 0.53, "grad_norm": 2.25, "learning_rate": 0.00016675369151939942, "loss": 2.1337, "step": 227210 }, { "epoch": 0.53, "grad_norm": 2.03125, "learning_rate": 0.00016675231531322786, "loss": 2.1212, "step": 227215 }, { "epoch": 0.53, "grad_norm": 1.7265625, "learning_rate": 0.00016675093908425249, "loss": 2.2207, "step": 227220 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.00016674956283247382, "loss": 2.1427, "step": 227225 }, { "epoch": 0.53, "grad_norm": 2.390625, "learning_rate": 0.00016674818655789225, "loss": 2.0407, "step": 227230 }, { "epoch": 0.53, "grad_norm": 2.46875, "learning_rate": 0.00016674681026050833, "loss": 2.0663, "step": 227235 }, { "epoch": 0.53, "grad_norm": 2.046875, "learning_rate": 0.00016674543394032244, "loss": 2.1355, "step": 227240 }, { "epoch": 0.53, "grad_norm": 1.84375, "learning_rate": 0.00016674405759733513, "loss": 1.9899, "step": 227245 }, { "epoch": 0.53, "grad_norm": 2.078125, "learning_rate": 0.0001667426812315468, "loss": 2.0177, "step": 227250 }, { "epoch": 0.53, "grad_norm": 2.15625, "learning_rate": 0.00016674130484295801, "loss": 2.0063, "step": 227255 }, { "epoch": 0.53, "grad_norm": 1.828125, "learning_rate": 0.00016673992843156915, "loss": 1.936, "step": 227260 }, { "epoch": 0.53, "grad_norm": 1.9296875, "learning_rate": 0.00016673855199738076, "loss": 2.0558, "step": 227265 }, { "epoch": 0.53, "grad_norm": 1.9765625, "learning_rate": 0.0001667371755403932, "loss": 2.0761, "step": 227270 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016673579906060707, "loss": 2.0173, "step": 227275 }, { "epoch": 0.53, "grad_norm": 1.890625, "learning_rate": 0.00016673442255802279, "loss": 2.1777, "step": 227280 }, { "epoch": 0.53, "grad_norm": 1.7421875, "learning_rate": 0.0001667330460326408, "loss": 2.0502, "step": 227285 }, { "epoch": 0.53, "grad_norm": 2.203125, "learning_rate": 0.00016673166948446156, "loss": 2.2279, "step": 227290 }, { "epoch": 0.53, "grad_norm": 1.984375, "learning_rate": 0.00016673029291348562, "loss": 2.0584, "step": 227295 }, { "epoch": 0.53, "grad_norm": 1.9375, "learning_rate": 0.00016672891631971335, "loss": 2.0024, "step": 227300 }, { "epoch": 0.53, "grad_norm": 1.9609375, "learning_rate": 0.00016672753970314533, "loss": 1.9443, "step": 227305 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 0.00016672616306378192, "loss": 2.1164, "step": 227310 }, { "epoch": 0.53, "grad_norm": 2.0, "learning_rate": 0.00016672478640162366, "loss": 2.1258, "step": 227315 }, { "epoch": 0.53, "grad_norm": 2.140625, "learning_rate": 0.000166723409716671, "loss": 1.9154, "step": 227320 }, { "epoch": 0.53, "grad_norm": 2.171875, "learning_rate": 0.00016672203300892443, "loss": 1.9741, "step": 227325 }, { "epoch": 0.53, "grad_norm": 2.125, "learning_rate": 0.00016672065627838437, "loss": 2.2499, "step": 227330 }, { "epoch": 0.53, "grad_norm": 1.8125, "learning_rate": 0.00016671927952505137, "loss": 2.0222, "step": 227335 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016671790274892583, "loss": 2.0583, "step": 227340 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016671652595000826, "loss": 2.0529, "step": 227345 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.0001667151491282991, "loss": 1.964, "step": 227350 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016671377228379884, "loss": 2.1319, "step": 227355 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.00016671239541650793, "loss": 1.9464, "step": 227360 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016671101852642686, "loss": 2.1487, "step": 227365 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016670964161355611, "loss": 2.038, "step": 227370 }, { "epoch": 0.54, "grad_norm": 1.8359375, "learning_rate": 0.00016670826467789614, "loss": 2.0674, "step": 227375 }, { "epoch": 0.54, "grad_norm": 1.8828125, "learning_rate": 0.0001667068877194474, "loss": 2.1769, "step": 227380 }, { "epoch": 0.54, "grad_norm": 1.8984375, "learning_rate": 0.0001667055107382104, "loss": 2.1633, "step": 227385 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016670413373418556, "loss": 2.0524, "step": 227390 }, { "epoch": 0.54, "grad_norm": 1.8984375, "learning_rate": 0.00016670275670737337, "loss": 2.213, "step": 227395 }, { "epoch": 0.54, "grad_norm": 1.875, "learning_rate": 0.00016670137965777438, "loss": 2.0188, "step": 227400 }, { "epoch": 0.54, "grad_norm": 3.234375, "learning_rate": 0.00016670000258538892, "loss": 2.0074, "step": 227405 }, { "epoch": 0.54, "grad_norm": 1.9609375, "learning_rate": 0.00016669862549021752, "loss": 2.1091, "step": 227410 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.0001666972483722607, "loss": 2.1829, "step": 227415 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016669587123151888, "loss": 2.0781, "step": 227420 }, { "epoch": 0.54, "grad_norm": 1.9375, "learning_rate": 0.00016669449406799255, "loss": 2.2238, "step": 227425 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016669311688168213, "loss": 2.0045, "step": 227430 }, { "epoch": 0.54, "grad_norm": 2.5625, "learning_rate": 0.0001666917396725882, "loss": 2.1803, "step": 227435 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.0001666903624407111, "loss": 2.0761, "step": 227440 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.0001666889851860514, "loss": 2.1402, "step": 227445 }, { "epoch": 0.54, "grad_norm": 1.703125, "learning_rate": 0.00016668760790860954, "loss": 2.0707, "step": 227450 }, { "epoch": 0.54, "grad_norm": 1.890625, "learning_rate": 0.00016668623060838594, "loss": 1.9634, "step": 227455 }, { "epoch": 0.54, "grad_norm": 1.8515625, "learning_rate": 0.00016668485328538113, "loss": 2.0619, "step": 227460 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016668347593959556, "loss": 2.0075, "step": 227465 }, { "epoch": 0.54, "grad_norm": 1.9453125, "learning_rate": 0.00016668209857102973, "loss": 2.1519, "step": 227470 }, { "epoch": 0.54, "grad_norm": 2.03125, "learning_rate": 0.00016668072117968408, "loss": 2.0424, "step": 227475 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016667934376555908, "loss": 2.0645, "step": 227480 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.0001666779663286552, "loss": 2.0424, "step": 227485 }, { "epoch": 0.54, "grad_norm": 1.9296875, "learning_rate": 0.00016667658886897294, "loss": 1.9815, "step": 227490 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016667521138651273, "loss": 2.1869, "step": 227495 }, { "epoch": 0.54, "grad_norm": 2.390625, "learning_rate": 0.00016667383388127503, "loss": 2.2342, "step": 227500 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016667245635326042, "loss": 2.0567, "step": 227505 }, { "epoch": 0.54, "grad_norm": 1.703125, "learning_rate": 0.0001666710788024692, "loss": 2.0429, "step": 227510 }, { "epoch": 0.54, "grad_norm": 2.53125, "learning_rate": 0.00016666970122890197, "loss": 2.2557, "step": 227515 }, { "epoch": 0.54, "grad_norm": 2.03125, "learning_rate": 0.00016666832363255917, "loss": 1.9959, "step": 227520 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.00016666694601344127, "loss": 2.0976, "step": 227525 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.0001666655683715487, "loss": 2.1221, "step": 227530 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016666419070688197, "loss": 2.1762, "step": 227535 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016666281301944155, "loss": 2.1346, "step": 227540 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.0001666614353092279, "loss": 1.9221, "step": 227545 }, { "epoch": 0.54, "grad_norm": 3.125, "learning_rate": 0.00016666005757624152, "loss": 2.2234, "step": 227550 }, { "epoch": 0.54, "grad_norm": 1.859375, "learning_rate": 0.00016665867982048281, "loss": 2.154, "step": 227555 }, { "epoch": 0.54, "grad_norm": 2.59375, "learning_rate": 0.00016665730204195234, "loss": 2.2444, "step": 227560 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.0001666559242406505, "loss": 2.0649, "step": 227565 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016665454641657775, "loss": 2.0758, "step": 227570 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.00016665316856973465, "loss": 2.21, "step": 227575 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016665179070012162, "loss": 2.1177, "step": 227580 }, { "epoch": 0.54, "grad_norm": 1.7734375, "learning_rate": 0.0001666504128077391, "loss": 2.0402, "step": 227585 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.0001666490348925876, "loss": 2.0689, "step": 227590 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.0001666476569546676, "loss": 2.1132, "step": 227595 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.0001666462789939795, "loss": 2.1207, "step": 227600 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016664490101052387, "loss": 2.0266, "step": 227605 }, { "epoch": 0.54, "grad_norm": 1.96875, "learning_rate": 0.00016664352300430112, "loss": 2.144, "step": 227610 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016664214497531173, "loss": 2.1462, "step": 227615 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.0001666407669235562, "loss": 2.0952, "step": 227620 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016663938884903495, "loss": 2.1096, "step": 227625 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.00016663801075174846, "loss": 2.0823, "step": 227630 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016663663263169725, "loss": 1.9739, "step": 227635 }, { "epoch": 0.54, "grad_norm": 2.390625, "learning_rate": 0.00016663525448888176, "loss": 2.0601, "step": 227640 }, { "epoch": 0.54, "grad_norm": 2.578125, "learning_rate": 0.00016663387632330243, "loss": 2.0495, "step": 227645 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.00016663249813495977, "loss": 2.0749, "step": 227650 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016663111992385425, "loss": 2.2324, "step": 227655 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016662974168998632, "loss": 2.0547, "step": 227660 }, { "epoch": 0.54, "grad_norm": 2.53125, "learning_rate": 0.00016662836343335648, "loss": 2.0075, "step": 227665 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016662698515396515, "loss": 2.2086, "step": 227670 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016662560685181286, "loss": 2.2882, "step": 227675 }, { "epoch": 0.54, "grad_norm": 1.9140625, "learning_rate": 0.00016662422852690005, "loss": 1.964, "step": 227680 }, { "epoch": 0.54, "grad_norm": 1.8515625, "learning_rate": 0.00016662285017922718, "loss": 2.0905, "step": 227685 }, { "epoch": 0.54, "grad_norm": 1.921875, "learning_rate": 0.00016662147180879474, "loss": 1.98, "step": 227690 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016662009341560322, "loss": 2.057, "step": 227695 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016661871499965304, "loss": 1.992, "step": 227700 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.0001666173365609447, "loss": 1.9822, "step": 227705 }, { "epoch": 0.54, "grad_norm": 2.390625, "learning_rate": 0.0001666159580994787, "loss": 2.1015, "step": 227710 }, { "epoch": 0.54, "grad_norm": 1.7734375, "learning_rate": 0.0001666145796152555, "loss": 2.0941, "step": 227715 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016661320110827548, "loss": 1.9997, "step": 227720 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.0001666118225785392, "loss": 1.9329, "step": 227725 }, { "epoch": 0.54, "grad_norm": 2.515625, "learning_rate": 0.00016661044402604716, "loss": 2.09, "step": 227730 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.00016660906545079972, "loss": 2.1058, "step": 227735 }, { "epoch": 0.54, "grad_norm": 1.984375, "learning_rate": 0.00016660768685279747, "loss": 2.0707, "step": 227740 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.00016660630823204083, "loss": 1.9808, "step": 227745 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.0001666049295885302, "loss": 2.0797, "step": 227750 }, { "epoch": 0.54, "grad_norm": 2.09375, "learning_rate": 0.00016660355092226617, "loss": 2.0289, "step": 227755 }, { "epoch": 0.54, "grad_norm": 2.3125, "learning_rate": 0.00016660217223324914, "loss": 2.1408, "step": 227760 }, { "epoch": 0.54, "grad_norm": 3.09375, "learning_rate": 0.0001666007935214796, "loss": 2.1035, "step": 227765 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016659941478695806, "loss": 2.102, "step": 227770 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.0001665980360296849, "loss": 2.2273, "step": 227775 }, { "epoch": 0.54, "grad_norm": 1.9140625, "learning_rate": 0.00016659665724966068, "loss": 2.1798, "step": 227780 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016659527844688581, "loss": 2.288, "step": 227785 }, { "epoch": 0.54, "grad_norm": 1.8671875, "learning_rate": 0.0001665938996213608, "loss": 1.9418, "step": 227790 }, { "epoch": 0.54, "grad_norm": 1.90625, "learning_rate": 0.0001665925207730861, "loss": 2.0359, "step": 227795 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.0001665911419020622, "loss": 2.1148, "step": 227800 }, { "epoch": 0.54, "grad_norm": 2.65625, "learning_rate": 0.00016658976300828955, "loss": 2.0962, "step": 227805 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016658838409176863, "loss": 2.0381, "step": 227810 }, { "epoch": 0.54, "grad_norm": 1.6953125, "learning_rate": 0.0001665870051524999, "loss": 2.0692, "step": 227815 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016658562619048384, "loss": 2.0239, "step": 227820 }, { "epoch": 0.54, "grad_norm": 2.03125, "learning_rate": 0.00016658424720572096, "loss": 2.2928, "step": 227825 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.00016658286819821162, "loss": 2.284, "step": 227830 }, { "epoch": 0.54, "grad_norm": 1.8828125, "learning_rate": 0.00016658148916795642, "loss": 2.0192, "step": 227835 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016658011011495575, "loss": 2.1866, "step": 227840 }, { "epoch": 0.54, "grad_norm": 1.921875, "learning_rate": 0.00016657873103921014, "loss": 2.126, "step": 227845 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016657735194072, "loss": 2.085, "step": 227850 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016657597281948583, "loss": 2.1276, "step": 227855 }, { "epoch": 0.54, "grad_norm": 1.9453125, "learning_rate": 0.0001665745936755081, "loss": 2.1104, "step": 227860 }, { "epoch": 0.54, "grad_norm": 2.828125, "learning_rate": 0.0001665732145087873, "loss": 2.089, "step": 227865 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016657183531932387, "loss": 1.9451, "step": 227870 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.0001665704561071183, "loss": 2.1231, "step": 227875 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.00016656907687217105, "loss": 2.0101, "step": 227880 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.0001665676976144826, "loss": 1.9738, "step": 227885 }, { "epoch": 0.54, "grad_norm": 2.4375, "learning_rate": 0.00016656631833405342, "loss": 2.0802, "step": 227890 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016656493903088396, "loss": 2.0845, "step": 227895 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016656355970497472, "loss": 2.1788, "step": 227900 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016656218035632616, "loss": 1.8256, "step": 227905 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016656080098493873, "loss": 1.9537, "step": 227910 }, { "epoch": 0.54, "grad_norm": 1.8359375, "learning_rate": 0.00016655942159081298, "loss": 1.9783, "step": 227915 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.0001665580421739493, "loss": 1.9067, "step": 227920 }, { "epoch": 0.54, "grad_norm": 1.78125, "learning_rate": 0.0001665566627343482, "loss": 2.1108, "step": 227925 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016655528327201008, "loss": 1.8994, "step": 227930 }, { "epoch": 0.54, "grad_norm": 1.734375, "learning_rate": 0.0001665539037869355, "loss": 2.1909, "step": 227935 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.0001665525242791249, "loss": 2.1336, "step": 227940 }, { "epoch": 0.54, "grad_norm": 1.96875, "learning_rate": 0.00016655114474857876, "loss": 2.2849, "step": 227945 }, { "epoch": 0.54, "grad_norm": 1.84375, "learning_rate": 0.00016654976519529755, "loss": 2.0084, "step": 227950 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.0001665483856192817, "loss": 2.1381, "step": 227955 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016654700602053175, "loss": 2.1662, "step": 227960 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.0001665456263990481, "loss": 2.2582, "step": 227965 }, { "epoch": 0.54, "grad_norm": 1.8828125, "learning_rate": 0.0001665442467548313, "loss": 2.2118, "step": 227970 }, { "epoch": 0.54, "grad_norm": 2.828125, "learning_rate": 0.00016654286708788174, "loss": 1.9994, "step": 227975 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016654148739819996, "loss": 2.2805, "step": 227980 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016654010768578638, "loss": 2.1217, "step": 227985 }, { "epoch": 0.54, "grad_norm": 2.578125, "learning_rate": 0.00016653872795064148, "loss": 2.2538, "step": 227990 }, { "epoch": 0.54, "grad_norm": 2.09375, "learning_rate": 0.00016653734819276577, "loss": 2.0518, "step": 227995 }, { "epoch": 0.54, "grad_norm": 2.03125, "learning_rate": 0.00016653596841215967, "loss": 1.9322, "step": 228000 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.0001665345886088237, "loss": 2.1247, "step": 228005 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016653320878275832, "loss": 2.1693, "step": 228010 }, { "epoch": 0.54, "grad_norm": 2.765625, "learning_rate": 0.00016653182893396396, "loss": 2.0958, "step": 228015 }, { "epoch": 0.54, "grad_norm": 1.9453125, "learning_rate": 0.00016653044906244114, "loss": 2.1548, "step": 228020 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016652906916819028, "loss": 2.1922, "step": 228025 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.00016652768925121193, "loss": 2.2441, "step": 228030 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.0001665263093115065, "loss": 2.2523, "step": 228035 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016652492934907446, "loss": 2.0326, "step": 228040 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.0001665235493639163, "loss": 2.2575, "step": 228045 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016652216935603248, "loss": 1.9908, "step": 228050 }, { "epoch": 0.54, "grad_norm": 1.90625, "learning_rate": 0.0001665207893254235, "loss": 2.1259, "step": 228055 }, { "epoch": 0.54, "grad_norm": 3.703125, "learning_rate": 0.00016651940927208983, "loss": 2.1646, "step": 228060 }, { "epoch": 0.54, "grad_norm": 1.890625, "learning_rate": 0.0001665180291960319, "loss": 2.2283, "step": 228065 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.0001665166490972502, "loss": 2.0997, "step": 228070 }, { "epoch": 0.54, "grad_norm": 2.03125, "learning_rate": 0.00016651526897574521, "loss": 2.0167, "step": 228075 }, { "epoch": 0.54, "grad_norm": 1.984375, "learning_rate": 0.00016651388883151736, "loss": 2.1016, "step": 228080 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016651250866456723, "loss": 2.0014, "step": 228085 }, { "epoch": 0.54, "grad_norm": 2.609375, "learning_rate": 0.00016651112847489518, "loss": 1.9704, "step": 228090 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016650974826250175, "loss": 2.2007, "step": 228095 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016650836802738735, "loss": 2.224, "step": 228100 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.0001665069877695525, "loss": 1.9737, "step": 228105 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016650560748899767, "loss": 2.2061, "step": 228110 }, { "epoch": 0.54, "grad_norm": 2.453125, "learning_rate": 0.0001665042271857233, "loss": 2.1192, "step": 228115 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016650284685972987, "loss": 2.2966, "step": 228120 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016650146651101788, "loss": 2.2105, "step": 228125 }, { "epoch": 0.54, "grad_norm": 1.9921875, "learning_rate": 0.0001665000861395878, "loss": 1.9974, "step": 228130 }, { "epoch": 0.54, "grad_norm": 1.9296875, "learning_rate": 0.00016649870574544004, "loss": 2.2312, "step": 228135 }, { "epoch": 0.54, "grad_norm": 1.875, "learning_rate": 0.00016649732532857513, "loss": 1.9468, "step": 228140 }, { "epoch": 0.54, "grad_norm": 2.3125, "learning_rate": 0.00016649594488899356, "loss": 2.1854, "step": 228145 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016649456442669575, "loss": 2.2386, "step": 228150 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016649318394168218, "loss": 2.1919, "step": 228155 }, { "epoch": 0.54, "grad_norm": 1.9140625, "learning_rate": 0.00016649180343395335, "loss": 2.0821, "step": 228160 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.00016649042290350972, "loss": 2.0096, "step": 228165 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.0001664890423503517, "loss": 2.1628, "step": 228170 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.0001664876617744799, "loss": 1.9984, "step": 228175 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016648628117589464, "loss": 2.0917, "step": 228180 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.0001664849005545965, "loss": 2.1929, "step": 228185 }, { "epoch": 0.54, "grad_norm": 2.390625, "learning_rate": 0.0001664835199105859, "loss": 1.9217, "step": 228190 }, { "epoch": 0.54, "grad_norm": 1.8203125, "learning_rate": 0.00016648213924386332, "loss": 2.034, "step": 228195 }, { "epoch": 0.54, "grad_norm": 1.828125, "learning_rate": 0.00016648075855442925, "loss": 2.1661, "step": 228200 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.0001664793778422841, "loss": 2.0274, "step": 228205 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016647799710742844, "loss": 2.1258, "step": 228210 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.00016647661634986268, "loss": 1.8329, "step": 228215 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.0001664752355695873, "loss": 2.0554, "step": 228220 }, { "epoch": 0.54, "grad_norm": 2.453125, "learning_rate": 0.00016647385476660278, "loss": 2.1641, "step": 228225 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016647247394090958, "loss": 1.7879, "step": 228230 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016647109309250813, "loss": 2.1709, "step": 228235 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016646971222139903, "loss": 2.1197, "step": 228240 }, { "epoch": 0.54, "grad_norm": 1.7890625, "learning_rate": 0.00016646833132758263, "loss": 2.1874, "step": 228245 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016646695041105947, "loss": 2.0846, "step": 228250 }, { "epoch": 0.54, "grad_norm": 1.765625, "learning_rate": 0.00016646556947182996, "loss": 2.2033, "step": 228255 }, { "epoch": 0.54, "grad_norm": 1.8359375, "learning_rate": 0.00016646418850989458, "loss": 1.889, "step": 228260 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.0001664628075252539, "loss": 2.0373, "step": 228265 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016646142651790825, "loss": 2.2566, "step": 228270 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016646004548785822, "loss": 2.2838, "step": 228275 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016645866443510424, "loss": 2.127, "step": 228280 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016645728335964674, "loss": 2.0681, "step": 228285 }, { "epoch": 0.54, "grad_norm": 1.9453125, "learning_rate": 0.00016645590226148623, "loss": 2.1174, "step": 228290 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.0001664545211406232, "loss": 2.0658, "step": 228295 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016645313999705808, "loss": 2.1833, "step": 228300 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.0001664517588307914, "loss": 2.1213, "step": 228305 }, { "epoch": 0.54, "grad_norm": 2.40625, "learning_rate": 0.00016645037764182353, "loss": 2.0624, "step": 228310 }, { "epoch": 0.54, "grad_norm": 2.453125, "learning_rate": 0.00016644899643015505, "loss": 2.1605, "step": 228315 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.0001664476151957864, "loss": 2.0021, "step": 228320 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.000166446233938718, "loss": 2.0339, "step": 228325 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.00016644485265895038, "loss": 2.1268, "step": 228330 }, { "epoch": 0.54, "grad_norm": 2.3125, "learning_rate": 0.00016644347135648401, "loss": 2.0063, "step": 228335 }, { "epoch": 0.54, "grad_norm": 1.8203125, "learning_rate": 0.00016644209003131933, "loss": 1.9588, "step": 228340 }, { "epoch": 0.54, "grad_norm": 2.796875, "learning_rate": 0.00016644070868345682, "loss": 2.0213, "step": 228345 }, { "epoch": 0.54, "grad_norm": 1.9921875, "learning_rate": 0.00016643932731289697, "loss": 1.9878, "step": 228350 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016643794591964022, "loss": 2.2331, "step": 228355 }, { "epoch": 0.54, "grad_norm": 1.9609375, "learning_rate": 0.0001664365645036871, "loss": 2.0962, "step": 228360 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.000166435183065038, "loss": 2.2843, "step": 228365 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016643380160369348, "loss": 2.1842, "step": 228370 }, { "epoch": 0.54, "grad_norm": 2.03125, "learning_rate": 0.00016643242011965393, "loss": 2.022, "step": 228375 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.0001664310386129199, "loss": 1.9995, "step": 228380 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016642965708349179, "loss": 2.1455, "step": 228385 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.0001664282755313701, "loss": 2.2716, "step": 228390 }, { "epoch": 0.54, "grad_norm": 1.8671875, "learning_rate": 0.00016642689395655533, "loss": 2.0366, "step": 228395 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016642551235904792, "loss": 2.0516, "step": 228400 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016642413073884838, "loss": 2.0226, "step": 228405 }, { "epoch": 0.54, "grad_norm": 2.546875, "learning_rate": 0.0001664227490959571, "loss": 2.2638, "step": 228410 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.00016642136743037463, "loss": 2.1715, "step": 228415 }, { "epoch": 0.54, "grad_norm": 1.7421875, "learning_rate": 0.0001664199857421014, "loss": 2.0386, "step": 228420 }, { "epoch": 0.54, "grad_norm": 1.921875, "learning_rate": 0.00016641860403113792, "loss": 2.0321, "step": 228425 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016641722229748465, "loss": 2.1283, "step": 228430 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.000166415840541142, "loss": 2.2379, "step": 228435 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016641445876211056, "loss": 2.1502, "step": 228440 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.0001664130769603907, "loss": 1.9696, "step": 228445 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016641169513598292, "loss": 2.0434, "step": 228450 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016641031328888773, "loss": 1.9799, "step": 228455 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.00016640893141910555, "loss": 2.0192, "step": 228460 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016640754952663688, "loss": 2.2068, "step": 228465 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016640616761148217, "loss": 2.0303, "step": 228470 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016640478567364194, "loss": 2.1259, "step": 228475 }, { "epoch": 0.54, "grad_norm": 1.9921875, "learning_rate": 0.00016640340371311662, "loss": 2.0382, "step": 228480 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.0001664020217299067, "loss": 2.0533, "step": 228485 }, { "epoch": 0.54, "grad_norm": 1.859375, "learning_rate": 0.00016640063972401262, "loss": 2.247, "step": 228490 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016639925769543491, "loss": 2.1207, "step": 228495 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.00016639787564417396, "loss": 2.1029, "step": 228500 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.00016639649357023034, "loss": 2.0737, "step": 228505 }, { "epoch": 0.54, "grad_norm": 1.875, "learning_rate": 0.00016639511147360447, "loss": 2.1207, "step": 228510 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.0001663937293542968, "loss": 2.1242, "step": 228515 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016639234721230786, "loss": 2.0541, "step": 228520 }, { "epoch": 0.54, "grad_norm": 1.78125, "learning_rate": 0.00016639096504763806, "loss": 2.1884, "step": 228525 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.0001663895828602879, "loss": 2.07, "step": 228530 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016638820065025786, "loss": 2.3075, "step": 228535 }, { "epoch": 0.54, "grad_norm": 3.0, "learning_rate": 0.00016638681841754844, "loss": 2.2288, "step": 228540 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.00016638543616216006, "loss": 2.309, "step": 228545 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.0001663840538840932, "loss": 2.1331, "step": 228550 }, { "epoch": 0.54, "grad_norm": 1.75, "learning_rate": 0.00016638267158334836, "loss": 2.1137, "step": 228555 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016638128925992597, "loss": 2.0404, "step": 228560 }, { "epoch": 0.54, "grad_norm": 1.984375, "learning_rate": 0.00016637990691382654, "loss": 2.1399, "step": 228565 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016637852454505053, "loss": 2.1085, "step": 228570 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.0001663771421535984, "loss": 2.1108, "step": 228575 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.00016637575973947065, "loss": 2.1734, "step": 228580 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016637437730266774, "loss": 2.1075, "step": 228585 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016637299484319014, "loss": 2.1133, "step": 228590 }, { "epoch": 0.54, "grad_norm": 1.8671875, "learning_rate": 0.00016637161236103832, "loss": 2.0075, "step": 228595 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.00016637022985621273, "loss": 2.207, "step": 228600 }, { "epoch": 0.54, "grad_norm": 2.453125, "learning_rate": 0.00016636884732871388, "loss": 2.1151, "step": 228605 }, { "epoch": 0.54, "grad_norm": 3.0, "learning_rate": 0.00016636746477854223, "loss": 2.0367, "step": 228610 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016636608220569826, "loss": 2.1034, "step": 228615 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.00016636469961018243, "loss": 2.047, "step": 228620 }, { "epoch": 0.54, "grad_norm": 2.59375, "learning_rate": 0.0001663633169919952, "loss": 2.2037, "step": 228625 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016636193435113708, "loss": 2.2726, "step": 228630 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016636055168760852, "loss": 2.0631, "step": 228635 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016635916900140996, "loss": 2.1586, "step": 228640 }, { "epoch": 0.54, "grad_norm": 1.984375, "learning_rate": 0.00016635778629254193, "loss": 2.2307, "step": 228645 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.0001663564035610049, "loss": 2.2214, "step": 228650 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016635502080679927, "loss": 2.2318, "step": 228655 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016635363802992558, "loss": 2.1233, "step": 228660 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016635225523038428, "loss": 1.9587, "step": 228665 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.00016635087240817584, "loss": 2.0058, "step": 228670 }, { "epoch": 0.54, "grad_norm": 1.96875, "learning_rate": 0.00016634948956330075, "loss": 1.9764, "step": 228675 }, { "epoch": 0.54, "grad_norm": 2.03125, "learning_rate": 0.00016634810669575946, "loss": 2.09, "step": 228680 }, { "epoch": 0.54, "grad_norm": 1.875, "learning_rate": 0.0001663467238055525, "loss": 2.0888, "step": 228685 }, { "epoch": 0.54, "grad_norm": 1.875, "learning_rate": 0.00016634534089268025, "loss": 2.0556, "step": 228690 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.00016634395795714322, "loss": 2.1276, "step": 228695 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016634257499894192, "loss": 2.0734, "step": 228700 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016634119201807676, "loss": 2.0925, "step": 228705 }, { "epoch": 0.54, "grad_norm": 1.9609375, "learning_rate": 0.0001663398090145483, "loss": 2.1025, "step": 228710 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016633842598835693, "loss": 2.0773, "step": 228715 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.00016633704293950313, "loss": 1.9973, "step": 228720 }, { "epoch": 0.54, "grad_norm": 2.09375, "learning_rate": 0.0001663356598679874, "loss": 2.1334, "step": 228725 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016633427677381022, "loss": 2.2406, "step": 228730 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016633289365697204, "loss": 2.118, "step": 228735 }, { "epoch": 0.54, "grad_norm": 1.8046875, "learning_rate": 0.00016633151051747332, "loss": 2.0451, "step": 228740 }, { "epoch": 0.54, "grad_norm": 1.859375, "learning_rate": 0.00016633012735531462, "loss": 2.0376, "step": 228745 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016632874417049629, "loss": 2.1821, "step": 228750 }, { "epoch": 0.54, "grad_norm": 1.9453125, "learning_rate": 0.00016632736096301888, "loss": 2.0872, "step": 228755 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016632597773288282, "loss": 2.0738, "step": 228760 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.0001663245944800886, "loss": 2.2399, "step": 228765 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016632321120463672, "loss": 2.1218, "step": 228770 }, { "epoch": 0.54, "grad_norm": 1.84375, "learning_rate": 0.0001663218279065276, "loss": 2.1609, "step": 228775 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016632044458576178, "loss": 2.0005, "step": 228780 }, { "epoch": 0.54, "grad_norm": 2.3125, "learning_rate": 0.00016631906124233968, "loss": 2.1893, "step": 228785 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016631767787626177, "loss": 2.0053, "step": 228790 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016631629448752856, "loss": 2.0342, "step": 228795 }, { "epoch": 0.54, "grad_norm": 3.03125, "learning_rate": 0.00016631491107614048, "loss": 2.1282, "step": 228800 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016631352764209803, "loss": 2.1159, "step": 228805 }, { "epoch": 0.54, "grad_norm": 2.609375, "learning_rate": 0.00016631214418540169, "loss": 2.2761, "step": 228810 }, { "epoch": 0.54, "grad_norm": 1.9296875, "learning_rate": 0.00016631076070605189, "loss": 2.0934, "step": 228815 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.00016630937720404915, "loss": 2.1516, "step": 228820 }, { "epoch": 0.54, "grad_norm": 1.9609375, "learning_rate": 0.00016630799367939393, "loss": 2.1696, "step": 228825 }, { "epoch": 0.54, "grad_norm": 2.09375, "learning_rate": 0.00016630661013208666, "loss": 2.1244, "step": 228830 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.0001663052265621279, "loss": 2.1565, "step": 228835 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016630384296951805, "loss": 2.1469, "step": 228840 }, { "epoch": 0.54, "grad_norm": 1.6796875, "learning_rate": 0.0001663024593542576, "loss": 1.9315, "step": 228845 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.000166301075716347, "loss": 2.012, "step": 228850 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016629969205578682, "loss": 2.02, "step": 228855 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.00016629830837257742, "loss": 2.1341, "step": 228860 }, { "epoch": 0.54, "grad_norm": 2.46875, "learning_rate": 0.00016629692466671933, "loss": 2.0585, "step": 228865 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016629554093821298, "loss": 2.1756, "step": 228870 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.0001662941571870589, "loss": 1.9368, "step": 228875 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.0001662927734132575, "loss": 2.0659, "step": 228880 }, { "epoch": 0.54, "grad_norm": 2.625, "learning_rate": 0.00016629138961680933, "loss": 2.0198, "step": 228885 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.0001662900057977148, "loss": 2.0158, "step": 228890 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.0001662886219559744, "loss": 2.3462, "step": 228895 }, { "epoch": 0.54, "grad_norm": 1.984375, "learning_rate": 0.00016628723809158862, "loss": 2.0204, "step": 228900 }, { "epoch": 0.54, "grad_norm": 1.8203125, "learning_rate": 0.00016628585420455787, "loss": 1.9568, "step": 228905 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.0001662844702948827, "loss": 2.0133, "step": 228910 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.00016628308636256356, "loss": 2.2844, "step": 228915 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016628170240760092, "loss": 2.0885, "step": 228920 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016628031842999525, "loss": 2.1312, "step": 228925 }, { "epoch": 0.54, "grad_norm": 1.9296875, "learning_rate": 0.000166278934429747, "loss": 2.1316, "step": 228930 }, { "epoch": 0.54, "grad_norm": 1.65625, "learning_rate": 0.0001662775504068567, "loss": 2.2054, "step": 228935 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016627616636132473, "loss": 2.43, "step": 228940 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016627478229315167, "loss": 2.0909, "step": 228945 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.00016627339820233795, "loss": 2.1736, "step": 228950 }, { "epoch": 0.54, "grad_norm": 2.453125, "learning_rate": 0.00016627201408888397, "loss": 2.0042, "step": 228955 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.0001662706299527903, "loss": 2.2507, "step": 228960 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.0001662692457940574, "loss": 2.2052, "step": 228965 }, { "epoch": 0.54, "grad_norm": 2.09375, "learning_rate": 0.00016626786161268574, "loss": 2.1869, "step": 228970 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016626647740867573, "loss": 2.086, "step": 228975 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.0001662650931820279, "loss": 2.1538, "step": 228980 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.00016626370893274271, "loss": 2.0471, "step": 228985 }, { "epoch": 0.54, "grad_norm": 2.703125, "learning_rate": 0.00016626232466082066, "loss": 2.2609, "step": 228990 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016626094036626218, "loss": 2.0602, "step": 228995 }, { "epoch": 0.54, "grad_norm": 1.765625, "learning_rate": 0.00016625955604906775, "loss": 1.9928, "step": 229000 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016625817170923789, "loss": 1.9682, "step": 229005 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.000166256787346773, "loss": 2.0656, "step": 229010 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.00016625540296167358, "loss": 1.9512, "step": 229015 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016625401855394016, "loss": 1.9228, "step": 229020 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016625263412357316, "loss": 2.0691, "step": 229025 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016625124967057305, "loss": 2.0181, "step": 229030 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016624986519494028, "loss": 2.1336, "step": 229035 }, { "epoch": 0.54, "grad_norm": 2.40625, "learning_rate": 0.0001662484806966754, "loss": 2.2738, "step": 229040 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.0001662470961757788, "loss": 2.1458, "step": 229045 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.00016624571163225102, "loss": 2.2232, "step": 229050 }, { "epoch": 0.54, "grad_norm": 1.9453125, "learning_rate": 0.0001662443270660925, "loss": 2.0892, "step": 229055 }, { "epoch": 0.54, "grad_norm": 1.96875, "learning_rate": 0.00016624294247730368, "loss": 2.1004, "step": 229060 }, { "epoch": 0.54, "grad_norm": 2.578125, "learning_rate": 0.0001662415578658851, "loss": 2.0832, "step": 229065 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.0001662401732318372, "loss": 2.1235, "step": 229070 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.00016623878857516044, "loss": 2.0747, "step": 229075 }, { "epoch": 0.54, "grad_norm": 2.578125, "learning_rate": 0.0001662374038958553, "loss": 2.3826, "step": 229080 }, { "epoch": 0.54, "grad_norm": 1.8984375, "learning_rate": 0.00016623601919392233, "loss": 2.1438, "step": 229085 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016623463446936186, "loss": 2.073, "step": 229090 }, { "epoch": 0.54, "grad_norm": 2.578125, "learning_rate": 0.00016623324972217448, "loss": 2.1488, "step": 229095 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.0001662318649523606, "loss": 2.0668, "step": 229100 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.00016623048015992073, "loss": 2.1234, "step": 229105 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.0001662290953448553, "loss": 2.2155, "step": 229110 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016622771050716484, "loss": 2.0841, "step": 229115 }, { "epoch": 0.54, "grad_norm": 1.890625, "learning_rate": 0.00016622632564684977, "loss": 2.0214, "step": 229120 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.0001662249407639106, "loss": 2.0038, "step": 229125 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.00016622355585834778, "loss": 2.2442, "step": 229130 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.0001662221709301618, "loss": 2.1914, "step": 229135 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.00016622078597935313, "loss": 2.1831, "step": 229140 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.00016621940100592224, "loss": 2.2108, "step": 229145 }, { "epoch": 0.54, "grad_norm": 1.5234375, "learning_rate": 0.0001662180160098696, "loss": 2.1574, "step": 229150 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016621663099119566, "loss": 2.2834, "step": 229155 }, { "epoch": 0.54, "grad_norm": 1.7734375, "learning_rate": 0.00016621524594990096, "loss": 2.0692, "step": 229160 }, { "epoch": 0.54, "grad_norm": 2.390625, "learning_rate": 0.0001662138608859859, "loss": 2.1087, "step": 229165 }, { "epoch": 0.54, "grad_norm": 1.90625, "learning_rate": 0.00016621247579945097, "loss": 2.1942, "step": 229170 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016621109069029667, "loss": 1.9995, "step": 229175 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.00016620970555852348, "loss": 2.2304, "step": 229180 }, { "epoch": 0.54, "grad_norm": 1.7421875, "learning_rate": 0.00016620832040413185, "loss": 2.0937, "step": 229185 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016620693522712224, "loss": 2.1459, "step": 229190 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016620555002749517, "loss": 2.0982, "step": 229195 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016620416480525103, "loss": 2.0578, "step": 229200 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.0001662027795603904, "loss": 2.0333, "step": 229205 }, { "epoch": 0.54, "grad_norm": 1.9921875, "learning_rate": 0.00016620139429291368, "loss": 2.1665, "step": 229210 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016620000900282138, "loss": 2.108, "step": 229215 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016619862369011393, "loss": 2.1176, "step": 229220 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016619723835479183, "loss": 2.0469, "step": 229225 }, { "epoch": 0.54, "grad_norm": 2.46875, "learning_rate": 0.00016619585299685555, "loss": 2.1017, "step": 229230 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016619446761630558, "loss": 2.2598, "step": 229235 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016619308221314238, "loss": 2.1513, "step": 229240 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016619169678736643, "loss": 2.1661, "step": 229245 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016619031133897818, "loss": 2.0472, "step": 229250 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016618892586797813, "loss": 2.2956, "step": 229255 }, { "epoch": 0.54, "grad_norm": 1.890625, "learning_rate": 0.0001661875403743667, "loss": 2.0257, "step": 229260 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.00016618615485814448, "loss": 2.0886, "step": 229265 }, { "epoch": 0.54, "grad_norm": 2.671875, "learning_rate": 0.00016618476931931183, "loss": 2.1481, "step": 229270 }, { "epoch": 0.54, "grad_norm": 2.03125, "learning_rate": 0.0001661833837578693, "loss": 2.114, "step": 229275 }, { "epoch": 0.54, "grad_norm": 2.453125, "learning_rate": 0.00016618199817381726, "loss": 2.1768, "step": 229280 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.0001661806125671563, "loss": 1.9972, "step": 229285 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.0001661792269378868, "loss": 2.1019, "step": 229290 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.0001661778412860093, "loss": 2.1102, "step": 229295 }, { "epoch": 0.54, "grad_norm": 2.625, "learning_rate": 0.00016617645561152426, "loss": 2.377, "step": 229300 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016617506991443211, "loss": 2.1544, "step": 229305 }, { "epoch": 0.54, "grad_norm": 1.9609375, "learning_rate": 0.00016617368419473338, "loss": 2.033, "step": 229310 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.0001661722984524285, "loss": 2.2133, "step": 229315 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.000166170912687518, "loss": 2.1178, "step": 229320 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.0001661695269000023, "loss": 2.1604, "step": 229325 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016616814108988187, "loss": 2.1545, "step": 229330 }, { "epoch": 0.54, "grad_norm": 2.65625, "learning_rate": 0.00016616675525715722, "loss": 2.2623, "step": 229335 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.0001661653694018288, "loss": 1.9676, "step": 229340 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.0001661639835238971, "loss": 2.1546, "step": 229345 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016616259762336258, "loss": 2.2564, "step": 229350 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016616121170022572, "loss": 2.2763, "step": 229355 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016615982575448696, "loss": 2.1642, "step": 229360 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.00016615843978614683, "loss": 2.0927, "step": 229365 }, { "epoch": 0.54, "grad_norm": 1.828125, "learning_rate": 0.00016615705379520579, "loss": 2.0498, "step": 229370 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.0001661556677816643, "loss": 2.1697, "step": 229375 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.0001661542817455228, "loss": 2.1024, "step": 229380 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.0001661528956867818, "loss": 2.0309, "step": 229385 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.0001661515096054418, "loss": 2.2364, "step": 229390 }, { "epoch": 0.54, "grad_norm": 2.4375, "learning_rate": 0.00016615012350150324, "loss": 2.0681, "step": 229395 }, { "epoch": 0.54, "grad_norm": 2.40625, "learning_rate": 0.00016614873737496658, "loss": 2.2783, "step": 229400 }, { "epoch": 0.54, "grad_norm": 1.7578125, "learning_rate": 0.00016614735122583233, "loss": 1.9235, "step": 229405 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016614596505410092, "loss": 2.0502, "step": 229410 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016614457885977287, "loss": 2.0633, "step": 229415 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.00016614319264284864, "loss": 2.1153, "step": 229420 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.0001661418064033287, "loss": 2.0505, "step": 229425 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.0001661404201412135, "loss": 2.1517, "step": 229430 }, { "epoch": 0.54, "grad_norm": 2.46875, "learning_rate": 0.0001661390338565035, "loss": 2.2472, "step": 229435 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.00016613764754919928, "loss": 2.0214, "step": 229440 }, { "epoch": 0.54, "grad_norm": 2.5, "learning_rate": 0.0001661362612193012, "loss": 2.054, "step": 229445 }, { "epoch": 0.54, "grad_norm": 2.46875, "learning_rate": 0.00016613487486680977, "loss": 1.9278, "step": 229450 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.00016613348849172548, "loss": 2.0713, "step": 229455 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.00016613210209404878, "loss": 2.1613, "step": 229460 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016613071567378012, "loss": 2.0405, "step": 229465 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016612932923092008, "loss": 2.1934, "step": 229470 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016612794276546903, "loss": 2.0422, "step": 229475 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016612655627742747, "loss": 1.9081, "step": 229480 }, { "epoch": 0.54, "grad_norm": 2.390625, "learning_rate": 0.0001661251697667959, "loss": 2.2105, "step": 229485 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016612378323357474, "loss": 2.0251, "step": 229490 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.00016612239667776452, "loss": 2.2126, "step": 229495 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016612101009936568, "loss": 2.0819, "step": 229500 }, { "epoch": 0.54, "grad_norm": 1.921875, "learning_rate": 0.0001661196234983787, "loss": 2.0941, "step": 229505 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016611823687480407, "loss": 2.1217, "step": 229510 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016611685022864227, "loss": 2.1744, "step": 229515 }, { "epoch": 0.54, "grad_norm": 1.90625, "learning_rate": 0.00016611546355989372, "loss": 2.1207, "step": 229520 }, { "epoch": 0.54, "grad_norm": 2.8125, "learning_rate": 0.00016611407686855894, "loss": 2.1432, "step": 229525 }, { "epoch": 0.54, "grad_norm": 1.9609375, "learning_rate": 0.00016611269015463838, "loss": 2.1947, "step": 229530 }, { "epoch": 0.54, "grad_norm": 1.8046875, "learning_rate": 0.00016611130341813253, "loss": 1.9591, "step": 229535 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.0001661099166590419, "loss": 1.9048, "step": 229540 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016610852987736688, "loss": 2.0873, "step": 229545 }, { "epoch": 0.54, "grad_norm": 1.921875, "learning_rate": 0.000166107143073108, "loss": 2.1391, "step": 229550 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.0001661057562462657, "loss": 2.0755, "step": 229555 }, { "epoch": 0.54, "grad_norm": 1.9375, "learning_rate": 0.00016610436939684052, "loss": 2.0058, "step": 229560 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016610298252483285, "loss": 2.1128, "step": 229565 }, { "epoch": 0.54, "grad_norm": 2.734375, "learning_rate": 0.00016610159563024322, "loss": 2.01, "step": 229570 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016610020871307208, "loss": 2.0347, "step": 229575 }, { "epoch": 0.54, "grad_norm": 2.40625, "learning_rate": 0.0001660988217733199, "loss": 1.9985, "step": 229580 }, { "epoch": 0.54, "grad_norm": 1.984375, "learning_rate": 0.0001660974348109872, "loss": 1.913, "step": 229585 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016609604782607436, "loss": 2.1345, "step": 229590 }, { "epoch": 0.54, "grad_norm": 2.3125, "learning_rate": 0.00016609466081858197, "loss": 2.0307, "step": 229595 }, { "epoch": 0.54, "grad_norm": 1.7421875, "learning_rate": 0.00016609327378851041, "loss": 1.9968, "step": 229600 }, { "epoch": 0.54, "grad_norm": 2.5625, "learning_rate": 0.00016609188673586021, "loss": 2.1021, "step": 229605 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.0001660904996606318, "loss": 2.1712, "step": 229610 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016608911256282567, "loss": 1.8236, "step": 229615 }, { "epoch": 0.54, "grad_norm": 1.7578125, "learning_rate": 0.00016608772544244232, "loss": 2.1729, "step": 229620 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.00016608633829948223, "loss": 2.1796, "step": 229625 }, { "epoch": 0.54, "grad_norm": 1.7734375, "learning_rate": 0.0001660849511339458, "loss": 2.0584, "step": 229630 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016608356394583358, "loss": 2.2177, "step": 229635 }, { "epoch": 0.54, "grad_norm": 2.5, "learning_rate": 0.000166082176735146, "loss": 2.0576, "step": 229640 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.00016608078950188358, "loss": 2.1569, "step": 229645 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.0001660794022460467, "loss": 2.2879, "step": 229650 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.00016607801496763594, "loss": 2.2147, "step": 229655 }, { "epoch": 0.54, "grad_norm": 1.828125, "learning_rate": 0.00016607662766665175, "loss": 2.0574, "step": 229660 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.00016607524034309456, "loss": 1.9301, "step": 229665 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.0001660738529969649, "loss": 2.1244, "step": 229670 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.0001660724656282632, "loss": 2.1414, "step": 229675 }, { "epoch": 0.54, "grad_norm": 1.8671875, "learning_rate": 0.00016607107823698991, "loss": 2.0004, "step": 229680 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016606969082314557, "loss": 2.3029, "step": 229685 }, { "epoch": 0.54, "grad_norm": 1.9921875, "learning_rate": 0.00016606830338673062, "loss": 2.0659, "step": 229690 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.00016606691592774557, "loss": 2.0594, "step": 229695 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016606552844619082, "loss": 2.051, "step": 229700 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.0001660641409420669, "loss": 2.0306, "step": 229705 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.0001660627534153743, "loss": 2.2099, "step": 229710 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.00016606136586611343, "loss": 1.9669, "step": 229715 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.0001660599782942848, "loss": 1.897, "step": 229720 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.00016605859069988892, "loss": 2.2686, "step": 229725 }, { "epoch": 0.54, "grad_norm": 1.890625, "learning_rate": 0.00016605720308292618, "loss": 2.0601, "step": 229730 }, { "epoch": 0.54, "grad_norm": 2.390625, "learning_rate": 0.00016605581544339713, "loss": 2.1381, "step": 229735 }, { "epoch": 0.54, "grad_norm": 2.546875, "learning_rate": 0.0001660544277813022, "loss": 2.1013, "step": 229740 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.0001660530400966419, "loss": 2.1395, "step": 229745 }, { "epoch": 0.54, "grad_norm": 2.5, "learning_rate": 0.00016605165238941667, "loss": 2.0051, "step": 229750 }, { "epoch": 0.54, "grad_norm": 1.7578125, "learning_rate": 0.000166050264659627, "loss": 1.8915, "step": 229755 }, { "epoch": 0.54, "grad_norm": 2.890625, "learning_rate": 0.00016604887690727335, "loss": 2.0036, "step": 229760 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.0001660474891323562, "loss": 2.0932, "step": 229765 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016604610133487605, "loss": 2.0614, "step": 229770 }, { "epoch": 0.54, "grad_norm": 1.9609375, "learning_rate": 0.00016604471351483338, "loss": 2.0763, "step": 229775 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.0001660433256722286, "loss": 2.0302, "step": 229780 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.0001660419378070622, "loss": 2.0566, "step": 229785 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.00016604054991933474, "loss": 2.133, "step": 229790 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.0001660391620090466, "loss": 1.8798, "step": 229795 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016603777407619828, "loss": 2.249, "step": 229800 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016603638612079026, "loss": 2.0539, "step": 229805 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016603499814282302, "loss": 2.0199, "step": 229810 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.000166033610142297, "loss": 2.1305, "step": 229815 }, { "epoch": 0.54, "grad_norm": 2.3125, "learning_rate": 0.0001660322221192127, "loss": 2.2188, "step": 229820 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016603083407357066, "loss": 2.0451, "step": 229825 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016602944600537124, "loss": 2.055, "step": 229830 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016602805791461497, "loss": 2.1545, "step": 229835 }, { "epoch": 0.54, "grad_norm": 2.390625, "learning_rate": 0.0001660266698013023, "loss": 2.2067, "step": 229840 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016602528166543374, "loss": 2.2159, "step": 229845 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016602389350700976, "loss": 2.066, "step": 229850 }, { "epoch": 0.54, "grad_norm": 2.5, "learning_rate": 0.00016602250532603082, "loss": 2.2165, "step": 229855 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016602111712249738, "loss": 2.0252, "step": 229860 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016601972889640992, "loss": 2.1399, "step": 229865 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016601834064776893, "loss": 2.2492, "step": 229870 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016601695237657488, "loss": 2.0073, "step": 229875 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016601556408282826, "loss": 1.9373, "step": 229880 }, { "epoch": 0.54, "grad_norm": 2.671875, "learning_rate": 0.00016601417576652948, "loss": 2.0205, "step": 229885 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016601278742767913, "loss": 2.0456, "step": 229890 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.00016601139906627753, "loss": 2.0304, "step": 229895 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016601001068232532, "loss": 2.0577, "step": 229900 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016600862227582284, "loss": 1.8778, "step": 229905 }, { "epoch": 0.54, "grad_norm": 2.453125, "learning_rate": 0.00016600723384677064, "loss": 2.2288, "step": 229910 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016600584539516917, "loss": 2.1279, "step": 229915 }, { "epoch": 0.54, "grad_norm": 4.15625, "learning_rate": 0.0001660044569210189, "loss": 2.0154, "step": 229920 }, { "epoch": 0.54, "grad_norm": 3.09375, "learning_rate": 0.00016600306842432032, "loss": 2.1789, "step": 229925 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016600167990507388, "loss": 2.2788, "step": 229930 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016600029136328007, "loss": 1.9928, "step": 229935 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016599890279893938, "loss": 2.3711, "step": 229940 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016599751421205228, "loss": 2.1047, "step": 229945 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.0001659961256026192, "loss": 2.114, "step": 229950 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016599473697064067, "loss": 2.1598, "step": 229955 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.0001659933483161171, "loss": 2.1801, "step": 229960 }, { "epoch": 0.54, "grad_norm": 1.984375, "learning_rate": 0.00016599195963904904, "loss": 2.2613, "step": 229965 }, { "epoch": 0.54, "grad_norm": 1.9375, "learning_rate": 0.00016599057093943692, "loss": 2.0658, "step": 229970 }, { "epoch": 0.54, "grad_norm": 2.578125, "learning_rate": 0.0001659891822172812, "loss": 1.9758, "step": 229975 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016598779347258243, "loss": 1.9962, "step": 229980 }, { "epoch": 0.54, "grad_norm": 2.03125, "learning_rate": 0.000165986404705341, "loss": 1.9437, "step": 229985 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016598501591555745, "loss": 2.3095, "step": 229990 }, { "epoch": 0.54, "grad_norm": 1.7109375, "learning_rate": 0.0001659836271032322, "loss": 1.9837, "step": 229995 }, { "epoch": 0.54, "grad_norm": 1.921875, "learning_rate": 0.00016598223826836574, "loss": 2.2264, "step": 230000 }, { "epoch": 0.54, "grad_norm": 1.96875, "learning_rate": 0.00016598084941095854, "loss": 2.1608, "step": 230005 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.0001659794605310111, "loss": 2.1123, "step": 230010 }, { "epoch": 0.54, "grad_norm": 1.9609375, "learning_rate": 0.0001659780716285239, "loss": 2.3223, "step": 230015 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.00016597668270349738, "loss": 2.2672, "step": 230020 }, { "epoch": 0.54, "grad_norm": 1.7421875, "learning_rate": 0.00016597529375593202, "loss": 2.0875, "step": 230025 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.0001659739047858283, "loss": 2.0925, "step": 230030 }, { "epoch": 0.54, "grad_norm": 1.9609375, "learning_rate": 0.00016597251579318672, "loss": 2.1179, "step": 230035 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.0001659711267780077, "loss": 2.0268, "step": 230040 }, { "epoch": 0.54, "grad_norm": 2.09375, "learning_rate": 0.00016596973774029173, "loss": 1.9052, "step": 230045 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016596834868003936, "loss": 2.2169, "step": 230050 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016596695959725097, "loss": 2.2387, "step": 230055 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.0001659655704919271, "loss": 2.0676, "step": 230060 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.00016596418136406819, "loss": 2.1814, "step": 230065 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016596279221367467, "loss": 2.1939, "step": 230070 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016596140304074713, "loss": 2.0707, "step": 230075 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.0001659600138452859, "loss": 2.0096, "step": 230080 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016595862462729158, "loss": 2.0423, "step": 230085 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016595723538676463, "loss": 2.1695, "step": 230090 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016595584612370546, "loss": 2.1451, "step": 230095 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016595445683811457, "loss": 1.9112, "step": 230100 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016595306752999244, "loss": 2.0639, "step": 230105 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016595167819933955, "loss": 2.2092, "step": 230110 }, { "epoch": 0.54, "grad_norm": 1.8828125, "learning_rate": 0.00016595028884615638, "loss": 2.1001, "step": 230115 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016594889947044338, "loss": 2.2222, "step": 230120 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016594751007220103, "loss": 2.1081, "step": 230125 }, { "epoch": 0.54, "grad_norm": 2.03125, "learning_rate": 0.00016594612065142986, "loss": 2.0366, "step": 230130 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016594473120813027, "loss": 2.1872, "step": 230135 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.00016594334174230277, "loss": 2.0259, "step": 230140 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.00016594195225394783, "loss": 1.852, "step": 230145 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.0001659405627430659, "loss": 2.0953, "step": 230150 }, { "epoch": 0.54, "grad_norm": 1.8828125, "learning_rate": 0.0001659391732096575, "loss": 2.0893, "step": 230155 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016593778365372308, "loss": 2.2511, "step": 230160 }, { "epoch": 0.54, "grad_norm": 2.640625, "learning_rate": 0.0001659363940752631, "loss": 2.0769, "step": 230165 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016593500447427807, "loss": 2.2704, "step": 230170 }, { "epoch": 0.54, "grad_norm": 2.578125, "learning_rate": 0.00016593361485076847, "loss": 2.2805, "step": 230175 }, { "epoch": 0.54, "grad_norm": 1.5703125, "learning_rate": 0.0001659322252047347, "loss": 2.1656, "step": 230180 }, { "epoch": 0.54, "grad_norm": 1.7734375, "learning_rate": 0.00016593083553617733, "loss": 2.1656, "step": 230185 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016592944584509675, "loss": 2.044, "step": 230190 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016592805613149352, "loss": 2.1355, "step": 230195 }, { "epoch": 0.54, "grad_norm": 2.453125, "learning_rate": 0.00016592666639536806, "loss": 2.0305, "step": 230200 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016592527663672084, "loss": 2.0905, "step": 230205 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016592388685555232, "loss": 2.2179, "step": 230210 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016592249705186304, "loss": 2.1932, "step": 230215 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016592110722565346, "loss": 1.9358, "step": 230220 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016591971737692403, "loss": 1.8901, "step": 230225 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.0001659183275056752, "loss": 2.0705, "step": 230230 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.0001659169376119075, "loss": 2.0239, "step": 230235 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016591554769562135, "loss": 2.2552, "step": 230240 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.00016591415775681724, "loss": 2.1492, "step": 230245 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016591276779549569, "loss": 2.1452, "step": 230250 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016591137781165715, "loss": 2.0282, "step": 230255 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.0001659099878053021, "loss": 2.0929, "step": 230260 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.000165908597776431, "loss": 1.9355, "step": 230265 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016590720772504427, "loss": 2.1482, "step": 230270 }, { "epoch": 0.54, "grad_norm": 2.453125, "learning_rate": 0.00016590581765114245, "loss": 2.0788, "step": 230275 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016590442755472606, "loss": 2.2292, "step": 230280 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.0001659030374357955, "loss": 2.1249, "step": 230285 }, { "epoch": 0.54, "grad_norm": 1.96875, "learning_rate": 0.00016590164729435127, "loss": 2.0617, "step": 230290 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016590025713039382, "loss": 2.2094, "step": 230295 }, { "epoch": 0.54, "grad_norm": 1.8671875, "learning_rate": 0.0001658988669439237, "loss": 2.0756, "step": 230300 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016589747673494127, "loss": 1.9482, "step": 230305 }, { "epoch": 0.54, "grad_norm": 3.640625, "learning_rate": 0.0001658960865034471, "loss": 1.9923, "step": 230310 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016589469624944163, "loss": 2.1314, "step": 230315 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016589330597292535, "loss": 2.0328, "step": 230320 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016589191567389867, "loss": 2.0447, "step": 230325 }, { "epoch": 0.54, "grad_norm": 1.9296875, "learning_rate": 0.00016589052535236217, "loss": 2.051, "step": 230330 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.00016588913500831628, "loss": 2.2616, "step": 230335 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.0001658877446417614, "loss": 2.1574, "step": 230340 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.00016588635425269813, "loss": 2.0481, "step": 230345 }, { "epoch": 0.54, "grad_norm": 1.734375, "learning_rate": 0.00016588496384112685, "loss": 2.2079, "step": 230350 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016588357340704808, "loss": 2.0992, "step": 230355 }, { "epoch": 0.54, "grad_norm": 1.8203125, "learning_rate": 0.0001658821829504623, "loss": 2.0341, "step": 230360 }, { "epoch": 0.54, "grad_norm": 1.96875, "learning_rate": 0.00016588079247136997, "loss": 1.9772, "step": 230365 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.00016587940196977158, "loss": 2.1177, "step": 230370 }, { "epoch": 0.54, "grad_norm": 2.40625, "learning_rate": 0.00016587801144566755, "loss": 2.1356, "step": 230375 }, { "epoch": 0.54, "grad_norm": 2.09375, "learning_rate": 0.00016587662089905845, "loss": 2.224, "step": 230380 }, { "epoch": 0.54, "grad_norm": 1.8984375, "learning_rate": 0.00016587523032994466, "loss": 2.1265, "step": 230385 }, { "epoch": 0.54, "grad_norm": 1.9375, "learning_rate": 0.0001658738397383267, "loss": 2.0495, "step": 230390 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.00016587244912420504, "loss": 2.2481, "step": 230395 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016587105848758017, "loss": 1.9715, "step": 230400 }, { "epoch": 0.54, "grad_norm": 2.84375, "learning_rate": 0.00016586966782845255, "loss": 2.1342, "step": 230405 }, { "epoch": 0.54, "grad_norm": 1.9375, "learning_rate": 0.00016586827714682267, "loss": 2.026, "step": 230410 }, { "epoch": 0.54, "grad_norm": 1.90625, "learning_rate": 0.00016586688644269098, "loss": 2.0808, "step": 230415 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.00016586549571605795, "loss": 2.058, "step": 230420 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.0001658641049669241, "loss": 2.1068, "step": 230425 }, { "epoch": 0.54, "grad_norm": 2.515625, "learning_rate": 0.00016586271419528984, "loss": 2.2217, "step": 230430 }, { "epoch": 0.54, "grad_norm": 2.46875, "learning_rate": 0.0001658613234011557, "loss": 1.9623, "step": 230435 }, { "epoch": 0.54, "grad_norm": 1.90625, "learning_rate": 0.00016585993258452216, "loss": 2.1107, "step": 230440 }, { "epoch": 0.54, "grad_norm": 2.390625, "learning_rate": 0.0001658585417453897, "loss": 2.0561, "step": 230445 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.00016585715088375872, "loss": 2.2171, "step": 230450 }, { "epoch": 0.54, "grad_norm": 2.609375, "learning_rate": 0.00016585575999962973, "loss": 2.0931, "step": 230455 }, { "epoch": 0.54, "grad_norm": 1.9375, "learning_rate": 0.00016585436909300323, "loss": 2.0571, "step": 230460 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016585297816387967, "loss": 2.0363, "step": 230465 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016585158721225957, "loss": 2.1662, "step": 230470 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016585019623814335, "loss": 2.0479, "step": 230475 }, { "epoch": 0.54, "grad_norm": 1.765625, "learning_rate": 0.00016584880524153154, "loss": 2.0536, "step": 230480 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016584741422242454, "loss": 2.0283, "step": 230485 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016584602318082289, "loss": 2.2546, "step": 230490 }, { "epoch": 0.54, "grad_norm": 1.875, "learning_rate": 0.00016584463211672708, "loss": 1.7948, "step": 230495 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016584324103013752, "loss": 2.0559, "step": 230500 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.0001658418499210547, "loss": 2.033, "step": 230505 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016584045878947912, "loss": 2.1807, "step": 230510 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016583906763541125, "loss": 2.0889, "step": 230515 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.00016583767645885153, "loss": 2.1184, "step": 230520 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.0001658362852598005, "loss": 2.1264, "step": 230525 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.0001658348940382586, "loss": 2.1837, "step": 230530 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016583350279422628, "loss": 2.2068, "step": 230535 }, { "epoch": 0.54, "grad_norm": 2.4375, "learning_rate": 0.00016583211152770407, "loss": 2.1397, "step": 230540 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016583072023869242, "loss": 2.0977, "step": 230545 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.00016582932892719179, "loss": 2.0801, "step": 230550 }, { "epoch": 0.54, "grad_norm": 2.46875, "learning_rate": 0.00016582793759320269, "loss": 2.2342, "step": 230555 }, { "epoch": 0.54, "grad_norm": 2.3125, "learning_rate": 0.00016582654623672552, "loss": 2.3001, "step": 230560 }, { "epoch": 0.54, "grad_norm": 1.9609375, "learning_rate": 0.00016582515485776082, "loss": 2.0512, "step": 230565 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.00016582376345630908, "loss": 2.068, "step": 230570 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016582237203237074, "loss": 2.0333, "step": 230575 }, { "epoch": 0.54, "grad_norm": 2.453125, "learning_rate": 0.0001658209805859463, "loss": 1.9796, "step": 230580 }, { "epoch": 0.54, "grad_norm": 1.8203125, "learning_rate": 0.0001658195891170362, "loss": 2.1391, "step": 230585 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016581819762564095, "loss": 2.0415, "step": 230590 }, { "epoch": 0.54, "grad_norm": 1.84375, "learning_rate": 0.00016581680611176097, "loss": 2.1863, "step": 230595 }, { "epoch": 0.54, "grad_norm": 1.9375, "learning_rate": 0.0001658154145753968, "loss": 2.0945, "step": 230600 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016581402301654892, "loss": 2.0808, "step": 230605 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016581263143521774, "loss": 2.0685, "step": 230610 }, { "epoch": 0.54, "grad_norm": 1.828125, "learning_rate": 0.0001658112398314038, "loss": 2.2292, "step": 230615 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.0001658098482051075, "loss": 2.0437, "step": 230620 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016580845655632943, "loss": 2.2257, "step": 230625 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016580706488506994, "loss": 2.0289, "step": 230630 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016580567319132955, "loss": 2.0648, "step": 230635 }, { "epoch": 0.54, "grad_norm": 1.96875, "learning_rate": 0.0001658042814751088, "loss": 2.1781, "step": 230640 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016580288973640808, "loss": 2.103, "step": 230645 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016580149797522793, "loss": 2.1077, "step": 230650 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.0001658001061915688, "loss": 2.0936, "step": 230655 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016579871438543111, "loss": 2.1761, "step": 230660 }, { "epoch": 0.54, "grad_norm": 1.7890625, "learning_rate": 0.0001657973225568154, "loss": 2.3544, "step": 230665 }, { "epoch": 0.54, "grad_norm": 1.8828125, "learning_rate": 0.00016579593070572215, "loss": 1.9453, "step": 230670 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.00016579453883215184, "loss": 2.1549, "step": 230675 }, { "epoch": 0.54, "grad_norm": 2.453125, "learning_rate": 0.00016579314693610487, "loss": 2.1118, "step": 230680 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.0001657917550175818, "loss": 2.0874, "step": 230685 }, { "epoch": 0.54, "grad_norm": 2.46875, "learning_rate": 0.0001657903630765831, "loss": 2.1437, "step": 230690 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.0001657889711131092, "loss": 2.045, "step": 230695 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016578757912716054, "loss": 2.2446, "step": 230700 }, { "epoch": 0.54, "grad_norm": 2.390625, "learning_rate": 0.00016578618711873767, "loss": 2.1385, "step": 230705 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.0001657847950878411, "loss": 2.1467, "step": 230710 }, { "epoch": 0.54, "grad_norm": 2.390625, "learning_rate": 0.0001657834030344712, "loss": 1.8784, "step": 230715 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016578201095862851, "loss": 2.2402, "step": 230720 }, { "epoch": 0.54, "grad_norm": 1.828125, "learning_rate": 0.00016578061886031347, "loss": 2.0561, "step": 230725 }, { "epoch": 0.54, "grad_norm": 2.453125, "learning_rate": 0.0001657792267395266, "loss": 2.0811, "step": 230730 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016577783459626838, "loss": 2.142, "step": 230735 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016577644243053925, "loss": 2.2327, "step": 230740 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016577505024233967, "loss": 2.2351, "step": 230745 }, { "epoch": 0.54, "grad_norm": 2.65625, "learning_rate": 0.00016577365803167014, "loss": 2.0976, "step": 230750 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016577226579853115, "loss": 2.2778, "step": 230755 }, { "epoch": 0.54, "grad_norm": 2.53125, "learning_rate": 0.00016577087354292314, "loss": 1.9572, "step": 230760 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016576948126484662, "loss": 2.2512, "step": 230765 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016576808896430207, "loss": 2.2079, "step": 230770 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016576669664128995, "loss": 2.0384, "step": 230775 }, { "epoch": 0.54, "grad_norm": 2.671875, "learning_rate": 0.00016576530429581067, "loss": 1.9425, "step": 230780 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016576391192786482, "loss": 2.1375, "step": 230785 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016576251953745284, "loss": 1.9237, "step": 230790 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.00016576112712457514, "loss": 2.1332, "step": 230795 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.0001657597346892323, "loss": 2.1126, "step": 230800 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.0001657583422314247, "loss": 2.1069, "step": 230805 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.0001657569497511529, "loss": 2.0326, "step": 230810 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.0001657555572484173, "loss": 2.1458, "step": 230815 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.0001657541647232184, "loss": 2.1446, "step": 230820 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016575277217555673, "loss": 2.0306, "step": 230825 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.00016575137960543268, "loss": 2.1383, "step": 230830 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.00016574998701284678, "loss": 2.0099, "step": 230835 }, { "epoch": 0.54, "grad_norm": 1.9140625, "learning_rate": 0.0001657485943977995, "loss": 2.1802, "step": 230840 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.0001657472017602913, "loss": 2.2346, "step": 230845 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016574580910032268, "loss": 2.1417, "step": 230850 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.00016574441641789407, "loss": 2.1827, "step": 230855 }, { "epoch": 0.54, "grad_norm": 1.75, "learning_rate": 0.000165743023713006, "loss": 2.1257, "step": 230860 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.0001657416309856589, "loss": 2.0827, "step": 230865 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016574023823585327, "loss": 2.2552, "step": 230870 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016573884546358956, "loss": 2.1205, "step": 230875 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016573745266886832, "loss": 2.1254, "step": 230880 }, { "epoch": 0.54, "grad_norm": 2.5, "learning_rate": 0.00016573605985168994, "loss": 2.0176, "step": 230885 }, { "epoch": 0.54, "grad_norm": 2.890625, "learning_rate": 0.00016573466701205495, "loss": 2.1461, "step": 230890 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.0001657332741499638, "loss": 2.1344, "step": 230895 }, { "epoch": 0.54, "grad_norm": 1.984375, "learning_rate": 0.00016573188126541697, "loss": 2.1812, "step": 230900 }, { "epoch": 0.54, "grad_norm": 1.9296875, "learning_rate": 0.00016573048835841493, "loss": 2.0479, "step": 230905 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016572909542895815, "loss": 1.9725, "step": 230910 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.00016572770247704713, "loss": 2.0626, "step": 230915 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.00016572630950268237, "loss": 2.2152, "step": 230920 }, { "epoch": 0.54, "grad_norm": 2.390625, "learning_rate": 0.00016572491650586427, "loss": 2.1217, "step": 230925 }, { "epoch": 0.54, "grad_norm": 2.59375, "learning_rate": 0.00016572352348659335, "loss": 2.1176, "step": 230930 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.0001657221304448701, "loss": 2.1997, "step": 230935 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.00016572073738069495, "loss": 2.0223, "step": 230940 }, { "epoch": 0.54, "grad_norm": 2.09375, "learning_rate": 0.00016571934429406842, "loss": 2.2474, "step": 230945 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016571795118499097, "loss": 2.2025, "step": 230950 }, { "epoch": 0.54, "grad_norm": 2.46875, "learning_rate": 0.0001657165580534631, "loss": 2.1187, "step": 230955 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016571516489948524, "loss": 2.2168, "step": 230960 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016571377172305788, "loss": 2.0361, "step": 230965 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016571237852418152, "loss": 2.0586, "step": 230970 }, { "epoch": 0.54, "grad_norm": 2.09375, "learning_rate": 0.00016571098530285662, "loss": 2.1501, "step": 230975 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016570959205908364, "loss": 2.1014, "step": 230980 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.0001657081987928631, "loss": 1.9841, "step": 230985 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.00016570680550419538, "loss": 1.8608, "step": 230990 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016570541219308109, "loss": 2.0511, "step": 230995 }, { "epoch": 0.54, "grad_norm": 1.6953125, "learning_rate": 0.00016570401885952066, "loss": 1.9398, "step": 231000 }, { "epoch": 0.54, "grad_norm": 1.703125, "learning_rate": 0.00016570262550351448, "loss": 2.0843, "step": 231005 }, { "epoch": 0.54, "grad_norm": 1.84375, "learning_rate": 0.0001657012321250631, "loss": 2.2819, "step": 231010 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016569983872416702, "loss": 2.0784, "step": 231015 }, { "epoch": 0.54, "grad_norm": 1.9609375, "learning_rate": 0.00016569844530082666, "loss": 2.1486, "step": 231020 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016569705185504253, "loss": 2.0991, "step": 231025 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016569565838681508, "loss": 1.9866, "step": 231030 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016569426489614484, "loss": 2.0092, "step": 231035 }, { "epoch": 0.54, "grad_norm": 2.484375, "learning_rate": 0.00016569287138303222, "loss": 1.9818, "step": 231040 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016569147784747772, "loss": 1.9559, "step": 231045 }, { "epoch": 0.54, "grad_norm": 1.859375, "learning_rate": 0.00016569008428948183, "loss": 2.0861, "step": 231050 }, { "epoch": 0.54, "grad_norm": 1.8671875, "learning_rate": 0.000165688690709045, "loss": 2.0312, "step": 231055 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016568729710616774, "loss": 2.0323, "step": 231060 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016568590348085051, "loss": 2.1555, "step": 231065 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.00016568450983309377, "loss": 1.9592, "step": 231070 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016568311616289803, "loss": 2.0643, "step": 231075 }, { "epoch": 0.54, "grad_norm": 2.640625, "learning_rate": 0.00016568172247026372, "loss": 1.9047, "step": 231080 }, { "epoch": 0.54, "grad_norm": 1.796875, "learning_rate": 0.00016568032875519134, "loss": 2.0786, "step": 231085 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.0001656789350176814, "loss": 2.0274, "step": 231090 }, { "epoch": 0.54, "grad_norm": 1.96875, "learning_rate": 0.00016567754125773433, "loss": 2.0239, "step": 231095 }, { "epoch": 0.54, "grad_norm": 2.4375, "learning_rate": 0.00016567614747535063, "loss": 2.0214, "step": 231100 }, { "epoch": 0.54, "grad_norm": 2.09375, "learning_rate": 0.00016567475367053076, "loss": 2.1831, "step": 231105 }, { "epoch": 0.54, "grad_norm": 1.7109375, "learning_rate": 0.00016567335984327518, "loss": 2.1461, "step": 231110 }, { "epoch": 0.54, "grad_norm": 1.9296875, "learning_rate": 0.0001656719659935844, "loss": 2.0025, "step": 231115 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.0001656705721214589, "loss": 2.0171, "step": 231120 }, { "epoch": 0.54, "grad_norm": 2.140625, "learning_rate": 0.00016566917822689913, "loss": 2.0439, "step": 231125 }, { "epoch": 0.54, "grad_norm": 2.765625, "learning_rate": 0.0001656677843099056, "loss": 1.9454, "step": 231130 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.00016566639037047875, "loss": 2.1231, "step": 231135 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016566499640861907, "loss": 2.0438, "step": 231140 }, { "epoch": 0.54, "grad_norm": 2.40625, "learning_rate": 0.00016566360242432703, "loss": 2.0703, "step": 231145 }, { "epoch": 0.54, "grad_norm": 2.3125, "learning_rate": 0.00016566220841760312, "loss": 2.0836, "step": 231150 }, { "epoch": 0.54, "grad_norm": 2.03125, "learning_rate": 0.0001656608143884478, "loss": 2.1607, "step": 231155 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.00016565942033686156, "loss": 2.0987, "step": 231160 }, { "epoch": 0.54, "grad_norm": 1.8984375, "learning_rate": 0.00016565802626284488, "loss": 2.091, "step": 231165 }, { "epoch": 0.54, "grad_norm": 2.5, "learning_rate": 0.0001656566321663982, "loss": 2.0031, "step": 231170 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.00016565523804752205, "loss": 2.0973, "step": 231175 }, { "epoch": 0.54, "grad_norm": 1.859375, "learning_rate": 0.0001656538439062169, "loss": 2.1926, "step": 231180 }, { "epoch": 0.54, "grad_norm": 1.8671875, "learning_rate": 0.00016565244974248316, "loss": 2.1798, "step": 231185 }, { "epoch": 0.54, "grad_norm": 1.875, "learning_rate": 0.00016565105555632137, "loss": 2.2001, "step": 231190 }, { "epoch": 0.54, "grad_norm": 1.9296875, "learning_rate": 0.00016564966134773198, "loss": 2.1532, "step": 231195 }, { "epoch": 0.54, "grad_norm": 2.3125, "learning_rate": 0.00016564826711671548, "loss": 2.0333, "step": 231200 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.00016564687286327234, "loss": 2.0459, "step": 231205 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.00016564547858740304, "loss": 1.7872, "step": 231210 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016564408428910808, "loss": 2.1454, "step": 231215 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016564268996838787, "loss": 1.9639, "step": 231220 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016564129562524296, "loss": 1.9454, "step": 231225 }, { "epoch": 0.54, "grad_norm": 2.4375, "learning_rate": 0.0001656399012596738, "loss": 2.0726, "step": 231230 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016563850687168082, "loss": 2.1394, "step": 231235 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016563711246126453, "loss": 2.0982, "step": 231240 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016563571802842545, "loss": 2.0501, "step": 231245 }, { "epoch": 0.54, "grad_norm": 1.8671875, "learning_rate": 0.000165634323573164, "loss": 2.1405, "step": 231250 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.0001656329290954807, "loss": 2.1968, "step": 231255 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.00016563153459537598, "loss": 2.1593, "step": 231260 }, { "epoch": 0.54, "grad_norm": 2.40625, "learning_rate": 0.00016563014007285032, "loss": 1.9989, "step": 231265 }, { "epoch": 0.54, "grad_norm": 2.015625, "learning_rate": 0.00016562874552790422, "loss": 2.2339, "step": 231270 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.00016562735096053817, "loss": 2.0511, "step": 231275 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.0001656259563707526, "loss": 2.152, "step": 231280 }, { "epoch": 0.54, "grad_norm": 2.4375, "learning_rate": 0.00016562456175854803, "loss": 2.0326, "step": 231285 }, { "epoch": 0.54, "grad_norm": 2.4375, "learning_rate": 0.0001656231671239249, "loss": 2.1139, "step": 231290 }, { "epoch": 0.54, "grad_norm": 2.53125, "learning_rate": 0.00016562177246688377, "loss": 2.0318, "step": 231295 }, { "epoch": 0.54, "grad_norm": 2.25, "learning_rate": 0.00016562037778742502, "loss": 2.0711, "step": 231300 }, { "epoch": 0.54, "grad_norm": 1.8046875, "learning_rate": 0.0001656189830855491, "loss": 2.2211, "step": 231305 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.00016561758836125662, "loss": 2.0179, "step": 231310 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016561619361454795, "loss": 2.0247, "step": 231315 }, { "epoch": 0.54, "grad_norm": 2.125, "learning_rate": 0.0001656147988454236, "loss": 2.0623, "step": 231320 }, { "epoch": 0.54, "grad_norm": 2.640625, "learning_rate": 0.00016561340405388405, "loss": 2.1639, "step": 231325 }, { "epoch": 0.54, "grad_norm": 1.8125, "learning_rate": 0.00016561200923992977, "loss": 1.7734, "step": 231330 }, { "epoch": 0.54, "grad_norm": 2.765625, "learning_rate": 0.00016561061440356124, "loss": 2.2224, "step": 231335 }, { "epoch": 0.54, "grad_norm": 2.640625, "learning_rate": 0.00016560921954477892, "loss": 1.9217, "step": 231340 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.0001656078246635833, "loss": 2.2967, "step": 231345 }, { "epoch": 0.54, "grad_norm": 1.984375, "learning_rate": 0.00016560642975997487, "loss": 2.1397, "step": 231350 }, { "epoch": 0.54, "grad_norm": 2.40625, "learning_rate": 0.00016560503483395412, "loss": 2.1807, "step": 231355 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.00016560363988552145, "loss": 2.2132, "step": 231360 }, { "epoch": 0.54, "grad_norm": 2.3125, "learning_rate": 0.00016560224491467742, "loss": 1.9456, "step": 231365 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.00016560084992142245, "loss": 2.2423, "step": 231370 }, { "epoch": 0.54, "grad_norm": 1.9765625, "learning_rate": 0.00016559945490575705, "loss": 1.9918, "step": 231375 }, { "epoch": 0.54, "grad_norm": 2.0, "learning_rate": 0.0001655980598676817, "loss": 2.0556, "step": 231380 }, { "epoch": 0.54, "grad_norm": 2.0625, "learning_rate": 0.00016559666480719685, "loss": 2.1585, "step": 231385 }, { "epoch": 0.54, "grad_norm": 2.234375, "learning_rate": 0.000165595269724303, "loss": 2.1861, "step": 231390 }, { "epoch": 0.54, "grad_norm": 2.1875, "learning_rate": 0.0001655938746190006, "loss": 2.1242, "step": 231395 }, { "epoch": 0.54, "grad_norm": 2.421875, "learning_rate": 0.00016559247949129015, "loss": 2.2612, "step": 231400 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.00016559108434117213, "loss": 2.0656, "step": 231405 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016558968916864699, "loss": 2.2023, "step": 231410 }, { "epoch": 0.54, "grad_norm": 1.8984375, "learning_rate": 0.00016558829397371523, "loss": 2.1264, "step": 231415 }, { "epoch": 0.54, "grad_norm": 2.578125, "learning_rate": 0.0001655868987563773, "loss": 2.0369, "step": 231420 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016558550351663374, "loss": 2.0756, "step": 231425 }, { "epoch": 0.54, "grad_norm": 2.375, "learning_rate": 0.00016558410825448494, "loss": 2.067, "step": 231430 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016558271296993144, "loss": 1.7041, "step": 231435 }, { "epoch": 0.54, "grad_norm": 2.171875, "learning_rate": 0.00016558131766297368, "loss": 2.1518, "step": 231440 }, { "epoch": 0.54, "grad_norm": 2.4375, "learning_rate": 0.00016557992233361216, "loss": 2.0177, "step": 231445 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.00016557852698184737, "loss": 1.8963, "step": 231450 }, { "epoch": 0.54, "grad_norm": 1.890625, "learning_rate": 0.00016557713160767973, "loss": 2.2765, "step": 231455 }, { "epoch": 0.54, "grad_norm": 2.078125, "learning_rate": 0.0001655757362111098, "loss": 2.1112, "step": 231460 }, { "epoch": 0.54, "grad_norm": 2.71875, "learning_rate": 0.00016557434079213796, "loss": 2.1047, "step": 231465 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.00016557294535076477, "loss": 1.9901, "step": 231470 }, { "epoch": 0.54, "grad_norm": 2.109375, "learning_rate": 0.00016557154988699067, "loss": 2.07, "step": 231475 }, { "epoch": 0.54, "grad_norm": 2.3125, "learning_rate": 0.0001655701544008161, "loss": 2.1513, "step": 231480 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.0001655687588922416, "loss": 1.925, "step": 231485 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.00016556736336126762, "loss": 2.0251, "step": 231490 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.00016556596780789468, "loss": 1.9056, "step": 231495 }, { "epoch": 0.54, "grad_norm": 2.40625, "learning_rate": 0.00016556457223212317, "loss": 2.124, "step": 231500 }, { "epoch": 0.54, "grad_norm": 2.28125, "learning_rate": 0.0001655631766339536, "loss": 2.2156, "step": 231505 }, { "epoch": 0.54, "grad_norm": 2.046875, "learning_rate": 0.00016556178101338653, "loss": 1.8693, "step": 231510 }, { "epoch": 0.54, "grad_norm": 2.203125, "learning_rate": 0.00016556038537042232, "loss": 2.2195, "step": 231515 }, { "epoch": 0.54, "grad_norm": 1.90625, "learning_rate": 0.00016555898970506148, "loss": 2.1219, "step": 231520 }, { "epoch": 0.54, "grad_norm": 2.09375, "learning_rate": 0.00016555759401730452, "loss": 1.9436, "step": 231525 }, { "epoch": 0.54, "grad_norm": 2.359375, "learning_rate": 0.0001655561983071519, "loss": 2.0657, "step": 231530 }, { "epoch": 0.54, "grad_norm": 2.328125, "learning_rate": 0.0001655548025746041, "loss": 1.9584, "step": 231535 }, { "epoch": 0.54, "grad_norm": 2.46875, "learning_rate": 0.00016555340681966157, "loss": 2.1742, "step": 231540 }, { "epoch": 0.54, "grad_norm": 2.265625, "learning_rate": 0.0001655520110423248, "loss": 2.1539, "step": 231545 }, { "epoch": 0.54, "grad_norm": 2.21875, "learning_rate": 0.0001655506152425943, "loss": 2.0053, "step": 231550 }, { "epoch": 0.54, "grad_norm": 2.296875, "learning_rate": 0.0001655492194204705, "loss": 2.0094, "step": 231555 }, { "epoch": 0.54, "grad_norm": 2.15625, "learning_rate": 0.0001655478235759539, "loss": 2.0493, "step": 231560 }, { "epoch": 0.54, "grad_norm": 2.5, "learning_rate": 0.000165546427709045, "loss": 2.0742, "step": 231565 }, { "epoch": 0.54, "grad_norm": 2.09375, "learning_rate": 0.00016554503181974423, "loss": 2.2378, "step": 231570 }, { "epoch": 0.54, "grad_norm": 1.953125, "learning_rate": 0.00016554363590805208, "loss": 2.0973, "step": 231575 }, { "epoch": 0.54, "grad_norm": 2.34375, "learning_rate": 0.00016554223997396905, "loss": 1.9912, "step": 231580 }, { "epoch": 0.54, "grad_norm": 2.40625, "learning_rate": 0.00016554084401749561, "loss": 2.1372, "step": 231585 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.0001655394480386322, "loss": 2.0426, "step": 231590 }, { "epoch": 0.55, "grad_norm": 2.578125, "learning_rate": 0.00016553805203737936, "loss": 2.0472, "step": 231595 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.0001655366560137375, "loss": 2.2184, "step": 231600 }, { "epoch": 0.55, "grad_norm": 2.53125, "learning_rate": 0.00016553525996770719, "loss": 2.1399, "step": 231605 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.00016553386389928876, "loss": 2.0991, "step": 231610 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016553246780848284, "loss": 2.0929, "step": 231615 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.0001655310716952898, "loss": 2.076, "step": 231620 }, { "epoch": 0.55, "grad_norm": 1.9765625, "learning_rate": 0.0001655296755597102, "loss": 2.0791, "step": 231625 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.00016552827940174443, "loss": 2.2058, "step": 231630 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016552688322139304, "loss": 2.1219, "step": 231635 }, { "epoch": 0.55, "grad_norm": 1.9453125, "learning_rate": 0.00016552548701865647, "loss": 2.1986, "step": 231640 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.0001655240907935352, "loss": 2.1006, "step": 231645 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.00016552269454602974, "loss": 2.1103, "step": 231650 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.00016552129827614048, "loss": 2.0396, "step": 231655 }, { "epoch": 0.55, "grad_norm": 1.8046875, "learning_rate": 0.00016551990198386803, "loss": 2.0743, "step": 231660 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016551850566921274, "loss": 2.0783, "step": 231665 }, { "epoch": 0.55, "grad_norm": 2.484375, "learning_rate": 0.00016551710933217516, "loss": 2.0427, "step": 231670 }, { "epoch": 0.55, "grad_norm": 1.9296875, "learning_rate": 0.00016551571297275573, "loss": 2.2275, "step": 231675 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016551431659095496, "loss": 2.06, "step": 231680 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016551292018677333, "loss": 2.202, "step": 231685 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.00016551152376021125, "loss": 1.9766, "step": 231690 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.00016551012731126928, "loss": 2.0314, "step": 231695 }, { "epoch": 0.55, "grad_norm": 2.515625, "learning_rate": 0.00016550873083994783, "loss": 2.1667, "step": 231700 }, { "epoch": 0.55, "grad_norm": 2.765625, "learning_rate": 0.00016550733434624748, "loss": 2.1149, "step": 231705 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016550593783016857, "loss": 2.0668, "step": 231710 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016550454129171167, "loss": 1.9384, "step": 231715 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016550314473087722, "loss": 2.0404, "step": 231720 }, { "epoch": 0.55, "grad_norm": 2.65625, "learning_rate": 0.00016550174814766573, "loss": 1.983, "step": 231725 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.0001655003515420776, "loss": 2.1151, "step": 231730 }, { "epoch": 0.55, "grad_norm": 2.578125, "learning_rate": 0.00016549895491411342, "loss": 2.2115, "step": 231735 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.00016549755826377358, "loss": 2.2424, "step": 231740 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.0001654961615910586, "loss": 2.1554, "step": 231745 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016549476489596896, "loss": 1.9161, "step": 231750 }, { "epoch": 0.55, "grad_norm": 2.609375, "learning_rate": 0.00016549336817850508, "loss": 2.2215, "step": 231755 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.0001654919714386675, "loss": 2.0766, "step": 231760 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.0001654905746764567, "loss": 1.9679, "step": 231765 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016548917789187308, "loss": 2.1633, "step": 231770 }, { "epoch": 0.55, "grad_norm": 1.8515625, "learning_rate": 0.00016548778108491717, "loss": 2.2363, "step": 231775 }, { "epoch": 0.55, "grad_norm": 1.8984375, "learning_rate": 0.00016548638425558949, "loss": 2.0852, "step": 231780 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.00016548498740389045, "loss": 2.1458, "step": 231785 }, { "epoch": 0.55, "grad_norm": 1.9921875, "learning_rate": 0.00016548359052982056, "loss": 2.0355, "step": 231790 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.00016548219363338027, "loss": 2.0516, "step": 231795 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.0001654807967145701, "loss": 2.1348, "step": 231800 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016547939977339048, "loss": 2.0502, "step": 231805 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016547800280984195, "loss": 2.2258, "step": 231810 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.0001654766058239249, "loss": 2.0153, "step": 231815 }, { "epoch": 0.55, "grad_norm": 2.53125, "learning_rate": 0.00016547520881563988, "loss": 2.2169, "step": 231820 }, { "epoch": 0.55, "grad_norm": 2.546875, "learning_rate": 0.00016547381178498731, "loss": 2.136, "step": 231825 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016547241473196774, "loss": 1.9558, "step": 231830 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016547101765658156, "loss": 2.0232, "step": 231835 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.0001654696205588293, "loss": 2.1319, "step": 231840 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016546822343871146, "loss": 2.0436, "step": 231845 }, { "epoch": 0.55, "grad_norm": 1.8671875, "learning_rate": 0.00016546682629622848, "loss": 2.1472, "step": 231850 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016546542913138082, "loss": 2.1092, "step": 231855 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016546403194416903, "loss": 2.001, "step": 231860 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.0001654626347345935, "loss": 2.1604, "step": 231865 }, { "epoch": 0.55, "grad_norm": 1.9765625, "learning_rate": 0.00016546123750265477, "loss": 1.9533, "step": 231870 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016545984024835325, "loss": 2.0826, "step": 231875 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016545844297168952, "loss": 2.1829, "step": 231880 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016545704567266399, "loss": 1.9949, "step": 231885 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.0001654556483512771, "loss": 2.0716, "step": 231890 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.0001654542510075294, "loss": 2.1396, "step": 231895 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016545285364142134, "loss": 1.869, "step": 231900 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.00016545145625295339, "loss": 2.2132, "step": 231905 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.0001654500588421261, "loss": 2.1023, "step": 231910 }, { "epoch": 0.55, "grad_norm": 4.46875, "learning_rate": 0.0001654486614089398, "loss": 2.2409, "step": 231915 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.0001654472639533951, "loss": 2.2479, "step": 231920 }, { "epoch": 0.55, "grad_norm": 2.375, "learning_rate": 0.0001654458664754924, "loss": 2.2633, "step": 231925 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016544446897523218, "loss": 1.9934, "step": 231930 }, { "epoch": 0.55, "grad_norm": 1.78125, "learning_rate": 0.000165443071452615, "loss": 2.1328, "step": 231935 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.00016544167390764126, "loss": 2.1446, "step": 231940 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016544027634031143, "loss": 2.2955, "step": 231945 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016543887875062601, "loss": 2.0341, "step": 231950 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016543748113858554, "loss": 1.985, "step": 231955 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.00016543608350419039, "loss": 2.2793, "step": 231960 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.0001654346858474411, "loss": 2.0976, "step": 231965 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016543328816833816, "loss": 2.2435, "step": 231970 }, { "epoch": 0.55, "grad_norm": 2.59375, "learning_rate": 0.000165431890466882, "loss": 2.0337, "step": 231975 }, { "epoch": 0.55, "grad_norm": 2.609375, "learning_rate": 0.00016543049274307312, "loss": 2.1871, "step": 231980 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016542909499691198, "loss": 2.2252, "step": 231985 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.0001654276972283991, "loss": 2.2484, "step": 231990 }, { "epoch": 0.55, "grad_norm": 2.359375, "learning_rate": 0.00016542629943753495, "loss": 2.048, "step": 231995 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016542490162431997, "loss": 2.1458, "step": 232000 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016542350378875463, "loss": 1.9235, "step": 232005 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016542210593083946, "loss": 2.0683, "step": 232010 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016542070805057493, "loss": 1.9768, "step": 232015 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.0001654193101479615, "loss": 2.2824, "step": 232020 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.00016541791222299962, "loss": 2.0524, "step": 232025 }, { "epoch": 0.55, "grad_norm": 1.890625, "learning_rate": 0.00016541651427568979, "loss": 2.1142, "step": 232030 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.0001654151163060325, "loss": 2.1193, "step": 232035 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.0001654137183140282, "loss": 2.0449, "step": 232040 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016541232029967741, "loss": 1.9248, "step": 232045 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.0001654109222629806, "loss": 2.0319, "step": 232050 }, { "epoch": 0.55, "grad_norm": 2.546875, "learning_rate": 0.0001654095242039382, "loss": 2.3285, "step": 232055 }, { "epoch": 0.55, "grad_norm": 1.90625, "learning_rate": 0.00016540812612255077, "loss": 2.0882, "step": 232060 }, { "epoch": 0.55, "grad_norm": 2.375, "learning_rate": 0.0001654067280188187, "loss": 2.0596, "step": 232065 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.0001654053298927425, "loss": 2.232, "step": 232070 }, { "epoch": 0.55, "grad_norm": 1.703125, "learning_rate": 0.00016540393174432265, "loss": 1.9571, "step": 232075 }, { "epoch": 0.55, "grad_norm": 2.484375, "learning_rate": 0.00016540253357355967, "loss": 2.0577, "step": 232080 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.00016540113538045397, "loss": 2.0401, "step": 232085 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016539973716500606, "loss": 2.0786, "step": 232090 }, { "epoch": 0.55, "grad_norm": 1.90625, "learning_rate": 0.0001653983389272164, "loss": 2.0491, "step": 232095 }, { "epoch": 0.55, "grad_norm": 2.359375, "learning_rate": 0.00016539694066708548, "loss": 2.1468, "step": 232100 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016539554238461382, "loss": 2.0132, "step": 232105 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016539414407980182, "loss": 2.232, "step": 232110 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.00016539274575265, "loss": 2.0263, "step": 232115 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.00016539134740315884, "loss": 2.0882, "step": 232120 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.0001653899490313288, "loss": 2.239, "step": 232125 }, { "epoch": 0.55, "grad_norm": 1.78125, "learning_rate": 0.00016538855063716036, "loss": 1.9238, "step": 232130 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016538715222065404, "loss": 2.1495, "step": 232135 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.00016538575378181026, "loss": 2.1886, "step": 232140 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016538435532062953, "loss": 2.0751, "step": 232145 }, { "epoch": 0.55, "grad_norm": 3.21875, "learning_rate": 0.0001653829568371123, "loss": 2.0239, "step": 232150 }, { "epoch": 0.55, "grad_norm": 1.953125, "learning_rate": 0.00016538155833125908, "loss": 2.2341, "step": 232155 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016538015980307035, "loss": 2.0801, "step": 232160 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016537876125254656, "loss": 2.0953, "step": 232165 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016537736267968817, "loss": 2.1348, "step": 232170 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016537596408449572, "loss": 1.9867, "step": 232175 }, { "epoch": 0.55, "grad_norm": 1.9140625, "learning_rate": 0.00016537456546696967, "loss": 2.1703, "step": 232180 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016537316682711044, "loss": 2.198, "step": 232185 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.00016537176816491854, "loss": 2.1201, "step": 232190 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016537036948039452, "loss": 1.9501, "step": 232195 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016536897077353875, "loss": 2.1865, "step": 232200 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.00016536757204435175, "loss": 2.0922, "step": 232205 }, { "epoch": 0.55, "grad_norm": 1.859375, "learning_rate": 0.00016536617329283404, "loss": 2.0269, "step": 232210 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016536477451898604, "loss": 2.1552, "step": 232215 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016536337572280825, "loss": 2.1698, "step": 232220 }, { "epoch": 0.55, "grad_norm": 1.8125, "learning_rate": 0.00016536197690430112, "loss": 2.0373, "step": 232225 }, { "epoch": 0.55, "grad_norm": 2.46875, "learning_rate": 0.00016536057806346518, "loss": 1.9526, "step": 232230 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016535917920030085, "loss": 2.233, "step": 232235 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.0001653577803148087, "loss": 2.0929, "step": 232240 }, { "epoch": 0.55, "grad_norm": 1.9453125, "learning_rate": 0.00016535638140698913, "loss": 2.0084, "step": 232245 }, { "epoch": 0.55, "grad_norm": 1.84375, "learning_rate": 0.0001653549824768426, "loss": 2.1359, "step": 232250 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016535358352436964, "loss": 2.0151, "step": 232255 }, { "epoch": 0.55, "grad_norm": 1.8515625, "learning_rate": 0.0001653521845495707, "loss": 2.061, "step": 232260 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.0001653507855524463, "loss": 2.0571, "step": 232265 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.00016534938653299685, "loss": 2.1856, "step": 232270 }, { "epoch": 0.55, "grad_norm": 2.515625, "learning_rate": 0.0001653479874912229, "loss": 1.8412, "step": 232275 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016534658842712484, "loss": 2.2326, "step": 232280 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016534518934070326, "loss": 2.1477, "step": 232285 }, { "epoch": 0.55, "grad_norm": 2.359375, "learning_rate": 0.00016534379023195852, "loss": 2.1571, "step": 232290 }, { "epoch": 0.55, "grad_norm": 1.7109375, "learning_rate": 0.0001653423911008912, "loss": 2.187, "step": 232295 }, { "epoch": 0.55, "grad_norm": 2.46875, "learning_rate": 0.00016534099194750172, "loss": 2.3593, "step": 232300 }, { "epoch": 0.55, "grad_norm": 2.796875, "learning_rate": 0.00016533959277179058, "loss": 2.2122, "step": 232305 }, { "epoch": 0.55, "grad_norm": 1.78125, "learning_rate": 0.00016533819357375824, "loss": 2.1948, "step": 232310 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.0001653367943534052, "loss": 2.0074, "step": 232315 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.0001653353951107319, "loss": 2.0132, "step": 232320 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016533399584573886, "loss": 2.0223, "step": 232325 }, { "epoch": 0.55, "grad_norm": 1.671875, "learning_rate": 0.00016533259655842654, "loss": 1.9376, "step": 232330 }, { "epoch": 0.55, "grad_norm": 1.90625, "learning_rate": 0.00016533119724879543, "loss": 2.0486, "step": 232335 }, { "epoch": 0.55, "grad_norm": 1.9921875, "learning_rate": 0.000165329797916846, "loss": 2.1291, "step": 232340 }, { "epoch": 0.55, "grad_norm": 1.875, "learning_rate": 0.0001653283985625787, "loss": 2.0671, "step": 232345 }, { "epoch": 0.55, "grad_norm": 1.9609375, "learning_rate": 0.00016532699918599407, "loss": 2.1151, "step": 232350 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016532559978709252, "loss": 2.2415, "step": 232355 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.00016532420036587459, "loss": 2.0811, "step": 232360 }, { "epoch": 0.55, "grad_norm": 1.953125, "learning_rate": 0.00016532280092234068, "loss": 2.0335, "step": 232365 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.00016532140145649138, "loss": 1.8834, "step": 232370 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016532000196832705, "loss": 2.2635, "step": 232375 }, { "epoch": 0.55, "grad_norm": 1.6328125, "learning_rate": 0.00016531860245784828, "loss": 1.9189, "step": 232380 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016531720292505545, "loss": 2.1357, "step": 232385 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016531580336994907, "loss": 2.2638, "step": 232390 }, { "epoch": 0.55, "grad_norm": 3.203125, "learning_rate": 0.00016531440379252964, "loss": 2.0607, "step": 232395 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016531300419279766, "loss": 2.1292, "step": 232400 }, { "epoch": 0.55, "grad_norm": 1.6640625, "learning_rate": 0.00016531160457075353, "loss": 2.0327, "step": 232405 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.0001653102049263978, "loss": 2.0262, "step": 232410 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016530880525973088, "loss": 2.0712, "step": 232415 }, { "epoch": 0.55, "grad_norm": 2.671875, "learning_rate": 0.0001653074055707533, "loss": 2.0773, "step": 232420 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016530600585946555, "loss": 2.1794, "step": 232425 }, { "epoch": 0.55, "grad_norm": 2.421875, "learning_rate": 0.0001653046061258681, "loss": 2.2909, "step": 232430 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.00016530320636996138, "loss": 2.0971, "step": 232435 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.0001653018065917459, "loss": 2.0102, "step": 232440 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.0001653004067912221, "loss": 2.2825, "step": 232445 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016529900696839056, "loss": 1.9564, "step": 232450 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.0001652976071232517, "loss": 2.1608, "step": 232455 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016529620725580594, "loss": 1.9923, "step": 232460 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016529480736605386, "loss": 2.1307, "step": 232465 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016529340745399584, "loss": 2.1731, "step": 232470 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.00016529200751963242, "loss": 2.0467, "step": 232475 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016529060756296412, "loss": 2.2221, "step": 232480 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.0001652892075839913, "loss": 2.1802, "step": 232485 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016528780758271453, "loss": 2.1723, "step": 232490 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016528640755913426, "loss": 2.039, "step": 232495 }, { "epoch": 0.55, "grad_norm": 1.7890625, "learning_rate": 0.00016528500751325097, "loss": 2.239, "step": 232500 }, { "epoch": 0.55, "grad_norm": 1.875, "learning_rate": 0.00016528360744506515, "loss": 2.1492, "step": 232505 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.00016528220735457727, "loss": 2.033, "step": 232510 }, { "epoch": 0.55, "grad_norm": 1.8515625, "learning_rate": 0.00016528080724178778, "loss": 1.9669, "step": 232515 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016527940710669715, "loss": 2.0751, "step": 232520 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.00016527800694930594, "loss": 2.2327, "step": 232525 }, { "epoch": 0.55, "grad_norm": 1.875, "learning_rate": 0.0001652766067696146, "loss": 2.0472, "step": 232530 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.00016527520656762354, "loss": 2.0317, "step": 232535 }, { "epoch": 0.55, "grad_norm": 2.59375, "learning_rate": 0.0001652738063433333, "loss": 2.0916, "step": 232540 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016527240609674434, "loss": 2.157, "step": 232545 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016527100582785711, "loss": 2.1879, "step": 232550 }, { "epoch": 0.55, "grad_norm": 1.8671875, "learning_rate": 0.0001652696055366722, "loss": 2.2779, "step": 232555 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.00016526820522318997, "loss": 2.0138, "step": 232560 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.0001652668048874109, "loss": 2.0207, "step": 232565 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016526540452933556, "loss": 2.0764, "step": 232570 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016526400414896436, "loss": 2.0453, "step": 232575 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.00016526260374629778, "loss": 2.0638, "step": 232580 }, { "epoch": 0.55, "grad_norm": 1.75, "learning_rate": 0.00016526120332133632, "loss": 2.0392, "step": 232585 }, { "epoch": 0.55, "grad_norm": 2.53125, "learning_rate": 0.00016525980287408046, "loss": 2.1259, "step": 232590 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016525840240453065, "loss": 2.1302, "step": 232595 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.00016525700191268738, "loss": 2.0239, "step": 232600 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016525560139855113, "loss": 1.9505, "step": 232605 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.0001652542008621224, "loss": 2.1406, "step": 232610 }, { "epoch": 0.55, "grad_norm": 2.5625, "learning_rate": 0.00016525280030340166, "loss": 2.0767, "step": 232615 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016525139972238938, "loss": 2.0487, "step": 232620 }, { "epoch": 0.55, "grad_norm": 2.53125, "learning_rate": 0.00016524999911908604, "loss": 2.0604, "step": 232625 }, { "epoch": 0.55, "grad_norm": 2.375, "learning_rate": 0.00016524859849349207, "loss": 2.047, "step": 232630 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.00016524719784560805, "loss": 1.9716, "step": 232635 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016524579717543437, "loss": 2.055, "step": 232640 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016524439648297155, "loss": 2.2031, "step": 232645 }, { "epoch": 0.55, "grad_norm": 1.78125, "learning_rate": 0.0001652429957682201, "loss": 2.1028, "step": 232650 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016524159503118041, "loss": 2.2209, "step": 232655 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.000165240194271853, "loss": 2.08, "step": 232660 }, { "epoch": 0.55, "grad_norm": 2.421875, "learning_rate": 0.00016523879349023842, "loss": 2.0371, "step": 232665 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016523739268633705, "loss": 2.1089, "step": 232670 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.0001652359918601494, "loss": 2.109, "step": 232675 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016523459101167593, "loss": 2.0806, "step": 232680 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016523319014091715, "loss": 2.0011, "step": 232685 }, { "epoch": 0.55, "grad_norm": 2.421875, "learning_rate": 0.00016523178924787358, "loss": 1.9548, "step": 232690 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.0001652303883325456, "loss": 2.1016, "step": 232695 }, { "epoch": 0.55, "grad_norm": 1.984375, "learning_rate": 0.00016522898739493376, "loss": 2.0826, "step": 232700 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016522758643503846, "loss": 2.146, "step": 232705 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.00016522618545286027, "loss": 2.0576, "step": 232710 }, { "epoch": 0.55, "grad_norm": 2.6875, "learning_rate": 0.00016522478444839966, "loss": 2.0112, "step": 232715 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016522338342165707, "loss": 2.2248, "step": 232720 }, { "epoch": 0.55, "grad_norm": 1.7421875, "learning_rate": 0.00016522198237263298, "loss": 2.1405, "step": 232725 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016522058130132787, "loss": 1.993, "step": 232730 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.00016521918020774223, "loss": 2.1973, "step": 232735 }, { "epoch": 0.55, "grad_norm": 1.9140625, "learning_rate": 0.00016521777909187652, "loss": 2.0923, "step": 232740 }, { "epoch": 0.55, "grad_norm": 1.765625, "learning_rate": 0.00016521637795373126, "loss": 2.1143, "step": 232745 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.0001652149767933069, "loss": 2.0514, "step": 232750 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.0001652135756106039, "loss": 2.1225, "step": 232755 }, { "epoch": 0.55, "grad_norm": 2.421875, "learning_rate": 0.0001652121744056228, "loss": 1.9602, "step": 232760 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.000165210773178364, "loss": 2.262, "step": 232765 }, { "epoch": 0.55, "grad_norm": 1.9375, "learning_rate": 0.00016520937192882805, "loss": 2.0431, "step": 232770 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.00016520797065701536, "loss": 2.0259, "step": 232775 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016520656936292648, "loss": 2.1066, "step": 232780 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016520516804656177, "loss": 1.9432, "step": 232785 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.0001652037667079219, "loss": 1.9836, "step": 232790 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016520236534700719, "loss": 1.9907, "step": 232795 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.00016520096396381817, "loss": 2.0784, "step": 232800 }, { "epoch": 0.55, "grad_norm": 1.8984375, "learning_rate": 0.00016519956255835533, "loss": 2.0577, "step": 232805 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016519816113061913, "loss": 2.1327, "step": 232810 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016519675968061005, "loss": 2.3037, "step": 232815 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.0001651953582083286, "loss": 2.1993, "step": 232820 }, { "epoch": 0.55, "grad_norm": 1.890625, "learning_rate": 0.0001651939567137752, "loss": 2.0868, "step": 232825 }, { "epoch": 0.55, "grad_norm": 1.734375, "learning_rate": 0.00016519255519695038, "loss": 2.1073, "step": 232830 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.0001651911536578546, "loss": 2.1266, "step": 232835 }, { "epoch": 0.55, "grad_norm": 1.9609375, "learning_rate": 0.00016518975209648833, "loss": 2.1755, "step": 232840 }, { "epoch": 0.55, "grad_norm": 1.8984375, "learning_rate": 0.00016518835051285206, "loss": 2.1221, "step": 232845 }, { "epoch": 0.55, "grad_norm": 1.7578125, "learning_rate": 0.0001651869489069463, "loss": 2.2773, "step": 232850 }, { "epoch": 0.55, "grad_norm": 2.484375, "learning_rate": 0.00016518554727877144, "loss": 2.1878, "step": 232855 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016518414562832808, "loss": 2.111, "step": 232860 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016518274395561656, "loss": 2.0702, "step": 232865 }, { "epoch": 0.55, "grad_norm": 2.421875, "learning_rate": 0.00016518134226063748, "loss": 1.8593, "step": 232870 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016517994054339127, "loss": 2.0467, "step": 232875 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.0001651785388038784, "loss": 1.9863, "step": 232880 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.00016517713704209937, "loss": 2.1592, "step": 232885 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016517573525805463, "loss": 1.897, "step": 232890 }, { "epoch": 0.55, "grad_norm": 2.421875, "learning_rate": 0.00016517433345174468, "loss": 2.0617, "step": 232895 }, { "epoch": 0.55, "grad_norm": 1.9453125, "learning_rate": 0.00016517293162317, "loss": 2.0007, "step": 232900 }, { "epoch": 0.55, "grad_norm": 1.9140625, "learning_rate": 0.00016517152977233107, "loss": 2.202, "step": 232905 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016517012789922836, "loss": 2.1832, "step": 232910 }, { "epoch": 0.55, "grad_norm": 1.7421875, "learning_rate": 0.00016516872600386235, "loss": 1.9518, "step": 232915 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.00016516732408623352, "loss": 2.2086, "step": 232920 }, { "epoch": 0.55, "grad_norm": 1.9140625, "learning_rate": 0.00016516592214634235, "loss": 2.019, "step": 232925 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016516452018418932, "loss": 2.1563, "step": 232930 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.0001651631181997749, "loss": 1.9789, "step": 232935 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.0001651617161930996, "loss": 2.0028, "step": 232940 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.00016516031416416387, "loss": 2.0151, "step": 232945 }, { "epoch": 0.55, "grad_norm": 1.8671875, "learning_rate": 0.00016515891211296818, "loss": 2.024, "step": 232950 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016515751003951304, "loss": 2.0662, "step": 232955 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.00016515610794379888, "loss": 1.9431, "step": 232960 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016515470582582626, "loss": 2.1703, "step": 232965 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016515330368559557, "loss": 2.0313, "step": 232970 }, { "epoch": 0.55, "grad_norm": 1.6640625, "learning_rate": 0.00016515190152310735, "loss": 2.11, "step": 232975 }, { "epoch": 0.55, "grad_norm": 1.65625, "learning_rate": 0.00016515049933836203, "loss": 1.9752, "step": 232980 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016514909713136018, "loss": 2.1361, "step": 232985 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016514769490210218, "loss": 1.8895, "step": 232990 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016514629265058854, "loss": 1.8554, "step": 232995 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.00016514489037681976, "loss": 2.1442, "step": 233000 }, { "epoch": 0.55, "grad_norm": 2.671875, "learning_rate": 0.00016514348808079627, "loss": 2.0958, "step": 233005 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016514208576251863, "loss": 2.1756, "step": 233010 }, { "epoch": 0.55, "grad_norm": 2.515625, "learning_rate": 0.00016514068342198725, "loss": 2.0949, "step": 233015 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016513928105920265, "loss": 2.0382, "step": 233020 }, { "epoch": 0.55, "grad_norm": 1.8671875, "learning_rate": 0.00016513787867416526, "loss": 2.1112, "step": 233025 }, { "epoch": 0.55, "grad_norm": 2.5625, "learning_rate": 0.00016513647626687557, "loss": 2.1345, "step": 233030 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.00016513507383733413, "loss": 2.174, "step": 233035 }, { "epoch": 0.55, "grad_norm": 2.671875, "learning_rate": 0.00016513367138554135, "loss": 2.041, "step": 233040 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016513226891149774, "loss": 2.1603, "step": 233045 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016513086641520376, "loss": 2.237, "step": 233050 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016512946389665985, "loss": 2.0248, "step": 233055 }, { "epoch": 0.55, "grad_norm": 2.421875, "learning_rate": 0.00016512806135586655, "loss": 2.1823, "step": 233060 }, { "epoch": 0.55, "grad_norm": 2.890625, "learning_rate": 0.00016512665879282436, "loss": 2.196, "step": 233065 }, { "epoch": 0.55, "grad_norm": 2.890625, "learning_rate": 0.00016512525620753372, "loss": 2.0137, "step": 233070 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.0001651238535999951, "loss": 2.1008, "step": 233075 }, { "epoch": 0.55, "grad_norm": 1.984375, "learning_rate": 0.00016512245097020895, "loss": 1.9883, "step": 233080 }, { "epoch": 0.55, "grad_norm": 1.6875, "learning_rate": 0.00016512104831817586, "loss": 1.8592, "step": 233085 }, { "epoch": 0.55, "grad_norm": 1.75, "learning_rate": 0.0001651196456438962, "loss": 1.9847, "step": 233090 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.0001651182429473705, "loss": 2.4037, "step": 233095 }, { "epoch": 0.55, "grad_norm": 2.46875, "learning_rate": 0.00016511684022859922, "loss": 2.0581, "step": 233100 }, { "epoch": 0.55, "grad_norm": 2.625, "learning_rate": 0.00016511543748758283, "loss": 1.9971, "step": 233105 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016511403472432187, "loss": 2.0328, "step": 233110 }, { "epoch": 0.55, "grad_norm": 2.671875, "learning_rate": 0.00016511263193881673, "loss": 2.0173, "step": 233115 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016511122913106795, "loss": 2.1629, "step": 233120 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.000165109826301076, "loss": 2.28, "step": 233125 }, { "epoch": 0.55, "grad_norm": 1.9296875, "learning_rate": 0.00016510842344884133, "loss": 2.0908, "step": 233130 }, { "epoch": 0.55, "grad_norm": 2.375, "learning_rate": 0.00016510702057436445, "loss": 2.0508, "step": 233135 }, { "epoch": 0.55, "grad_norm": 2.484375, "learning_rate": 0.00016510561767764585, "loss": 2.1005, "step": 233140 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016510421475868597, "loss": 2.0586, "step": 233145 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016510281181748534, "loss": 2.1613, "step": 233150 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016510140885404436, "loss": 2.1285, "step": 233155 }, { "epoch": 0.55, "grad_norm": 2.46875, "learning_rate": 0.00016510000586836358, "loss": 1.907, "step": 233160 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016509860286044348, "loss": 2.1145, "step": 233165 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016509719983028452, "loss": 2.0377, "step": 233170 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016509579677788714, "loss": 2.1653, "step": 233175 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016509439370325187, "loss": 2.1218, "step": 233180 }, { "epoch": 0.55, "grad_norm": 2.5, "learning_rate": 0.00016509299060637916, "loss": 2.1073, "step": 233185 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.00016509158748726952, "loss": 2.0932, "step": 233190 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016509018434592343, "loss": 2.171, "step": 233195 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.0001650887811823413, "loss": 2.1132, "step": 233200 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016508737799652372, "loss": 2.209, "step": 233205 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.0001650859747884711, "loss": 2.1452, "step": 233210 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.0001650845715581839, "loss": 2.1188, "step": 233215 }, { "epoch": 0.55, "grad_norm": 1.8984375, "learning_rate": 0.00016508316830566265, "loss": 2.0617, "step": 233220 }, { "epoch": 0.55, "grad_norm": 2.578125, "learning_rate": 0.00016508176503090782, "loss": 2.129, "step": 233225 }, { "epoch": 0.55, "grad_norm": 2.546875, "learning_rate": 0.00016508036173391988, "loss": 2.0359, "step": 233230 }, { "epoch": 0.55, "grad_norm": 2.6875, "learning_rate": 0.00016507895841469928, "loss": 2.2095, "step": 233235 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016507755507324653, "loss": 2.2402, "step": 233240 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.00016507615170956213, "loss": 2.2108, "step": 233245 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.0001650747483236465, "loss": 2.1602, "step": 233250 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.0001650733449155002, "loss": 1.9558, "step": 233255 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.00016507194148512367, "loss": 2.03, "step": 233260 }, { "epoch": 0.55, "grad_norm": 1.9140625, "learning_rate": 0.00016507053803251735, "loss": 2.1465, "step": 233265 }, { "epoch": 0.55, "grad_norm": 1.6640625, "learning_rate": 0.00016506913455768176, "loss": 2.1071, "step": 233270 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016506773106061737, "loss": 2.0231, "step": 233275 }, { "epoch": 0.55, "grad_norm": 2.984375, "learning_rate": 0.00016506632754132466, "loss": 2.0077, "step": 233280 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.00016506492399980416, "loss": 2.1344, "step": 233285 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016506352043605627, "loss": 2.1512, "step": 233290 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.00016506211685008148, "loss": 2.2609, "step": 233295 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.0001650607132418803, "loss": 2.013, "step": 233300 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.0001650593096114532, "loss": 2.0685, "step": 233305 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016505790595880068, "loss": 2.1622, "step": 233310 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.0001650565022839232, "loss": 2.0418, "step": 233315 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.00016505509858682124, "loss": 2.0989, "step": 233320 }, { "epoch": 0.55, "grad_norm": 2.6875, "learning_rate": 0.00016505369486749524, "loss": 2.114, "step": 233325 }, { "epoch": 0.55, "grad_norm": 1.984375, "learning_rate": 0.00016505229112594573, "loss": 1.9204, "step": 233330 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016505088736217322, "loss": 2.281, "step": 233335 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.0001650494835761781, "loss": 1.9901, "step": 233340 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016504807976796092, "loss": 2.0953, "step": 233345 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.00016504667593752212, "loss": 2.241, "step": 233350 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.0001650452720848622, "loss": 2.1136, "step": 233355 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016504386820998166, "loss": 1.8998, "step": 233360 }, { "epoch": 0.55, "grad_norm": 2.375, "learning_rate": 0.00016504246431288096, "loss": 2.4034, "step": 233365 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016504106039356056, "loss": 2.1821, "step": 233370 }, { "epoch": 0.55, "grad_norm": 1.984375, "learning_rate": 0.0001650396564520209, "loss": 2.2526, "step": 233375 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016503825248826258, "loss": 2.0471, "step": 233380 }, { "epoch": 0.55, "grad_norm": 1.65625, "learning_rate": 0.000165036848502286, "loss": 2.1426, "step": 233385 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016503544449409163, "loss": 2.2541, "step": 233390 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016503404046368, "loss": 2.1365, "step": 233395 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016503263641105159, "loss": 2.1806, "step": 233400 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016503123233620678, "loss": 1.9799, "step": 233405 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.00016502982823914618, "loss": 2.0885, "step": 233410 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016502842411987017, "loss": 2.1115, "step": 233415 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.0001650270199783793, "loss": 2.1237, "step": 233420 }, { "epoch": 0.55, "grad_norm": 1.890625, "learning_rate": 0.000165025615814674, "loss": 2.1533, "step": 233425 }, { "epoch": 0.55, "grad_norm": 2.515625, "learning_rate": 0.00016502421162875477, "loss": 2.0937, "step": 233430 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.0001650228074206221, "loss": 2.0888, "step": 233435 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016502140319027644, "loss": 2.1814, "step": 233440 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.00016501999893771832, "loss": 2.1741, "step": 233445 }, { "epoch": 0.55, "grad_norm": 2.5, "learning_rate": 0.00016501859466294818, "loss": 2.1742, "step": 233450 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.0001650171903659665, "loss": 1.8904, "step": 233455 }, { "epoch": 0.55, "grad_norm": 1.96875, "learning_rate": 0.00016501578604677378, "loss": 1.9874, "step": 233460 }, { "epoch": 0.55, "grad_norm": 2.53125, "learning_rate": 0.00016501438170537047, "loss": 2.1102, "step": 233465 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016501297734175708, "loss": 2.2044, "step": 233470 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.0001650115729559341, "loss": 1.9763, "step": 233475 }, { "epoch": 0.55, "grad_norm": 2.921875, "learning_rate": 0.00016501016854790197, "loss": 2.1018, "step": 233480 }, { "epoch": 0.55, "grad_norm": 1.9453125, "learning_rate": 0.00016500876411766114, "loss": 2.0304, "step": 233485 }, { "epoch": 0.55, "grad_norm": 2.421875, "learning_rate": 0.00016500735966521222, "loss": 2.1682, "step": 233490 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.00016500595519055558, "loss": 2.0684, "step": 233495 }, { "epoch": 0.55, "grad_norm": 1.6640625, "learning_rate": 0.0001650045506936917, "loss": 2.0575, "step": 233500 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.0001650031461746211, "loss": 2.0366, "step": 233505 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016500174163334424, "loss": 1.8696, "step": 233510 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016500033706986163, "loss": 2.2472, "step": 233515 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.0001649989324841737, "loss": 2.2168, "step": 233520 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016499752787628097, "loss": 1.992, "step": 233525 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.0001649961232461839, "loss": 2.1523, "step": 233530 }, { "epoch": 0.55, "grad_norm": 1.796875, "learning_rate": 0.00016499471859388298, "loss": 2.1304, "step": 233535 }, { "epoch": 0.55, "grad_norm": 1.828125, "learning_rate": 0.00016499331391937866, "loss": 2.1488, "step": 233540 }, { "epoch": 0.55, "grad_norm": 2.78125, "learning_rate": 0.00016499190922267148, "loss": 2.1881, "step": 233545 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.00016499050450376188, "loss": 2.0661, "step": 233550 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.00016498909976265033, "loss": 2.1829, "step": 233555 }, { "epoch": 0.55, "grad_norm": 1.984375, "learning_rate": 0.00016498769499933733, "loss": 1.9386, "step": 233560 }, { "epoch": 0.55, "grad_norm": 1.96875, "learning_rate": 0.00016498629021382337, "loss": 2.0296, "step": 233565 }, { "epoch": 0.55, "grad_norm": 1.96875, "learning_rate": 0.0001649848854061089, "loss": 2.1206, "step": 233570 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016498348057619443, "loss": 2.0688, "step": 233575 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.0001649820757240804, "loss": 1.8445, "step": 233580 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016498067084976732, "loss": 2.2454, "step": 233585 }, { "epoch": 0.55, "grad_norm": 2.671875, "learning_rate": 0.00016497926595325567, "loss": 2.0528, "step": 233590 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.0001649778610345459, "loss": 2.1995, "step": 233595 }, { "epoch": 0.55, "grad_norm": 1.9296875, "learning_rate": 0.00016497645609363855, "loss": 1.8481, "step": 233600 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016497505113053406, "loss": 2.0463, "step": 233605 }, { "epoch": 0.55, "grad_norm": 1.8359375, "learning_rate": 0.00016497364614523292, "loss": 2.0035, "step": 233610 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016497224113773559, "loss": 2.1527, "step": 233615 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.00016497083610804257, "loss": 2.0843, "step": 233620 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.0001649694310561543, "loss": 1.9801, "step": 233625 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016496802598207135, "loss": 2.3273, "step": 233630 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016496662088579412, "loss": 2.1256, "step": 233635 }, { "epoch": 0.55, "grad_norm": 1.984375, "learning_rate": 0.0001649652157673231, "loss": 2.0212, "step": 233640 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.0001649638106266588, "loss": 2.1791, "step": 233645 }, { "epoch": 0.55, "grad_norm": 2.59375, "learning_rate": 0.00016496240546380166, "loss": 2.1996, "step": 233650 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016496100027875224, "loss": 2.1564, "step": 233655 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.0001649595950715109, "loss": 2.0632, "step": 233660 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016495818984207822, "loss": 2.1771, "step": 233665 }, { "epoch": 0.55, "grad_norm": 2.546875, "learning_rate": 0.00016495678459045463, "loss": 2.1149, "step": 233670 }, { "epoch": 0.55, "grad_norm": 1.6640625, "learning_rate": 0.00016495537931664062, "loss": 2.0362, "step": 233675 }, { "epoch": 0.55, "grad_norm": 1.7421875, "learning_rate": 0.00016495397402063668, "loss": 2.0523, "step": 233680 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016495256870244332, "loss": 2.0504, "step": 233685 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016495116336206093, "loss": 2.0266, "step": 233690 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.0001649497579994901, "loss": 2.0957, "step": 233695 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016494835261473122, "loss": 1.9599, "step": 233700 }, { "epoch": 0.55, "grad_norm": 1.703125, "learning_rate": 0.0001649469472077848, "loss": 2.236, "step": 233705 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016494554177865132, "loss": 2.1724, "step": 233710 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016494413632733127, "loss": 2.0724, "step": 233715 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.00016494273085382514, "loss": 2.1014, "step": 233720 }, { "epoch": 0.55, "grad_norm": 2.5, "learning_rate": 0.00016494132535813338, "loss": 1.8836, "step": 233725 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.0001649399198402565, "loss": 1.9889, "step": 233730 }, { "epoch": 0.55, "grad_norm": 1.9765625, "learning_rate": 0.00016493851430019496, "loss": 2.2088, "step": 233735 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.00016493710873794926, "loss": 2.1077, "step": 233740 }, { "epoch": 0.55, "grad_norm": 1.8125, "learning_rate": 0.00016493570315351984, "loss": 2.0231, "step": 233745 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016493429754690721, "loss": 2.1419, "step": 233750 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016493289191811186, "loss": 2.0265, "step": 233755 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016493148626713425, "loss": 2.1689, "step": 233760 }, { "epoch": 0.55, "grad_norm": 1.9609375, "learning_rate": 0.0001649300805939749, "loss": 2.0388, "step": 233765 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.0001649286748986342, "loss": 1.96, "step": 233770 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016492726918111272, "loss": 2.208, "step": 233775 }, { "epoch": 0.55, "grad_norm": 1.9765625, "learning_rate": 0.0001649258634414109, "loss": 1.9881, "step": 233780 }, { "epoch": 0.55, "grad_norm": 2.640625, "learning_rate": 0.0001649244576795292, "loss": 1.883, "step": 233785 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016492305189546818, "loss": 2.1963, "step": 233790 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.0001649216460892282, "loss": 2.0688, "step": 233795 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.00016492024026080988, "loss": 2.1087, "step": 233800 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.0001649188344102136, "loss": 1.9916, "step": 233805 }, { "epoch": 0.55, "grad_norm": 1.90625, "learning_rate": 0.00016491742853743987, "loss": 2.1374, "step": 233810 }, { "epoch": 0.55, "grad_norm": 2.515625, "learning_rate": 0.00016491602264248915, "loss": 2.1051, "step": 233815 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.00016491461672536197, "loss": 2.0564, "step": 233820 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.00016491321078605876, "loss": 2.3015, "step": 233825 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016491180482458003, "loss": 2.1141, "step": 233830 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016491039884092626, "loss": 1.9501, "step": 233835 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.0001649089928350979, "loss": 2.1356, "step": 233840 }, { "epoch": 0.55, "grad_norm": 2.53125, "learning_rate": 0.00016490758680709545, "loss": 1.9673, "step": 233845 }, { "epoch": 0.55, "grad_norm": 1.671875, "learning_rate": 0.0001649061807569194, "loss": 1.9838, "step": 233850 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016490477468457021, "loss": 2.2289, "step": 233855 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.0001649033685900484, "loss": 2.0173, "step": 233860 }, { "epoch": 0.55, "grad_norm": 1.953125, "learning_rate": 0.00016490196247335442, "loss": 2.0326, "step": 233865 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.0001649005563344887, "loss": 2.1424, "step": 233870 }, { "epoch": 0.55, "grad_norm": 2.546875, "learning_rate": 0.0001648991501734518, "loss": 2.133, "step": 233875 }, { "epoch": 0.55, "grad_norm": 1.71875, "learning_rate": 0.0001648977439902442, "loss": 2.0656, "step": 233880 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.00016489633778486634, "loss": 2.082, "step": 233885 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.0001648949315573187, "loss": 1.9641, "step": 233890 }, { "epoch": 0.55, "grad_norm": 1.96875, "learning_rate": 0.0001648935253076018, "loss": 2.0453, "step": 233895 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016489211903571608, "loss": 2.1168, "step": 233900 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016489071274166202, "loss": 2.0667, "step": 233905 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.00016488930642544013, "loss": 2.1095, "step": 233910 }, { "epoch": 0.55, "grad_norm": 1.890625, "learning_rate": 0.0001648879000870509, "loss": 2.1114, "step": 233915 }, { "epoch": 0.55, "grad_norm": 1.984375, "learning_rate": 0.00016488649372649477, "loss": 2.1265, "step": 233920 }, { "epoch": 0.55, "grad_norm": 4.6875, "learning_rate": 0.00016488508734377222, "loss": 2.2611, "step": 233925 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016488368093888376, "loss": 2.0471, "step": 233930 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016488227451182984, "loss": 1.9309, "step": 233935 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016488086806261098, "loss": 1.9514, "step": 233940 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016487946159122763, "loss": 2.0431, "step": 233945 }, { "epoch": 0.55, "grad_norm": 1.9921875, "learning_rate": 0.00016487805509768027, "loss": 2.1396, "step": 233950 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.0001648766485819694, "loss": 2.0278, "step": 233955 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.0001648752420440955, "loss": 2.0829, "step": 233960 }, { "epoch": 0.55, "grad_norm": 1.9375, "learning_rate": 0.00016487383548405903, "loss": 2.1368, "step": 233965 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016487242890186047, "loss": 2.1113, "step": 233970 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.00016487102229750035, "loss": 2.2007, "step": 233975 }, { "epoch": 0.55, "grad_norm": 2.359375, "learning_rate": 0.0001648696156709791, "loss": 2.1226, "step": 233980 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.0001648682090222972, "loss": 2.2397, "step": 233985 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016486680235145516, "loss": 2.0074, "step": 233990 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016486539565845342, "loss": 2.1443, "step": 233995 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.0001648639889432925, "loss": 1.9022, "step": 234000 }, { "epoch": 0.55, "grad_norm": 2.59375, "learning_rate": 0.00016486258220597283, "loss": 2.1789, "step": 234005 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016486117544649496, "loss": 2.2281, "step": 234010 }, { "epoch": 0.55, "grad_norm": 1.9375, "learning_rate": 0.00016485976866485933, "loss": 2.1792, "step": 234015 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.00016485836186106643, "loss": 2.2278, "step": 234020 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.0001648569550351167, "loss": 2.103, "step": 234025 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016485554818701072, "loss": 2.1959, "step": 234030 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.0001648541413167489, "loss": 2.0364, "step": 234035 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.00016485273442433168, "loss": 2.205, "step": 234040 }, { "epoch": 0.55, "grad_norm": 2.375, "learning_rate": 0.0001648513275097596, "loss": 2.1106, "step": 234045 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.00016484992057303317, "loss": 2.0132, "step": 234050 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.0001648485136141528, "loss": 1.9529, "step": 234055 }, { "epoch": 0.55, "grad_norm": 1.8359375, "learning_rate": 0.00016484710663311903, "loss": 2.1581, "step": 234060 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.0001648456996299323, "loss": 2.1363, "step": 234065 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.0001648442926045931, "loss": 2.0863, "step": 234070 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.0001648428855571019, "loss": 2.0861, "step": 234075 }, { "epoch": 0.55, "grad_norm": 1.9375, "learning_rate": 0.0001648414784874592, "loss": 2.0427, "step": 234080 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016484007139566548, "loss": 2.0954, "step": 234085 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016483866428172123, "loss": 2.2506, "step": 234090 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.00016483725714562688, "loss": 2.1842, "step": 234095 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016483584998738297, "loss": 2.1774, "step": 234100 }, { "epoch": 0.55, "grad_norm": 1.8828125, "learning_rate": 0.00016483444280698996, "loss": 2.1022, "step": 234105 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016483303560444834, "loss": 1.9205, "step": 234110 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.00016483162837975857, "loss": 2.2253, "step": 234115 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.00016483022113292112, "loss": 2.0548, "step": 234120 }, { "epoch": 0.55, "grad_norm": 1.9375, "learning_rate": 0.0001648288138639365, "loss": 2.1004, "step": 234125 }, { "epoch": 0.55, "grad_norm": 2.46875, "learning_rate": 0.00016482740657280517, "loss": 2.1876, "step": 234130 }, { "epoch": 0.55, "grad_norm": 1.9765625, "learning_rate": 0.00016482599925952767, "loss": 2.2499, "step": 234135 }, { "epoch": 0.55, "grad_norm": 1.734375, "learning_rate": 0.0001648245919241044, "loss": 1.9432, "step": 234140 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.00016482318456653586, "loss": 2.1414, "step": 234145 }, { "epoch": 0.55, "grad_norm": 1.9375, "learning_rate": 0.00016482177718682257, "loss": 1.9788, "step": 234150 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016482036978496498, "loss": 2.0217, "step": 234155 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016481896236096355, "loss": 2.173, "step": 234160 }, { "epoch": 0.55, "grad_norm": 1.9609375, "learning_rate": 0.00016481755491481884, "loss": 2.0516, "step": 234165 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016481614744653125, "loss": 2.1219, "step": 234170 }, { "epoch": 0.55, "grad_norm": 1.875, "learning_rate": 0.00016481473995610127, "loss": 1.9318, "step": 234175 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.0001648133324435294, "loss": 1.9758, "step": 234180 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016481192490881615, "loss": 2.1473, "step": 234185 }, { "epoch": 0.55, "grad_norm": 2.6875, "learning_rate": 0.00016481051735196193, "loss": 2.0009, "step": 234190 }, { "epoch": 0.55, "grad_norm": 2.578125, "learning_rate": 0.0001648091097729673, "loss": 2.1123, "step": 234195 }, { "epoch": 0.55, "grad_norm": 2.421875, "learning_rate": 0.0001648077021718327, "loss": 2.2495, "step": 234200 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.0001648062945485586, "loss": 1.953, "step": 234205 }, { "epoch": 0.55, "grad_norm": 1.9296875, "learning_rate": 0.00016480488690314552, "loss": 1.9266, "step": 234210 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016480347923559388, "loss": 2.053, "step": 234215 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.0001648020715459042, "loss": 2.0727, "step": 234220 }, { "epoch": 0.55, "grad_norm": 2.46875, "learning_rate": 0.00016480066383407698, "loss": 2.2944, "step": 234225 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.00016479925610011264, "loss": 1.9688, "step": 234230 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016479784834401174, "loss": 2.0289, "step": 234235 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.0001647964405657747, "loss": 2.0692, "step": 234240 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016479503276540201, "loss": 2.4282, "step": 234245 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.0001647936249428942, "loss": 1.9792, "step": 234250 }, { "epoch": 0.55, "grad_norm": 2.421875, "learning_rate": 0.00016479221709825168, "loss": 2.1483, "step": 234255 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016479080923147496, "loss": 2.1967, "step": 234260 }, { "epoch": 0.55, "grad_norm": 1.8671875, "learning_rate": 0.00016478940134256453, "loss": 2.2012, "step": 234265 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.0001647879934315209, "loss": 2.1816, "step": 234270 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016478658549834446, "loss": 2.202, "step": 234275 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.0001647851775430358, "loss": 2.0543, "step": 234280 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.0001647837695655953, "loss": 2.2018, "step": 234285 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.00016478236156602352, "loss": 2.1855, "step": 234290 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.0001647809535443209, "loss": 2.0628, "step": 234295 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016477954550048795, "loss": 2.103, "step": 234300 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.0001647781374345251, "loss": 2.3183, "step": 234305 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016477672934643288, "loss": 2.0242, "step": 234310 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016477532123621176, "loss": 2.1962, "step": 234315 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.00016477391310386225, "loss": 2.1699, "step": 234320 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.00016477250494938473, "loss": 2.1048, "step": 234325 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016477109677277978, "loss": 2.1186, "step": 234330 }, { "epoch": 0.55, "grad_norm": 1.9921875, "learning_rate": 0.00016476968857404784, "loss": 2.1534, "step": 234335 }, { "epoch": 0.55, "grad_norm": 3.046875, "learning_rate": 0.0001647682803531894, "loss": 2.0685, "step": 234340 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016476687211020496, "loss": 1.9332, "step": 234345 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016476546384509497, "loss": 2.137, "step": 234350 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016476405555785993, "loss": 2.1296, "step": 234355 }, { "epoch": 0.55, "grad_norm": 1.8203125, "learning_rate": 0.0001647626472485003, "loss": 2.0169, "step": 234360 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016476123891701655, "loss": 2.156, "step": 234365 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016475983056340923, "loss": 2.0376, "step": 234370 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.00016475842218767873, "loss": 2.2379, "step": 234375 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016475701378982564, "loss": 1.9314, "step": 234380 }, { "epoch": 0.55, "grad_norm": 1.8046875, "learning_rate": 0.00016475560536985035, "loss": 2.0573, "step": 234385 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016475419692775338, "loss": 2.2047, "step": 234390 }, { "epoch": 0.55, "grad_norm": 2.359375, "learning_rate": 0.00016475278846353518, "loss": 2.0021, "step": 234395 }, { "epoch": 0.55, "grad_norm": 1.625, "learning_rate": 0.00016475137997719626, "loss": 1.973, "step": 234400 }, { "epoch": 0.55, "grad_norm": 1.921875, "learning_rate": 0.00016474997146873708, "loss": 2.0131, "step": 234405 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016474856293815818, "loss": 2.1007, "step": 234410 }, { "epoch": 0.55, "grad_norm": 1.9140625, "learning_rate": 0.00016474715438545998, "loss": 2.1248, "step": 234415 }, { "epoch": 0.55, "grad_norm": 1.828125, "learning_rate": 0.00016474574581064295, "loss": 1.9907, "step": 234420 }, { "epoch": 0.55, "grad_norm": 1.96875, "learning_rate": 0.00016474433721370762, "loss": 2.216, "step": 234425 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016474292859465446, "loss": 1.9077, "step": 234430 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016474151995348396, "loss": 2.0811, "step": 234435 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016474011129019657, "loss": 2.1329, "step": 234440 }, { "epoch": 0.55, "grad_norm": 1.8984375, "learning_rate": 0.00016473870260479273, "loss": 1.9828, "step": 234445 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.000164737293897273, "loss": 2.1455, "step": 234450 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016473588516763784, "loss": 2.0135, "step": 234455 }, { "epoch": 0.55, "grad_norm": 1.875, "learning_rate": 0.00016473447641588776, "loss": 2.2734, "step": 234460 }, { "epoch": 0.55, "grad_norm": 2.65625, "learning_rate": 0.0001647330676420232, "loss": 2.1474, "step": 234465 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016473165884604463, "loss": 2.0642, "step": 234470 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.00016473025002795253, "loss": 2.2637, "step": 234475 }, { "epoch": 0.55, "grad_norm": 1.7734375, "learning_rate": 0.00016472884118774744, "loss": 2.1919, "step": 234480 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.0001647274323254298, "loss": 2.1927, "step": 234485 }, { "epoch": 0.55, "grad_norm": 2.515625, "learning_rate": 0.0001647260234410001, "loss": 2.172, "step": 234490 }, { "epoch": 0.55, "grad_norm": 1.875, "learning_rate": 0.0001647246145344588, "loss": 2.1494, "step": 234495 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.0001647232056058064, "loss": 1.9919, "step": 234500 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.0001647217966550434, "loss": 2.1271, "step": 234505 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016472038768217023, "loss": 2.131, "step": 234510 }, { "epoch": 0.55, "grad_norm": 1.828125, "learning_rate": 0.0001647189786871874, "loss": 1.9593, "step": 234515 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016471756967009543, "loss": 1.9279, "step": 234520 }, { "epoch": 0.55, "grad_norm": 1.9765625, "learning_rate": 0.00016471616063089473, "loss": 2.1443, "step": 234525 }, { "epoch": 0.55, "grad_norm": 1.9453125, "learning_rate": 0.0001647147515695858, "loss": 2.0708, "step": 234530 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016471334248616918, "loss": 2.2026, "step": 234535 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.0001647119333806453, "loss": 2.1625, "step": 234540 }, { "epoch": 0.55, "grad_norm": 2.625, "learning_rate": 0.00016471052425301466, "loss": 1.9578, "step": 234545 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.0001647091151032777, "loss": 2.1918, "step": 234550 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.00016470770593143494, "loss": 2.1607, "step": 234555 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.00016470629673748688, "loss": 2.1954, "step": 234560 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.00016470488752143395, "loss": 2.1338, "step": 234565 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016470347828327666, "loss": 2.1181, "step": 234570 }, { "epoch": 0.55, "grad_norm": 1.9140625, "learning_rate": 0.00016470206902301552, "loss": 2.2989, "step": 234575 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.0001647006597406509, "loss": 2.2361, "step": 234580 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016469925043618343, "loss": 2.1353, "step": 234585 }, { "epoch": 0.55, "grad_norm": 1.7734375, "learning_rate": 0.00016469784110961352, "loss": 2.0241, "step": 234590 }, { "epoch": 0.55, "grad_norm": 2.546875, "learning_rate": 0.00016469643176094163, "loss": 2.0579, "step": 234595 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.0001646950223901683, "loss": 2.1769, "step": 234600 }, { "epoch": 0.55, "grad_norm": 2.421875, "learning_rate": 0.00016469361299729395, "loss": 2.1385, "step": 234605 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016469220358231908, "loss": 2.1783, "step": 234610 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.0001646907941452442, "loss": 2.1324, "step": 234615 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016468938468606976, "loss": 2.257, "step": 234620 }, { "epoch": 0.55, "grad_norm": 1.7421875, "learning_rate": 0.00016468797520479623, "loss": 1.9573, "step": 234625 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.00016468656570142417, "loss": 2.1908, "step": 234630 }, { "epoch": 0.55, "grad_norm": 1.8984375, "learning_rate": 0.00016468515617595398, "loss": 2.2722, "step": 234635 }, { "epoch": 0.55, "grad_norm": 2.53125, "learning_rate": 0.00016468374662838615, "loss": 2.0145, "step": 234640 }, { "epoch": 0.55, "grad_norm": 2.375, "learning_rate": 0.0001646823370587212, "loss": 2.0866, "step": 234645 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016468092746695957, "loss": 1.9876, "step": 234650 }, { "epoch": 0.55, "grad_norm": 1.984375, "learning_rate": 0.00016467951785310178, "loss": 1.9964, "step": 234655 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.0001646781082171483, "loss": 1.9901, "step": 234660 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.00016467669855909958, "loss": 2.0842, "step": 234665 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.00016467528887895613, "loss": 2.2358, "step": 234670 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016467387917671843, "loss": 2.0546, "step": 234675 }, { "epoch": 0.55, "grad_norm": 1.890625, "learning_rate": 0.000164672469452387, "loss": 2.0997, "step": 234680 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.00016467105970596223, "loss": 2.0768, "step": 234685 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.0001646696499374447, "loss": 2.0133, "step": 234690 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016466824014683482, "loss": 2.0554, "step": 234695 }, { "epoch": 0.55, "grad_norm": 2.359375, "learning_rate": 0.00016466683033413307, "loss": 2.0176, "step": 234700 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.00016466542049934, "loss": 2.1334, "step": 234705 }, { "epoch": 0.55, "grad_norm": 2.46875, "learning_rate": 0.00016466401064245604, "loss": 2.0273, "step": 234710 }, { "epoch": 0.55, "grad_norm": 2.375, "learning_rate": 0.00016466260076348165, "loss": 2.2297, "step": 234715 }, { "epoch": 0.55, "grad_norm": 3.359375, "learning_rate": 0.00016466119086241737, "loss": 2.0155, "step": 234720 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016465978093926367, "loss": 2.0541, "step": 234725 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.000164658370994021, "loss": 2.1749, "step": 234730 }, { "epoch": 0.55, "grad_norm": 1.9296875, "learning_rate": 0.00016465696102668987, "loss": 2.0645, "step": 234735 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.00016465555103727073, "loss": 2.0793, "step": 234740 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.0001646541410257641, "loss": 2.2086, "step": 234745 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.0001646527309921704, "loss": 2.1345, "step": 234750 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.0001646513209364902, "loss": 1.9394, "step": 234755 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016464991085872392, "loss": 2.2335, "step": 234760 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016464850075887207, "loss": 2.0767, "step": 234765 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.00016464709063693508, "loss": 2.2057, "step": 234770 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016464568049291348, "loss": 2.2131, "step": 234775 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.0001646442703268078, "loss": 2.1718, "step": 234780 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.00016464286013861843, "loss": 2.0591, "step": 234785 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016464144992834587, "loss": 2.282, "step": 234790 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016464003969599063, "loss": 2.0387, "step": 234795 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.00016463862944155318, "loss": 2.0059, "step": 234800 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.000164637219165034, "loss": 2.1157, "step": 234805 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016463580886643357, "loss": 2.1803, "step": 234810 }, { "epoch": 0.55, "grad_norm": 2.671875, "learning_rate": 0.0001646343985457524, "loss": 1.9675, "step": 234815 }, { "epoch": 0.55, "grad_norm": 1.796875, "learning_rate": 0.00016463298820299093, "loss": 2.1267, "step": 234820 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016463157783814966, "loss": 2.2049, "step": 234825 }, { "epoch": 0.55, "grad_norm": 1.8828125, "learning_rate": 0.00016463016745122904, "loss": 2.1585, "step": 234830 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016462875704222966, "loss": 2.22, "step": 234835 }, { "epoch": 0.55, "grad_norm": 1.5703125, "learning_rate": 0.00016462734661115187, "loss": 2.0757, "step": 234840 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.0001646259361579962, "loss": 2.1368, "step": 234845 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016462452568276315, "loss": 2.1878, "step": 234850 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.0001646231151854532, "loss": 2.138, "step": 234855 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.0001646217046660668, "loss": 2.1836, "step": 234860 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.00016462029412460448, "loss": 2.1311, "step": 234865 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.0001646188835610667, "loss": 2.1061, "step": 234870 }, { "epoch": 0.55, "grad_norm": 1.9765625, "learning_rate": 0.0001646174729754539, "loss": 2.0149, "step": 234875 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.0001646160623677666, "loss": 2.1259, "step": 234880 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.00016461465173800533, "loss": 2.124, "step": 234885 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016461324108617048, "loss": 2.0821, "step": 234890 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.0001646118304122626, "loss": 2.1419, "step": 234895 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016461041971628214, "loss": 2.196, "step": 234900 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016460900899822957, "loss": 2.0488, "step": 234905 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.0001646075982581054, "loss": 2.1104, "step": 234910 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.00016460618749591013, "loss": 2.1559, "step": 234915 }, { "epoch": 0.55, "grad_norm": 2.53125, "learning_rate": 0.0001646047767116442, "loss": 1.872, "step": 234920 }, { "epoch": 0.55, "grad_norm": 1.9375, "learning_rate": 0.00016460336590530807, "loss": 2.0531, "step": 234925 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016460195507690228, "loss": 2.1812, "step": 234930 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016460054422642731, "loss": 2.0548, "step": 234935 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016459913335388363, "loss": 2.1702, "step": 234940 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016459772245927168, "loss": 2.1701, "step": 234945 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.000164596311542592, "loss": 2.0262, "step": 234950 }, { "epoch": 0.55, "grad_norm": 2.75, "learning_rate": 0.000164594900603845, "loss": 2.1125, "step": 234955 }, { "epoch": 0.55, "grad_norm": 1.953125, "learning_rate": 0.0001645934896430313, "loss": 2.1882, "step": 234960 }, { "epoch": 0.55, "grad_norm": 2.3125, "learning_rate": 0.00016459207866015123, "loss": 1.9909, "step": 234965 }, { "epoch": 0.55, "grad_norm": 2.484375, "learning_rate": 0.00016459066765520537, "loss": 2.2299, "step": 234970 }, { "epoch": 0.55, "grad_norm": 2.84375, "learning_rate": 0.00016458925662819412, "loss": 2.0568, "step": 234975 }, { "epoch": 0.55, "grad_norm": 2.484375, "learning_rate": 0.00016458784557911807, "loss": 1.9455, "step": 234980 }, { "epoch": 0.55, "grad_norm": 3.40625, "learning_rate": 0.00016458643450797757, "loss": 2.1633, "step": 234985 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.0001645850234147732, "loss": 2.1515, "step": 234990 }, { "epoch": 0.55, "grad_norm": 2.71875, "learning_rate": 0.00016458361229950544, "loss": 2.1133, "step": 234995 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.0001645822011621747, "loss": 2.1792, "step": 235000 }, { "epoch": 0.55, "grad_norm": 2.5, "learning_rate": 0.00016458079000278156, "loss": 2.0106, "step": 235005 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016457937882132642, "loss": 2.0777, "step": 235010 }, { "epoch": 0.55, "grad_norm": 1.9921875, "learning_rate": 0.0001645779676178098, "loss": 2.2669, "step": 235015 }, { "epoch": 0.55, "grad_norm": 1.90625, "learning_rate": 0.00016457655639223216, "loss": 2.0997, "step": 235020 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.00016457514514459405, "loss": 2.0542, "step": 235025 }, { "epoch": 0.55, "grad_norm": 1.859375, "learning_rate": 0.00016457373387489586, "loss": 2.0984, "step": 235030 }, { "epoch": 0.55, "grad_norm": 2.59375, "learning_rate": 0.0001645723225831381, "loss": 2.07, "step": 235035 }, { "epoch": 0.55, "grad_norm": 1.90625, "learning_rate": 0.00016457091126932126, "loss": 2.1218, "step": 235040 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016456949993344587, "loss": 2.0241, "step": 235045 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.00016456808857551232, "loss": 2.0647, "step": 235050 }, { "epoch": 0.55, "grad_norm": 1.9453125, "learning_rate": 0.00016456667719552117, "loss": 1.941, "step": 235055 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.00016456526579347288, "loss": 1.9583, "step": 235060 }, { "epoch": 0.55, "grad_norm": 2.484375, "learning_rate": 0.0001645638543693679, "loss": 2.3221, "step": 235065 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.00016456244292320673, "loss": 2.1297, "step": 235070 }, { "epoch": 0.55, "grad_norm": 1.9375, "learning_rate": 0.00016456103145498987, "loss": 2.2583, "step": 235075 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.0001645596199647178, "loss": 2.015, "step": 235080 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.000164558208452391, "loss": 2.1515, "step": 235085 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.0001645567969180099, "loss": 1.9993, "step": 235090 }, { "epoch": 0.55, "grad_norm": 1.7890625, "learning_rate": 0.00016455538536157507, "loss": 2.1613, "step": 235095 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016455397378308694, "loss": 2.1013, "step": 235100 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.000164552562182546, "loss": 2.3083, "step": 235105 }, { "epoch": 0.55, "grad_norm": 1.71875, "learning_rate": 0.00016455115055995275, "loss": 1.9571, "step": 235110 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016454973891530763, "loss": 2.0825, "step": 235115 }, { "epoch": 0.55, "grad_norm": 1.8671875, "learning_rate": 0.00016454832724861117, "loss": 2.0529, "step": 235120 }, { "epoch": 0.55, "grad_norm": 2.359375, "learning_rate": 0.0001645469155598638, "loss": 2.0925, "step": 235125 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016454550384906606, "loss": 2.1387, "step": 235130 }, { "epoch": 0.55, "grad_norm": 3.40625, "learning_rate": 0.00016454409211621838, "loss": 2.0417, "step": 235135 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.0001645426803613213, "loss": 2.1477, "step": 235140 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016454126858437524, "loss": 2.019, "step": 235145 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016453985678538074, "loss": 2.2878, "step": 235150 }, { "epoch": 0.55, "grad_norm": 2.296875, "learning_rate": 0.00016453844496433826, "loss": 2.0763, "step": 235155 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.00016453703312124823, "loss": 1.9506, "step": 235160 }, { "epoch": 0.55, "grad_norm": 2.359375, "learning_rate": 0.00016453562125611125, "loss": 2.061, "step": 235165 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.00016453420936892764, "loss": 1.8463, "step": 235170 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016453279745969805, "loss": 2.1002, "step": 235175 }, { "epoch": 0.55, "grad_norm": 1.8984375, "learning_rate": 0.00016453138552842284, "loss": 2.1383, "step": 235180 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016452997357510254, "loss": 2.1641, "step": 235185 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016452856159973766, "loss": 2.107, "step": 235190 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016452714960232866, "loss": 2.1008, "step": 235195 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016452573758287596, "loss": 1.9425, "step": 235200 }, { "epoch": 0.55, "grad_norm": 1.9375, "learning_rate": 0.00016452432554138016, "loss": 1.9738, "step": 235205 }, { "epoch": 0.55, "grad_norm": 1.96875, "learning_rate": 0.00016452291347784166, "loss": 2.0493, "step": 235210 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016452150139226098, "loss": 2.1177, "step": 235215 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.00016452008928463852, "loss": 2.1038, "step": 235220 }, { "epoch": 0.55, "grad_norm": 2.421875, "learning_rate": 0.0001645186771549749, "loss": 2.307, "step": 235225 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016451726500327048, "loss": 2.3316, "step": 235230 }, { "epoch": 0.55, "grad_norm": 1.96875, "learning_rate": 0.0001645158528295258, "loss": 2.1601, "step": 235235 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016451444063374137, "loss": 1.9189, "step": 235240 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016451302841591762, "loss": 2.1478, "step": 235245 }, { "epoch": 0.55, "grad_norm": 2.90625, "learning_rate": 0.00016451161617605503, "loss": 2.0206, "step": 235250 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.0001645102039141541, "loss": 2.201, "step": 235255 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016450879163021532, "loss": 2.1432, "step": 235260 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.0001645073793242392, "loss": 2.0089, "step": 235265 }, { "epoch": 0.55, "grad_norm": 1.9609375, "learning_rate": 0.00016450596699622616, "loss": 2.0359, "step": 235270 }, { "epoch": 0.55, "grad_norm": 2.359375, "learning_rate": 0.00016450455464617672, "loss": 2.0843, "step": 235275 }, { "epoch": 0.55, "grad_norm": 2.546875, "learning_rate": 0.00016450314227409133, "loss": 1.9677, "step": 235280 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.0001645017298799705, "loss": 2.2053, "step": 235285 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016450031746381474, "loss": 2.093, "step": 235290 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.0001644989050256245, "loss": 2.1818, "step": 235295 }, { "epoch": 0.55, "grad_norm": 2.828125, "learning_rate": 0.00016449749256540025, "loss": 2.0822, "step": 235300 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.0001644960800831425, "loss": 1.9017, "step": 235305 }, { "epoch": 0.55, "grad_norm": 2.65625, "learning_rate": 0.0001644946675788517, "loss": 2.156, "step": 235310 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016449325505252834, "loss": 2.1037, "step": 235315 }, { "epoch": 0.55, "grad_norm": 2.34375, "learning_rate": 0.00016449184250417296, "loss": 2.1176, "step": 235320 }, { "epoch": 0.55, "grad_norm": 1.875, "learning_rate": 0.000164490429933786, "loss": 2.0273, "step": 235325 }, { "epoch": 0.55, "grad_norm": 2.546875, "learning_rate": 0.00016448901734136787, "loss": 2.0831, "step": 235330 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016448760472691917, "loss": 2.1124, "step": 235335 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016448619209044033, "loss": 2.129, "step": 235340 }, { "epoch": 0.55, "grad_norm": 1.921875, "learning_rate": 0.00016448477943193184, "loss": 2.005, "step": 235345 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016448336675139416, "loss": 2.1641, "step": 235350 }, { "epoch": 0.55, "grad_norm": 1.890625, "learning_rate": 0.00016448195404882783, "loss": 2.0128, "step": 235355 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016448054132423327, "loss": 2.0822, "step": 235360 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.000164479128577611, "loss": 1.9507, "step": 235365 }, { "epoch": 0.55, "grad_norm": 1.96875, "learning_rate": 0.00016447771580896148, "loss": 1.9741, "step": 235370 }, { "epoch": 0.55, "grad_norm": 2.59375, "learning_rate": 0.0001644763030182852, "loss": 2.1036, "step": 235375 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016447489020558265, "loss": 2.1186, "step": 235380 }, { "epoch": 0.55, "grad_norm": 3.421875, "learning_rate": 0.00016447347737085427, "loss": 2.2285, "step": 235385 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.00016447206451410064, "loss": 2.1046, "step": 235390 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016447065163532217, "loss": 2.1418, "step": 235395 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016446923873451934, "loss": 2.0698, "step": 235400 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.0001644678258116927, "loss": 2.0936, "step": 235405 }, { "epoch": 0.55, "grad_norm": 1.765625, "learning_rate": 0.0001644664128668426, "loss": 2.0313, "step": 235410 }, { "epoch": 0.55, "grad_norm": 1.9375, "learning_rate": 0.00016446499989996964, "loss": 2.2028, "step": 235415 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016446358691107427, "loss": 2.0517, "step": 235420 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016446217390015698, "loss": 2.1539, "step": 235425 }, { "epoch": 0.55, "grad_norm": 1.96875, "learning_rate": 0.00016446076086721822, "loss": 2.1374, "step": 235430 }, { "epoch": 0.55, "grad_norm": 1.9453125, "learning_rate": 0.0001644593478122585, "loss": 2.1012, "step": 235435 }, { "epoch": 0.55, "grad_norm": 2.046875, "learning_rate": 0.00016445793473527832, "loss": 2.0511, "step": 235440 }, { "epoch": 0.55, "grad_norm": 1.75, "learning_rate": 0.00016445652163627812, "loss": 2.1175, "step": 235445 }, { "epoch": 0.55, "grad_norm": 1.9375, "learning_rate": 0.0001644551085152584, "loss": 1.9809, "step": 235450 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016445369537221966, "loss": 2.085, "step": 235455 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.0001644522822071624, "loss": 2.0481, "step": 235460 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.000164450869020087, "loss": 2.1123, "step": 235465 }, { "epoch": 0.55, "grad_norm": 1.8125, "learning_rate": 0.00016444945581099403, "loss": 2.0918, "step": 235470 }, { "epoch": 0.55, "grad_norm": 3.3125, "learning_rate": 0.000164448042579884, "loss": 2.1283, "step": 235475 }, { "epoch": 0.55, "grad_norm": 1.8359375, "learning_rate": 0.00016444662932675732, "loss": 2.1268, "step": 235480 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016444521605161452, "loss": 2.0662, "step": 235485 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016444380275445605, "loss": 2.0917, "step": 235490 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.0001644423894352824, "loss": 2.101, "step": 235495 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016444097609409405, "loss": 2.1484, "step": 235500 }, { "epoch": 0.55, "grad_norm": 2.265625, "learning_rate": 0.00016443956273089154, "loss": 2.0278, "step": 235505 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.0001644381493456753, "loss": 1.9831, "step": 235510 }, { "epoch": 0.55, "grad_norm": 1.9375, "learning_rate": 0.0001644367359384458, "loss": 2.042, "step": 235515 }, { "epoch": 0.55, "grad_norm": 2.5625, "learning_rate": 0.00016443532250920353, "loss": 1.9036, "step": 235520 }, { "epoch": 0.55, "grad_norm": 2.328125, "learning_rate": 0.00016443390905794902, "loss": 1.8906, "step": 235525 }, { "epoch": 0.55, "grad_norm": 1.65625, "learning_rate": 0.00016443249558468267, "loss": 2.1706, "step": 235530 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.00016443108208940506, "loss": 2.1526, "step": 235535 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.0001644296685721166, "loss": 2.1004, "step": 235540 }, { "epoch": 0.55, "grad_norm": 1.84375, "learning_rate": 0.00016442825503281777, "loss": 2.0461, "step": 235545 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.00016442684147150913, "loss": 2.0562, "step": 235550 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.0001644254278881911, "loss": 2.1805, "step": 235555 }, { "epoch": 0.55, "grad_norm": 2.453125, "learning_rate": 0.00016442401428286415, "loss": 2.0259, "step": 235560 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.00016442260065552882, "loss": 2.1092, "step": 235565 }, { "epoch": 0.55, "grad_norm": 2.625, "learning_rate": 0.00016442118700618554, "loss": 2.1262, "step": 235570 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016441977333483482, "loss": 2.0731, "step": 235575 }, { "epoch": 0.55, "grad_norm": 2.078125, "learning_rate": 0.00016441835964147713, "loss": 1.848, "step": 235580 }, { "epoch": 0.55, "grad_norm": 2.6875, "learning_rate": 0.00016441694592611298, "loss": 2.0529, "step": 235585 }, { "epoch": 0.55, "grad_norm": 1.7421875, "learning_rate": 0.00016441553218874284, "loss": 1.9884, "step": 235590 }, { "epoch": 0.55, "grad_norm": 2.84375, "learning_rate": 0.00016441411842936716, "loss": 1.9963, "step": 235595 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.00016441270464798646, "loss": 2.1713, "step": 235600 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.00016441129084460122, "loss": 2.1415, "step": 235605 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016440987701921188, "loss": 1.957, "step": 235610 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016440846317181896, "loss": 2.1154, "step": 235615 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.000164407049302423, "loss": 2.1054, "step": 235620 }, { "epoch": 0.55, "grad_norm": 2.65625, "learning_rate": 0.00016440563541102437, "loss": 2.0844, "step": 235625 }, { "epoch": 0.55, "grad_norm": 2.28125, "learning_rate": 0.00016440422149762362, "loss": 2.0928, "step": 235630 }, { "epoch": 0.55, "grad_norm": 1.953125, "learning_rate": 0.0001644028075622212, "loss": 2.2372, "step": 235635 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.00016440139360481766, "loss": 2.3017, "step": 235640 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016439997962541342, "loss": 2.2462, "step": 235645 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.00016439856562400898, "loss": 2.0462, "step": 235650 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016439715160060478, "loss": 2.0272, "step": 235655 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.0001643957375552014, "loss": 2.2825, "step": 235660 }, { "epoch": 0.55, "grad_norm": 1.9609375, "learning_rate": 0.00016439432348779924, "loss": 2.0079, "step": 235665 }, { "epoch": 0.55, "grad_norm": 2.109375, "learning_rate": 0.0001643929093983988, "loss": 2.0544, "step": 235670 }, { "epoch": 0.55, "grad_norm": 2.828125, "learning_rate": 0.00016439149528700063, "loss": 2.2545, "step": 235675 }, { "epoch": 0.55, "grad_norm": 1.96875, "learning_rate": 0.0001643900811536051, "loss": 2.0489, "step": 235680 }, { "epoch": 0.55, "grad_norm": 2.171875, "learning_rate": 0.00016438866699821278, "loss": 2.2081, "step": 235685 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.0001643872528208241, "loss": 2.1695, "step": 235690 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.0001643858386214396, "loss": 2.0594, "step": 235695 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.0001643844244000597, "loss": 2.0175, "step": 235700 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016438301015668492, "loss": 2.3268, "step": 235705 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016438159589131574, "loss": 2.017, "step": 235710 }, { "epoch": 0.55, "grad_norm": 1.765625, "learning_rate": 0.0001643801816039526, "loss": 2.0397, "step": 235715 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.0001643787672945961, "loss": 2.1308, "step": 235720 }, { "epoch": 0.55, "grad_norm": 2.0625, "learning_rate": 0.00016437735296324658, "loss": 2.1264, "step": 235725 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.00016437593860990464, "loss": 2.1446, "step": 235730 }, { "epoch": 0.55, "grad_norm": 2.125, "learning_rate": 0.00016437452423457067, "loss": 2.2048, "step": 235735 }, { "epoch": 0.55, "grad_norm": 2.015625, "learning_rate": 0.00016437310983724522, "loss": 2.0671, "step": 235740 }, { "epoch": 0.55, "grad_norm": 1.8203125, "learning_rate": 0.00016437169541792874, "loss": 2.156, "step": 235745 }, { "epoch": 0.55, "grad_norm": 2.390625, "learning_rate": 0.00016437028097662172, "loss": 1.9422, "step": 235750 }, { "epoch": 0.55, "grad_norm": 2.1875, "learning_rate": 0.00016436886651332463, "loss": 2.1054, "step": 235755 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016436745202803797, "loss": 2.0844, "step": 235760 }, { "epoch": 0.55, "grad_norm": 2.234375, "learning_rate": 0.00016436603752076226, "loss": 2.0947, "step": 235765 }, { "epoch": 0.55, "grad_norm": 2.46875, "learning_rate": 0.0001643646229914979, "loss": 2.1185, "step": 235770 }, { "epoch": 0.55, "grad_norm": 2.0, "learning_rate": 0.00016436320844024548, "loss": 2.0169, "step": 235775 }, { "epoch": 0.55, "grad_norm": 2.4375, "learning_rate": 0.00016436179386700536, "loss": 1.9408, "step": 235780 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.0001643603792717781, "loss": 1.8744, "step": 235785 }, { "epoch": 0.55, "grad_norm": 2.40625, "learning_rate": 0.00016435896465456416, "loss": 2.0907, "step": 235790 }, { "epoch": 0.55, "grad_norm": 2.21875, "learning_rate": 0.00016435755001536402, "loss": 2.0927, "step": 235795 }, { "epoch": 0.55, "grad_norm": 2.09375, "learning_rate": 0.00016435613535417822, "loss": 1.9512, "step": 235800 }, { "epoch": 0.55, "grad_norm": 2.15625, "learning_rate": 0.00016435472067100716, "loss": 2.0404, "step": 235805 }, { "epoch": 0.55, "grad_norm": 2.25, "learning_rate": 0.00016435330596585137, "loss": 2.0755, "step": 235810 }, { "epoch": 0.55, "grad_norm": 2.203125, "learning_rate": 0.0001643518912387113, "loss": 2.1292, "step": 235815 }, { "epoch": 0.55, "grad_norm": 2.140625, "learning_rate": 0.0001643504764895875, "loss": 2.214, "step": 235820 }, { "epoch": 0.55, "grad_norm": 1.796875, "learning_rate": 0.0001643490617184804, "loss": 2.1534, "step": 235825 }, { "epoch": 0.55, "grad_norm": 1.9765625, "learning_rate": 0.0001643476469253905, "loss": 1.9856, "step": 235830 }, { "epoch": 0.55, "grad_norm": 2.03125, "learning_rate": 0.00016434623211031824, "loss": 2.0545, "step": 235835 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016434481727326414, "loss": 2.0054, "step": 235840 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016434340241422868, "loss": 2.136, "step": 235845 }, { "epoch": 0.56, "grad_norm": 1.9453125, "learning_rate": 0.00016434198753321238, "loss": 2.2279, "step": 235850 }, { "epoch": 0.56, "grad_norm": 1.8984375, "learning_rate": 0.00016434057263021572, "loss": 2.0424, "step": 235855 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.0001643391577052391, "loss": 2.0737, "step": 235860 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016433774275828307, "loss": 2.0225, "step": 235865 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.0001643363277893481, "loss": 2.2369, "step": 235870 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016433491279843466, "loss": 1.9267, "step": 235875 }, { "epoch": 0.56, "grad_norm": 1.921875, "learning_rate": 0.00016433349778554327, "loss": 2.1226, "step": 235880 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.0001643320827506744, "loss": 2.0757, "step": 235885 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.0001643306676938285, "loss": 2.0502, "step": 235890 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.00016432925261500607, "loss": 2.0446, "step": 235895 }, { "epoch": 0.56, "grad_norm": 1.8671875, "learning_rate": 0.00016432783751420762, "loss": 2.1477, "step": 235900 }, { "epoch": 0.56, "grad_norm": 2.3125, "learning_rate": 0.0001643264223914336, "loss": 2.1569, "step": 235905 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.0001643250072466845, "loss": 1.9987, "step": 235910 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016432359207996087, "loss": 2.066, "step": 235915 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.00016432217689126306, "loss": 2.1044, "step": 235920 }, { "epoch": 0.56, "grad_norm": 2.3125, "learning_rate": 0.00016432076168059167, "loss": 2.1306, "step": 235925 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016431934644794714, "loss": 2.1365, "step": 235930 }, { "epoch": 0.56, "grad_norm": 2.40625, "learning_rate": 0.00016431793119332994, "loss": 2.102, "step": 235935 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016431651591674058, "loss": 2.0966, "step": 235940 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.0001643151006181795, "loss": 2.1245, "step": 235945 }, { "epoch": 0.56, "grad_norm": 1.7109375, "learning_rate": 0.00016431368529764727, "loss": 1.9661, "step": 235950 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016431226995514427, "loss": 2.2344, "step": 235955 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016431085459067105, "loss": 2.0533, "step": 235960 }, { "epoch": 0.56, "grad_norm": 1.984375, "learning_rate": 0.0001643094392042281, "loss": 2.0856, "step": 235965 }, { "epoch": 0.56, "grad_norm": 1.8984375, "learning_rate": 0.00016430802379581585, "loss": 2.1337, "step": 235970 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016430660836543482, "loss": 2.2114, "step": 235975 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016430519291308547, "loss": 2.0203, "step": 235980 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.0001643037774387683, "loss": 2.1763, "step": 235985 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016430236194248384, "loss": 2.0854, "step": 235990 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016430094642423248, "loss": 2.0257, "step": 235995 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016429953088401475, "loss": 2.0294, "step": 236000 }, { "epoch": 0.56, "grad_norm": 2.0, "learning_rate": 0.00016429811532183116, "loss": 2.0038, "step": 236005 }, { "epoch": 0.56, "grad_norm": 2.71875, "learning_rate": 0.00016429669973768216, "loss": 2.1309, "step": 236010 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016429528413156825, "loss": 2.1608, "step": 236015 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.0001642938685034899, "loss": 2.0041, "step": 236020 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016429245285344758, "loss": 2.191, "step": 236025 }, { "epoch": 0.56, "grad_norm": 1.953125, "learning_rate": 0.00016429103718144182, "loss": 1.9689, "step": 236030 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016428962148747306, "loss": 1.9574, "step": 236035 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016428820577154177, "loss": 1.9476, "step": 236040 }, { "epoch": 0.56, "grad_norm": 1.8671875, "learning_rate": 0.0001642867900336485, "loss": 1.9372, "step": 236045 }, { "epoch": 0.56, "grad_norm": 1.90625, "learning_rate": 0.00016428537427379372, "loss": 2.0324, "step": 236050 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016428395849197783, "loss": 2.1761, "step": 236055 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.0001642825426882014, "loss": 1.9597, "step": 236060 }, { "epoch": 0.56, "grad_norm": 1.9765625, "learning_rate": 0.00016428112686246488, "loss": 2.1232, "step": 236065 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016427971101476877, "loss": 2.1255, "step": 236070 }, { "epoch": 0.56, "grad_norm": 1.9921875, "learning_rate": 0.00016427829514511358, "loss": 2.1454, "step": 236075 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016427687925349972, "loss": 1.9564, "step": 236080 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016427546333992773, "loss": 2.1395, "step": 236085 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016427404740439804, "loss": 2.1355, "step": 236090 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016427263144691117, "loss": 1.9909, "step": 236095 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016427121546746767, "loss": 1.9783, "step": 236100 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.00016426979946606787, "loss": 2.206, "step": 236105 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016426838344271242, "loss": 1.864, "step": 236110 }, { "epoch": 0.56, "grad_norm": 1.7421875, "learning_rate": 0.00016426696739740168, "loss": 2.0172, "step": 236115 }, { "epoch": 0.56, "grad_norm": 1.890625, "learning_rate": 0.0001642655513301362, "loss": 2.0217, "step": 236120 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016426413524091643, "loss": 2.2331, "step": 236125 }, { "epoch": 0.56, "grad_norm": 1.828125, "learning_rate": 0.00016426271912974288, "loss": 1.9618, "step": 236130 }, { "epoch": 0.56, "grad_norm": 1.9453125, "learning_rate": 0.00016426130299661596, "loss": 2.0777, "step": 236135 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016425988684153628, "loss": 2.1418, "step": 236140 }, { "epoch": 0.56, "grad_norm": 1.90625, "learning_rate": 0.00016425847066450425, "loss": 2.1508, "step": 236145 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016425705446552032, "loss": 2.1599, "step": 236150 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016425563824458506, "loss": 2.1584, "step": 236155 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.0001642542220016989, "loss": 2.4395, "step": 236160 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016425280573686233, "loss": 2.169, "step": 236165 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.0001642513894500758, "loss": 2.0983, "step": 236170 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016424997314133988, "loss": 1.8838, "step": 236175 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.000164248556810655, "loss": 1.9038, "step": 236180 }, { "epoch": 0.56, "grad_norm": 2.28125, "learning_rate": 0.0001642471404580216, "loss": 2.1168, "step": 236185 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016424572408344024, "loss": 2.1408, "step": 236190 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.0001642443076869114, "loss": 2.1103, "step": 236195 }, { "epoch": 0.56, "grad_norm": 1.4921875, "learning_rate": 0.0001642428912684355, "loss": 2.0649, "step": 236200 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.0001642414748280131, "loss": 1.9478, "step": 236205 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.00016424005836564463, "loss": 2.0669, "step": 236210 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016423864188133057, "loss": 1.9787, "step": 236215 }, { "epoch": 0.56, "grad_norm": 1.6484375, "learning_rate": 0.00016423722537507146, "loss": 2.0498, "step": 236220 }, { "epoch": 0.56, "grad_norm": 1.859375, "learning_rate": 0.0001642358088468677, "loss": 2.2474, "step": 236225 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016423439229671988, "loss": 2.3647, "step": 236230 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.00016423297572462842, "loss": 2.1109, "step": 236235 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016423155913059377, "loss": 2.0545, "step": 236240 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016423014251461647, "loss": 2.1622, "step": 236245 }, { "epoch": 0.56, "grad_norm": 1.9296875, "learning_rate": 0.000164228725876697, "loss": 2.1172, "step": 236250 }, { "epoch": 0.56, "grad_norm": 2.0, "learning_rate": 0.00016422730921683584, "loss": 2.0913, "step": 236255 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.00016422589253503348, "loss": 2.0228, "step": 236260 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016422447583129034, "loss": 2.0353, "step": 236265 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.00016422305910560698, "loss": 2.0059, "step": 236270 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016422164235798387, "loss": 2.0913, "step": 236275 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016422022558842145, "loss": 1.9484, "step": 236280 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016421880879692027, "loss": 2.1114, "step": 236285 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016421739198348075, "loss": 2.1865, "step": 236290 }, { "epoch": 0.56, "grad_norm": 2.65625, "learning_rate": 0.00016421597514810343, "loss": 2.1389, "step": 236295 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016421455829078875, "loss": 2.0691, "step": 236300 }, { "epoch": 0.56, "grad_norm": 2.640625, "learning_rate": 0.00016421314141153723, "loss": 2.2729, "step": 236305 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016421172451034933, "loss": 2.0366, "step": 236310 }, { "epoch": 0.56, "grad_norm": 1.828125, "learning_rate": 0.00016421030758722552, "loss": 2.0985, "step": 236315 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016420889064216632, "loss": 2.0442, "step": 236320 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016420747367517223, "loss": 1.9534, "step": 236325 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016420605668624364, "loss": 1.8465, "step": 236330 }, { "epoch": 0.56, "grad_norm": 2.421875, "learning_rate": 0.00016420463967538115, "loss": 2.0331, "step": 236335 }, { "epoch": 0.56, "grad_norm": 1.9609375, "learning_rate": 0.0001642032226425852, "loss": 1.8148, "step": 236340 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016420180558785624, "loss": 2.0074, "step": 236345 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.0001642003885111947, "loss": 2.0157, "step": 236350 }, { "epoch": 0.56, "grad_norm": 1.859375, "learning_rate": 0.00016419897141260127, "loss": 2.0882, "step": 236355 }, { "epoch": 0.56, "grad_norm": 1.609375, "learning_rate": 0.00016419755429207625, "loss": 2.0628, "step": 236360 }, { "epoch": 0.56, "grad_norm": 1.78125, "learning_rate": 0.0001641961371496202, "loss": 2.1415, "step": 236365 }, { "epoch": 0.56, "grad_norm": 1.9296875, "learning_rate": 0.00016419471998523356, "loss": 2.0216, "step": 236370 }, { "epoch": 0.56, "grad_norm": 2.28125, "learning_rate": 0.00016419330279891686, "loss": 1.9983, "step": 236375 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016419188559067054, "loss": 2.0643, "step": 236380 }, { "epoch": 0.56, "grad_norm": 1.921875, "learning_rate": 0.00016419046836049512, "loss": 2.2954, "step": 236385 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.0001641890511083911, "loss": 2.1008, "step": 236390 }, { "epoch": 0.56, "grad_norm": 1.90625, "learning_rate": 0.00016418763383435888, "loss": 2.0702, "step": 236395 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.00016418621653839903, "loss": 1.9124, "step": 236400 }, { "epoch": 0.56, "grad_norm": 1.78125, "learning_rate": 0.000164184799220512, "loss": 1.9727, "step": 236405 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016418338188069827, "loss": 2.1771, "step": 236410 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016418196451895836, "loss": 2.1083, "step": 236415 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.0001641805471352927, "loss": 2.0802, "step": 236420 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016417912972970184, "loss": 2.1867, "step": 236425 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.0001641777123021862, "loss": 2.2223, "step": 236430 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016417629485274629, "loss": 2.0767, "step": 236435 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016417487738138258, "loss": 2.1428, "step": 236440 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016417345988809556, "loss": 2.1934, "step": 236445 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016417204237288577, "loss": 2.0076, "step": 236450 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.0001641706248357536, "loss": 2.1026, "step": 236455 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.0001641692072766996, "loss": 2.078, "step": 236460 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016416778969572423, "loss": 2.1749, "step": 236465 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016416637209282803, "loss": 2.1476, "step": 236470 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016416495446801136, "loss": 1.9448, "step": 236475 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.0001641635368212748, "loss": 2.2771, "step": 236480 }, { "epoch": 0.56, "grad_norm": 1.8046875, "learning_rate": 0.00016416211915261882, "loss": 2.0394, "step": 236485 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.0001641607014620439, "loss": 2.072, "step": 236490 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.0001641592837495505, "loss": 2.0363, "step": 236495 }, { "epoch": 0.56, "grad_norm": 2.28125, "learning_rate": 0.00016415786601513914, "loss": 2.1494, "step": 236500 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016415644825881032, "loss": 2.13, "step": 236505 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016415503048056442, "loss": 1.9922, "step": 236510 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016415361268040207, "loss": 2.0497, "step": 236515 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016415219485832366, "loss": 2.2476, "step": 236520 }, { "epoch": 0.56, "grad_norm": 2.453125, "learning_rate": 0.00016415077701432965, "loss": 2.0742, "step": 236525 }, { "epoch": 0.56, "grad_norm": 1.8671875, "learning_rate": 0.00016414935914842063, "loss": 1.9839, "step": 236530 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.000164147941260597, "loss": 2.125, "step": 236535 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.00016414652335085928, "loss": 1.8685, "step": 236540 }, { "epoch": 0.56, "grad_norm": 2.671875, "learning_rate": 0.00016414510541920793, "loss": 1.8866, "step": 236545 }, { "epoch": 0.56, "grad_norm": 1.8515625, "learning_rate": 0.00016414368746564347, "loss": 2.0044, "step": 236550 }, { "epoch": 0.56, "grad_norm": 1.796875, "learning_rate": 0.00016414226949016636, "loss": 2.0519, "step": 236555 }, { "epoch": 0.56, "grad_norm": 2.59375, "learning_rate": 0.0001641408514927771, "loss": 2.2354, "step": 236560 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.0001641394334734761, "loss": 2.0883, "step": 236565 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016413801543226398, "loss": 2.245, "step": 236570 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.0001641365973691411, "loss": 2.0522, "step": 236575 }, { "epoch": 0.56, "grad_norm": 1.9140625, "learning_rate": 0.00016413517928410803, "loss": 1.9713, "step": 236580 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016413376117716523, "loss": 2.0621, "step": 236585 }, { "epoch": 0.56, "grad_norm": 2.671875, "learning_rate": 0.00016413234304831308, "loss": 2.0477, "step": 236590 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016413092489755225, "loss": 2.1019, "step": 236595 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.0001641295067248831, "loss": 2.0686, "step": 236600 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016412808853030617, "loss": 2.0989, "step": 236605 }, { "epoch": 0.56, "grad_norm": 1.875, "learning_rate": 0.00016412667031382193, "loss": 2.1239, "step": 236610 }, { "epoch": 0.56, "grad_norm": 1.84375, "learning_rate": 0.0001641252520754308, "loss": 2.1964, "step": 236615 }, { "epoch": 0.56, "grad_norm": 3.96875, "learning_rate": 0.00016412383381513335, "loss": 2.089, "step": 236620 }, { "epoch": 0.56, "grad_norm": 1.9765625, "learning_rate": 0.00016412241553293003, "loss": 2.0827, "step": 236625 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016412099722882138, "loss": 2.1512, "step": 236630 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016411957890280778, "loss": 2.2248, "step": 236635 }, { "epoch": 0.56, "grad_norm": 2.0, "learning_rate": 0.0001641181605548898, "loss": 2.0504, "step": 236640 }, { "epoch": 0.56, "grad_norm": 1.9140625, "learning_rate": 0.00016411674218506786, "loss": 2.1466, "step": 236645 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.0001641153237933425, "loss": 1.8449, "step": 236650 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016411390537971417, "loss": 2.0876, "step": 236655 }, { "epoch": 0.56, "grad_norm": 2.703125, "learning_rate": 0.00016411248694418338, "loss": 2.1328, "step": 236660 }, { "epoch": 0.56, "grad_norm": 2.9375, "learning_rate": 0.00016411106848675063, "loss": 2.037, "step": 236665 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.0001641096500074163, "loss": 2.0823, "step": 236670 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.000164108231506181, "loss": 2.1204, "step": 236675 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.0001641068129830452, "loss": 2.1314, "step": 236680 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.0001641053944380093, "loss": 2.1475, "step": 236685 }, { "epoch": 0.56, "grad_norm": 1.8359375, "learning_rate": 0.00016410397587107383, "loss": 2.1685, "step": 236690 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.0001641025572822393, "loss": 2.189, "step": 236695 }, { "epoch": 0.56, "grad_norm": 1.9765625, "learning_rate": 0.00016410113867150616, "loss": 2.1452, "step": 236700 }, { "epoch": 0.56, "grad_norm": 2.953125, "learning_rate": 0.00016409972003887492, "loss": 2.2411, "step": 236705 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.00016409830138434606, "loss": 2.0914, "step": 236710 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016409688270792002, "loss": 1.9498, "step": 236715 }, { "epoch": 0.56, "grad_norm": 2.3125, "learning_rate": 0.00016409546400959738, "loss": 2.1478, "step": 236720 }, { "epoch": 0.56, "grad_norm": 2.625, "learning_rate": 0.00016409404528937852, "loss": 2.2497, "step": 236725 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016409262654726398, "loss": 2.0989, "step": 236730 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016409120778325425, "loss": 2.1558, "step": 236735 }, { "epoch": 0.56, "grad_norm": 1.875, "learning_rate": 0.0001640897889973498, "loss": 2.1159, "step": 236740 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.0001640883701895511, "loss": 2.0932, "step": 236745 }, { "epoch": 0.56, "grad_norm": 3.03125, "learning_rate": 0.00016408695135985868, "loss": 2.1853, "step": 236750 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016408553250827298, "loss": 2.0237, "step": 236755 }, { "epoch": 0.56, "grad_norm": 3.125, "learning_rate": 0.0001640841136347945, "loss": 2.1243, "step": 236760 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.0001640826947394237, "loss": 2.1331, "step": 236765 }, { "epoch": 0.56, "grad_norm": 1.7890625, "learning_rate": 0.00016408127582216112, "loss": 2.1211, "step": 236770 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.00016407985688300723, "loss": 2.2354, "step": 236775 }, { "epoch": 0.56, "grad_norm": 1.9375, "learning_rate": 0.00016407843792196246, "loss": 1.9324, "step": 236780 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.00016407701893902737, "loss": 2.1907, "step": 236785 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016407559993420235, "loss": 2.1799, "step": 236790 }, { "epoch": 0.56, "grad_norm": 1.5703125, "learning_rate": 0.000164074180907488, "loss": 2.0473, "step": 236795 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016407276185888473, "loss": 2.173, "step": 236800 }, { "epoch": 0.56, "grad_norm": 1.921875, "learning_rate": 0.000164071342788393, "loss": 2.0864, "step": 236805 }, { "epoch": 0.56, "grad_norm": 1.890625, "learning_rate": 0.0001640699236960134, "loss": 2.2741, "step": 236810 }, { "epoch": 0.56, "grad_norm": 2.53125, "learning_rate": 0.00016406850458174633, "loss": 1.8943, "step": 236815 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016406708544559228, "loss": 2.128, "step": 236820 }, { "epoch": 0.56, "grad_norm": 1.9140625, "learning_rate": 0.0001640656662875518, "loss": 1.9898, "step": 236825 }, { "epoch": 0.56, "grad_norm": 1.875, "learning_rate": 0.00016406424710762528, "loss": 2.1129, "step": 236830 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016406282790581327, "loss": 2.2576, "step": 236835 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.0001640614086821162, "loss": 1.893, "step": 236840 }, { "epoch": 0.56, "grad_norm": 1.8359375, "learning_rate": 0.00016405998943653464, "loss": 2.2225, "step": 236845 }, { "epoch": 0.56, "grad_norm": 2.5, "learning_rate": 0.00016405857016906902, "loss": 2.0907, "step": 236850 }, { "epoch": 0.56, "grad_norm": 2.625, "learning_rate": 0.00016405715087971983, "loss": 2.2779, "step": 236855 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.0001640557315684875, "loss": 2.1128, "step": 236860 }, { "epoch": 0.56, "grad_norm": 2.546875, "learning_rate": 0.00016405431223537263, "loss": 2.0809, "step": 236865 }, { "epoch": 0.56, "grad_norm": 1.984375, "learning_rate": 0.00016405289288037565, "loss": 2.3163, "step": 236870 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016405147350349698, "loss": 2.0996, "step": 236875 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016405005410473724, "loss": 1.9537, "step": 236880 }, { "epoch": 0.56, "grad_norm": 1.9609375, "learning_rate": 0.00016404863468409678, "loss": 1.9204, "step": 236885 }, { "epoch": 0.56, "grad_norm": 2.40625, "learning_rate": 0.00016404721524157617, "loss": 2.0424, "step": 236890 }, { "epoch": 0.56, "grad_norm": 2.3125, "learning_rate": 0.0001640457957771759, "loss": 1.9137, "step": 236895 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016404437629089638, "loss": 2.1189, "step": 236900 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016404295678273815, "loss": 1.9212, "step": 236905 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.0001640415372527017, "loss": 2.0178, "step": 236910 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016404011770078747, "loss": 1.9685, "step": 236915 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.000164038698126996, "loss": 2.1201, "step": 236920 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.00016403727853132774, "loss": 2.0337, "step": 236925 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.00016403585891378318, "loss": 1.973, "step": 236930 }, { "epoch": 0.56, "grad_norm": 1.9609375, "learning_rate": 0.0001640344392743628, "loss": 1.9857, "step": 236935 }, { "epoch": 0.56, "grad_norm": 2.0, "learning_rate": 0.0001640330196130671, "loss": 2.1035, "step": 236940 }, { "epoch": 0.56, "grad_norm": 1.8671875, "learning_rate": 0.0001640315999298966, "loss": 2.1408, "step": 236945 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.0001640301802248517, "loss": 2.1289, "step": 236950 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016402876049793295, "loss": 2.1766, "step": 236955 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016402734074914078, "loss": 2.0699, "step": 236960 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016402592097847576, "loss": 2.0787, "step": 236965 }, { "epoch": 0.56, "grad_norm": 2.0, "learning_rate": 0.0001640245011859383, "loss": 2.1146, "step": 236970 }, { "epoch": 0.56, "grad_norm": 1.90625, "learning_rate": 0.00016402308137152888, "loss": 2.2816, "step": 236975 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016402166153524808, "loss": 2.0271, "step": 236980 }, { "epoch": 0.56, "grad_norm": 1.9296875, "learning_rate": 0.00016402024167709627, "loss": 2.1751, "step": 236985 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.000164018821797074, "loss": 2.0793, "step": 236990 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.0001640174018951817, "loss": 2.0822, "step": 236995 }, { "epoch": 0.56, "grad_norm": 1.9765625, "learning_rate": 0.00016401598197141994, "loss": 2.1021, "step": 237000 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016401456202578917, "loss": 2.3105, "step": 237005 }, { "epoch": 0.56, "grad_norm": 1.921875, "learning_rate": 0.00016401314205828984, "loss": 2.1879, "step": 237010 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.0001640117220689225, "loss": 1.9693, "step": 237015 }, { "epoch": 0.56, "grad_norm": 1.9140625, "learning_rate": 0.0001640103020576875, "loss": 2.0179, "step": 237020 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.0001640088820245855, "loss": 2.1548, "step": 237025 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.0001640074619696169, "loss": 1.9547, "step": 237030 }, { "epoch": 0.56, "grad_norm": 2.0, "learning_rate": 0.00016400604189278215, "loss": 2.0953, "step": 237035 }, { "epoch": 0.56, "grad_norm": 1.984375, "learning_rate": 0.0001640046217940818, "loss": 2.0644, "step": 237040 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016400320167351631, "loss": 2.2322, "step": 237045 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016400178153108618, "loss": 2.1325, "step": 237050 }, { "epoch": 0.56, "grad_norm": 1.84375, "learning_rate": 0.00016400036136679185, "loss": 2.172, "step": 237055 }, { "epoch": 0.56, "grad_norm": 1.9375, "learning_rate": 0.00016399894118063386, "loss": 2.1476, "step": 237060 }, { "epoch": 0.56, "grad_norm": 2.421875, "learning_rate": 0.00016399752097261266, "loss": 1.9811, "step": 237065 }, { "epoch": 0.56, "grad_norm": 3.546875, "learning_rate": 0.00016399610074272874, "loss": 2.0617, "step": 237070 }, { "epoch": 0.56, "grad_norm": 2.46875, "learning_rate": 0.00016399468049098262, "loss": 2.099, "step": 237075 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016399326021737472, "loss": 1.9553, "step": 237080 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.0001639918399219056, "loss": 1.9608, "step": 237085 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016399041960457567, "loss": 2.1665, "step": 237090 }, { "epoch": 0.56, "grad_norm": 1.8359375, "learning_rate": 0.00016398899926538547, "loss": 2.1356, "step": 237095 }, { "epoch": 0.56, "grad_norm": 2.40625, "learning_rate": 0.00016398757890433545, "loss": 2.21, "step": 237100 }, { "epoch": 0.56, "grad_norm": 1.953125, "learning_rate": 0.00016398615852142613, "loss": 2.0518, "step": 237105 }, { "epoch": 0.56, "grad_norm": 2.46875, "learning_rate": 0.00016398473811665798, "loss": 2.2251, "step": 237110 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016398331769003149, "loss": 2.2654, "step": 237115 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016398189724154715, "loss": 2.0406, "step": 237120 }, { "epoch": 0.56, "grad_norm": 2.484375, "learning_rate": 0.00016398047677120537, "loss": 2.0543, "step": 237125 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016397905627900673, "loss": 2.0972, "step": 237130 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.00016397763576495172, "loss": 2.1031, "step": 237135 }, { "epoch": 0.56, "grad_norm": 1.84375, "learning_rate": 0.00016397621522904076, "loss": 1.9062, "step": 237140 }, { "epoch": 0.56, "grad_norm": 1.765625, "learning_rate": 0.00016397479467127437, "loss": 1.9339, "step": 237145 }, { "epoch": 0.56, "grad_norm": 1.859375, "learning_rate": 0.000163973374091653, "loss": 2.1806, "step": 237150 }, { "epoch": 0.56, "grad_norm": 1.796875, "learning_rate": 0.0001639719534901772, "loss": 2.1202, "step": 237155 }, { "epoch": 0.56, "grad_norm": 1.890625, "learning_rate": 0.0001639705328668474, "loss": 2.2527, "step": 237160 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016396911222166415, "loss": 2.0669, "step": 237165 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.00016396769155462785, "loss": 2.0766, "step": 237170 }, { "epoch": 0.56, "grad_norm": 1.9453125, "learning_rate": 0.00016396627086573903, "loss": 2.2111, "step": 237175 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016396485015499816, "loss": 2.1999, "step": 237180 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016396342942240577, "loss": 2.1167, "step": 237185 }, { "epoch": 0.56, "grad_norm": 2.28125, "learning_rate": 0.0001639620086679623, "loss": 2.0536, "step": 237190 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016396058789166826, "loss": 1.9028, "step": 237195 }, { "epoch": 0.56, "grad_norm": 1.7421875, "learning_rate": 0.0001639591670935241, "loss": 2.1541, "step": 237200 }, { "epoch": 0.56, "grad_norm": 2.84375, "learning_rate": 0.0001639577462735303, "loss": 2.0945, "step": 237205 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.00016395632543168742, "loss": 2.1714, "step": 237210 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.0001639549045679959, "loss": 2.021, "step": 237215 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016395348368245622, "loss": 2.3085, "step": 237220 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016395206277506887, "loss": 2.0996, "step": 237225 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016395064184583432, "loss": 1.9663, "step": 237230 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016394922089475308, "loss": 2.0892, "step": 237235 }, { "epoch": 0.56, "grad_norm": 1.9921875, "learning_rate": 0.00016394779992182564, "loss": 2.0043, "step": 237240 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016394637892705246, "loss": 1.9404, "step": 237245 }, { "epoch": 0.56, "grad_norm": 1.9375, "learning_rate": 0.00016394495791043404, "loss": 2.1917, "step": 237250 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016394353687197087, "loss": 2.1435, "step": 237255 }, { "epoch": 0.56, "grad_norm": 2.0, "learning_rate": 0.0001639421158116634, "loss": 2.2607, "step": 237260 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016394069472951218, "loss": 2.1128, "step": 237265 }, { "epoch": 0.56, "grad_norm": 1.84375, "learning_rate": 0.00016393927362551763, "loss": 2.0495, "step": 237270 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.0001639378524996803, "loss": 2.1459, "step": 237275 }, { "epoch": 0.56, "grad_norm": 1.9453125, "learning_rate": 0.00016393643135200058, "loss": 2.0434, "step": 237280 }, { "epoch": 0.56, "grad_norm": 1.8125, "learning_rate": 0.00016393501018247905, "loss": 2.093, "step": 237285 }, { "epoch": 0.56, "grad_norm": 1.9296875, "learning_rate": 0.00016393358899111614, "loss": 2.1494, "step": 237290 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016393216777791237, "loss": 2.1119, "step": 237295 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016393074654286825, "loss": 2.2171, "step": 237300 }, { "epoch": 0.56, "grad_norm": 1.7734375, "learning_rate": 0.00016392932528598418, "loss": 2.2289, "step": 237305 }, { "epoch": 0.56, "grad_norm": 1.671875, "learning_rate": 0.00016392790400726072, "loss": 2.2364, "step": 237310 }, { "epoch": 0.56, "grad_norm": 2.40625, "learning_rate": 0.00016392648270669832, "loss": 1.8852, "step": 237315 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016392506138429748, "loss": 2.2004, "step": 237320 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016392364004005867, "loss": 1.9832, "step": 237325 }, { "epoch": 0.56, "grad_norm": 1.8046875, "learning_rate": 0.0001639222186739824, "loss": 2.0343, "step": 237330 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.0001639207972860691, "loss": 2.0524, "step": 237335 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016391937587631933, "loss": 2.1002, "step": 237340 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016391795444473356, "loss": 2.1603, "step": 237345 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016391653299131223, "loss": 2.0393, "step": 237350 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016391511151605585, "loss": 2.0653, "step": 237355 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016391369001896494, "loss": 2.1788, "step": 237360 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.0001639122685000399, "loss": 2.1863, "step": 237365 }, { "epoch": 0.56, "grad_norm": 2.796875, "learning_rate": 0.0001639108469592813, "loss": 2.021, "step": 237370 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.0001639094253966896, "loss": 2.1294, "step": 237375 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016390800381226527, "loss": 1.8821, "step": 237380 }, { "epoch": 0.56, "grad_norm": 1.859375, "learning_rate": 0.00016390658220600884, "loss": 2.1881, "step": 237385 }, { "epoch": 0.56, "grad_norm": 1.84375, "learning_rate": 0.00016390516057792072, "loss": 2.0315, "step": 237390 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016390373892800146, "loss": 2.2772, "step": 237395 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016390231725625152, "loss": 2.2707, "step": 237400 }, { "epoch": 0.56, "grad_norm": 1.796875, "learning_rate": 0.00016390089556267138, "loss": 2.1211, "step": 237405 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016389947384726154, "loss": 1.9369, "step": 237410 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.00016389805211002248, "loss": 2.109, "step": 237415 }, { "epoch": 0.56, "grad_norm": 1.9765625, "learning_rate": 0.0001638966303509547, "loss": 2.1268, "step": 237420 }, { "epoch": 0.56, "grad_norm": 1.8984375, "learning_rate": 0.00016389520857005866, "loss": 1.9727, "step": 237425 }, { "epoch": 0.56, "grad_norm": 1.8125, "learning_rate": 0.00016389378676733487, "loss": 2.2, "step": 237430 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.0001638923649427838, "loss": 2.2315, "step": 237435 }, { "epoch": 0.56, "grad_norm": 1.8046875, "learning_rate": 0.0001638909430964059, "loss": 2.0162, "step": 237440 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016388952122820176, "loss": 2.1868, "step": 237445 }, { "epoch": 0.56, "grad_norm": 1.84375, "learning_rate": 0.00016388809933817178, "loss": 2.0798, "step": 237450 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016388667742631645, "loss": 1.8249, "step": 237455 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.00016388525549263628, "loss": 2.1156, "step": 237460 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016388383353713174, "loss": 2.2162, "step": 237465 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016388241155980334, "loss": 2.1751, "step": 237470 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016388098956065153, "loss": 2.2631, "step": 237475 }, { "epoch": 0.56, "grad_norm": 2.46875, "learning_rate": 0.00016387956753967684, "loss": 1.9981, "step": 237480 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.0001638781454968797, "loss": 2.1622, "step": 237485 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016387672343226066, "loss": 2.1624, "step": 237490 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016387530134582016, "loss": 1.9918, "step": 237495 }, { "epoch": 0.56, "grad_norm": 1.8515625, "learning_rate": 0.0001638738792375587, "loss": 1.8492, "step": 237500 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016387245710747673, "loss": 2.1108, "step": 237505 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016387103495557482, "loss": 2.1573, "step": 237510 }, { "epoch": 0.56, "grad_norm": 1.734375, "learning_rate": 0.0001638696127818534, "loss": 2.1954, "step": 237515 }, { "epoch": 0.56, "grad_norm": 1.984375, "learning_rate": 0.00016386819058631294, "loss": 2.0844, "step": 237520 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016386676836895397, "loss": 2.0919, "step": 237525 }, { "epoch": 0.56, "grad_norm": 1.7578125, "learning_rate": 0.0001638653461297769, "loss": 2.1161, "step": 237530 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016386392386878233, "loss": 2.0566, "step": 237535 }, { "epoch": 0.56, "grad_norm": 1.953125, "learning_rate": 0.00016386250158597067, "loss": 1.8916, "step": 237540 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.0001638610792813424, "loss": 1.9306, "step": 237545 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016385965695489807, "loss": 2.2412, "step": 237550 }, { "epoch": 0.56, "grad_norm": 2.703125, "learning_rate": 0.00016385823460663806, "loss": 1.9636, "step": 237555 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.00016385681223656296, "loss": 2.1039, "step": 237560 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016385538984467322, "loss": 2.2294, "step": 237565 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.00016385396743096934, "loss": 2.0897, "step": 237570 }, { "epoch": 0.56, "grad_norm": 1.9765625, "learning_rate": 0.00016385254499545174, "loss": 2.1124, "step": 237575 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016385112253812097, "loss": 2.1214, "step": 237580 }, { "epoch": 0.56, "grad_norm": 2.671875, "learning_rate": 0.00016384970005897749, "loss": 1.9401, "step": 237585 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016384827755802178, "loss": 2.1185, "step": 237590 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.0001638468550352544, "loss": 2.2767, "step": 237595 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016384543249067573, "loss": 2.1875, "step": 237600 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.0001638440099242863, "loss": 2.0676, "step": 237605 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016384258733608662, "loss": 2.0355, "step": 237610 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016384116472607714, "loss": 1.9912, "step": 237615 }, { "epoch": 0.56, "grad_norm": 2.0, "learning_rate": 0.00016383974209425837, "loss": 2.1206, "step": 237620 }, { "epoch": 0.56, "grad_norm": 2.0, "learning_rate": 0.00016383831944063077, "loss": 2.0197, "step": 237625 }, { "epoch": 0.56, "grad_norm": 1.96875, "learning_rate": 0.00016383689676519487, "loss": 1.9341, "step": 237630 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.0001638354740679511, "loss": 2.1138, "step": 237635 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016383405134889998, "loss": 2.1398, "step": 237640 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016383262860804202, "loss": 2.1885, "step": 237645 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016383120584537766, "loss": 1.9628, "step": 237650 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016382978306090737, "loss": 1.9635, "step": 237655 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.0001638283602546317, "loss": 2.0729, "step": 237660 }, { "epoch": 0.56, "grad_norm": 1.8828125, "learning_rate": 0.0001638269374265511, "loss": 2.2237, "step": 237665 }, { "epoch": 0.56, "grad_norm": 2.453125, "learning_rate": 0.00016382551457666607, "loss": 2.0079, "step": 237670 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016382409170497705, "loss": 2.0208, "step": 237675 }, { "epoch": 0.56, "grad_norm": 1.96875, "learning_rate": 0.0001638226688114846, "loss": 2.061, "step": 237680 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.00016382124589618916, "loss": 2.0284, "step": 237685 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016381982295909123, "loss": 2.2767, "step": 237690 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016381840000019126, "loss": 2.1374, "step": 237695 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.0001638169770194898, "loss": 2.1089, "step": 237700 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.0001638155540169873, "loss": 2.1721, "step": 237705 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016381413099268426, "loss": 2.1642, "step": 237710 }, { "epoch": 0.56, "grad_norm": 1.9296875, "learning_rate": 0.00016381270794658113, "loss": 1.8799, "step": 237715 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.0001638112848786784, "loss": 2.0987, "step": 237720 }, { "epoch": 0.56, "grad_norm": 2.59375, "learning_rate": 0.00016380986178897664, "loss": 2.0767, "step": 237725 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016380843867747625, "loss": 2.2675, "step": 237730 }, { "epoch": 0.56, "grad_norm": 2.46875, "learning_rate": 0.00016380701554417773, "loss": 2.1231, "step": 237735 }, { "epoch": 0.56, "grad_norm": 1.984375, "learning_rate": 0.00016380559238908155, "loss": 2.0115, "step": 237740 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016380416921218827, "loss": 2.2072, "step": 237745 }, { "epoch": 0.56, "grad_norm": 1.9609375, "learning_rate": 0.00016380274601349832, "loss": 2.0216, "step": 237750 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.00016380132279301217, "loss": 2.121, "step": 237755 }, { "epoch": 0.56, "grad_norm": 1.8671875, "learning_rate": 0.00016379989955073033, "loss": 2.1664, "step": 237760 }, { "epoch": 0.56, "grad_norm": 1.7578125, "learning_rate": 0.0001637984762866533, "loss": 2.1848, "step": 237765 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016379705300078156, "loss": 1.9854, "step": 237770 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.0001637956296931156, "loss": 2.0641, "step": 237775 }, { "epoch": 0.56, "grad_norm": 1.9453125, "learning_rate": 0.0001637942063636559, "loss": 2.0315, "step": 237780 }, { "epoch": 0.56, "grad_norm": 2.484375, "learning_rate": 0.00016379278301240292, "loss": 2.073, "step": 237785 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016379135963935716, "loss": 1.9338, "step": 237790 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016378993624451913, "loss": 1.88, "step": 237795 }, { "epoch": 0.56, "grad_norm": 1.9921875, "learning_rate": 0.00016378851282788928, "loss": 1.9963, "step": 237800 }, { "epoch": 0.56, "grad_norm": 1.9375, "learning_rate": 0.00016378708938946814, "loss": 1.9173, "step": 237805 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016378566592925616, "loss": 2.2182, "step": 237810 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016378424244725389, "loss": 2.1218, "step": 237815 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.0001637828189434617, "loss": 2.0945, "step": 237820 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016378139541788018, "loss": 2.1395, "step": 237825 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016377997187050976, "loss": 2.21, "step": 237830 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.00016377854830135094, "loss": 2.1046, "step": 237835 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016377712471040424, "loss": 2.0241, "step": 237840 }, { "epoch": 0.56, "grad_norm": 1.90625, "learning_rate": 0.0001637757010976701, "loss": 2.0729, "step": 237845 }, { "epoch": 0.56, "grad_norm": 2.546875, "learning_rate": 0.000163774277463149, "loss": 2.3081, "step": 237850 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.0001637728538068415, "loss": 2.215, "step": 237855 }, { "epoch": 0.56, "grad_norm": 1.921875, "learning_rate": 0.000163771430128748, "loss": 2.0241, "step": 237860 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.000163770006428869, "loss": 2.0266, "step": 237865 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.0001637685827072051, "loss": 2.0535, "step": 237870 }, { "epoch": 0.56, "grad_norm": 1.9375, "learning_rate": 0.0001637671589637566, "loss": 2.0685, "step": 237875 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016376573519852412, "loss": 2.1847, "step": 237880 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016376431141150812, "loss": 2.1222, "step": 237885 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016376288760270905, "loss": 2.0768, "step": 237890 }, { "epoch": 0.56, "grad_norm": 2.3125, "learning_rate": 0.00016376146377212745, "loss": 2.0672, "step": 237895 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016376003991976377, "loss": 2.0435, "step": 237900 }, { "epoch": 0.56, "grad_norm": 1.9921875, "learning_rate": 0.00016375861604561848, "loss": 2.0533, "step": 237905 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.0001637571921496921, "loss": 2.2757, "step": 237910 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.0001637557682319851, "loss": 1.9576, "step": 237915 }, { "epoch": 0.56, "grad_norm": 2.640625, "learning_rate": 0.00016375434429249797, "loss": 2.0024, "step": 237920 }, { "epoch": 0.56, "grad_norm": 2.5625, "learning_rate": 0.00016375292033123126, "loss": 2.24, "step": 237925 }, { "epoch": 0.56, "grad_norm": 1.9453125, "learning_rate": 0.00016375149634818533, "loss": 1.9039, "step": 237930 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016375007234336076, "loss": 2.2137, "step": 237935 }, { "epoch": 0.56, "grad_norm": 1.7421875, "learning_rate": 0.000163748648316758, "loss": 2.0245, "step": 237940 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016374722426837755, "loss": 1.9817, "step": 237945 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.0001637458001982199, "loss": 2.0872, "step": 237950 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016374437610628553, "loss": 1.8798, "step": 237955 }, { "epoch": 0.56, "grad_norm": 1.96875, "learning_rate": 0.0001637429519925749, "loss": 2.0662, "step": 237960 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016374152785708854, "loss": 2.1307, "step": 237965 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.0001637401036998269, "loss": 1.838, "step": 237970 }, { "epoch": 0.56, "grad_norm": 1.921875, "learning_rate": 0.00016373867952079052, "loss": 2.2036, "step": 237975 }, { "epoch": 0.56, "grad_norm": 1.9453125, "learning_rate": 0.00016373725531997982, "loss": 2.2268, "step": 237980 }, { "epoch": 0.56, "grad_norm": 1.7421875, "learning_rate": 0.0001637358310973953, "loss": 1.8581, "step": 237985 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016373440685303755, "loss": 2.1113, "step": 237990 }, { "epoch": 0.56, "grad_norm": 1.796875, "learning_rate": 0.00016373298258690687, "loss": 2.0987, "step": 237995 }, { "epoch": 0.56, "grad_norm": 1.984375, "learning_rate": 0.00016373155829900392, "loss": 2.188, "step": 238000 }, { "epoch": 0.56, "grad_norm": 2.640625, "learning_rate": 0.00016373013398932908, "loss": 2.2521, "step": 238005 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016372870965788286, "loss": 2.0784, "step": 238010 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016372728530466576, "loss": 1.9715, "step": 238015 }, { "epoch": 0.56, "grad_norm": 2.8125, "learning_rate": 0.0001637258609296783, "loss": 2.1878, "step": 238020 }, { "epoch": 0.56, "grad_norm": 1.7890625, "learning_rate": 0.00016372443653292092, "loss": 2.1172, "step": 238025 }, { "epoch": 0.56, "grad_norm": 1.796875, "learning_rate": 0.0001637230121143941, "loss": 1.9717, "step": 238030 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016372158767409832, "loss": 2.121, "step": 238035 }, { "epoch": 0.56, "grad_norm": 4.875, "learning_rate": 0.00016372016321203412, "loss": 1.9882, "step": 238040 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016371873872820194, "loss": 2.0635, "step": 238045 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.0001637173142226023, "loss": 2.2071, "step": 238050 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.00016371588969523568, "loss": 1.9663, "step": 238055 }, { "epoch": 0.56, "grad_norm": 1.90625, "learning_rate": 0.00016371446514610254, "loss": 1.9703, "step": 238060 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.0001637130405752034, "loss": 2.2993, "step": 238065 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.0001637116159825387, "loss": 2.0683, "step": 238070 }, { "epoch": 0.56, "grad_norm": 1.703125, "learning_rate": 0.00016371019136810896, "loss": 1.9569, "step": 238075 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016370876673191468, "loss": 1.9813, "step": 238080 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016370734207395632, "loss": 2.1379, "step": 238085 }, { "epoch": 0.56, "grad_norm": 2.8125, "learning_rate": 0.00016370591739423438, "loss": 2.0088, "step": 238090 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016370449269274934, "loss": 2.1458, "step": 238095 }, { "epoch": 0.56, "grad_norm": 2.515625, "learning_rate": 0.0001637030679695017, "loss": 2.2256, "step": 238100 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.00016370164322449194, "loss": 1.9775, "step": 238105 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016370021845772052, "loss": 1.9988, "step": 238110 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016369879366918798, "loss": 2.0121, "step": 238115 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016369736885889474, "loss": 1.9983, "step": 238120 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016369594402684134, "loss": 2.0842, "step": 238125 }, { "epoch": 0.56, "grad_norm": 1.7265625, "learning_rate": 0.00016369451917302828, "loss": 2.133, "step": 238130 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016369309429745599, "loss": 2.0255, "step": 238135 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016369166940012497, "loss": 2.076, "step": 238140 }, { "epoch": 0.56, "grad_norm": 1.7578125, "learning_rate": 0.00016369024448103573, "loss": 1.9766, "step": 238145 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016368881954018875, "loss": 2.1351, "step": 238150 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.00016368739457758453, "loss": 2.0694, "step": 238155 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016368596959322353, "loss": 2.0706, "step": 238160 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016368454458710625, "loss": 2.3435, "step": 238165 }, { "epoch": 0.56, "grad_norm": 2.578125, "learning_rate": 0.00016368311955923317, "loss": 2.0786, "step": 238170 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016368169450960477, "loss": 1.7615, "step": 238175 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016368026943822158, "loss": 2.3003, "step": 238180 }, { "epoch": 0.56, "grad_norm": 2.5, "learning_rate": 0.00016367884434508403, "loss": 2.0912, "step": 238185 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016367741923019265, "loss": 2.0723, "step": 238190 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.0001636759940935479, "loss": 2.2271, "step": 238195 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016367456893515026, "loss": 2.0273, "step": 238200 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016367314375500026, "loss": 2.0898, "step": 238205 }, { "epoch": 0.56, "grad_norm": 2.5625, "learning_rate": 0.00016367171855309834, "loss": 2.125, "step": 238210 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.000163670293329445, "loss": 1.9758, "step": 238215 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016366886808404077, "loss": 2.0345, "step": 238220 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.00016366744281688604, "loss": 2.1804, "step": 238225 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016366601752798137, "loss": 2.1439, "step": 238230 }, { "epoch": 0.56, "grad_norm": 1.9921875, "learning_rate": 0.00016366459221732726, "loss": 2.0655, "step": 238235 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016366316688492418, "loss": 2.2466, "step": 238240 }, { "epoch": 0.56, "grad_norm": 2.546875, "learning_rate": 0.00016366174153077258, "loss": 1.9916, "step": 238245 }, { "epoch": 0.56, "grad_norm": 2.421875, "learning_rate": 0.000163660316154873, "loss": 2.0265, "step": 238250 }, { "epoch": 0.56, "grad_norm": 1.9609375, "learning_rate": 0.00016365889075722588, "loss": 2.0654, "step": 238255 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016365746533783174, "loss": 2.0231, "step": 238260 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016365603989669105, "loss": 1.9046, "step": 238265 }, { "epoch": 0.56, "grad_norm": 1.953125, "learning_rate": 0.0001636546144338043, "loss": 2.2835, "step": 238270 }, { "epoch": 0.56, "grad_norm": 2.703125, "learning_rate": 0.000163653188949172, "loss": 2.1424, "step": 238275 }, { "epoch": 0.56, "grad_norm": 1.984375, "learning_rate": 0.0001636517634427946, "loss": 2.185, "step": 238280 }, { "epoch": 0.56, "grad_norm": 2.40625, "learning_rate": 0.00016365033791467258, "loss": 2.0452, "step": 238285 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.0001636489123648065, "loss": 2.2859, "step": 238290 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016364748679319674, "loss": 2.0399, "step": 238295 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016364606119984392, "loss": 2.2518, "step": 238300 }, { "epoch": 0.56, "grad_norm": 1.8046875, "learning_rate": 0.00016364463558474839, "loss": 2.258, "step": 238305 }, { "epoch": 0.56, "grad_norm": 1.96875, "learning_rate": 0.0001636432099479107, "loss": 2.0028, "step": 238310 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016364178428933136, "loss": 2.1904, "step": 238315 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.0001636403586090108, "loss": 1.9416, "step": 238320 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016363893290694957, "loss": 2.0533, "step": 238325 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016363750718314813, "loss": 2.1244, "step": 238330 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016363608143760695, "loss": 2.28, "step": 238335 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016363465567032653, "loss": 2.0627, "step": 238340 }, { "epoch": 0.56, "grad_norm": 2.28125, "learning_rate": 0.00016363322988130735, "loss": 2.0756, "step": 238345 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.0001636318040705499, "loss": 2.1029, "step": 238350 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.0001636303782380547, "loss": 2.2154, "step": 238355 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.0001636289523838222, "loss": 2.0692, "step": 238360 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.0001636275265078529, "loss": 2.3228, "step": 238365 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016362610061014724, "loss": 2.0796, "step": 238370 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.0001636246746907058, "loss": 2.0486, "step": 238375 }, { "epoch": 0.56, "grad_norm": 1.84375, "learning_rate": 0.000163623248749529, "loss": 2.1616, "step": 238380 }, { "epoch": 0.56, "grad_norm": 1.9921875, "learning_rate": 0.00016362182278661735, "loss": 2.2491, "step": 238385 }, { "epoch": 0.56, "grad_norm": 2.0, "learning_rate": 0.0001636203968019713, "loss": 2.0726, "step": 238390 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016361897079559143, "loss": 2.0875, "step": 238395 }, { "epoch": 0.56, "grad_norm": 1.921875, "learning_rate": 0.00016361754476747813, "loss": 2.0064, "step": 238400 }, { "epoch": 0.56, "grad_norm": 2.578125, "learning_rate": 0.00016361611871763192, "loss": 2.1781, "step": 238405 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.0001636146926460533, "loss": 2.1864, "step": 238410 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016361326655274272, "loss": 2.1904, "step": 238415 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016361184043770072, "loss": 2.2261, "step": 238420 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016361041430092775, "loss": 1.94, "step": 238425 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.0001636089881424243, "loss": 2.2233, "step": 238430 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016360756196219091, "loss": 2.0303, "step": 238435 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.000163606135760228, "loss": 2.152, "step": 238440 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016360470953653605, "loss": 2.167, "step": 238445 }, { "epoch": 0.56, "grad_norm": 1.8046875, "learning_rate": 0.00016360328329111562, "loss": 1.9637, "step": 238450 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016360185702396716, "loss": 2.0937, "step": 238455 }, { "epoch": 0.56, "grad_norm": 2.40625, "learning_rate": 0.00016360043073509111, "loss": 2.132, "step": 238460 }, { "epoch": 0.56, "grad_norm": 2.640625, "learning_rate": 0.00016359900442448801, "loss": 2.0825, "step": 238465 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.00016359757809215837, "loss": 2.1178, "step": 238470 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016359615173810262, "loss": 2.1837, "step": 238475 }, { "epoch": 0.56, "grad_norm": 2.40625, "learning_rate": 0.00016359472536232127, "loss": 1.8291, "step": 238480 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016359329896481478, "loss": 2.0198, "step": 238485 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016359187254558373, "loss": 2.0515, "step": 238490 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016359044610462847, "loss": 2.1529, "step": 238495 }, { "epoch": 0.56, "grad_norm": 1.7421875, "learning_rate": 0.00016358901964194962, "loss": 2.1043, "step": 238500 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016358759315754758, "loss": 2.1454, "step": 238505 }, { "epoch": 0.56, "grad_norm": 1.984375, "learning_rate": 0.00016358616665142287, "loss": 2.0564, "step": 238510 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.000163584740123576, "loss": 2.0383, "step": 238515 }, { "epoch": 0.56, "grad_norm": 1.796875, "learning_rate": 0.00016358331357400739, "loss": 2.1467, "step": 238520 }, { "epoch": 0.56, "grad_norm": 2.5625, "learning_rate": 0.00016358188700271758, "loss": 2.0941, "step": 238525 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016358046040970705, "loss": 2.1416, "step": 238530 }, { "epoch": 0.56, "grad_norm": 1.8984375, "learning_rate": 0.00016357903379497624, "loss": 2.2068, "step": 238535 }, { "epoch": 0.56, "grad_norm": 2.65625, "learning_rate": 0.00016357760715852575, "loss": 2.1271, "step": 238540 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016357618050035595, "loss": 2.2206, "step": 238545 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016357475382046738, "loss": 2.139, "step": 238550 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016357332711886056, "loss": 2.2122, "step": 238555 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016357190039553588, "loss": 2.0627, "step": 238560 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016357047365049392, "loss": 2.0626, "step": 238565 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.0001635690468837351, "loss": 2.1454, "step": 238570 }, { "epoch": 0.56, "grad_norm": 2.9375, "learning_rate": 0.00016356762009525996, "loss": 2.1829, "step": 238575 }, { "epoch": 0.56, "grad_norm": 1.9921875, "learning_rate": 0.000163566193285069, "loss": 2.067, "step": 238580 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016356476645316262, "loss": 2.0457, "step": 238585 }, { "epoch": 0.56, "grad_norm": 2.625, "learning_rate": 0.00016356333959954142, "loss": 2.0139, "step": 238590 }, { "epoch": 0.56, "grad_norm": 1.9296875, "learning_rate": 0.00016356191272420581, "loss": 2.0257, "step": 238595 }, { "epoch": 0.56, "grad_norm": 1.8125, "learning_rate": 0.00016356048582715627, "loss": 1.9356, "step": 238600 }, { "epoch": 0.56, "grad_norm": 1.984375, "learning_rate": 0.00016355905890839332, "loss": 2.1168, "step": 238605 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016355763196791747, "loss": 1.9705, "step": 238610 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.0001635562050057292, "loss": 2.049, "step": 238615 }, { "epoch": 0.56, "grad_norm": 2.421875, "learning_rate": 0.00016355477802182893, "loss": 2.1427, "step": 238620 }, { "epoch": 0.56, "grad_norm": 1.734375, "learning_rate": 0.0001635533510162172, "loss": 2.0321, "step": 238625 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.0001635519239888945, "loss": 1.9431, "step": 238630 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.0001635504969398613, "loss": 2.156, "step": 238635 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016354906986911814, "loss": 1.8881, "step": 238640 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016354764277666544, "loss": 2.034, "step": 238645 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.0001635462156625037, "loss": 1.9955, "step": 238650 }, { "epoch": 0.56, "grad_norm": 1.53125, "learning_rate": 0.00016354478852663342, "loss": 1.9286, "step": 238655 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.0001635433613690551, "loss": 1.9269, "step": 238660 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.00016354193418976923, "loss": 2.0769, "step": 238665 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016354050698877626, "loss": 2.0954, "step": 238670 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.0001635390797660767, "loss": 2.1032, "step": 238675 }, { "epoch": 0.56, "grad_norm": 1.8828125, "learning_rate": 0.00016353765252167103, "loss": 1.9193, "step": 238680 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016353622525555977, "loss": 2.1083, "step": 238685 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016353479796774338, "loss": 1.9477, "step": 238690 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016353337065822234, "loss": 2.0656, "step": 238695 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016353194332699715, "loss": 1.9707, "step": 238700 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016353051597406827, "loss": 1.9857, "step": 238705 }, { "epoch": 0.56, "grad_norm": 2.421875, "learning_rate": 0.00016352908859943629, "loss": 2.2545, "step": 238710 }, { "epoch": 0.56, "grad_norm": 1.890625, "learning_rate": 0.00016352766120310156, "loss": 1.9626, "step": 238715 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016352623378506462, "loss": 2.1021, "step": 238720 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016352480634532597, "loss": 2.0535, "step": 238725 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016352337888388612, "loss": 2.2038, "step": 238730 }, { "epoch": 0.56, "grad_norm": 1.96875, "learning_rate": 0.00016352195140074553, "loss": 2.2423, "step": 238735 }, { "epoch": 0.56, "grad_norm": 2.515625, "learning_rate": 0.0001635205238959047, "loss": 2.017, "step": 238740 }, { "epoch": 0.56, "grad_norm": 1.828125, "learning_rate": 0.00016351909636936409, "loss": 1.8668, "step": 238745 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016351766882112423, "loss": 2.1624, "step": 238750 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.00016351624125118553, "loss": 2.1337, "step": 238755 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016351481365954856, "loss": 2.2479, "step": 238760 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.00016351338604621377, "loss": 2.1048, "step": 238765 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016351195841118166, "loss": 2.245, "step": 238770 }, { "epoch": 0.56, "grad_norm": 2.6875, "learning_rate": 0.00016351053075445271, "loss": 2.059, "step": 238775 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016350910307602742, "loss": 2.1981, "step": 238780 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.0001635076753759063, "loss": 1.9796, "step": 238785 }, { "epoch": 0.56, "grad_norm": 1.9765625, "learning_rate": 0.00016350624765408973, "loss": 1.9187, "step": 238790 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016350481991057834, "loss": 2.1376, "step": 238795 }, { "epoch": 0.56, "grad_norm": 2.40625, "learning_rate": 0.0001635033921453725, "loss": 2.2581, "step": 238800 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.00016350196435847278, "loss": 2.2848, "step": 238805 }, { "epoch": 0.56, "grad_norm": 1.75, "learning_rate": 0.00016350053654987963, "loss": 2.0037, "step": 238810 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016349910871959352, "loss": 2.1735, "step": 238815 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.000163497680867615, "loss": 2.0147, "step": 238820 }, { "epoch": 0.56, "grad_norm": 1.9921875, "learning_rate": 0.00016349625299394451, "loss": 2.0463, "step": 238825 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016349482509858256, "loss": 2.0478, "step": 238830 }, { "epoch": 0.56, "grad_norm": 1.8515625, "learning_rate": 0.00016349339718152962, "loss": 2.1505, "step": 238835 }, { "epoch": 0.56, "grad_norm": 1.8984375, "learning_rate": 0.0001634919692427862, "loss": 1.9574, "step": 238840 }, { "epoch": 0.56, "grad_norm": 2.3125, "learning_rate": 0.00016349054128235273, "loss": 2.0515, "step": 238845 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.00016348911330022975, "loss": 2.1004, "step": 238850 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016348768529641776, "loss": 2.0757, "step": 238855 }, { "epoch": 0.56, "grad_norm": 1.8359375, "learning_rate": 0.00016348625727091723, "loss": 2.138, "step": 238860 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016348482922372862, "loss": 1.9693, "step": 238865 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016348340115485244, "loss": 2.0505, "step": 238870 }, { "epoch": 0.56, "grad_norm": 2.59375, "learning_rate": 0.00016348197306428918, "loss": 2.0635, "step": 238875 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016348054495203936, "loss": 2.0754, "step": 238880 }, { "epoch": 0.56, "grad_norm": 1.671875, "learning_rate": 0.0001634791168181034, "loss": 2.1979, "step": 238885 }, { "epoch": 0.56, "grad_norm": 1.9453125, "learning_rate": 0.0001634776886624818, "loss": 2.026, "step": 238890 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016347626048517511, "loss": 2.1005, "step": 238895 }, { "epoch": 0.56, "grad_norm": 1.9921875, "learning_rate": 0.00016347483228618378, "loss": 2.1473, "step": 238900 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.0001634734040655083, "loss": 1.9545, "step": 238905 }, { "epoch": 0.56, "grad_norm": 1.734375, "learning_rate": 0.0001634719758231491, "loss": 2.0797, "step": 238910 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016347054755910679, "loss": 2.1178, "step": 238915 }, { "epoch": 0.56, "grad_norm": 2.40625, "learning_rate": 0.00016346911927338174, "loss": 2.1578, "step": 238920 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016346769096597453, "loss": 1.9078, "step": 238925 }, { "epoch": 0.56, "grad_norm": 2.546875, "learning_rate": 0.00016346626263688557, "loss": 2.1165, "step": 238930 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016346483428611542, "loss": 1.9991, "step": 238935 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.0001634634059136645, "loss": 2.1092, "step": 238940 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.00016346197751953334, "loss": 2.2007, "step": 238945 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016346054910372238, "loss": 2.0365, "step": 238950 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016345912066623218, "loss": 2.3508, "step": 238955 }, { "epoch": 0.56, "grad_norm": 1.9921875, "learning_rate": 0.00016345769220706322, "loss": 2.037, "step": 238960 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016345626372621594, "loss": 2.1589, "step": 238965 }, { "epoch": 0.56, "grad_norm": 2.5, "learning_rate": 0.00016345483522369085, "loss": 2.087, "step": 238970 }, { "epoch": 0.56, "grad_norm": 2.59375, "learning_rate": 0.00016345340669948844, "loss": 2.1327, "step": 238975 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016345197815360917, "loss": 2.18, "step": 238980 }, { "epoch": 0.56, "grad_norm": 1.890625, "learning_rate": 0.00016345054958605358, "loss": 2.4007, "step": 238985 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016344912099682213, "loss": 1.9364, "step": 238990 }, { "epoch": 0.56, "grad_norm": 1.859375, "learning_rate": 0.0001634476923859153, "loss": 2.0408, "step": 238995 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.00016344626375333358, "loss": 2.1486, "step": 239000 }, { "epoch": 0.56, "grad_norm": 2.484375, "learning_rate": 0.00016344483509907746, "loss": 2.0172, "step": 239005 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016344340642314748, "loss": 2.0362, "step": 239010 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016344197772554404, "loss": 2.1235, "step": 239015 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.0001634405490062677, "loss": 2.1086, "step": 239020 }, { "epoch": 0.56, "grad_norm": 2.3125, "learning_rate": 0.0001634391202653189, "loss": 2.2, "step": 239025 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016343769150269815, "loss": 1.9334, "step": 239030 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016343626271840593, "loss": 1.879, "step": 239035 }, { "epoch": 0.56, "grad_norm": 2.53125, "learning_rate": 0.00016343483391244271, "loss": 2.1661, "step": 239040 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016343340508480903, "loss": 2.0944, "step": 239045 }, { "epoch": 0.56, "grad_norm": 2.59375, "learning_rate": 0.00016343197623550535, "loss": 2.2248, "step": 239050 }, { "epoch": 0.56, "grad_norm": 2.40625, "learning_rate": 0.00016343054736453214, "loss": 2.2037, "step": 239055 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016342911847188991, "loss": 2.166, "step": 239060 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016342768955757915, "loss": 2.164, "step": 239065 }, { "epoch": 0.56, "grad_norm": 1.9765625, "learning_rate": 0.00016342626062160033, "loss": 2.0886, "step": 239070 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016342483166395397, "loss": 2.0798, "step": 239075 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.0001634234026846405, "loss": 2.0294, "step": 239080 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016342197368366047, "loss": 2.0206, "step": 239085 }, { "epoch": 0.56, "grad_norm": 1.8515625, "learning_rate": 0.00016342054466101435, "loss": 1.7766, "step": 239090 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016341911561670261, "loss": 2.0776, "step": 239095 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016341768655072576, "loss": 2.1722, "step": 239100 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.0001634162574630843, "loss": 2.1524, "step": 239105 }, { "epoch": 0.56, "grad_norm": 1.9609375, "learning_rate": 0.00016341482835377863, "loss": 2.091, "step": 239110 }, { "epoch": 0.56, "grad_norm": 1.8671875, "learning_rate": 0.00016341339922280933, "loss": 2.0762, "step": 239115 }, { "epoch": 0.56, "grad_norm": 2.3125, "learning_rate": 0.00016341197007017688, "loss": 2.3187, "step": 239120 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016341054089588178, "loss": 2.0121, "step": 239125 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016340911169992442, "loss": 2.0865, "step": 239130 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016340768248230541, "loss": 1.9518, "step": 239135 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016340625324302516, "loss": 2.1509, "step": 239140 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.0001634048239820842, "loss": 2.0493, "step": 239145 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.000163403394699483, "loss": 2.0189, "step": 239150 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016340196539522205, "loss": 1.9825, "step": 239155 }, { "epoch": 0.56, "grad_norm": 2.59375, "learning_rate": 0.00016340053606930184, "loss": 2.1107, "step": 239160 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016339910672172282, "loss": 2.0644, "step": 239165 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016339767735248554, "loss": 2.1008, "step": 239170 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016339624796159048, "loss": 2.1276, "step": 239175 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016339481854903812, "loss": 2.143, "step": 239180 }, { "epoch": 0.56, "grad_norm": 2.640625, "learning_rate": 0.00016339338911482894, "loss": 1.9707, "step": 239185 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.0001633919596589634, "loss": 2.1675, "step": 239190 }, { "epoch": 0.56, "grad_norm": 2.421875, "learning_rate": 0.000163390530181442, "loss": 2.1525, "step": 239195 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.0001633891006822653, "loss": 2.263, "step": 239200 }, { "epoch": 0.56, "grad_norm": 1.9375, "learning_rate": 0.0001633876711614337, "loss": 2.2406, "step": 239205 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016338624161894773, "loss": 2.0247, "step": 239210 }, { "epoch": 0.56, "grad_norm": 2.53125, "learning_rate": 0.00016338481205480787, "loss": 2.1664, "step": 239215 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.0001633833824690146, "loss": 2.0554, "step": 239220 }, { "epoch": 0.56, "grad_norm": 1.9453125, "learning_rate": 0.00016338195286156842, "loss": 2.0665, "step": 239225 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016338052323246983, "loss": 2.2443, "step": 239230 }, { "epoch": 0.56, "grad_norm": 1.8359375, "learning_rate": 0.0001633790935817193, "loss": 2.1111, "step": 239235 }, { "epoch": 0.56, "grad_norm": 2.421875, "learning_rate": 0.0001633776639093173, "loss": 2.1051, "step": 239240 }, { "epoch": 0.56, "grad_norm": 2.421875, "learning_rate": 0.00016337623421526436, "loss": 2.1641, "step": 239245 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.00016337480449956092, "loss": 2.0968, "step": 239250 }, { "epoch": 0.56, "grad_norm": 1.96875, "learning_rate": 0.00016337337476220754, "loss": 2.0805, "step": 239255 }, { "epoch": 0.56, "grad_norm": 2.484375, "learning_rate": 0.00016337194500320464, "loss": 2.0613, "step": 239260 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016337051522255275, "loss": 2.017, "step": 239265 }, { "epoch": 0.56, "grad_norm": 1.8984375, "learning_rate": 0.00016336908542025232, "loss": 2.0423, "step": 239270 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.0001633676555963039, "loss": 2.0425, "step": 239275 }, { "epoch": 0.56, "grad_norm": 1.890625, "learning_rate": 0.0001633662257507079, "loss": 2.0032, "step": 239280 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.0001633647958834649, "loss": 2.2365, "step": 239285 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016336336599457528, "loss": 2.0099, "step": 239290 }, { "epoch": 0.56, "grad_norm": 1.9921875, "learning_rate": 0.00016336193608403957, "loss": 2.2175, "step": 239295 }, { "epoch": 0.56, "grad_norm": 2.796875, "learning_rate": 0.00016336050615185834, "loss": 2.0747, "step": 239300 }, { "epoch": 0.56, "grad_norm": 1.7890625, "learning_rate": 0.00016335907619803198, "loss": 2.1027, "step": 239305 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.000163357646222561, "loss": 2.1521, "step": 239310 }, { "epoch": 0.56, "grad_norm": 1.9765625, "learning_rate": 0.0001633562162254459, "loss": 2.1634, "step": 239315 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016335478620668723, "loss": 2.0578, "step": 239320 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016335335616628533, "loss": 1.9706, "step": 239325 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016335192610424082, "loss": 2.2783, "step": 239330 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016335049602055413, "loss": 2.1295, "step": 239335 }, { "epoch": 0.56, "grad_norm": 1.890625, "learning_rate": 0.00016334906591522578, "loss": 2.1578, "step": 239340 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016334763578825624, "loss": 2.2323, "step": 239345 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016334620563964594, "loss": 2.1836, "step": 239350 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016334477546939548, "loss": 2.0804, "step": 239355 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016334334527750527, "loss": 2.2006, "step": 239360 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016334191506397585, "loss": 2.2051, "step": 239365 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016334048482880768, "loss": 2.0319, "step": 239370 }, { "epoch": 0.56, "grad_norm": 2.640625, "learning_rate": 0.00016333905457200122, "loss": 2.1246, "step": 239375 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016333762429355703, "loss": 2.108, "step": 239380 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016333619399347556, "loss": 2.1462, "step": 239385 }, { "epoch": 0.56, "grad_norm": 2.453125, "learning_rate": 0.00016333476367175727, "loss": 2.1408, "step": 239390 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.0001633333333284027, "loss": 1.9505, "step": 239395 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.0001633319029634123, "loss": 1.968, "step": 239400 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016333047257678656, "loss": 2.1882, "step": 239405 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.000163329042168526, "loss": 2.1446, "step": 239410 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016332761173863108, "loss": 2.0234, "step": 239415 }, { "epoch": 0.56, "grad_norm": 2.59375, "learning_rate": 0.00016332618128710234, "loss": 1.9732, "step": 239420 }, { "epoch": 0.56, "grad_norm": 2.859375, "learning_rate": 0.0001633247508139402, "loss": 2.0175, "step": 239425 }, { "epoch": 0.56, "grad_norm": 1.8203125, "learning_rate": 0.00016332332031914515, "loss": 2.0207, "step": 239430 }, { "epoch": 0.56, "grad_norm": 2.671875, "learning_rate": 0.00016332188980271772, "loss": 2.1324, "step": 239435 }, { "epoch": 0.56, "grad_norm": 2.53125, "learning_rate": 0.0001633204592646584, "loss": 2.118, "step": 239440 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.00016331902870496764, "loss": 2.0787, "step": 239445 }, { "epoch": 0.56, "grad_norm": 2.53125, "learning_rate": 0.00016331759812364597, "loss": 2.1308, "step": 239450 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016331616752069384, "loss": 2.1132, "step": 239455 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016331473689611177, "loss": 2.1263, "step": 239460 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016331330624990028, "loss": 2.0933, "step": 239465 }, { "epoch": 0.56, "grad_norm": 1.9921875, "learning_rate": 0.0001633118755820598, "loss": 2.2494, "step": 239470 }, { "epoch": 0.56, "grad_norm": 2.421875, "learning_rate": 0.0001633104448925908, "loss": 2.0077, "step": 239475 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.00016330901418149383, "loss": 1.9722, "step": 239480 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016330758344876932, "loss": 2.1803, "step": 239485 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.0001633061526944178, "loss": 2.1633, "step": 239490 }, { "epoch": 0.56, "grad_norm": 2.15625, "learning_rate": 0.00016330472191843976, "loss": 2.0772, "step": 239495 }, { "epoch": 0.56, "grad_norm": 1.8515625, "learning_rate": 0.0001633032911208357, "loss": 2.133, "step": 239500 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016330186030160605, "loss": 2.201, "step": 239505 }, { "epoch": 0.56, "grad_norm": 1.5625, "learning_rate": 0.00016330042946075137, "loss": 1.9784, "step": 239510 }, { "epoch": 0.56, "grad_norm": 3.078125, "learning_rate": 0.00016329899859827206, "loss": 2.1376, "step": 239515 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016329756771416872, "loss": 2.1766, "step": 239520 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016329613680844178, "loss": 2.0841, "step": 239525 }, { "epoch": 0.56, "grad_norm": 2.484375, "learning_rate": 0.00016329470588109172, "loss": 2.1042, "step": 239530 }, { "epoch": 0.56, "grad_norm": 1.9609375, "learning_rate": 0.00016329327493211905, "loss": 2.1163, "step": 239535 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.0001632918439615242, "loss": 1.9572, "step": 239540 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016329041296930775, "loss": 2.0014, "step": 239545 }, { "epoch": 0.56, "grad_norm": 1.9140625, "learning_rate": 0.00016328898195547016, "loss": 2.0719, "step": 239550 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.0001632875509200119, "loss": 1.9353, "step": 239555 }, { "epoch": 0.56, "grad_norm": 1.9765625, "learning_rate": 0.00016328611986293344, "loss": 2.0705, "step": 239560 }, { "epoch": 0.56, "grad_norm": 2.453125, "learning_rate": 0.00016328468878423533, "loss": 2.2064, "step": 239565 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.000163283257683918, "loss": 1.994, "step": 239570 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016328182656198196, "loss": 2.0983, "step": 239575 }, { "epoch": 0.56, "grad_norm": 1.9375, "learning_rate": 0.0001632803954184277, "loss": 2.235, "step": 239580 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.00016327896425325574, "loss": 2.1964, "step": 239585 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016327753306646646, "loss": 2.0437, "step": 239590 }, { "epoch": 0.56, "grad_norm": 1.8984375, "learning_rate": 0.0001632761018580605, "loss": 1.9947, "step": 239595 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016327467062803825, "loss": 2.0593, "step": 239600 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016327323937640023, "loss": 2.0482, "step": 239605 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016327180810314691, "loss": 1.9935, "step": 239610 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016327037680827883, "loss": 2.0424, "step": 239615 }, { "epoch": 0.56, "grad_norm": 1.7890625, "learning_rate": 0.0001632689454917964, "loss": 2.1087, "step": 239620 }, { "epoch": 0.56, "grad_norm": 2.25, "learning_rate": 0.00016326751415370017, "loss": 2.19, "step": 239625 }, { "epoch": 0.56, "grad_norm": 2.3125, "learning_rate": 0.0001632660827939906, "loss": 2.1051, "step": 239630 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.0001632646514126682, "loss": 2.1092, "step": 239635 }, { "epoch": 0.56, "grad_norm": 2.46875, "learning_rate": 0.00016326322000973347, "loss": 2.0704, "step": 239640 }, { "epoch": 0.56, "grad_norm": 2.421875, "learning_rate": 0.0001632617885851868, "loss": 2.0719, "step": 239645 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016326035713902886, "loss": 2.2074, "step": 239650 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016325892567125994, "loss": 2.1641, "step": 239655 }, { "epoch": 0.56, "grad_norm": 1.96875, "learning_rate": 0.0001632574941818807, "loss": 2.0342, "step": 239660 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.0001632560626708915, "loss": 2.0717, "step": 239665 }, { "epoch": 0.56, "grad_norm": 1.9140625, "learning_rate": 0.00016325463113829288, "loss": 2.2294, "step": 239670 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.00016325319958408535, "loss": 2.005, "step": 239675 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.0001632517680082694, "loss": 2.0495, "step": 239680 }, { "epoch": 0.56, "grad_norm": 2.6875, "learning_rate": 0.00016325033641084548, "loss": 2.0714, "step": 239685 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016324890479181412, "loss": 2.0304, "step": 239690 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.00016324747315117576, "loss": 1.9859, "step": 239695 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.0001632460414889309, "loss": 1.9387, "step": 239700 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016324460980508007, "loss": 2.1401, "step": 239705 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016324317809962374, "loss": 2.0068, "step": 239710 }, { "epoch": 0.56, "grad_norm": 1.9765625, "learning_rate": 0.00016324174637256236, "loss": 2.2251, "step": 239715 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.0001632403146238965, "loss": 2.0364, "step": 239720 }, { "epoch": 0.56, "grad_norm": 1.8359375, "learning_rate": 0.00016323888285362656, "loss": 2.234, "step": 239725 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.0001632374510617531, "loss": 1.9669, "step": 239730 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016323601924827656, "loss": 2.1464, "step": 239735 }, { "epoch": 0.56, "grad_norm": 2.34375, "learning_rate": 0.00016323458741319746, "loss": 1.9105, "step": 239740 }, { "epoch": 0.56, "grad_norm": 1.859375, "learning_rate": 0.00016323315555651628, "loss": 2.0199, "step": 239745 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.0001632317236782335, "loss": 1.9066, "step": 239750 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.0001632302917783496, "loss": 2.2293, "step": 239755 }, { "epoch": 0.56, "grad_norm": 2.4375, "learning_rate": 0.00016322885985686512, "loss": 1.8936, "step": 239760 }, { "epoch": 0.56, "grad_norm": 1.890625, "learning_rate": 0.0001632274279137805, "loss": 1.9097, "step": 239765 }, { "epoch": 0.56, "grad_norm": 2.71875, "learning_rate": 0.00016322599594909625, "loss": 2.1727, "step": 239770 }, { "epoch": 0.56, "grad_norm": 1.9453125, "learning_rate": 0.00016322456396281285, "loss": 2.2617, "step": 239775 }, { "epoch": 0.56, "grad_norm": 1.90625, "learning_rate": 0.00016322313195493078, "loss": 1.9894, "step": 239780 }, { "epoch": 0.56, "grad_norm": 1.7734375, "learning_rate": 0.00016322169992545057, "loss": 2.1105, "step": 239785 }, { "epoch": 0.56, "grad_norm": 1.984375, "learning_rate": 0.00016322026787437264, "loss": 2.116, "step": 239790 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016321883580169754, "loss": 2.1512, "step": 239795 }, { "epoch": 0.56, "grad_norm": 1.7578125, "learning_rate": 0.00016321740370742576, "loss": 2.0025, "step": 239800 }, { "epoch": 0.56, "grad_norm": 2.046875, "learning_rate": 0.00016321597159155773, "loss": 2.275, "step": 239805 }, { "epoch": 0.56, "grad_norm": 2.3125, "learning_rate": 0.00016321453945409398, "loss": 1.9502, "step": 239810 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016321310729503502, "loss": 2.4019, "step": 239815 }, { "epoch": 0.56, "grad_norm": 2.109375, "learning_rate": 0.0001632116751143813, "loss": 2.0561, "step": 239820 }, { "epoch": 0.56, "grad_norm": 1.96875, "learning_rate": 0.00016321024291213334, "loss": 2.1417, "step": 239825 }, { "epoch": 0.56, "grad_norm": 1.96875, "learning_rate": 0.0001632088106882916, "loss": 2.0695, "step": 239830 }, { "epoch": 0.56, "grad_norm": 2.71875, "learning_rate": 0.00016320737844285657, "loss": 2.2442, "step": 239835 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.0001632059461758288, "loss": 2.1403, "step": 239840 }, { "epoch": 0.56, "grad_norm": 1.7890625, "learning_rate": 0.0001632045138872087, "loss": 2.1162, "step": 239845 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016320308157699681, "loss": 2.0564, "step": 239850 }, { "epoch": 0.56, "grad_norm": 1.96875, "learning_rate": 0.0001632016492451936, "loss": 2.125, "step": 239855 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.00016320021689179956, "loss": 2.1616, "step": 239860 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016319878451681515, "loss": 2.2194, "step": 239865 }, { "epoch": 0.56, "grad_norm": 2.8125, "learning_rate": 0.0001631973521202409, "loss": 2.2199, "step": 239870 }, { "epoch": 0.56, "grad_norm": 1.6953125, "learning_rate": 0.00016319591970207734, "loss": 2.0338, "step": 239875 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016319448726232486, "loss": 2.2206, "step": 239880 }, { "epoch": 0.56, "grad_norm": 2.203125, "learning_rate": 0.000163193054800984, "loss": 2.0828, "step": 239885 }, { "epoch": 0.56, "grad_norm": 2.28125, "learning_rate": 0.00016319162231805522, "loss": 2.2058, "step": 239890 }, { "epoch": 0.56, "grad_norm": 1.84375, "learning_rate": 0.0001631901898135391, "loss": 2.2438, "step": 239895 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.00016318875728743603, "loss": 2.0632, "step": 239900 }, { "epoch": 0.56, "grad_norm": 2.453125, "learning_rate": 0.00016318732473974654, "loss": 2.3137, "step": 239905 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016318589217047112, "loss": 1.9481, "step": 239910 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.00016318445957961018, "loss": 2.1655, "step": 239915 }, { "epoch": 0.56, "grad_norm": 2.21875, "learning_rate": 0.00016318302696716435, "loss": 2.0155, "step": 239920 }, { "epoch": 0.56, "grad_norm": 2.015625, "learning_rate": 0.00016318159433313407, "loss": 1.8922, "step": 239925 }, { "epoch": 0.56, "grad_norm": 2.0, "learning_rate": 0.00016318016167751978, "loss": 2.0108, "step": 239930 }, { "epoch": 0.56, "grad_norm": 2.375, "learning_rate": 0.00016317872900032204, "loss": 2.1395, "step": 239935 }, { "epoch": 0.56, "grad_norm": 2.3125, "learning_rate": 0.00016317729630154127, "loss": 2.2637, "step": 239940 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016317586358117797, "loss": 2.0879, "step": 239945 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.00016317443083923268, "loss": 2.1634, "step": 239950 }, { "epoch": 0.56, "grad_norm": 1.96875, "learning_rate": 0.00016317299807570585, "loss": 2.1402, "step": 239955 }, { "epoch": 0.56, "grad_norm": 2.078125, "learning_rate": 0.00016317156529059798, "loss": 2.1211, "step": 239960 }, { "epoch": 0.56, "grad_norm": 2.0625, "learning_rate": 0.00016317013248390953, "loss": 2.1819, "step": 239965 }, { "epoch": 0.56, "grad_norm": 2.328125, "learning_rate": 0.00016316869965564106, "loss": 2.0208, "step": 239970 }, { "epoch": 0.56, "grad_norm": 1.8984375, "learning_rate": 0.00016316726680579299, "loss": 2.223, "step": 239975 }, { "epoch": 0.56, "grad_norm": 2.03125, "learning_rate": 0.00016316583393436584, "loss": 1.9659, "step": 239980 }, { "epoch": 0.56, "grad_norm": 2.359375, "learning_rate": 0.0001631644010413601, "loss": 2.0907, "step": 239985 }, { "epoch": 0.56, "grad_norm": 2.171875, "learning_rate": 0.00016316296812677624, "loss": 2.0439, "step": 239990 }, { "epoch": 0.56, "grad_norm": 2.296875, "learning_rate": 0.0001631615351906148, "loss": 1.9836, "step": 239995 }, { "epoch": 0.56, "grad_norm": 2.125, "learning_rate": 0.00016316010223287622, "loss": 2.0259, "step": 240000 }, { "epoch": 0.56, "grad_norm": 2.515625, "learning_rate": 0.000163158669253561, "loss": 2.0979, "step": 240005 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.00016315723625266962, "loss": 2.2108, "step": 240010 }, { "epoch": 0.56, "grad_norm": 1.7890625, "learning_rate": 0.0001631558032302026, "loss": 2.0766, "step": 240015 }, { "epoch": 0.56, "grad_norm": 2.234375, "learning_rate": 0.0001631543701861604, "loss": 2.3185, "step": 240020 }, { "epoch": 0.56, "grad_norm": 2.1875, "learning_rate": 0.0001631529371205435, "loss": 1.902, "step": 240025 }, { "epoch": 0.56, "grad_norm": 1.953125, "learning_rate": 0.00016315150403335243, "loss": 2.1473, "step": 240030 }, { "epoch": 0.56, "grad_norm": 2.09375, "learning_rate": 0.0001631500709245877, "loss": 1.9616, "step": 240035 }, { "epoch": 0.56, "grad_norm": 2.5, "learning_rate": 0.0001631486377942497, "loss": 2.1448, "step": 240040 }, { "epoch": 0.56, "grad_norm": 1.8984375, "learning_rate": 0.000163147204642339, "loss": 1.8804, "step": 240045 }, { "epoch": 0.56, "grad_norm": 1.84375, "learning_rate": 0.0001631457714688561, "loss": 2.1275, "step": 240050 }, { "epoch": 0.56, "grad_norm": 2.265625, "learning_rate": 0.00016314433827380143, "loss": 2.0522, "step": 240055 }, { "epoch": 0.56, "grad_norm": 1.8359375, "learning_rate": 0.00016314290505717554, "loss": 2.124, "step": 240060 }, { "epoch": 0.56, "grad_norm": 2.71875, "learning_rate": 0.00016314147181897885, "loss": 1.9543, "step": 240065 }, { "epoch": 0.56, "grad_norm": 2.40625, "learning_rate": 0.0001631400385592119, "loss": 2.1887, "step": 240070 }, { "epoch": 0.56, "grad_norm": 2.28125, "learning_rate": 0.00016313860527787517, "loss": 2.2244, "step": 240075 }, { "epoch": 0.56, "grad_norm": 2.140625, "learning_rate": 0.00016313717197496917, "loss": 2.0636, "step": 240080 }, { "epoch": 0.56, "grad_norm": 2.390625, "learning_rate": 0.00016313573865049433, "loss": 2.1254, "step": 240085 }, { "epoch": 0.57, "grad_norm": 1.8046875, "learning_rate": 0.0001631343053044512, "loss": 2.0815, "step": 240090 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016313287193684024, "loss": 2.0405, "step": 240095 }, { "epoch": 0.57, "grad_norm": 2.609375, "learning_rate": 0.00016313143854766192, "loss": 2.1361, "step": 240100 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016313000513691682, "loss": 2.1539, "step": 240105 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.0001631285717046053, "loss": 2.2306, "step": 240110 }, { "epoch": 0.57, "grad_norm": 2.671875, "learning_rate": 0.00016312713825072795, "loss": 2.0958, "step": 240115 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.00016312570477528522, "loss": 1.9962, "step": 240120 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.0001631242712782776, "loss": 2.0915, "step": 240125 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016312283775970555, "loss": 2.1307, "step": 240130 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016312140421956964, "loss": 2.2107, "step": 240135 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.0001631199706578703, "loss": 2.1942, "step": 240140 }, { "epoch": 0.57, "grad_norm": 2.4375, "learning_rate": 0.00016311853707460806, "loss": 2.0838, "step": 240145 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016311710346978336, "loss": 2.047, "step": 240150 }, { "epoch": 0.57, "grad_norm": 2.421875, "learning_rate": 0.0001631156698433967, "loss": 1.9797, "step": 240155 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016311423619544857, "loss": 2.1503, "step": 240160 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.0001631128025259395, "loss": 2.1654, "step": 240165 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016311136883486994, "loss": 2.1302, "step": 240170 }, { "epoch": 0.57, "grad_norm": 2.4375, "learning_rate": 0.00016310993512224042, "loss": 2.1618, "step": 240175 }, { "epoch": 0.57, "grad_norm": 2.484375, "learning_rate": 0.00016310850138805138, "loss": 2.0177, "step": 240180 }, { "epoch": 0.57, "grad_norm": 3.0625, "learning_rate": 0.0001631070676323033, "loss": 2.1108, "step": 240185 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016310563385499676, "loss": 2.0429, "step": 240190 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016310420005613216, "loss": 2.0223, "step": 240195 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016310276623571004, "loss": 1.9308, "step": 240200 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016310133239373084, "loss": 2.0869, "step": 240205 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016309989853019506, "loss": 1.9232, "step": 240210 }, { "epoch": 0.57, "grad_norm": 1.84375, "learning_rate": 0.00016309846464510327, "loss": 2.1588, "step": 240215 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016309703073845588, "loss": 2.254, "step": 240220 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016309559681025338, "loss": 2.1745, "step": 240225 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.00016309416286049632, "loss": 2.0807, "step": 240230 }, { "epoch": 0.57, "grad_norm": 2.5625, "learning_rate": 0.0001630927288891851, "loss": 1.9542, "step": 240235 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.0001630912948963203, "loss": 1.9314, "step": 240240 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016308986088190236, "loss": 2.2037, "step": 240245 }, { "epoch": 0.57, "grad_norm": 2.40625, "learning_rate": 0.00016308842684593176, "loss": 2.1533, "step": 240250 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016308699278840905, "loss": 2.0753, "step": 240255 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016308555870933464, "loss": 2.005, "step": 240260 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016308412460870907, "loss": 2.0246, "step": 240265 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016308269048653283, "loss": 2.0839, "step": 240270 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.0001630812563428064, "loss": 2.1116, "step": 240275 }, { "epoch": 0.57, "grad_norm": 1.828125, "learning_rate": 0.00016307982217753024, "loss": 2.1802, "step": 240280 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.0001630783879907049, "loss": 2.1905, "step": 240285 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016307695378233082, "loss": 2.2413, "step": 240290 }, { "epoch": 0.57, "grad_norm": 1.90625, "learning_rate": 0.00016307551955240852, "loss": 2.1144, "step": 240295 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016307408530093844, "loss": 2.0515, "step": 240300 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016307265102792116, "loss": 2.0556, "step": 240305 }, { "epoch": 0.57, "grad_norm": 3.09375, "learning_rate": 0.0001630712167333571, "loss": 2.162, "step": 240310 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016306978241724678, "loss": 2.11, "step": 240315 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016306834807959067, "loss": 2.1737, "step": 240320 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016306691372038924, "loss": 1.9687, "step": 240325 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016306547933964304, "loss": 2.1532, "step": 240330 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.00016306404493735251, "loss": 2.146, "step": 240335 }, { "epoch": 0.57, "grad_norm": 1.9453125, "learning_rate": 0.0001630626105135182, "loss": 2.1039, "step": 240340 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.0001630611760681405, "loss": 2.2102, "step": 240345 }, { "epoch": 0.57, "grad_norm": 2.421875, "learning_rate": 0.00016305974160122, "loss": 2.1747, "step": 240350 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016305830711275712, "loss": 1.9708, "step": 240355 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.00016305687260275242, "loss": 1.9312, "step": 240360 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016305543807120635, "loss": 2.286, "step": 240365 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016305400351811935, "loss": 2.1764, "step": 240370 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016305256894349197, "loss": 2.0657, "step": 240375 }, { "epoch": 0.57, "grad_norm": 1.8359375, "learning_rate": 0.0001630511343473247, "loss": 2.1541, "step": 240380 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.00016304969972961804, "loss": 2.0898, "step": 240385 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.0001630482650903724, "loss": 2.2495, "step": 240390 }, { "epoch": 0.57, "grad_norm": 2.71875, "learning_rate": 0.00016304683042958837, "loss": 2.1631, "step": 240395 }, { "epoch": 0.57, "grad_norm": 1.921875, "learning_rate": 0.0001630453957472664, "loss": 2.0967, "step": 240400 }, { "epoch": 0.57, "grad_norm": 1.6875, "learning_rate": 0.000163043961043407, "loss": 1.9774, "step": 240405 }, { "epoch": 0.57, "grad_norm": 1.84375, "learning_rate": 0.0001630425263180106, "loss": 2.2891, "step": 240410 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016304109157107773, "loss": 2.1295, "step": 240415 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.0001630396568026089, "loss": 1.9128, "step": 240420 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.0001630382220126046, "loss": 2.1375, "step": 240425 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016303678720106527, "loss": 2.1413, "step": 240430 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.0001630353523679914, "loss": 2.2072, "step": 240435 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016303391751338358, "loss": 2.0985, "step": 240440 }, { "epoch": 0.57, "grad_norm": 3.234375, "learning_rate": 0.0001630324826372422, "loss": 2.1182, "step": 240445 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.00016303104773956776, "loss": 2.0093, "step": 240450 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.00016302961282036078, "loss": 2.1874, "step": 240455 }, { "epoch": 0.57, "grad_norm": 1.828125, "learning_rate": 0.00016302817787962174, "loss": 2.0918, "step": 240460 }, { "epoch": 0.57, "grad_norm": 1.90625, "learning_rate": 0.00016302674291735118, "loss": 2.1312, "step": 240465 }, { "epoch": 0.57, "grad_norm": 2.390625, "learning_rate": 0.00016302530793354947, "loss": 2.0977, "step": 240470 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.00016302387292821722, "loss": 2.2606, "step": 240475 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016302243790135485, "loss": 2.0164, "step": 240480 }, { "epoch": 0.57, "grad_norm": 2.5625, "learning_rate": 0.00016302100285296288, "loss": 2.0206, "step": 240485 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016301956778304178, "loss": 2.1313, "step": 240490 }, { "epoch": 0.57, "grad_norm": 1.890625, "learning_rate": 0.0001630181326915921, "loss": 2.2047, "step": 240495 }, { "epoch": 0.57, "grad_norm": 2.671875, "learning_rate": 0.00016301669757861422, "loss": 2.0239, "step": 240500 }, { "epoch": 0.57, "grad_norm": 3.1875, "learning_rate": 0.0001630152624441087, "loss": 2.1588, "step": 240505 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.0001630138272880761, "loss": 2.1423, "step": 240510 }, { "epoch": 0.57, "grad_norm": 1.8671875, "learning_rate": 0.00016301239211051675, "loss": 1.906, "step": 240515 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016301095691143128, "loss": 2.2104, "step": 240520 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.00016300952169082006, "loss": 1.9507, "step": 240525 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016300808644868368, "loss": 2.163, "step": 240530 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016300665118502262, "loss": 1.9272, "step": 240535 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.0001630052158998373, "loss": 2.1216, "step": 240540 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.00016300378059312827, "loss": 2.0297, "step": 240545 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016300234526489603, "loss": 2.1696, "step": 240550 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.00016300090991514101, "loss": 2.3286, "step": 240555 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016299947454386375, "loss": 1.9007, "step": 240560 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.00016299803915106474, "loss": 2.1138, "step": 240565 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016299660373674443, "loss": 2.1915, "step": 240570 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016299516830090339, "loss": 2.1563, "step": 240575 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.000162993732843542, "loss": 2.2295, "step": 240580 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016299229736466085, "loss": 2.1479, "step": 240585 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.0001629908618642604, "loss": 1.8041, "step": 240590 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016298942634234106, "loss": 2.2716, "step": 240595 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016298799079890347, "loss": 2.159, "step": 240600 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016298655523394796, "loss": 1.9839, "step": 240605 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016298511964747515, "loss": 1.9177, "step": 240610 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016298368403948546, "loss": 1.9632, "step": 240615 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016298224840997943, "loss": 2.0903, "step": 240620 }, { "epoch": 0.57, "grad_norm": 1.8515625, "learning_rate": 0.0001629808127589575, "loss": 2.1219, "step": 240625 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.0001629793770864202, "loss": 2.1374, "step": 240630 }, { "epoch": 0.57, "grad_norm": 2.390625, "learning_rate": 0.00016297794139236797, "loss": 2.1772, "step": 240635 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016297650567680133, "loss": 2.1317, "step": 240640 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016297506993972083, "loss": 2.1561, "step": 240645 }, { "epoch": 0.57, "grad_norm": 1.796875, "learning_rate": 0.00016297363418112684, "loss": 2.1339, "step": 240650 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016297219840101995, "loss": 2.0196, "step": 240655 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016297076259940064, "loss": 2.1477, "step": 240660 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016296932677626935, "loss": 1.9701, "step": 240665 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016296789093162657, "loss": 2.1825, "step": 240670 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016296645506547286, "loss": 2.1227, "step": 240675 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016296501917780862, "loss": 2.1859, "step": 240680 }, { "epoch": 0.57, "grad_norm": 2.4375, "learning_rate": 0.00016296358326863445, "loss": 2.0263, "step": 240685 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.0001629621473379507, "loss": 1.9677, "step": 240690 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.000162960711385758, "loss": 2.1708, "step": 240695 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016295927541205675, "loss": 2.0436, "step": 240700 }, { "epoch": 0.57, "grad_norm": 1.7734375, "learning_rate": 0.00016295783941684748, "loss": 1.9738, "step": 240705 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016295640340013064, "loss": 2.0958, "step": 240710 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016295496736190682, "loss": 2.1187, "step": 240715 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016295353130217638, "loss": 2.2101, "step": 240720 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.0001629520952209399, "loss": 2.0852, "step": 240725 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016295065911819782, "loss": 1.9301, "step": 240730 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016294922299395069, "loss": 1.8519, "step": 240735 }, { "epoch": 0.57, "grad_norm": 1.78125, "learning_rate": 0.00016294778684819894, "loss": 2.0118, "step": 240740 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016294635068094307, "loss": 2.2016, "step": 240745 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.0001629449144921836, "loss": 2.0126, "step": 240750 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.00016294347828192101, "loss": 2.0438, "step": 240755 }, { "epoch": 0.57, "grad_norm": 1.9296875, "learning_rate": 0.00016294204205015577, "loss": 2.2381, "step": 240760 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.0001629406057968884, "loss": 2.2937, "step": 240765 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016293916952211937, "loss": 2.1748, "step": 240770 }, { "epoch": 0.57, "grad_norm": 2.859375, "learning_rate": 0.0001629377332258492, "loss": 2.1741, "step": 240775 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016293629690807836, "loss": 2.1286, "step": 240780 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016293486056880732, "loss": 2.1866, "step": 240785 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.0001629334242080366, "loss": 2.1209, "step": 240790 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016293198782576664, "loss": 2.0907, "step": 240795 }, { "epoch": 0.57, "grad_norm": 1.84375, "learning_rate": 0.00016293055142199803, "loss": 2.1485, "step": 240800 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016292911499673118, "loss": 2.0602, "step": 240805 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.0001629276785499666, "loss": 2.149, "step": 240810 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016292624208170478, "loss": 2.2313, "step": 240815 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016292480559194623, "loss": 2.0247, "step": 240820 }, { "epoch": 0.57, "grad_norm": 1.953125, "learning_rate": 0.00016292336908069142, "loss": 2.069, "step": 240825 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016292193254794084, "loss": 2.1907, "step": 240830 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.00016292049599369497, "loss": 2.1331, "step": 240835 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016291905941795437, "loss": 2.0912, "step": 240840 }, { "epoch": 0.57, "grad_norm": 2.6875, "learning_rate": 0.00016291762282071942, "loss": 2.1197, "step": 240845 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.00016291618620199069, "loss": 2.053, "step": 240850 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.00016291474956176867, "loss": 1.9875, "step": 240855 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.0001629133129000538, "loss": 2.1291, "step": 240860 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.00016291187621684662, "loss": 2.0708, "step": 240865 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.0001629104395121476, "loss": 1.9184, "step": 240870 }, { "epoch": 0.57, "grad_norm": 2.421875, "learning_rate": 0.0001629090027859572, "loss": 1.9138, "step": 240875 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.000162907566038276, "loss": 2.1355, "step": 240880 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016290612926910442, "loss": 2.2322, "step": 240885 }, { "epoch": 0.57, "grad_norm": 2.59375, "learning_rate": 0.00016290469247844293, "loss": 2.1623, "step": 240890 }, { "epoch": 0.57, "grad_norm": 1.8671875, "learning_rate": 0.0001629032556662921, "loss": 2.1757, "step": 240895 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016290181883265234, "loss": 1.9605, "step": 240900 }, { "epoch": 0.57, "grad_norm": 1.921875, "learning_rate": 0.00016290038197752418, "loss": 2.1228, "step": 240905 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016289894510090812, "loss": 2.2147, "step": 240910 }, { "epoch": 0.57, "grad_norm": 2.703125, "learning_rate": 0.00016289750820280466, "loss": 2.2452, "step": 240915 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016289607128321424, "loss": 1.9044, "step": 240920 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.0001628946343421374, "loss": 2.0122, "step": 240925 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.0001628931973795746, "loss": 2.2382, "step": 240930 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.00016289176039552635, "loss": 2.2092, "step": 240935 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016289032338999315, "loss": 2.2012, "step": 240940 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016288888636297545, "loss": 2.0318, "step": 240945 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016288744931447377, "loss": 1.9581, "step": 240950 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.0001628860122444886, "loss": 2.105, "step": 240955 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016288457515302045, "loss": 2.0398, "step": 240960 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.00016288313804006975, "loss": 2.1553, "step": 240965 }, { "epoch": 0.57, "grad_norm": 4.25, "learning_rate": 0.00016288170090563705, "loss": 1.9389, "step": 240970 }, { "epoch": 0.57, "grad_norm": 1.9921875, "learning_rate": 0.00016288026374972284, "loss": 1.9875, "step": 240975 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016287882657232755, "loss": 2.1486, "step": 240980 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016287738937345174, "loss": 2.283, "step": 240985 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016287595215309586, "loss": 1.9538, "step": 240990 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016287451491126044, "loss": 2.0556, "step": 240995 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.0001628730776479459, "loss": 2.1689, "step": 241000 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.0001628716403631528, "loss": 2.223, "step": 241005 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.0001628702030568816, "loss": 1.8622, "step": 241010 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.00016286876572913284, "loss": 2.187, "step": 241015 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016286732837990692, "loss": 2.0677, "step": 241020 }, { "epoch": 0.57, "grad_norm": 1.75, "learning_rate": 0.00016286589100920443, "loss": 2.0249, "step": 241025 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016286445361702576, "loss": 2.2167, "step": 241030 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016286301620337148, "loss": 2.0865, "step": 241035 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016286157876824203, "loss": 2.0493, "step": 241040 }, { "epoch": 0.57, "grad_norm": 1.9140625, "learning_rate": 0.00016286014131163792, "loss": 2.0523, "step": 241045 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.00016285870383355968, "loss": 2.1104, "step": 241050 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.00016285726633400778, "loss": 2.2748, "step": 241055 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016285582881298265, "loss": 2.081, "step": 241060 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016285439127048483, "loss": 1.8653, "step": 241065 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016285295370651484, "loss": 2.0063, "step": 241070 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.00016285151612107312, "loss": 1.9783, "step": 241075 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.0001628500785141602, "loss": 2.0367, "step": 241080 }, { "epoch": 0.57, "grad_norm": 2.90625, "learning_rate": 0.00016284864088577655, "loss": 2.2885, "step": 241085 }, { "epoch": 0.57, "grad_norm": 1.796875, "learning_rate": 0.00016284720323592265, "loss": 2.0339, "step": 241090 }, { "epoch": 0.57, "grad_norm": 1.953125, "learning_rate": 0.000162845765564599, "loss": 2.308, "step": 241095 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016284432787180614, "loss": 2.1815, "step": 241100 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.0001628428901575445, "loss": 2.0413, "step": 241105 }, { "epoch": 0.57, "grad_norm": 1.84375, "learning_rate": 0.00016284145242181458, "loss": 2.1536, "step": 241110 }, { "epoch": 0.57, "grad_norm": 1.8203125, "learning_rate": 0.00016284001466461686, "loss": 2.0941, "step": 241115 }, { "epoch": 0.57, "grad_norm": 1.90625, "learning_rate": 0.00016283857688595188, "loss": 2.1361, "step": 241120 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.0001628371390858201, "loss": 2.0521, "step": 241125 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.000162835701264222, "loss": 2.0266, "step": 241130 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016283426342115812, "loss": 2.1408, "step": 241135 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.00016283282555662885, "loss": 2.0972, "step": 241140 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.0001628313876706348, "loss": 2.202, "step": 241145 }, { "epoch": 0.57, "grad_norm": 2.6875, "learning_rate": 0.00016282994976317639, "loss": 2.1924, "step": 241150 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.00016282851183425413, "loss": 1.9726, "step": 241155 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.0001628270738838685, "loss": 2.1651, "step": 241160 }, { "epoch": 0.57, "grad_norm": 1.9921875, "learning_rate": 0.00016282563591202002, "loss": 1.9986, "step": 241165 }, { "epoch": 0.57, "grad_norm": 2.828125, "learning_rate": 0.00016282419791870914, "loss": 2.1805, "step": 241170 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016282275990393643, "loss": 2.1794, "step": 241175 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016282132186770228, "loss": 2.0696, "step": 241180 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016281988381000722, "loss": 2.0179, "step": 241185 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.0001628184457308518, "loss": 2.2027, "step": 241190 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016281700763023645, "loss": 2.1113, "step": 241195 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016281556950816162, "loss": 2.1552, "step": 241200 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016281413136462786, "loss": 2.1134, "step": 241205 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.00016281269319963569, "loss": 1.9964, "step": 241210 }, { "epoch": 0.57, "grad_norm": 1.84375, "learning_rate": 0.00016281125501318555, "loss": 2.1368, "step": 241215 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016280981680527793, "loss": 2.2225, "step": 241220 }, { "epoch": 0.57, "grad_norm": 2.75, "learning_rate": 0.00016280837857591337, "loss": 1.947, "step": 241225 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.0001628069403250923, "loss": 2.1382, "step": 241230 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016280550205281525, "loss": 2.0884, "step": 241235 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016280406375908268, "loss": 2.3719, "step": 241240 }, { "epoch": 0.57, "grad_norm": 1.9296875, "learning_rate": 0.00016280262544389514, "loss": 2.123, "step": 241245 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016280118710725305, "loss": 2.0799, "step": 241250 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016279974874915695, "loss": 2.3546, "step": 241255 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016279831036960734, "loss": 2.1315, "step": 241260 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.00016279687196860468, "loss": 2.0638, "step": 241265 }, { "epoch": 0.57, "grad_norm": 1.9609375, "learning_rate": 0.00016279543354614945, "loss": 2.1174, "step": 241270 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016279399510224217, "loss": 2.1517, "step": 241275 }, { "epoch": 0.57, "grad_norm": 2.65625, "learning_rate": 0.0001627925566368833, "loss": 2.1414, "step": 241280 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016279111815007338, "loss": 2.1266, "step": 241285 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.0001627896796418129, "loss": 2.2835, "step": 241290 }, { "epoch": 0.57, "grad_norm": 1.671875, "learning_rate": 0.00016278824111210226, "loss": 2.1052, "step": 241295 }, { "epoch": 0.57, "grad_norm": 2.40625, "learning_rate": 0.00016278680256094207, "loss": 2.3478, "step": 241300 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.00016278536398833275, "loss": 2.1774, "step": 241305 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.0001627839253942748, "loss": 2.2355, "step": 241310 }, { "epoch": 0.57, "grad_norm": 2.546875, "learning_rate": 0.00016278248677876878, "loss": 2.0726, "step": 241315 }, { "epoch": 0.57, "grad_norm": 2.5625, "learning_rate": 0.00016278104814181508, "loss": 2.0544, "step": 241320 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016277960948341425, "loss": 2.0653, "step": 241325 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.00016277817080356672, "loss": 2.3017, "step": 241330 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016277673210227307, "loss": 1.9667, "step": 241335 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016277529337953373, "loss": 2.3555, "step": 241340 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016277385463534923, "loss": 2.1666, "step": 241345 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016277241586972006, "loss": 2.0503, "step": 241350 }, { "epoch": 0.57, "grad_norm": 1.96875, "learning_rate": 0.00016277097708264666, "loss": 2.0628, "step": 241355 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.0001627695382741296, "loss": 2.0161, "step": 241360 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016276809944416925, "loss": 2.0833, "step": 241365 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.00016276666059276623, "loss": 2.0144, "step": 241370 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016276522171992098, "loss": 2.1488, "step": 241375 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016276378282563397, "loss": 2.1852, "step": 241380 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.00016276234390990573, "loss": 2.147, "step": 241385 }, { "epoch": 0.57, "grad_norm": 2.46875, "learning_rate": 0.00016276090497273672, "loss": 2.0003, "step": 241390 }, { "epoch": 0.57, "grad_norm": 1.859375, "learning_rate": 0.0001627594660141275, "loss": 2.0704, "step": 241395 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016275802703407844, "loss": 2.2133, "step": 241400 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016275658803259016, "loss": 1.9811, "step": 241405 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016275514900966306, "loss": 2.1415, "step": 241410 }, { "epoch": 0.57, "grad_norm": 2.59375, "learning_rate": 0.00016275370996529764, "loss": 2.0747, "step": 241415 }, { "epoch": 0.57, "grad_norm": 1.8125, "learning_rate": 0.00016275227089949446, "loss": 2.0319, "step": 241420 }, { "epoch": 0.57, "grad_norm": 1.96875, "learning_rate": 0.00016275083181225392, "loss": 2.0862, "step": 241425 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.0001627493927035766, "loss": 1.8984, "step": 241430 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016274795357346293, "loss": 2.2727, "step": 241435 }, { "epoch": 0.57, "grad_norm": 1.96875, "learning_rate": 0.0001627465144219134, "loss": 2.1844, "step": 241440 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016274507524892856, "loss": 2.032, "step": 241445 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016274363605450884, "loss": 2.2855, "step": 241450 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016274219683865479, "loss": 2.053, "step": 241455 }, { "epoch": 0.57, "grad_norm": 1.8359375, "learning_rate": 0.00016274075760136682, "loss": 2.0278, "step": 241460 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.0001627393183426455, "loss": 2.174, "step": 241465 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016273787906249127, "loss": 2.0122, "step": 241470 }, { "epoch": 0.57, "grad_norm": 1.78125, "learning_rate": 0.00016273643976090466, "loss": 2.2298, "step": 241475 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.00016273500043788617, "loss": 2.083, "step": 241480 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016273356109343623, "loss": 2.1377, "step": 241485 }, { "epoch": 0.57, "grad_norm": 2.609375, "learning_rate": 0.00016273212172755538, "loss": 2.2188, "step": 241490 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016273068234024408, "loss": 2.126, "step": 241495 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016272924293150284, "loss": 2.0478, "step": 241500 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016272780350133216, "loss": 2.2236, "step": 241505 }, { "epoch": 0.57, "grad_norm": 1.796875, "learning_rate": 0.00016272636404973255, "loss": 2.1515, "step": 241510 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016272492457670445, "loss": 2.0877, "step": 241515 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.00016272348508224838, "loss": 2.0889, "step": 241520 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016272204556636485, "loss": 2.2513, "step": 241525 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016272060602905432, "loss": 2.1555, "step": 241530 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.0001627191664703173, "loss": 2.2146, "step": 241535 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016271772689015426, "loss": 1.9563, "step": 241540 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.0001627162872885657, "loss": 2.1215, "step": 241545 }, { "epoch": 0.57, "grad_norm": 1.8046875, "learning_rate": 0.00016271484766555214, "loss": 2.0255, "step": 241550 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016271340802111406, "loss": 2.0136, "step": 241555 }, { "epoch": 0.57, "grad_norm": 1.859375, "learning_rate": 0.00016271196835525192, "loss": 2.2132, "step": 241560 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016271052866796625, "loss": 2.0631, "step": 241565 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016270908895925753, "loss": 2.2026, "step": 241570 }, { "epoch": 0.57, "grad_norm": 1.8828125, "learning_rate": 0.00016270764922912621, "loss": 2.0804, "step": 241575 }, { "epoch": 0.57, "grad_norm": 2.546875, "learning_rate": 0.00016270620947757286, "loss": 2.052, "step": 241580 }, { "epoch": 0.57, "grad_norm": 1.96875, "learning_rate": 0.0001627047697045979, "loss": 2.1253, "step": 241585 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.00016270332991020188, "loss": 2.0399, "step": 241590 }, { "epoch": 0.57, "grad_norm": 2.484375, "learning_rate": 0.00016270189009438525, "loss": 2.2214, "step": 241595 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016270045025714854, "loss": 2.3083, "step": 241600 }, { "epoch": 0.57, "grad_norm": 2.484375, "learning_rate": 0.0001626990103984922, "loss": 2.1579, "step": 241605 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016269757051841675, "loss": 2.1187, "step": 241610 }, { "epoch": 0.57, "grad_norm": 2.5, "learning_rate": 0.00016269613061692265, "loss": 2.003, "step": 241615 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.00016269469069401043, "loss": 2.239, "step": 241620 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.00016269325074968056, "loss": 1.9506, "step": 241625 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016269181078393355, "loss": 2.1357, "step": 241630 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.00016269037079676986, "loss": 2.2055, "step": 241635 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016268893078819004, "loss": 1.9302, "step": 241640 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.0001626874907581945, "loss": 2.0306, "step": 241645 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016268605070678382, "loss": 2.1421, "step": 241650 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.0001626846106339584, "loss": 1.9397, "step": 241655 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.0001626831705397188, "loss": 1.9545, "step": 241660 }, { "epoch": 0.57, "grad_norm": 1.9609375, "learning_rate": 0.00016268173042406555, "loss": 1.9875, "step": 241665 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.000162680290286999, "loss": 2.1954, "step": 241670 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.00016267885012851978, "loss": 2.0472, "step": 241675 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016267740994862828, "loss": 1.9668, "step": 241680 }, { "epoch": 0.57, "grad_norm": 2.390625, "learning_rate": 0.00016267596974732507, "loss": 1.968, "step": 241685 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016267452952461062, "loss": 2.1129, "step": 241690 }, { "epoch": 0.57, "grad_norm": 1.8046875, "learning_rate": 0.00016267308928048538, "loss": 2.0685, "step": 241695 }, { "epoch": 0.57, "grad_norm": 1.7578125, "learning_rate": 0.00016267164901494994, "loss": 2.0694, "step": 241700 }, { "epoch": 0.57, "grad_norm": 1.8046875, "learning_rate": 0.00016267020872800467, "loss": 2.0771, "step": 241705 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016266876841965013, "loss": 2.0944, "step": 241710 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.0001626673280898868, "loss": 1.9881, "step": 241715 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.00016266588773871518, "loss": 2.1764, "step": 241720 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016266444736613578, "loss": 1.9756, "step": 241725 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016266300697214905, "loss": 2.1182, "step": 241730 }, { "epoch": 0.57, "grad_norm": 2.953125, "learning_rate": 0.00016266156655675548, "loss": 1.8241, "step": 241735 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016266012611995562, "loss": 2.0864, "step": 241740 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016265868566174988, "loss": 2.1661, "step": 241745 }, { "epoch": 0.57, "grad_norm": 2.53125, "learning_rate": 0.00016265724518213883, "loss": 2.064, "step": 241750 }, { "epoch": 0.57, "grad_norm": 1.84375, "learning_rate": 0.00016265580468112294, "loss": 1.9569, "step": 241755 }, { "epoch": 0.57, "grad_norm": 2.40625, "learning_rate": 0.00016265436415870266, "loss": 2.0789, "step": 241760 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.0001626529236148785, "loss": 2.1066, "step": 241765 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.00016265148304965102, "loss": 2.0817, "step": 241770 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016265004246302062, "loss": 2.1285, "step": 241775 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016264860185498785, "loss": 2.0725, "step": 241780 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016264716122555317, "loss": 2.1089, "step": 241785 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.00016264572057471708, "loss": 2.0697, "step": 241790 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016264427990248006, "loss": 2.0488, "step": 241795 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016264283920884264, "loss": 2.1826, "step": 241800 }, { "epoch": 0.57, "grad_norm": 2.640625, "learning_rate": 0.0001626413984938053, "loss": 1.9772, "step": 241805 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.00016263995775736852, "loss": 2.1082, "step": 241810 }, { "epoch": 0.57, "grad_norm": 2.6875, "learning_rate": 0.00016263851699953275, "loss": 1.894, "step": 241815 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016263707622029855, "loss": 2.1519, "step": 241820 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.0001626356354196664, "loss": 2.1473, "step": 241825 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.0001626341945976368, "loss": 2.0061, "step": 241830 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016263275375421018, "loss": 2.184, "step": 241835 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016263131288938708, "loss": 2.1166, "step": 241840 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016262987200316804, "loss": 1.9282, "step": 241845 }, { "epoch": 0.57, "grad_norm": 2.46875, "learning_rate": 0.00016262843109555344, "loss": 2.0547, "step": 241850 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.00016262699016654386, "loss": 2.1323, "step": 241855 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016262554921613978, "loss": 2.3125, "step": 241860 }, { "epoch": 0.57, "grad_norm": 1.9453125, "learning_rate": 0.00016262410824434165, "loss": 2.1359, "step": 241865 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016262266725115, "loss": 2.1331, "step": 241870 }, { "epoch": 0.57, "grad_norm": 2.546875, "learning_rate": 0.00016262122623656532, "loss": 2.0478, "step": 241875 }, { "epoch": 0.57, "grad_norm": 1.890625, "learning_rate": 0.00016261978520058806, "loss": 2.1862, "step": 241880 }, { "epoch": 0.57, "grad_norm": 1.8828125, "learning_rate": 0.00016261834414321876, "loss": 2.0143, "step": 241885 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.0001626169030644579, "loss": 2.2747, "step": 241890 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016261546196430596, "loss": 1.9426, "step": 241895 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016261402084276347, "loss": 2.158, "step": 241900 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016261257969983088, "loss": 2.1513, "step": 241905 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.0001626111385355087, "loss": 1.984, "step": 241910 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016260969734979743, "loss": 2.0157, "step": 241915 }, { "epoch": 0.57, "grad_norm": 1.8828125, "learning_rate": 0.00016260825614269753, "loss": 2.1611, "step": 241920 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016260681491420953, "loss": 2.1545, "step": 241925 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.0001626053736643339, "loss": 2.0908, "step": 241930 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016260393239307113, "loss": 2.0077, "step": 241935 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.00016260249110042175, "loss": 2.0985, "step": 241940 }, { "epoch": 0.57, "grad_norm": 2.421875, "learning_rate": 0.0001626010497863862, "loss": 2.2619, "step": 241945 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.000162599608450965, "loss": 2.1337, "step": 241950 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016259816709415862, "loss": 2.1908, "step": 241955 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.0001625967257159676, "loss": 2.1741, "step": 241960 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016259528431639242, "loss": 2.0366, "step": 241965 }, { "epoch": 0.57, "grad_norm": 2.40625, "learning_rate": 0.00016259384289543354, "loss": 1.9519, "step": 241970 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016259240145309142, "loss": 2.208, "step": 241975 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016259095998936665, "loss": 2.1157, "step": 241980 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016258951850425967, "loss": 2.0659, "step": 241985 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016258807699777097, "loss": 2.0572, "step": 241990 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016258663546990106, "loss": 1.8807, "step": 241995 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016258519392065037, "loss": 2.1837, "step": 242000 }, { "epoch": 0.57, "grad_norm": 1.8671875, "learning_rate": 0.00016258375235001947, "loss": 1.9683, "step": 242005 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016258231075800885, "loss": 2.1674, "step": 242010 }, { "epoch": 0.57, "grad_norm": 1.796875, "learning_rate": 0.00016258086914461894, "loss": 2.188, "step": 242015 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016257942750985032, "loss": 2.1158, "step": 242020 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.0001625779858537034, "loss": 2.1515, "step": 242025 }, { "epoch": 0.57, "grad_norm": 1.96875, "learning_rate": 0.0001625765441761787, "loss": 2.2331, "step": 242030 }, { "epoch": 0.57, "grad_norm": 1.9609375, "learning_rate": 0.00016257510247727673, "loss": 2.1572, "step": 242035 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016257366075699795, "loss": 2.1965, "step": 242040 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.0001625722190153429, "loss": 2.0738, "step": 242045 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016257077725231206, "loss": 2.0575, "step": 242050 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016256933546790584, "loss": 2.0542, "step": 242055 }, { "epoch": 0.57, "grad_norm": 2.46875, "learning_rate": 0.00016256789366212484, "loss": 2.0747, "step": 242060 }, { "epoch": 0.57, "grad_norm": 1.8125, "learning_rate": 0.00016256645183496953, "loss": 2.0324, "step": 242065 }, { "epoch": 0.57, "grad_norm": 1.9296875, "learning_rate": 0.00016256500998644037, "loss": 2.0323, "step": 242070 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016256356811653787, "loss": 2.122, "step": 242075 }, { "epoch": 0.57, "grad_norm": 1.8984375, "learning_rate": 0.00016256212622526252, "loss": 2.0975, "step": 242080 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.0001625606843126148, "loss": 2.2552, "step": 242085 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.00016255924237859522, "loss": 2.061, "step": 242090 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016255780042320427, "loss": 2.0407, "step": 242095 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.00016255635844644246, "loss": 2.1122, "step": 242100 }, { "epoch": 0.57, "grad_norm": 1.96875, "learning_rate": 0.00016255491644831028, "loss": 1.8113, "step": 242105 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.00016255347442880816, "loss": 2.1069, "step": 242110 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016255203238793665, "loss": 2.1302, "step": 242115 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.00016255059032569622, "loss": 2.1615, "step": 242120 }, { "epoch": 0.57, "grad_norm": 2.484375, "learning_rate": 0.0001625491482420874, "loss": 2.0137, "step": 242125 }, { "epoch": 0.57, "grad_norm": 2.390625, "learning_rate": 0.00016254770613711064, "loss": 2.0983, "step": 242130 }, { "epoch": 0.57, "grad_norm": 2.5625, "learning_rate": 0.00016254626401076644, "loss": 2.1165, "step": 242135 }, { "epoch": 0.57, "grad_norm": 1.953125, "learning_rate": 0.00016254482186305532, "loss": 2.128, "step": 242140 }, { "epoch": 0.57, "grad_norm": 2.53125, "learning_rate": 0.00016254337969397775, "loss": 2.133, "step": 242145 }, { "epoch": 0.57, "grad_norm": 2.546875, "learning_rate": 0.00016254193750353424, "loss": 2.0353, "step": 242150 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.00016254049529172528, "loss": 1.9892, "step": 242155 }, { "epoch": 0.57, "grad_norm": 1.890625, "learning_rate": 0.00016253905305855132, "loss": 2.0341, "step": 242160 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.0001625376108040129, "loss": 2.2581, "step": 242165 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016253616852811046, "loss": 1.8061, "step": 242170 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.00016253472623084457, "loss": 2.0126, "step": 242175 }, { "epoch": 0.57, "grad_norm": 1.9453125, "learning_rate": 0.0001625332839122157, "loss": 2.0093, "step": 242180 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.0001625318415722243, "loss": 2.1743, "step": 242185 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.00016253039921087088, "loss": 2.1917, "step": 242190 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016252895682815596, "loss": 2.2216, "step": 242195 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016252751442408, "loss": 1.9263, "step": 242200 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016252607199864353, "loss": 1.8795, "step": 242205 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.000162524629551847, "loss": 2.081, "step": 242210 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.0001625231870836909, "loss": 2.1312, "step": 242215 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.0001625217445941758, "loss": 2.2016, "step": 242220 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.00016252030208330212, "loss": 2.0121, "step": 242225 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.00016251885955107035, "loss": 2.196, "step": 242230 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.00016251741699748105, "loss": 2.0703, "step": 242235 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016251597442253464, "loss": 1.9404, "step": 242240 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.0001625145318262316, "loss": 2.218, "step": 242245 }, { "epoch": 0.57, "grad_norm": 1.9140625, "learning_rate": 0.00016251308920857248, "loss": 2.1882, "step": 242250 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.0001625116465695578, "loss": 2.1125, "step": 242255 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016251020390918797, "loss": 2.1707, "step": 242260 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016250876122746353, "loss": 2.1776, "step": 242265 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016250731852438498, "loss": 2.102, "step": 242270 }, { "epoch": 0.57, "grad_norm": 2.90625, "learning_rate": 0.00016250587579995276, "loss": 2.1425, "step": 242275 }, { "epoch": 0.57, "grad_norm": 1.9140625, "learning_rate": 0.00016250443305416742, "loss": 1.8357, "step": 242280 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016250299028702943, "loss": 1.9432, "step": 242285 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.0001625015474985393, "loss": 1.9897, "step": 242290 }, { "epoch": 0.57, "grad_norm": 2.4375, "learning_rate": 0.00016250010468869746, "loss": 1.9298, "step": 242295 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.00016249866185750447, "loss": 2.1031, "step": 242300 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.00016249721900496084, "loss": 2.2169, "step": 242305 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016249577613106702, "loss": 2.2817, "step": 242310 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.0001624943332358235, "loss": 2.2458, "step": 242315 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.00016249289031923075, "loss": 2.1845, "step": 242320 }, { "epoch": 0.57, "grad_norm": 1.9296875, "learning_rate": 0.0001624914473812893, "loss": 2.052, "step": 242325 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016249000442199968, "loss": 2.0592, "step": 242330 }, { "epoch": 0.57, "grad_norm": 2.734375, "learning_rate": 0.00016248856144136233, "loss": 2.1323, "step": 242335 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.00016248711843937775, "loss": 2.1365, "step": 242340 }, { "epoch": 0.57, "grad_norm": 1.859375, "learning_rate": 0.00016248567541604642, "loss": 1.981, "step": 242345 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016248423237136885, "loss": 2.2399, "step": 242350 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.00016248278930534556, "loss": 2.0516, "step": 242355 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.000162481346217977, "loss": 2.0117, "step": 242360 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016247990310926368, "loss": 2.2148, "step": 242365 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.0001624784599792061, "loss": 2.0355, "step": 242370 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016247701682780473, "loss": 2.0561, "step": 242375 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016247557365506006, "loss": 2.1555, "step": 242380 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.00016247413046097264, "loss": 2.1889, "step": 242385 }, { "epoch": 0.57, "grad_norm": 1.9453125, "learning_rate": 0.00016247268724554293, "loss": 1.8728, "step": 242390 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016247124400877138, "loss": 2.1358, "step": 242395 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.00016246980075065854, "loss": 2.0306, "step": 242400 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016246835747120488, "loss": 1.928, "step": 242405 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.0001624669141704109, "loss": 2.081, "step": 242410 }, { "epoch": 0.57, "grad_norm": 2.53125, "learning_rate": 0.00016246547084827707, "loss": 2.1761, "step": 242415 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016246402750480393, "loss": 2.1083, "step": 242420 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.0001624625841399919, "loss": 1.8799, "step": 242425 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016246114075384158, "loss": 2.0353, "step": 242430 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.00016245969734635338, "loss": 1.941, "step": 242435 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.0001624582539175278, "loss": 1.9624, "step": 242440 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016245681046736534, "loss": 2.0283, "step": 242445 }, { "epoch": 0.57, "grad_norm": 1.8515625, "learning_rate": 0.00016245536699586652, "loss": 2.1343, "step": 242450 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016245392350303182, "loss": 2.0604, "step": 242455 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.0001624524799888617, "loss": 2.1539, "step": 242460 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.0001624510364533567, "loss": 2.0564, "step": 242465 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016244959289651728, "loss": 2.187, "step": 242470 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016244814931834397, "loss": 2.2426, "step": 242475 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016244670571883725, "loss": 2.2746, "step": 242480 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016244526209799754, "loss": 2.0842, "step": 242485 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016244381845582546, "loss": 1.9698, "step": 242490 }, { "epoch": 0.57, "grad_norm": 3.921875, "learning_rate": 0.0001624423747923214, "loss": 2.0601, "step": 242495 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.0001624409311074859, "loss": 2.139, "step": 242500 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016243948740131945, "loss": 2.1183, "step": 242505 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016243804367382253, "loss": 1.9259, "step": 242510 }, { "epoch": 0.57, "grad_norm": 1.8828125, "learning_rate": 0.00016243659992499568, "loss": 1.9696, "step": 242515 }, { "epoch": 0.57, "grad_norm": 1.859375, "learning_rate": 0.0001624351561548393, "loss": 2.0051, "step": 242520 }, { "epoch": 0.57, "grad_norm": 1.921875, "learning_rate": 0.00016243371236335398, "loss": 2.0817, "step": 242525 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.00016243226855054016, "loss": 2.033, "step": 242530 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016243082471639837, "loss": 2.2324, "step": 242535 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016242938086092902, "loss": 2.115, "step": 242540 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.0001624279369841327, "loss": 1.81, "step": 242545 }, { "epoch": 0.57, "grad_norm": 1.921875, "learning_rate": 0.00016242649308600987, "loss": 2.0514, "step": 242550 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016242504916656102, "loss": 2.2077, "step": 242555 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.0001624236052257866, "loss": 2.319, "step": 242560 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.00016242216126368715, "loss": 2.1112, "step": 242565 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.0001624207172802632, "loss": 1.9317, "step": 242570 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016241927327551516, "loss": 1.9253, "step": 242575 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.0001624178292494436, "loss": 1.9451, "step": 242580 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016241638520204897, "loss": 2.1469, "step": 242585 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016241494113333174, "loss": 2.1879, "step": 242590 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016241349704329246, "loss": 2.1158, "step": 242595 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.0001624120529319316, "loss": 2.1432, "step": 242600 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016241060879924963, "loss": 2.2005, "step": 242605 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016240916464524706, "loss": 2.2013, "step": 242610 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016240772046992443, "loss": 2.0787, "step": 242615 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016240627627328214, "loss": 2.0622, "step": 242620 }, { "epoch": 0.57, "grad_norm": 2.46875, "learning_rate": 0.00016240483205532077, "loss": 2.001, "step": 242625 }, { "epoch": 0.57, "grad_norm": 2.390625, "learning_rate": 0.0001624033878160408, "loss": 2.1232, "step": 242630 }, { "epoch": 0.57, "grad_norm": 2.484375, "learning_rate": 0.00016240194355544266, "loss": 2.0576, "step": 242635 }, { "epoch": 0.57, "grad_norm": 1.9609375, "learning_rate": 0.00016240049927352686, "loss": 2.2203, "step": 242640 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016239905497029394, "loss": 2.157, "step": 242645 }, { "epoch": 0.57, "grad_norm": 2.5, "learning_rate": 0.0001623976106457444, "loss": 2.1065, "step": 242650 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.00016239616629987872, "loss": 2.1397, "step": 242655 }, { "epoch": 0.57, "grad_norm": 2.53125, "learning_rate": 0.0001623947219326973, "loss": 2.2181, "step": 242660 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.00016239327754420077, "loss": 2.0974, "step": 242665 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016239183313438953, "loss": 2.09, "step": 242670 }, { "epoch": 0.57, "grad_norm": 2.4375, "learning_rate": 0.00016239038870326415, "loss": 2.0382, "step": 242675 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016238894425082506, "loss": 2.0483, "step": 242680 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016238749977707278, "loss": 2.0826, "step": 242685 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.0001623860552820078, "loss": 2.0282, "step": 242690 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.0001623846107656306, "loss": 2.1544, "step": 242695 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016238316622794167, "loss": 2.2494, "step": 242700 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016238172166894155, "loss": 2.1182, "step": 242705 }, { "epoch": 0.57, "grad_norm": 2.53125, "learning_rate": 0.0001623802770886307, "loss": 2.1892, "step": 242710 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.0001623788324870096, "loss": 2.1234, "step": 242715 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016237738786407878, "loss": 2.1379, "step": 242720 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.0001623759432198387, "loss": 2.0231, "step": 242725 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016237449855428988, "loss": 2.1204, "step": 242730 }, { "epoch": 0.57, "grad_norm": 1.8671875, "learning_rate": 0.00016237305386743276, "loss": 2.0601, "step": 242735 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016237160915926792, "loss": 2.0856, "step": 242740 }, { "epoch": 0.57, "grad_norm": 2.4375, "learning_rate": 0.00016237016442979577, "loss": 2.0219, "step": 242745 }, { "epoch": 0.57, "grad_norm": 1.890625, "learning_rate": 0.00016236871967901685, "loss": 1.9823, "step": 242750 }, { "epoch": 0.57, "grad_norm": 1.421875, "learning_rate": 0.00016236727490693168, "loss": 1.9979, "step": 242755 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.00016236583011354069, "loss": 1.9839, "step": 242760 }, { "epoch": 0.57, "grad_norm": 2.609375, "learning_rate": 0.0001623643852988444, "loss": 2.1642, "step": 242765 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016236294046284333, "loss": 2.01, "step": 242770 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016236149560553792, "loss": 2.179, "step": 242775 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.0001623600507269287, "loss": 2.1464, "step": 242780 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.0001623586058270161, "loss": 2.154, "step": 242785 }, { "epoch": 0.57, "grad_norm": 2.734375, "learning_rate": 0.00016235716090580077, "loss": 2.1262, "step": 242790 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016235571596328306, "loss": 2.1153, "step": 242795 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.0001623542709994635, "loss": 2.06, "step": 242800 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.00016235282601434258, "loss": 2.0054, "step": 242805 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016235138100792083, "loss": 2.0623, "step": 242810 }, { "epoch": 0.57, "grad_norm": 1.9609375, "learning_rate": 0.0001623499359801987, "loss": 2.1492, "step": 242815 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.0001623484909311767, "loss": 2.1875, "step": 242820 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016234704586085534, "loss": 2.0022, "step": 242825 }, { "epoch": 0.57, "grad_norm": 1.8671875, "learning_rate": 0.00016234560076923508, "loss": 2.1736, "step": 242830 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016234415565631645, "loss": 2.0798, "step": 242835 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.0001623427105220999, "loss": 2.0267, "step": 242840 }, { "epoch": 0.57, "grad_norm": 3.421875, "learning_rate": 0.00016234126536658597, "loss": 2.299, "step": 242845 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016233982018977512, "loss": 2.0949, "step": 242850 }, { "epoch": 0.57, "grad_norm": 1.90625, "learning_rate": 0.0001623383749916679, "loss": 1.9831, "step": 242855 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.0001623369297722647, "loss": 2.0629, "step": 242860 }, { "epoch": 0.57, "grad_norm": 3.296875, "learning_rate": 0.0001623354845315661, "loss": 2.0812, "step": 242865 }, { "epoch": 0.57, "grad_norm": 1.8125, "learning_rate": 0.00016233403926957257, "loss": 2.0467, "step": 242870 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.00016233259398628461, "loss": 1.9761, "step": 242875 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.0001623311486817027, "loss": 2.3658, "step": 242880 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.0001623297033558273, "loss": 1.9623, "step": 242885 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.000162328258008659, "loss": 2.1964, "step": 242890 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.00016232681264019823, "loss": 2.2405, "step": 242895 }, { "epoch": 0.57, "grad_norm": 2.671875, "learning_rate": 0.00016232536725044547, "loss": 1.9885, "step": 242900 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016232392183940125, "loss": 2.0126, "step": 242905 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016232247640706601, "loss": 1.9778, "step": 242910 }, { "epoch": 0.57, "grad_norm": 2.921875, "learning_rate": 0.00016232103095344032, "loss": 2.0736, "step": 242915 }, { "epoch": 0.57, "grad_norm": 1.875, "learning_rate": 0.00016231958547852462, "loss": 1.9959, "step": 242920 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016231813998231944, "loss": 2.0487, "step": 242925 }, { "epoch": 0.57, "grad_norm": 2.578125, "learning_rate": 0.00016231669446482524, "loss": 2.0028, "step": 242930 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016231524892604252, "loss": 2.0135, "step": 242935 }, { "epoch": 0.57, "grad_norm": 1.9140625, "learning_rate": 0.00016231380336597179, "loss": 2.0432, "step": 242940 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.0001623123577846135, "loss": 2.0405, "step": 242945 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016231091218196823, "loss": 2.3028, "step": 242950 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.0001623094665580364, "loss": 1.8949, "step": 242955 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016230802091281855, "loss": 2.1375, "step": 242960 }, { "epoch": 0.57, "grad_norm": 2.71875, "learning_rate": 0.0001623065752463151, "loss": 2.052, "step": 242965 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.00016230512955852663, "loss": 2.1933, "step": 242970 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.00016230368384945358, "loss": 2.0923, "step": 242975 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.00016230223811909648, "loss": 2.0305, "step": 242980 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.0001623007923674558, "loss": 1.9883, "step": 242985 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.00016229934659453202, "loss": 2.0341, "step": 242990 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016229790080032568, "loss": 1.9074, "step": 242995 }, { "epoch": 0.57, "grad_norm": 2.4375, "learning_rate": 0.00016229645498483722, "loss": 2.1592, "step": 243000 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.0001622950091480672, "loss": 2.22, "step": 243005 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016229356329001604, "loss": 2.0933, "step": 243010 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016229211741068428, "loss": 1.9407, "step": 243015 }, { "epoch": 0.57, "grad_norm": 2.609375, "learning_rate": 0.0001622906715100724, "loss": 2.0646, "step": 243020 }, { "epoch": 0.57, "grad_norm": 1.9296875, "learning_rate": 0.0001622892255881809, "loss": 1.9113, "step": 243025 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.00016228777964501028, "loss": 2.2016, "step": 243030 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.000162286333680561, "loss": 2.2557, "step": 243035 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.0001622848876948336, "loss": 2.1419, "step": 243040 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016228344168782853, "loss": 2.1504, "step": 243045 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.00016228199565954636, "loss": 2.2832, "step": 243050 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.00016228054960998748, "loss": 1.9306, "step": 243055 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.00016227910353915247, "loss": 2.0384, "step": 243060 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016227765744704175, "loss": 2.121, "step": 243065 }, { "epoch": 0.57, "grad_norm": 2.515625, "learning_rate": 0.00016227621133365586, "loss": 2.0263, "step": 243070 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016227476519899533, "loss": 1.9886, "step": 243075 }, { "epoch": 0.57, "grad_norm": 1.8828125, "learning_rate": 0.00016227331904306055, "loss": 2.2237, "step": 243080 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016227187286585213, "loss": 1.9999, "step": 243085 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016227042666737046, "loss": 2.1867, "step": 243090 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.0001622689804476161, "loss": 2.1688, "step": 243095 }, { "epoch": 0.57, "grad_norm": 2.703125, "learning_rate": 0.00016226753420658954, "loss": 1.8996, "step": 243100 }, { "epoch": 0.57, "grad_norm": 1.9921875, "learning_rate": 0.00016226608794429125, "loss": 2.1317, "step": 243105 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.00016226464166072178, "loss": 2.2509, "step": 243110 }, { "epoch": 0.57, "grad_norm": 1.953125, "learning_rate": 0.00016226319535588154, "loss": 2.1757, "step": 243115 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016226174902977104, "loss": 2.1409, "step": 243120 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.0001622603026823908, "loss": 2.2809, "step": 243125 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016225885631374135, "loss": 2.0366, "step": 243130 }, { "epoch": 0.57, "grad_norm": 2.546875, "learning_rate": 0.00016225740992382312, "loss": 2.1708, "step": 243135 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016225596351263666, "loss": 1.923, "step": 243140 }, { "epoch": 0.57, "grad_norm": 2.5625, "learning_rate": 0.0001622545170801824, "loss": 1.9399, "step": 243145 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016225307062646088, "loss": 1.9361, "step": 243150 }, { "epoch": 0.57, "grad_norm": 1.9921875, "learning_rate": 0.00016225162415147256, "loss": 1.9725, "step": 243155 }, { "epoch": 0.57, "grad_norm": 2.484375, "learning_rate": 0.00016225017765521797, "loss": 1.9891, "step": 243160 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.0001622487311376976, "loss": 2.1264, "step": 243165 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.00016224728459891195, "loss": 1.9731, "step": 243170 }, { "epoch": 0.57, "grad_norm": 1.9609375, "learning_rate": 0.00016224583803886147, "loss": 1.9731, "step": 243175 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016224439145754666, "loss": 2.0929, "step": 243180 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.0001622429448549681, "loss": 2.145, "step": 243185 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016224149823112618, "loss": 2.0461, "step": 243190 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.00016224005158602144, "loss": 2.1061, "step": 243195 }, { "epoch": 0.57, "grad_norm": 2.59375, "learning_rate": 0.0001622386049196544, "loss": 1.9086, "step": 243200 }, { "epoch": 0.57, "grad_norm": 2.40625, "learning_rate": 0.0001622371582320255, "loss": 2.2414, "step": 243205 }, { "epoch": 0.57, "grad_norm": 2.46875, "learning_rate": 0.00016223571152313525, "loss": 2.1832, "step": 243210 }, { "epoch": 0.57, "grad_norm": 2.4375, "learning_rate": 0.00016223426479298415, "loss": 1.9817, "step": 243215 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.00016223281804157271, "loss": 2.0647, "step": 243220 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016223137126890143, "loss": 2.0268, "step": 243225 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016222992447497073, "loss": 2.1603, "step": 243230 }, { "epoch": 0.57, "grad_norm": 1.5234375, "learning_rate": 0.0001622284776597812, "loss": 2.0324, "step": 243235 }, { "epoch": 0.57, "grad_norm": 2.53125, "learning_rate": 0.00016222703082333326, "loss": 1.877, "step": 243240 }, { "epoch": 0.57, "grad_norm": 2.59375, "learning_rate": 0.00016222558396562748, "loss": 2.3329, "step": 243245 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.0001622241370866643, "loss": 2.0018, "step": 243250 }, { "epoch": 0.57, "grad_norm": 2.828125, "learning_rate": 0.00016222269018644422, "loss": 1.9975, "step": 243255 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016222124326496773, "loss": 2.1787, "step": 243260 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016221979632223535, "loss": 2.0101, "step": 243265 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.00016221834935824757, "loss": 1.9894, "step": 243270 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016221690237300484, "loss": 2.0704, "step": 243275 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.00016221545536650772, "loss": 1.9397, "step": 243280 }, { "epoch": 0.57, "grad_norm": 3.671875, "learning_rate": 0.00016221400833875665, "loss": 2.183, "step": 243285 }, { "epoch": 0.57, "grad_norm": 1.8359375, "learning_rate": 0.00016221256128975216, "loss": 1.9678, "step": 243290 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.00016221111421949473, "loss": 2.0414, "step": 243295 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016220966712798484, "loss": 2.0509, "step": 243300 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016220822001522303, "loss": 1.9574, "step": 243305 }, { "epoch": 0.57, "grad_norm": 1.8515625, "learning_rate": 0.00016220677288120974, "loss": 2.1114, "step": 243310 }, { "epoch": 0.57, "grad_norm": 6.25, "learning_rate": 0.00016220532572594547, "loss": 2.0657, "step": 243315 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016220387854943077, "loss": 1.9917, "step": 243320 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016220243135166607, "loss": 1.9491, "step": 243325 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.0001622009841326519, "loss": 2.2193, "step": 243330 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016219953689238875, "loss": 1.9876, "step": 243335 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.0001621980896308771, "loss": 1.9722, "step": 243340 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016219664234811746, "loss": 2.0596, "step": 243345 }, { "epoch": 0.57, "grad_norm": 1.8984375, "learning_rate": 0.00016219519504411033, "loss": 1.8539, "step": 243350 }, { "epoch": 0.57, "grad_norm": 2.59375, "learning_rate": 0.00016219374771885622, "loss": 2.0212, "step": 243355 }, { "epoch": 0.57, "grad_norm": 2.703125, "learning_rate": 0.00016219230037235556, "loss": 2.1319, "step": 243360 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.0001621908530046089, "loss": 2.0707, "step": 243365 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.00016218940561561667, "loss": 2.0203, "step": 243370 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.00016218795820537943, "loss": 1.9577, "step": 243375 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.0001621865107738977, "loss": 2.1335, "step": 243380 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.0001621850633211719, "loss": 2.0427, "step": 243385 }, { "epoch": 0.57, "grad_norm": 1.8359375, "learning_rate": 0.00016218361584720254, "loss": 1.9967, "step": 243390 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.00016218216835199014, "loss": 2.0986, "step": 243395 }, { "epoch": 0.57, "grad_norm": 1.921875, "learning_rate": 0.00016218072083553516, "loss": 2.1421, "step": 243400 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016217927329783817, "loss": 2.1897, "step": 243405 }, { "epoch": 0.57, "grad_norm": 2.5625, "learning_rate": 0.0001621778257388996, "loss": 2.0803, "step": 243410 }, { "epoch": 0.57, "grad_norm": 1.6953125, "learning_rate": 0.00016217637815871993, "loss": 2.0056, "step": 243415 }, { "epoch": 0.57, "grad_norm": 1.9609375, "learning_rate": 0.00016217493055729966, "loss": 2.0841, "step": 243420 }, { "epoch": 0.57, "grad_norm": 2.578125, "learning_rate": 0.00016217348293463938, "loss": 1.8939, "step": 243425 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016217203529073947, "loss": 2.2057, "step": 243430 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016217058762560047, "loss": 2.0085, "step": 243435 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016216913993922287, "loss": 2.1189, "step": 243440 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016216769223160714, "loss": 2.2421, "step": 243445 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.0001621662445027538, "loss": 1.9619, "step": 243450 }, { "epoch": 0.57, "grad_norm": 2.625, "learning_rate": 0.00016216479675266337, "loss": 2.1725, "step": 243455 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016216334898133634, "loss": 2.2154, "step": 243460 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016216190118877318, "loss": 2.111, "step": 243465 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016216045337497434, "loss": 2.0166, "step": 243470 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.00016215900553994038, "loss": 2.1515, "step": 243475 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.00016215755768367177, "loss": 2.0156, "step": 243480 }, { "epoch": 0.57, "grad_norm": 1.96875, "learning_rate": 0.000162156109806169, "loss": 2.1528, "step": 243485 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016215466190743264, "loss": 2.2043, "step": 243490 }, { "epoch": 0.57, "grad_norm": 2.578125, "learning_rate": 0.0001621532139874631, "loss": 1.8991, "step": 243495 }, { "epoch": 0.57, "grad_norm": 2.765625, "learning_rate": 0.00016215176604626082, "loss": 2.1131, "step": 243500 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016215031808382643, "loss": 2.3821, "step": 243505 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016214887010016036, "loss": 2.3339, "step": 243510 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.0001621474220952631, "loss": 1.9778, "step": 243515 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016214597406913515, "loss": 1.9066, "step": 243520 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016214452602177702, "loss": 2.1346, "step": 243525 }, { "epoch": 0.57, "grad_norm": 2.390625, "learning_rate": 0.0001621430779531892, "loss": 2.0968, "step": 243530 }, { "epoch": 0.57, "grad_norm": 1.9609375, "learning_rate": 0.00016214162986337216, "loss": 2.1052, "step": 243535 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016214018175232645, "loss": 1.916, "step": 243540 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.00016213873362005247, "loss": 1.8045, "step": 243545 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.0001621372854665508, "loss": 1.9866, "step": 243550 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.00016213583729182189, "loss": 1.9016, "step": 243555 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.0001621343890958663, "loss": 2.1208, "step": 243560 }, { "epoch": 0.57, "grad_norm": 2.4375, "learning_rate": 0.00016213294087868442, "loss": 2.0597, "step": 243565 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.00016213149264027684, "loss": 2.1534, "step": 243570 }, { "epoch": 0.57, "grad_norm": 1.8203125, "learning_rate": 0.00016213004438064398, "loss": 2.0574, "step": 243575 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.00016212859609978638, "loss": 2.0418, "step": 243580 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016212714779770452, "loss": 2.1283, "step": 243585 }, { "epoch": 0.57, "grad_norm": 1.78125, "learning_rate": 0.00016212569947439892, "loss": 2.2303, "step": 243590 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016212425112987005, "loss": 1.993, "step": 243595 }, { "epoch": 0.57, "grad_norm": 2.6875, "learning_rate": 0.0001621228027641184, "loss": 2.0657, "step": 243600 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016212135437714448, "loss": 1.9216, "step": 243605 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.00016211990596894877, "loss": 2.0786, "step": 243610 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.00016211845753953178, "loss": 1.9651, "step": 243615 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016211700908889403, "loss": 2.0617, "step": 243620 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016211556061703592, "loss": 1.9852, "step": 243625 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.00016211411212395805, "loss": 2.0824, "step": 243630 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016211266360966086, "loss": 2.2345, "step": 243635 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016211121507414487, "loss": 2.1369, "step": 243640 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016210976651741054, "loss": 2.0844, "step": 243645 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.00016210831793945843, "loss": 2.184, "step": 243650 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.00016210686934028896, "loss": 2.1063, "step": 243655 }, { "epoch": 0.57, "grad_norm": 1.9609375, "learning_rate": 0.00016210542071990264, "loss": 2.0166, "step": 243660 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.0001621039720783, "loss": 1.9508, "step": 243665 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016210252341548152, "loss": 2.0953, "step": 243670 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.0001621010747314477, "loss": 2.0107, "step": 243675 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.00016209962602619903, "loss": 2.1169, "step": 243680 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.00016209817729973596, "loss": 2.2113, "step": 243685 }, { "epoch": 0.57, "grad_norm": 1.90625, "learning_rate": 0.00016209672855205907, "loss": 1.9652, "step": 243690 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016209527978316879, "loss": 2.0958, "step": 243695 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016209383099306565, "loss": 2.0709, "step": 243700 }, { "epoch": 0.57, "grad_norm": 3.0625, "learning_rate": 0.0001620923821817501, "loss": 1.9998, "step": 243705 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016209093334922269, "loss": 1.936, "step": 243710 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.00016208948449548388, "loss": 2.0817, "step": 243715 }, { "epoch": 0.57, "grad_norm": 2.5, "learning_rate": 0.0001620880356205342, "loss": 2.1067, "step": 243720 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016208658672437412, "loss": 2.1491, "step": 243725 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.0001620851378070041, "loss": 2.1145, "step": 243730 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.00016208368886842475, "loss": 2.1538, "step": 243735 }, { "epoch": 0.57, "grad_norm": 1.8125, "learning_rate": 0.0001620822399086364, "loss": 1.9922, "step": 243740 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.00016208079092763965, "loss": 2.1032, "step": 243745 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.000162079341925435, "loss": 2.0743, "step": 243750 }, { "epoch": 0.57, "grad_norm": 2.421875, "learning_rate": 0.00016207789290202288, "loss": 2.0773, "step": 243755 }, { "epoch": 0.57, "grad_norm": 2.828125, "learning_rate": 0.00016207644385740389, "loss": 2.0181, "step": 243760 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.0001620749947915784, "loss": 2.0924, "step": 243765 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016207354570454697, "loss": 2.1239, "step": 243770 }, { "epoch": 0.57, "grad_norm": 2.375, "learning_rate": 0.00016207209659631016, "loss": 2.2263, "step": 243775 }, { "epoch": 0.57, "grad_norm": 2.484375, "learning_rate": 0.00016207064746686834, "loss": 2.0985, "step": 243780 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016206919831622204, "loss": 2.1724, "step": 243785 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.00016206774914437184, "loss": 2.2547, "step": 243790 }, { "epoch": 0.57, "grad_norm": 1.859375, "learning_rate": 0.0001620662999513181, "loss": 2.0797, "step": 243795 }, { "epoch": 0.57, "grad_norm": 2.4375, "learning_rate": 0.00016206485073706146, "loss": 2.1272, "step": 243800 }, { "epoch": 0.57, "grad_norm": 2.671875, "learning_rate": 0.00016206340150160228, "loss": 2.0214, "step": 243805 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016206195224494115, "loss": 1.9523, "step": 243810 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016206050296707853, "loss": 2.0019, "step": 243815 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.0001620590536680149, "loss": 2.121, "step": 243820 }, { "epoch": 0.57, "grad_norm": 2.53125, "learning_rate": 0.00016205760434775078, "loss": 2.0177, "step": 243825 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016205615500628666, "loss": 2.0049, "step": 243830 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016205470564362303, "loss": 1.9818, "step": 243835 }, { "epoch": 0.57, "grad_norm": 2.015625, "learning_rate": 0.0001620532562597604, "loss": 2.0863, "step": 243840 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.00016205180685469923, "loss": 2.2781, "step": 243845 }, { "epoch": 0.57, "grad_norm": 1.9140625, "learning_rate": 0.00016205035742844004, "loss": 2.1818, "step": 243850 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016204890798098335, "loss": 2.205, "step": 243855 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016204745851232957, "loss": 2.0403, "step": 243860 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.0001620460090224793, "loss": 2.1052, "step": 243865 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.000162044559511433, "loss": 2.0559, "step": 243870 }, { "epoch": 0.57, "grad_norm": 3.421875, "learning_rate": 0.00016204310997919113, "loss": 2.0854, "step": 243875 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.0001620416604257542, "loss": 2.175, "step": 243880 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.0001620402108511227, "loss": 2.1419, "step": 243885 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.0001620387612552972, "loss": 1.9974, "step": 243890 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016203731163827808, "loss": 1.8637, "step": 243895 }, { "epoch": 0.57, "grad_norm": 1.953125, "learning_rate": 0.0001620358620000659, "loss": 2.0769, "step": 243900 }, { "epoch": 0.57, "grad_norm": 1.828125, "learning_rate": 0.00016203441234066116, "loss": 2.0284, "step": 243905 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.00016203296266006434, "loss": 2.0555, "step": 243910 }, { "epoch": 0.57, "grad_norm": 2.734375, "learning_rate": 0.0001620315129582759, "loss": 2.2678, "step": 243915 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.0001620300632352964, "loss": 2.1226, "step": 243920 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016202861349112632, "loss": 2.0532, "step": 243925 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.0001620271637257661, "loss": 2.0111, "step": 243930 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016202571393921628, "loss": 2.0061, "step": 243935 }, { "epoch": 0.57, "grad_norm": 2.328125, "learning_rate": 0.0001620242641314774, "loss": 2.1677, "step": 243940 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016202281430254987, "loss": 2.1236, "step": 243945 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.0001620213644524342, "loss": 2.1445, "step": 243950 }, { "epoch": 0.57, "grad_norm": 1.84375, "learning_rate": 0.00016201991458113097, "loss": 2.2612, "step": 243955 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016201846468864056, "loss": 2.0092, "step": 243960 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.0001620170147749635, "loss": 2.0305, "step": 243965 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016201556484010036, "loss": 2.1279, "step": 243970 }, { "epoch": 0.57, "grad_norm": 2.53125, "learning_rate": 0.00016201411488405156, "loss": 1.9355, "step": 243975 }, { "epoch": 0.57, "grad_norm": 1.75, "learning_rate": 0.0001620126649068176, "loss": 2.2214, "step": 243980 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.00016201121490839898, "loss": 2.1001, "step": 243985 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016200976488879625, "loss": 2.0858, "step": 243990 }, { "epoch": 0.57, "grad_norm": 1.84375, "learning_rate": 0.00016200831484800983, "loss": 2.1211, "step": 243995 }, { "epoch": 0.57, "grad_norm": 1.9296875, "learning_rate": 0.00016200686478604027, "loss": 2.1583, "step": 244000 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.000162005414702888, "loss": 2.2043, "step": 244005 }, { "epoch": 0.57, "grad_norm": 2.25, "learning_rate": 0.00016200396459855357, "loss": 2.1466, "step": 244010 }, { "epoch": 0.57, "grad_norm": 3.40625, "learning_rate": 0.00016200251447303745, "loss": 2.1168, "step": 244015 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016200106432634018, "loss": 1.9808, "step": 244020 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.0001619996141584622, "loss": 2.0198, "step": 244025 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.000161998163969404, "loss": 1.9645, "step": 244030 }, { "epoch": 0.57, "grad_norm": 1.9921875, "learning_rate": 0.00016199671375916616, "loss": 2.0652, "step": 244035 }, { "epoch": 0.57, "grad_norm": 1.9140625, "learning_rate": 0.00016199526352774908, "loss": 2.0821, "step": 244040 }, { "epoch": 0.57, "grad_norm": 1.921875, "learning_rate": 0.0001619938132751533, "loss": 2.0753, "step": 244045 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016199236300137933, "loss": 2.0366, "step": 244050 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.00016199091270642762, "loss": 2.1245, "step": 244055 }, { "epoch": 0.57, "grad_norm": 2.28125, "learning_rate": 0.0001619894623902987, "loss": 2.1949, "step": 244060 }, { "epoch": 0.57, "grad_norm": 2.40625, "learning_rate": 0.00016198801205299302, "loss": 1.9073, "step": 244065 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.0001619865616945112, "loss": 2.2828, "step": 244070 }, { "epoch": 0.57, "grad_norm": 1.9453125, "learning_rate": 0.00016198511131485356, "loss": 1.9337, "step": 244075 }, { "epoch": 0.57, "grad_norm": 1.9765625, "learning_rate": 0.0001619836609140207, "loss": 2.0147, "step": 244080 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.00016198221049201314, "loss": 2.2223, "step": 244085 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016198076004883126, "loss": 2.0928, "step": 244090 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.0001619793095844757, "loss": 2.0901, "step": 244095 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016197785909894685, "loss": 2.2035, "step": 244100 }, { "epoch": 0.57, "grad_norm": 2.40625, "learning_rate": 0.00016197640859224525, "loss": 2.1545, "step": 244105 }, { "epoch": 0.57, "grad_norm": 2.03125, "learning_rate": 0.00016197495806437139, "loss": 2.0926, "step": 244110 }, { "epoch": 0.57, "grad_norm": 2.046875, "learning_rate": 0.00016197350751532573, "loss": 2.0775, "step": 244115 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.0001619720569451088, "loss": 2.1513, "step": 244120 }, { "epoch": 0.57, "grad_norm": 2.0625, "learning_rate": 0.0001619706063537211, "loss": 2.0407, "step": 244125 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016196915574116316, "loss": 2.2966, "step": 244130 }, { "epoch": 0.57, "grad_norm": 1.78125, "learning_rate": 0.0001619677051074354, "loss": 2.3476, "step": 244135 }, { "epoch": 0.57, "grad_norm": 2.34375, "learning_rate": 0.00016196625445253835, "loss": 2.1523, "step": 244140 }, { "epoch": 0.57, "grad_norm": 2.421875, "learning_rate": 0.00016196480377647247, "loss": 2.1069, "step": 244145 }, { "epoch": 0.57, "grad_norm": 2.703125, "learning_rate": 0.00016196335307923834, "loss": 2.1929, "step": 244150 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.00016196190236083638, "loss": 2.1024, "step": 244155 }, { "epoch": 0.57, "grad_norm": 2.078125, "learning_rate": 0.0001619604516212671, "loss": 2.2931, "step": 244160 }, { "epoch": 0.57, "grad_norm": 2.5625, "learning_rate": 0.000161959000860531, "loss": 2.1808, "step": 244165 }, { "epoch": 0.57, "grad_norm": 1.7109375, "learning_rate": 0.0001619575500786286, "loss": 1.8822, "step": 244170 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.0001619560992755604, "loss": 2.225, "step": 244175 }, { "epoch": 0.57, "grad_norm": 2.125, "learning_rate": 0.00016195464845132683, "loss": 2.186, "step": 244180 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016195319760592847, "loss": 2.1865, "step": 244185 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016195174673936576, "loss": 2.2539, "step": 244190 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.0001619502958516392, "loss": 2.0785, "step": 244195 }, { "epoch": 0.57, "grad_norm": 1.78125, "learning_rate": 0.00016194884494274926, "loss": 2.0313, "step": 244200 }, { "epoch": 0.57, "grad_norm": 2.21875, "learning_rate": 0.0001619473940126965, "loss": 2.1133, "step": 244205 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016194594306148143, "loss": 2.1939, "step": 244210 }, { "epoch": 0.57, "grad_norm": 2.203125, "learning_rate": 0.00016194449208910446, "loss": 2.1998, "step": 244215 }, { "epoch": 0.57, "grad_norm": 2.359375, "learning_rate": 0.00016194304109556616, "loss": 2.0206, "step": 244220 }, { "epoch": 0.57, "grad_norm": 2.0, "learning_rate": 0.00016194159008086695, "loss": 2.1475, "step": 244225 }, { "epoch": 0.57, "grad_norm": 2.296875, "learning_rate": 0.0001619401390450074, "loss": 2.0961, "step": 244230 }, { "epoch": 0.57, "grad_norm": 2.265625, "learning_rate": 0.00016193868798798796, "loss": 2.0551, "step": 244235 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.00016193723690980914, "loss": 1.9822, "step": 244240 }, { "epoch": 0.57, "grad_norm": 2.53125, "learning_rate": 0.00016193578581047145, "loss": 2.2031, "step": 244245 }, { "epoch": 0.57, "grad_norm": 1.9375, "learning_rate": 0.00016193433468997536, "loss": 2.0945, "step": 244250 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016193288354832136, "loss": 2.1204, "step": 244255 }, { "epoch": 0.57, "grad_norm": 2.421875, "learning_rate": 0.00016193143238551, "loss": 2.0915, "step": 244260 }, { "epoch": 0.57, "grad_norm": 2.109375, "learning_rate": 0.00016192998120154173, "loss": 2.0794, "step": 244265 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016192852999641705, "loss": 2.273, "step": 244270 }, { "epoch": 0.57, "grad_norm": 2.09375, "learning_rate": 0.00016192707877013646, "loss": 2.122, "step": 244275 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016192562752270043, "loss": 1.9358, "step": 244280 }, { "epoch": 0.57, "grad_norm": 2.3125, "learning_rate": 0.00016192417625410954, "loss": 1.8256, "step": 244285 }, { "epoch": 0.57, "grad_norm": 1.859375, "learning_rate": 0.00016192272496436417, "loss": 2.1049, "step": 244290 }, { "epoch": 0.57, "grad_norm": 2.15625, "learning_rate": 0.00016192127365346494, "loss": 2.2172, "step": 244295 }, { "epoch": 0.57, "grad_norm": 2.40625, "learning_rate": 0.00016191982232141222, "loss": 2.0817, "step": 244300 }, { "epoch": 0.57, "grad_norm": 2.171875, "learning_rate": 0.00016191837096820658, "loss": 2.0416, "step": 244305 }, { "epoch": 0.57, "grad_norm": 2.453125, "learning_rate": 0.00016191691959384854, "loss": 2.142, "step": 244310 }, { "epoch": 0.57, "grad_norm": 2.234375, "learning_rate": 0.00016191546819833852, "loss": 2.1264, "step": 244315 }, { "epoch": 0.57, "grad_norm": 2.609375, "learning_rate": 0.00016191401678167705, "loss": 1.9854, "step": 244320 }, { "epoch": 0.57, "grad_norm": 2.1875, "learning_rate": 0.00016191256534386466, "loss": 1.9003, "step": 244325 }, { "epoch": 0.57, "grad_norm": 2.140625, "learning_rate": 0.0001619111138849018, "loss": 2.2064, "step": 244330 }, { "epoch": 0.57, "grad_norm": 1.984375, "learning_rate": 0.00016190966240478896, "loss": 2.1195, "step": 244335 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016190821090352668, "loss": 2.1748, "step": 244340 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.0001619067593811154, "loss": 2.1547, "step": 244345 }, { "epoch": 0.58, "grad_norm": 1.890625, "learning_rate": 0.0001619053078375557, "loss": 1.9497, "step": 244350 }, { "epoch": 0.58, "grad_norm": 2.484375, "learning_rate": 0.00016190385627284798, "loss": 2.2321, "step": 244355 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001619024046869928, "loss": 2.0406, "step": 244360 }, { "epoch": 0.58, "grad_norm": 2.0, "learning_rate": 0.00016190095307999064, "loss": 2.1328, "step": 244365 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016189950145184198, "loss": 2.1921, "step": 244370 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016189804980254734, "loss": 2.0696, "step": 244375 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016189659813210718, "loss": 2.1017, "step": 244380 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.00016189514644052202, "loss": 2.2345, "step": 244385 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.0001618936947277924, "loss": 1.9192, "step": 244390 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016189224299391873, "loss": 2.0996, "step": 244395 }, { "epoch": 0.58, "grad_norm": 2.453125, "learning_rate": 0.00016189079123890156, "loss": 2.2466, "step": 244400 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016188933946274139, "loss": 1.9729, "step": 244405 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016188788766543868, "loss": 2.1324, "step": 244410 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016188643584699394, "loss": 2.1552, "step": 244415 }, { "epoch": 0.58, "grad_norm": 1.8125, "learning_rate": 0.00016188498400740767, "loss": 2.1038, "step": 244420 }, { "epoch": 0.58, "grad_norm": 1.8671875, "learning_rate": 0.00016188353214668038, "loss": 2.1589, "step": 244425 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016188208026481256, "loss": 1.9728, "step": 244430 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.0001618806283618047, "loss": 2.0987, "step": 244435 }, { "epoch": 0.58, "grad_norm": 1.8125, "learning_rate": 0.00016187917643765726, "loss": 1.9886, "step": 244440 }, { "epoch": 0.58, "grad_norm": 2.515625, "learning_rate": 0.0001618777244923708, "loss": 2.1912, "step": 244445 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.00016187627252594577, "loss": 2.0024, "step": 244450 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001618748205383827, "loss": 2.0312, "step": 244455 }, { "epoch": 0.58, "grad_norm": 2.0, "learning_rate": 0.00016187336852968207, "loss": 2.0619, "step": 244460 }, { "epoch": 0.58, "grad_norm": 2.484375, "learning_rate": 0.00016187191649984434, "loss": 2.2478, "step": 244465 }, { "epoch": 0.58, "grad_norm": 2.515625, "learning_rate": 0.0001618704644488701, "loss": 2.0945, "step": 244470 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016186901237675975, "loss": 2.0433, "step": 244475 }, { "epoch": 0.58, "grad_norm": 1.9296875, "learning_rate": 0.00016186756028351384, "loss": 2.2363, "step": 244480 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016186610816913284, "loss": 1.9911, "step": 244485 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016186465603361725, "loss": 2.2077, "step": 244490 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016186320387696757, "loss": 2.1612, "step": 244495 }, { "epoch": 0.58, "grad_norm": 2.703125, "learning_rate": 0.0001618617516991843, "loss": 2.1976, "step": 244500 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016186029950026796, "loss": 2.2004, "step": 244505 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.000161858847280219, "loss": 2.1969, "step": 244510 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016185739503903792, "loss": 2.2674, "step": 244515 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016185594277672523, "loss": 2.1393, "step": 244520 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016185449049328145, "loss": 1.9235, "step": 244525 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016185303818870702, "loss": 2.2119, "step": 244530 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.00016185158586300253, "loss": 1.9484, "step": 244535 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001618501335161684, "loss": 2.1013, "step": 244540 }, { "epoch": 0.58, "grad_norm": 2.390625, "learning_rate": 0.0001618486811482051, "loss": 2.0704, "step": 244545 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.0001618472287591132, "loss": 1.8286, "step": 244550 }, { "epoch": 0.58, "grad_norm": 2.6875, "learning_rate": 0.00016184577634889316, "loss": 2.1974, "step": 244555 }, { "epoch": 0.58, "grad_norm": 2.609375, "learning_rate": 0.0001618443239175455, "loss": 2.2088, "step": 244560 }, { "epoch": 0.58, "grad_norm": 2.0, "learning_rate": 0.00016184287146507067, "loss": 2.1875, "step": 244565 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.0001618414189914692, "loss": 2.0851, "step": 244570 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016183996649674157, "loss": 2.1193, "step": 244575 }, { "epoch": 0.58, "grad_norm": 2.734375, "learning_rate": 0.00016183851398088832, "loss": 2.013, "step": 244580 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.0001618370614439099, "loss": 2.0402, "step": 244585 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.0001618356088858068, "loss": 2.0117, "step": 244590 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.00016183415630657955, "loss": 1.9771, "step": 244595 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016183270370622862, "loss": 2.0339, "step": 244600 }, { "epoch": 0.58, "grad_norm": 1.7578125, "learning_rate": 0.00016183125108475453, "loss": 1.9946, "step": 244605 }, { "epoch": 0.58, "grad_norm": 2.390625, "learning_rate": 0.00016182979844215774, "loss": 1.9832, "step": 244610 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.00016182834577843877, "loss": 2.2106, "step": 244615 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.00016182689309359813, "loss": 2.184, "step": 244620 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001618254403876363, "loss": 2.1178, "step": 244625 }, { "epoch": 0.58, "grad_norm": 2.609375, "learning_rate": 0.0001618239876605538, "loss": 2.1153, "step": 244630 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.0001618225349123511, "loss": 2.1642, "step": 244635 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016182108214302866, "loss": 2.0679, "step": 244640 }, { "epoch": 0.58, "grad_norm": 1.890625, "learning_rate": 0.00016181962935258706, "loss": 1.853, "step": 244645 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.0001618181765410267, "loss": 2.0919, "step": 244650 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.0001618167237083482, "loss": 2.0377, "step": 244655 }, { "epoch": 0.58, "grad_norm": 1.9609375, "learning_rate": 0.00016181527085455194, "loss": 1.9312, "step": 244660 }, { "epoch": 0.58, "grad_norm": 1.9296875, "learning_rate": 0.0001618138179796385, "loss": 1.9311, "step": 244665 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.0001618123650836083, "loss": 1.9943, "step": 244670 }, { "epoch": 0.58, "grad_norm": 1.7109375, "learning_rate": 0.00016181091216646188, "loss": 1.9418, "step": 244675 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016180945922819975, "loss": 2.0679, "step": 244680 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016180800626882234, "loss": 2.0522, "step": 244685 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016180655328833026, "loss": 2.1064, "step": 244690 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016180510028672394, "loss": 1.9315, "step": 244695 }, { "epoch": 0.58, "grad_norm": 1.96875, "learning_rate": 0.00016180364726400384, "loss": 2.1551, "step": 244700 }, { "epoch": 0.58, "grad_norm": 1.8671875, "learning_rate": 0.0001618021942201705, "loss": 2.0132, "step": 244705 }, { "epoch": 0.58, "grad_norm": 1.609375, "learning_rate": 0.00016180074115522438, "loss": 2.0845, "step": 244710 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016179928806916606, "loss": 2.0617, "step": 244715 }, { "epoch": 0.58, "grad_norm": 1.765625, "learning_rate": 0.00016179783496199596, "loss": 2.0131, "step": 244720 }, { "epoch": 0.58, "grad_norm": 2.5625, "learning_rate": 0.0001617963818337146, "loss": 1.8438, "step": 244725 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016179492868432247, "loss": 2.1164, "step": 244730 }, { "epoch": 0.58, "grad_norm": 1.5390625, "learning_rate": 0.00016179347551382008, "loss": 2.0268, "step": 244735 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.00016179202232220792, "loss": 2.221, "step": 244740 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016179056910948646, "loss": 1.9749, "step": 244745 }, { "epoch": 0.58, "grad_norm": 2.59375, "learning_rate": 0.00016178911587565623, "loss": 2.1116, "step": 244750 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016178766262071775, "loss": 2.0811, "step": 244755 }, { "epoch": 0.58, "grad_norm": 2.515625, "learning_rate": 0.00016178620934467145, "loss": 2.0612, "step": 244760 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016178475604751785, "loss": 2.2229, "step": 244765 }, { "epoch": 0.58, "grad_norm": 1.9921875, "learning_rate": 0.0001617833027292575, "loss": 2.1681, "step": 244770 }, { "epoch": 0.58, "grad_norm": 2.71875, "learning_rate": 0.00016178184938989084, "loss": 2.2352, "step": 244775 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016178039602941837, "loss": 2.2372, "step": 244780 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016177894264784063, "loss": 2.1863, "step": 244785 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.000161777489245158, "loss": 2.0823, "step": 244790 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016177603582137113, "loss": 2.0898, "step": 244795 }, { "epoch": 0.58, "grad_norm": 1.8046875, "learning_rate": 0.00016177458237648043, "loss": 2.1202, "step": 244800 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.0001617731289104864, "loss": 2.1084, "step": 244805 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016177167542338956, "loss": 2.1394, "step": 244810 }, { "epoch": 0.58, "grad_norm": 1.6875, "learning_rate": 0.00016177022191519037, "loss": 2.0114, "step": 244815 }, { "epoch": 0.58, "grad_norm": 1.9375, "learning_rate": 0.00016176876838588938, "loss": 2.1974, "step": 244820 }, { "epoch": 0.58, "grad_norm": 2.6875, "learning_rate": 0.00016176731483548705, "loss": 2.2128, "step": 244825 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.0001617658612639839, "loss": 2.1429, "step": 244830 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.0001617644076713804, "loss": 2.1899, "step": 244835 }, { "epoch": 0.58, "grad_norm": 2.484375, "learning_rate": 0.00016176295405767705, "loss": 2.0824, "step": 244840 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.00016176150042287437, "loss": 2.1317, "step": 244845 }, { "epoch": 0.58, "grad_norm": 1.7109375, "learning_rate": 0.00016176004676697283, "loss": 2.2175, "step": 244850 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016175859308997293, "loss": 2.1298, "step": 244855 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.0001617571393918752, "loss": 2.198, "step": 244860 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016175568567268012, "loss": 1.9294, "step": 244865 }, { "epoch": 0.58, "grad_norm": 7.90625, "learning_rate": 0.00016175423193238813, "loss": 2.1683, "step": 244870 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.0001617527781709998, "loss": 2.1178, "step": 244875 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016175132438851558, "loss": 2.1225, "step": 244880 }, { "epoch": 0.58, "grad_norm": 1.8984375, "learning_rate": 0.00016174987058493602, "loss": 2.0036, "step": 244885 }, { "epoch": 0.58, "grad_norm": 1.8203125, "learning_rate": 0.0001617484167602616, "loss": 2.0881, "step": 244890 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016174696291449274, "loss": 2.1082, "step": 244895 }, { "epoch": 0.58, "grad_norm": 1.7890625, "learning_rate": 0.00016174550904763003, "loss": 2.0042, "step": 244900 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.00016174405515967394, "loss": 2.2027, "step": 244905 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.00016174260125062495, "loss": 2.0497, "step": 244910 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.0001617411473204836, "loss": 2.1086, "step": 244915 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.00016173969336925033, "loss": 2.0886, "step": 244920 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016173823939692565, "loss": 2.0612, "step": 244925 }, { "epoch": 0.58, "grad_norm": 1.671875, "learning_rate": 0.00016173678540351008, "loss": 2.0386, "step": 244930 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016173533138900408, "loss": 2.0625, "step": 244935 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016173387735340822, "loss": 2.0097, "step": 244940 }, { "epoch": 0.58, "grad_norm": 1.8125, "learning_rate": 0.0001617324232967229, "loss": 2.0537, "step": 244945 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016173096921894868, "loss": 2.2367, "step": 244950 }, { "epoch": 0.58, "grad_norm": 1.9140625, "learning_rate": 0.00016172951512008607, "loss": 2.3426, "step": 244955 }, { "epoch": 0.58, "grad_norm": 2.453125, "learning_rate": 0.00016172806100013548, "loss": 2.1918, "step": 244960 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.0001617266068590975, "loss": 2.1682, "step": 244965 }, { "epoch": 0.58, "grad_norm": 2.625, "learning_rate": 0.00016172515269697262, "loss": 2.0194, "step": 244970 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016172369851376125, "loss": 2.1844, "step": 244975 }, { "epoch": 0.58, "grad_norm": 1.8359375, "learning_rate": 0.00016172224430946398, "loss": 1.9342, "step": 244980 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016172079008408127, "loss": 2.0615, "step": 244985 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016171933583761363, "loss": 2.2708, "step": 244990 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.00016171788157006155, "loss": 2.1431, "step": 244995 }, { "epoch": 0.58, "grad_norm": 1.90625, "learning_rate": 0.00016171642728142547, "loss": 2.1969, "step": 245000 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.00016171497297170597, "loss": 2.0116, "step": 245005 }, { "epoch": 0.58, "grad_norm": 1.9296875, "learning_rate": 0.00016171351864090351, "loss": 2.2086, "step": 245010 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016171206428901863, "loss": 2.0548, "step": 245015 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016171060991605177, "loss": 2.1626, "step": 245020 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016170915552200343, "loss": 1.9222, "step": 245025 }, { "epoch": 0.58, "grad_norm": 2.546875, "learning_rate": 0.0001617077011068741, "loss": 1.9779, "step": 245030 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016170624667066435, "loss": 2.1609, "step": 245035 }, { "epoch": 0.58, "grad_norm": 2.390625, "learning_rate": 0.00016170479221337464, "loss": 2.0174, "step": 245040 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.0001617033377350054, "loss": 2.0852, "step": 245045 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001617018832355572, "loss": 2.0845, "step": 245050 }, { "epoch": 0.58, "grad_norm": 2.515625, "learning_rate": 0.00016170042871503052, "loss": 2.2371, "step": 245055 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.00016169897417342584, "loss": 1.9791, "step": 245060 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016169751961074368, "loss": 2.1609, "step": 245065 }, { "epoch": 0.58, "grad_norm": 1.9921875, "learning_rate": 0.00016169606502698455, "loss": 2.2317, "step": 245070 }, { "epoch": 0.58, "grad_norm": 2.46875, "learning_rate": 0.0001616946104221489, "loss": 1.937, "step": 245075 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.00016169315579623726, "loss": 2.1228, "step": 245080 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016169170114925012, "loss": 1.959, "step": 245085 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.00016169024648118798, "loss": 2.0282, "step": 245090 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016168879179205133, "loss": 2.1022, "step": 245095 }, { "epoch": 0.58, "grad_norm": 2.40625, "learning_rate": 0.00016168733708184067, "loss": 2.1504, "step": 245100 }, { "epoch": 0.58, "grad_norm": 1.96875, "learning_rate": 0.0001616858823505565, "loss": 2.2325, "step": 245105 }, { "epoch": 0.58, "grad_norm": 4.03125, "learning_rate": 0.0001616844275981993, "loss": 2.0102, "step": 245110 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.0001616829728247696, "loss": 2.0851, "step": 245115 }, { "epoch": 0.58, "grad_norm": 1.9765625, "learning_rate": 0.00016168151803026788, "loss": 1.9974, "step": 245120 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.0001616800632146946, "loss": 2.0659, "step": 245125 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016167860837805034, "loss": 2.2563, "step": 245130 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.0001616771535203355, "loss": 2.158, "step": 245135 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.00016167569864155068, "loss": 2.2661, "step": 245140 }, { "epoch": 0.58, "grad_norm": 2.5625, "learning_rate": 0.00016167424374169626, "loss": 2.1785, "step": 245145 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.00016167278882077285, "loss": 2.1581, "step": 245150 }, { "epoch": 0.58, "grad_norm": 1.953125, "learning_rate": 0.00016167133387878088, "loss": 1.9351, "step": 245155 }, { "epoch": 0.58, "grad_norm": 2.40625, "learning_rate": 0.00016166987891572088, "loss": 2.1496, "step": 245160 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.0001616684239315933, "loss": 1.9553, "step": 245165 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.00016166696892639868, "loss": 2.1932, "step": 245170 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016166551390013752, "loss": 1.7865, "step": 245175 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.0001616640588528103, "loss": 2.0491, "step": 245180 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.0001616626037844175, "loss": 2.1157, "step": 245185 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.00016166114869495962, "loss": 2.2046, "step": 245190 }, { "epoch": 0.58, "grad_norm": 1.9296875, "learning_rate": 0.00016165969358443723, "loss": 1.9869, "step": 245195 }, { "epoch": 0.58, "grad_norm": 2.0, "learning_rate": 0.00016165823845285072, "loss": 2.1504, "step": 245200 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.00016165678330020065, "loss": 2.1088, "step": 245205 }, { "epoch": 0.58, "grad_norm": 1.9765625, "learning_rate": 0.0001616553281264875, "loss": 2.1813, "step": 245210 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.0001616538729317118, "loss": 2.0558, "step": 245215 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.000161652417715874, "loss": 2.2341, "step": 245220 }, { "epoch": 0.58, "grad_norm": 2.46875, "learning_rate": 0.0001616509624789746, "loss": 1.9652, "step": 245225 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016164950722101414, "loss": 2.055, "step": 245230 }, { "epoch": 0.58, "grad_norm": 2.625, "learning_rate": 0.00016164805194199307, "loss": 2.0241, "step": 245235 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.00016164659664191192, "loss": 2.0222, "step": 245240 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.00016164514132077117, "loss": 2.0121, "step": 245245 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.00016164368597857136, "loss": 2.104, "step": 245250 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.0001616422306153129, "loss": 2.0366, "step": 245255 }, { "epoch": 0.58, "grad_norm": 1.7578125, "learning_rate": 0.00016164077523099634, "loss": 2.1436, "step": 245260 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016163931982562222, "loss": 2.0681, "step": 245265 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016163786439919094, "loss": 2.0829, "step": 245270 }, { "epoch": 0.58, "grad_norm": 2.796875, "learning_rate": 0.00016163640895170304, "loss": 2.0823, "step": 245275 }, { "epoch": 0.58, "grad_norm": 2.703125, "learning_rate": 0.00016163495348315904, "loss": 1.9873, "step": 245280 }, { "epoch": 0.58, "grad_norm": 1.9453125, "learning_rate": 0.00016163349799355945, "loss": 2.0033, "step": 245285 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.0001616320424829047, "loss": 1.9932, "step": 245290 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016163058695119534, "loss": 2.324, "step": 245295 }, { "epoch": 0.58, "grad_norm": 2.453125, "learning_rate": 0.00016162913139843184, "loss": 2.0862, "step": 245300 }, { "epoch": 0.58, "grad_norm": 1.9296875, "learning_rate": 0.00016162767582461477, "loss": 2.1456, "step": 245305 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016162622022974452, "loss": 2.1234, "step": 245310 }, { "epoch": 0.58, "grad_norm": 1.9921875, "learning_rate": 0.00016162476461382164, "loss": 2.094, "step": 245315 }, { "epoch": 0.58, "grad_norm": 1.9453125, "learning_rate": 0.00016162330897684664, "loss": 2.1931, "step": 245320 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016162185331881996, "loss": 2.0115, "step": 245325 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016162039763974217, "loss": 2.3129, "step": 245330 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016161894193961372, "loss": 2.2363, "step": 245335 }, { "epoch": 0.58, "grad_norm": 2.0, "learning_rate": 0.00016161748621843514, "loss": 2.0193, "step": 245340 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.0001616160304762069, "loss": 2.1529, "step": 245345 }, { "epoch": 0.58, "grad_norm": 1.671875, "learning_rate": 0.00016161457471292948, "loss": 1.9922, "step": 245350 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016161311892860344, "loss": 2.0387, "step": 245355 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.00016161166312322921, "loss": 2.0986, "step": 245360 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.00016161020729680737, "loss": 2.2833, "step": 245365 }, { "epoch": 0.58, "grad_norm": 2.0, "learning_rate": 0.00016160875144933834, "loss": 2.1336, "step": 245370 }, { "epoch": 0.58, "grad_norm": 1.9609375, "learning_rate": 0.00016160729558082262, "loss": 2.3076, "step": 245375 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016160583969126075, "loss": 2.1306, "step": 245380 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.0001616043837806532, "loss": 1.8614, "step": 245385 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016160292784900051, "loss": 2.0802, "step": 245390 }, { "epoch": 0.58, "grad_norm": 1.8359375, "learning_rate": 0.00016160147189630308, "loss": 2.1224, "step": 245395 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.0001616000159225615, "loss": 2.1695, "step": 245400 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016159855992777623, "loss": 1.8952, "step": 245405 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.0001615971039119478, "loss": 2.1299, "step": 245410 }, { "epoch": 0.58, "grad_norm": 2.5, "learning_rate": 0.0001615956478750767, "loss": 1.8977, "step": 245415 }, { "epoch": 0.58, "grad_norm": 2.0, "learning_rate": 0.00016159419181716335, "loss": 2.1234, "step": 245420 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016159273573820835, "loss": 2.0621, "step": 245425 }, { "epoch": 0.58, "grad_norm": 1.9453125, "learning_rate": 0.00016159127963821213, "loss": 2.0523, "step": 245430 }, { "epoch": 0.58, "grad_norm": 1.84375, "learning_rate": 0.00016158982351717523, "loss": 1.9635, "step": 245435 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016158836737509811, "loss": 2.1854, "step": 245440 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.0001615869112119813, "loss": 2.2075, "step": 245445 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.0001615854550278253, "loss": 2.2519, "step": 245450 }, { "epoch": 0.58, "grad_norm": 2.5625, "learning_rate": 0.00016158399882263056, "loss": 2.079, "step": 245455 }, { "epoch": 0.58, "grad_norm": 1.96875, "learning_rate": 0.00016158254259639762, "loss": 2.0821, "step": 245460 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016158108634912696, "loss": 2.2457, "step": 245465 }, { "epoch": 0.58, "grad_norm": 1.875, "learning_rate": 0.00016157963008081908, "loss": 1.9545, "step": 245470 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.00016157817379147451, "loss": 2.1714, "step": 245475 }, { "epoch": 0.58, "grad_norm": 1.8515625, "learning_rate": 0.00016157671748109372, "loss": 2.2045, "step": 245480 }, { "epoch": 0.58, "grad_norm": 1.9140625, "learning_rate": 0.00016157526114967717, "loss": 2.1908, "step": 245485 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016157380479722542, "loss": 1.9578, "step": 245490 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016157234842373893, "loss": 2.1834, "step": 245495 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.00016157089202921822, "loss": 2.0547, "step": 245500 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016156943561366376, "loss": 1.963, "step": 245505 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016156797917707606, "loss": 1.948, "step": 245510 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016156652271945564, "loss": 2.0811, "step": 245515 }, { "epoch": 0.58, "grad_norm": 1.734375, "learning_rate": 0.00016156506624080298, "loss": 2.0465, "step": 245520 }, { "epoch": 0.58, "grad_norm": 2.390625, "learning_rate": 0.00016156360974111858, "loss": 2.1993, "step": 245525 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016156215322040294, "loss": 1.9701, "step": 245530 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016156069667865651, "loss": 1.9402, "step": 245535 }, { "epoch": 0.58, "grad_norm": 1.71875, "learning_rate": 0.00016155924011587987, "loss": 2.1364, "step": 245540 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.0001615577835320735, "loss": 2.1232, "step": 245545 }, { "epoch": 0.58, "grad_norm": 2.515625, "learning_rate": 0.00016155632692723781, "loss": 2.1437, "step": 245550 }, { "epoch": 0.58, "grad_norm": 1.859375, "learning_rate": 0.0001615548703013734, "loss": 2.0292, "step": 245555 }, { "epoch": 0.58, "grad_norm": 2.390625, "learning_rate": 0.00016155341365448072, "loss": 1.9886, "step": 245560 }, { "epoch": 0.58, "grad_norm": 1.6640625, "learning_rate": 0.00016155195698656027, "loss": 2.0481, "step": 245565 }, { "epoch": 0.58, "grad_norm": 2.40625, "learning_rate": 0.00016155050029761258, "loss": 2.1079, "step": 245570 }, { "epoch": 0.58, "grad_norm": 3.3125, "learning_rate": 0.00016154904358763812, "loss": 2.2481, "step": 245575 }, { "epoch": 0.58, "grad_norm": 1.828125, "learning_rate": 0.00016154758685663735, "loss": 2.1309, "step": 245580 }, { "epoch": 0.58, "grad_norm": 1.890625, "learning_rate": 0.00016154613010461086, "loss": 2.1996, "step": 245585 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016154467333155906, "loss": 2.1353, "step": 245590 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.0001615432165374825, "loss": 2.1509, "step": 245595 }, { "epoch": 0.58, "grad_norm": 2.6875, "learning_rate": 0.00016154175972238165, "loss": 2.1992, "step": 245600 }, { "epoch": 0.58, "grad_norm": 2.46875, "learning_rate": 0.00016154030288625704, "loss": 2.0396, "step": 245605 }, { "epoch": 0.58, "grad_norm": 2.625, "learning_rate": 0.00016153884602910913, "loss": 2.1624, "step": 245610 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016153738915093842, "loss": 1.9738, "step": 245615 }, { "epoch": 0.58, "grad_norm": 2.515625, "learning_rate": 0.00016153593225174545, "loss": 2.167, "step": 245620 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.0001615344753315307, "loss": 2.1681, "step": 245625 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016153301839029463, "loss": 2.1329, "step": 245630 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016153156142803776, "loss": 2.1263, "step": 245635 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016153010444476063, "loss": 2.1456, "step": 245640 }, { "epoch": 0.58, "grad_norm": 2.46875, "learning_rate": 0.00016152864744046363, "loss": 2.1248, "step": 245645 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.00016152719041514737, "loss": 2.0554, "step": 245650 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.0001615257333688123, "loss": 2.0264, "step": 245655 }, { "epoch": 0.58, "grad_norm": 2.890625, "learning_rate": 0.00016152427630145897, "loss": 2.085, "step": 245660 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.0001615228192130878, "loss": 1.9379, "step": 245665 }, { "epoch": 0.58, "grad_norm": 1.765625, "learning_rate": 0.0001615213621036993, "loss": 2.1146, "step": 245670 }, { "epoch": 0.58, "grad_norm": 2.484375, "learning_rate": 0.00016151990497329397, "loss": 2.0741, "step": 245675 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016151844782187237, "loss": 2.1385, "step": 245680 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.00016151699064943494, "loss": 1.9357, "step": 245685 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.00016151553345598217, "loss": 2.044, "step": 245690 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.0001615140762415146, "loss": 1.9619, "step": 245695 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.0001615126190060327, "loss": 2.0911, "step": 245700 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.00016151116174953697, "loss": 2.1128, "step": 245705 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016150970447202791, "loss": 1.9678, "step": 245710 }, { "epoch": 0.58, "grad_norm": 1.9609375, "learning_rate": 0.00016150824717350607, "loss": 2.1697, "step": 245715 }, { "epoch": 0.58, "grad_norm": 2.453125, "learning_rate": 0.00016150678985397184, "loss": 2.0203, "step": 245720 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016150533251342578, "loss": 2.1254, "step": 245725 }, { "epoch": 0.58, "grad_norm": 1.7890625, "learning_rate": 0.0001615038751518684, "loss": 2.1905, "step": 245730 }, { "epoch": 0.58, "grad_norm": 1.75, "learning_rate": 0.00016150241776930016, "loss": 2.0769, "step": 245735 }, { "epoch": 0.58, "grad_norm": 2.40625, "learning_rate": 0.0001615009603657216, "loss": 2.1355, "step": 245740 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016149950294113316, "loss": 1.9965, "step": 245745 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.0001614980454955354, "loss": 2.2588, "step": 245750 }, { "epoch": 0.58, "grad_norm": 1.9375, "learning_rate": 0.00016149658802892879, "loss": 2.1492, "step": 245755 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.00016149513054131384, "loss": 2.1526, "step": 245760 }, { "epoch": 0.58, "grad_norm": 1.8984375, "learning_rate": 0.00016149367303269104, "loss": 2.0189, "step": 245765 }, { "epoch": 0.58, "grad_norm": 1.96875, "learning_rate": 0.00016149221550306088, "loss": 2.037, "step": 245770 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016149075795242386, "loss": 2.14, "step": 245775 }, { "epoch": 0.58, "grad_norm": 1.9921875, "learning_rate": 0.00016148930038078048, "loss": 1.9081, "step": 245780 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016148784278813123, "loss": 2.0503, "step": 245785 }, { "epoch": 0.58, "grad_norm": 1.9140625, "learning_rate": 0.00016148638517447664, "loss": 2.0687, "step": 245790 }, { "epoch": 0.58, "grad_norm": 2.453125, "learning_rate": 0.00016148492753981718, "loss": 2.1589, "step": 245795 }, { "epoch": 0.58, "grad_norm": 1.9296875, "learning_rate": 0.00016148346988415337, "loss": 2.1751, "step": 245800 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016148201220748566, "loss": 2.038, "step": 245805 }, { "epoch": 0.58, "grad_norm": 1.9453125, "learning_rate": 0.0001614805545098146, "loss": 2.1336, "step": 245810 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.00016147909679114063, "loss": 2.1234, "step": 245815 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.00016147763905146435, "loss": 1.9703, "step": 245820 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.00016147618129078615, "loss": 2.1326, "step": 245825 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.00016147472350910657, "loss": 2.2606, "step": 245830 }, { "epoch": 0.58, "grad_norm": 2.53125, "learning_rate": 0.0001614732657064261, "loss": 2.0488, "step": 245835 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.0001614718078827453, "loss": 2.0926, "step": 245840 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.00016147035003806457, "loss": 2.1121, "step": 245845 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.00016146889217238445, "loss": 2.219, "step": 245850 }, { "epoch": 0.58, "grad_norm": 2.53125, "learning_rate": 0.00016146743428570548, "loss": 1.9972, "step": 245855 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016146597637802808, "loss": 2.1783, "step": 245860 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.0001614645184493528, "loss": 2.1919, "step": 245865 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016146306049968014, "loss": 2.0654, "step": 245870 }, { "epoch": 0.58, "grad_norm": 2.53125, "learning_rate": 0.0001614616025290106, "loss": 1.9791, "step": 245875 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016146014453734462, "loss": 2.147, "step": 245880 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016145868652468274, "loss": 2.0098, "step": 245885 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001614572284910255, "loss": 2.0366, "step": 245890 }, { "epoch": 0.58, "grad_norm": 1.984375, "learning_rate": 0.00016145577043637333, "loss": 2.0296, "step": 245895 }, { "epoch": 0.58, "grad_norm": 1.8984375, "learning_rate": 0.00016145431236072675, "loss": 2.2234, "step": 245900 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016145285426408625, "loss": 2.1973, "step": 245905 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016145139614645238, "loss": 2.2995, "step": 245910 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.00016144993800782555, "loss": 2.1124, "step": 245915 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016144847984820635, "loss": 2.1455, "step": 245920 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.0001614470216675952, "loss": 2.187, "step": 245925 }, { "epoch": 0.58, "grad_norm": 1.9453125, "learning_rate": 0.00016144556346599267, "loss": 2.1805, "step": 245930 }, { "epoch": 0.58, "grad_norm": 2.59375, "learning_rate": 0.0001614441052433992, "loss": 1.9228, "step": 245935 }, { "epoch": 0.58, "grad_norm": 1.921875, "learning_rate": 0.0001614426469998153, "loss": 2.0185, "step": 245940 }, { "epoch": 0.58, "grad_norm": 2.453125, "learning_rate": 0.0001614411887352415, "loss": 2.2561, "step": 245945 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016143973044967827, "loss": 2.1862, "step": 245950 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.0001614382721431261, "loss": 2.0879, "step": 245955 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.0001614368138155855, "loss": 2.1666, "step": 245960 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.000161435355467057, "loss": 2.034, "step": 245965 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016143389709754105, "loss": 2.1595, "step": 245970 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016143243870703815, "loss": 2.115, "step": 245975 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016143098029554884, "loss": 2.1472, "step": 245980 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.00016142952186307358, "loss": 2.0569, "step": 245985 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.0001614280634096129, "loss": 2.0236, "step": 245990 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016142660493516725, "loss": 2.0646, "step": 245995 }, { "epoch": 0.58, "grad_norm": 1.6640625, "learning_rate": 0.00016142514643973716, "loss": 2.1793, "step": 246000 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016142368792332315, "loss": 2.122, "step": 246005 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.0001614222293859257, "loss": 2.0557, "step": 246010 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016142077082754528, "loss": 2.1116, "step": 246015 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.0001614193122481824, "loss": 2.2457, "step": 246020 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.0001614178536478376, "loss": 2.0383, "step": 246025 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001614163950265113, "loss": 2.3221, "step": 246030 }, { "epoch": 0.58, "grad_norm": 2.40625, "learning_rate": 0.00016141493638420409, "loss": 1.9299, "step": 246035 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.0001614134777209164, "loss": 2.1349, "step": 246040 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016141201903664877, "loss": 2.2228, "step": 246045 }, { "epoch": 0.58, "grad_norm": 1.9375, "learning_rate": 0.00016141056033140168, "loss": 2.1134, "step": 246050 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.0001614091016051756, "loss": 2.1493, "step": 246055 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016140764285797111, "loss": 2.0756, "step": 246060 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.0001614061840897886, "loss": 2.1205, "step": 246065 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.0001614047253006287, "loss": 2.1553, "step": 246070 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016140326649049174, "loss": 2.3397, "step": 246075 }, { "epoch": 0.58, "grad_norm": 1.84375, "learning_rate": 0.00016140180765937835, "loss": 2.0659, "step": 246080 }, { "epoch": 0.58, "grad_norm": 2.390625, "learning_rate": 0.000161400348807289, "loss": 2.2222, "step": 246085 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016139888993422417, "loss": 2.1499, "step": 246090 }, { "epoch": 0.58, "grad_norm": 2.390625, "learning_rate": 0.00016139743104018437, "loss": 2.062, "step": 246095 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016139597212517007, "loss": 2.0593, "step": 246100 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016139451318918183, "loss": 2.1024, "step": 246105 }, { "epoch": 0.58, "grad_norm": 2.40625, "learning_rate": 0.00016139305423222008, "loss": 2.0931, "step": 246110 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016139159525428538, "loss": 2.123, "step": 246115 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016139013625537818, "loss": 2.1535, "step": 246120 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016138867723549898, "loss": 2.211, "step": 246125 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.0001613872181946483, "loss": 2.1362, "step": 246130 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016138575913282664, "loss": 2.0803, "step": 246135 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.0001613843000500345, "loss": 2.1387, "step": 246140 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.00016138284094627237, "loss": 2.126, "step": 246145 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016138138182154074, "loss": 2.0505, "step": 246150 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016137992267584012, "loss": 2.0681, "step": 246155 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016137846350917099, "loss": 2.199, "step": 246160 }, { "epoch": 0.58, "grad_norm": 1.9765625, "learning_rate": 0.0001613770043215339, "loss": 2.1982, "step": 246165 }, { "epoch": 0.58, "grad_norm": 1.875, "learning_rate": 0.0001613755451129293, "loss": 1.9706, "step": 246170 }, { "epoch": 0.58, "grad_norm": 2.59375, "learning_rate": 0.00016137408588335768, "loss": 2.1779, "step": 246175 }, { "epoch": 0.58, "grad_norm": 1.9140625, "learning_rate": 0.00016137262663281957, "loss": 2.1145, "step": 246180 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.00016137116736131545, "loss": 2.0188, "step": 246185 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.00016136970806884584, "loss": 2.0496, "step": 246190 }, { "epoch": 0.58, "grad_norm": 1.8125, "learning_rate": 0.0001613682487554112, "loss": 2.0513, "step": 246195 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016136678942101207, "loss": 1.9593, "step": 246200 }, { "epoch": 0.58, "grad_norm": 2.515625, "learning_rate": 0.00016136533006564892, "loss": 1.9083, "step": 246205 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016136387068932227, "loss": 2.0939, "step": 246210 }, { "epoch": 0.58, "grad_norm": 1.953125, "learning_rate": 0.00016136241129203262, "loss": 2.3025, "step": 246215 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.00016136095187378047, "loss": 2.0323, "step": 246220 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016135949243456627, "loss": 2.166, "step": 246225 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.0001613580329743906, "loss": 2.0957, "step": 246230 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016135657349325387, "loss": 2.1152, "step": 246235 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016135511399115662, "loss": 2.2261, "step": 246240 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.00016135365446809936, "loss": 2.1811, "step": 246245 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.0001613521949240826, "loss": 2.2211, "step": 246250 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.0001613507353591068, "loss": 2.1396, "step": 246255 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.00016134927577317248, "loss": 2.1177, "step": 246260 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016134781616628012, "loss": 2.1363, "step": 246265 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016134635653843025, "loss": 2.2313, "step": 246270 }, { "epoch": 0.58, "grad_norm": 1.9921875, "learning_rate": 0.00016134489688962337, "loss": 2.1739, "step": 246275 }, { "epoch": 0.58, "grad_norm": 2.59375, "learning_rate": 0.00016134343721985994, "loss": 1.9735, "step": 246280 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016134197752914046, "loss": 2.0005, "step": 246285 }, { "epoch": 0.58, "grad_norm": 2.59375, "learning_rate": 0.00016134051781746544, "loss": 2.329, "step": 246290 }, { "epoch": 0.58, "grad_norm": 1.7578125, "learning_rate": 0.00016133905808483543, "loss": 2.2063, "step": 246295 }, { "epoch": 0.58, "grad_norm": 2.40625, "learning_rate": 0.0001613375983312509, "loss": 2.0296, "step": 246300 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.0001613361385567123, "loss": 2.1675, "step": 246305 }, { "epoch": 0.58, "grad_norm": 1.9609375, "learning_rate": 0.00016133467876122015, "loss": 2.0395, "step": 246310 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016133321894477498, "loss": 2.1656, "step": 246315 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016133175910737726, "loss": 1.9797, "step": 246320 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.0001613302992490275, "loss": 2.1696, "step": 246325 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.0001613288393697262, "loss": 2.0304, "step": 246330 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.00016132737946947384, "loss": 2.095, "step": 246335 }, { "epoch": 0.58, "grad_norm": 1.9453125, "learning_rate": 0.00016132591954827097, "loss": 2.0681, "step": 246340 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016132445960611805, "loss": 2.0584, "step": 246345 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016132299964301557, "loss": 2.0948, "step": 246350 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.00016132153965896405, "loss": 1.9852, "step": 246355 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016132007965396396, "loss": 2.0895, "step": 246360 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.0001613186196280158, "loss": 1.9703, "step": 246365 }, { "epoch": 0.58, "grad_norm": 2.546875, "learning_rate": 0.00016131715958112012, "loss": 1.9652, "step": 246370 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016131569951327742, "loss": 2.1219, "step": 246375 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016131423942448812, "loss": 2.0528, "step": 246380 }, { "epoch": 0.58, "grad_norm": 1.953125, "learning_rate": 0.00016131277931475278, "loss": 2.0828, "step": 246385 }, { "epoch": 0.58, "grad_norm": 1.8046875, "learning_rate": 0.00016131131918407186, "loss": 2.1399, "step": 246390 }, { "epoch": 0.58, "grad_norm": 1.828125, "learning_rate": 0.0001613098590324459, "loss": 2.0998, "step": 246395 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016130839885987537, "loss": 2.1228, "step": 246400 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016130693866636076, "loss": 1.9298, "step": 246405 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016130547845190262, "loss": 2.0454, "step": 246410 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016130401821650144, "loss": 2.0709, "step": 246415 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016130255796015765, "loss": 2.0569, "step": 246420 }, { "epoch": 0.58, "grad_norm": 2.921875, "learning_rate": 0.0001613010976828718, "loss": 2.2877, "step": 246425 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.0001612996373846444, "loss": 2.0293, "step": 246430 }, { "epoch": 0.58, "grad_norm": 1.859375, "learning_rate": 0.00016129817706547593, "loss": 2.0176, "step": 246435 }, { "epoch": 0.58, "grad_norm": 1.890625, "learning_rate": 0.0001612967167253669, "loss": 2.0852, "step": 246440 }, { "epoch": 0.58, "grad_norm": 2.484375, "learning_rate": 0.0001612952563643178, "loss": 2.1889, "step": 246445 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016129379598232908, "loss": 1.9743, "step": 246450 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016129233557940134, "loss": 2.1706, "step": 246455 }, { "epoch": 0.58, "grad_norm": 1.9140625, "learning_rate": 0.000161290875155535, "loss": 2.1751, "step": 246460 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.0001612894147107306, "loss": 1.9603, "step": 246465 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.0001612879542449886, "loss": 2.0959, "step": 246470 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016128649375830956, "loss": 2.0088, "step": 246475 }, { "epoch": 0.58, "grad_norm": 1.96875, "learning_rate": 0.00016128503325069392, "loss": 2.0685, "step": 246480 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.0001612835727221422, "loss": 2.0779, "step": 246485 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016128211217265493, "loss": 2.1524, "step": 246490 }, { "epoch": 0.58, "grad_norm": 1.890625, "learning_rate": 0.00016128065160223254, "loss": 2.0307, "step": 246495 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016127919101087559, "loss": 2.0501, "step": 246500 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016127773039858453, "loss": 2.0376, "step": 246505 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016127626976535992, "loss": 2.2103, "step": 246510 }, { "epoch": 0.58, "grad_norm": 3.65625, "learning_rate": 0.0001612748091112022, "loss": 2.264, "step": 246515 }, { "epoch": 0.58, "grad_norm": 1.96875, "learning_rate": 0.00016127334843611194, "loss": 2.0401, "step": 246520 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.00016127188774008955, "loss": 2.0886, "step": 246525 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.00016127042702313555, "loss": 2.1917, "step": 246530 }, { "epoch": 0.58, "grad_norm": 1.921875, "learning_rate": 0.0001612689662852505, "loss": 2.1701, "step": 246535 }, { "epoch": 0.58, "grad_norm": 2.5625, "learning_rate": 0.00016126750552643485, "loss": 2.1387, "step": 246540 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016126604474668913, "loss": 2.1697, "step": 246545 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016126458394601377, "loss": 2.182, "step": 246550 }, { "epoch": 0.58, "grad_norm": 2.671875, "learning_rate": 0.00016126312312440935, "loss": 2.3505, "step": 246555 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.00016126166228187634, "loss": 2.2176, "step": 246560 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016126020141841522, "loss": 2.0826, "step": 246565 }, { "epoch": 0.58, "grad_norm": 1.9765625, "learning_rate": 0.0001612587405340265, "loss": 2.1347, "step": 246570 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016125727962871072, "loss": 2.2283, "step": 246575 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.0001612558187024683, "loss": 2.108, "step": 246580 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.0001612543577552998, "loss": 2.1674, "step": 246585 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.0001612528967872057, "loss": 2.1882, "step": 246590 }, { "epoch": 0.58, "grad_norm": 1.953125, "learning_rate": 0.0001612514357981865, "loss": 2.1483, "step": 246595 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.0001612499747882427, "loss": 2.0994, "step": 246600 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.0001612485137573748, "loss": 2.1155, "step": 246605 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.00016124705270558328, "loss": 2.0118, "step": 246610 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016124559163286866, "loss": 2.0018, "step": 246615 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016124413053923142, "loss": 2.1728, "step": 246620 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016124266942467213, "loss": 2.0487, "step": 246625 }, { "epoch": 0.58, "grad_norm": 1.9765625, "learning_rate": 0.00016124120828919122, "loss": 2.0021, "step": 246630 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016123974713278915, "loss": 2.0051, "step": 246635 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016123828595546652, "loss": 1.9951, "step": 246640 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016123682475722374, "loss": 2.3749, "step": 246645 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016123536353806136, "loss": 2.3174, "step": 246650 }, { "epoch": 0.58, "grad_norm": 1.796875, "learning_rate": 0.0001612339022979799, "loss": 2.1442, "step": 246655 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016123244103697984, "loss": 2.1237, "step": 246660 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.0001612309797550616, "loss": 2.0505, "step": 246665 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.0001612295184522258, "loss": 2.1114, "step": 246670 }, { "epoch": 0.58, "grad_norm": 1.9453125, "learning_rate": 0.00016122805712847287, "loss": 2.2151, "step": 246675 }, { "epoch": 0.58, "grad_norm": 2.65625, "learning_rate": 0.00016122659578380333, "loss": 1.9324, "step": 246680 }, { "epoch": 0.58, "grad_norm": 1.828125, "learning_rate": 0.00016122513441821766, "loss": 2.081, "step": 246685 }, { "epoch": 0.58, "grad_norm": 1.90625, "learning_rate": 0.0001612236730317164, "loss": 1.9975, "step": 246690 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.0001612222116243, "loss": 2.2418, "step": 246695 }, { "epoch": 0.58, "grad_norm": 2.46875, "learning_rate": 0.00016122075019596902, "loss": 2.1506, "step": 246700 }, { "epoch": 0.58, "grad_norm": 1.90625, "learning_rate": 0.00016121928874672387, "loss": 2.1867, "step": 246705 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.0001612178272765651, "loss": 2.1026, "step": 246710 }, { "epoch": 0.58, "grad_norm": 1.703125, "learning_rate": 0.00016121636578549325, "loss": 2.2934, "step": 246715 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016121490427350877, "loss": 2.139, "step": 246720 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016121344274061216, "loss": 2.0483, "step": 246725 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016121198118680395, "loss": 2.1374, "step": 246730 }, { "epoch": 0.58, "grad_norm": 2.484375, "learning_rate": 0.00016121051961208457, "loss": 2.0794, "step": 246735 }, { "epoch": 0.58, "grad_norm": 2.484375, "learning_rate": 0.00016120905801645462, "loss": 2.2831, "step": 246740 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.0001612075963999145, "loss": 2.0268, "step": 246745 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.00016120613476246475, "loss": 1.8638, "step": 246750 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016120467310410592, "loss": 2.1987, "step": 246755 }, { "epoch": 0.58, "grad_norm": 1.78125, "learning_rate": 0.00016120321142483845, "loss": 1.9756, "step": 246760 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016120174972466284, "loss": 2.2819, "step": 246765 }, { "epoch": 0.58, "grad_norm": 1.9765625, "learning_rate": 0.0001612002880035796, "loss": 1.9758, "step": 246770 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016119882626158924, "loss": 2.0384, "step": 246775 }, { "epoch": 0.58, "grad_norm": 3.109375, "learning_rate": 0.00016119736449869224, "loss": 2.214, "step": 246780 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016119590271488913, "loss": 2.1647, "step": 246785 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016119444091018038, "loss": 2.0669, "step": 246790 }, { "epoch": 0.58, "grad_norm": 1.6953125, "learning_rate": 0.00016119297908456648, "loss": 1.9754, "step": 246795 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016119151723804797, "loss": 2.1502, "step": 246800 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016119005537062531, "loss": 2.2444, "step": 246805 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016118859348229907, "loss": 2.1128, "step": 246810 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016118713157306966, "loss": 2.0786, "step": 246815 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016118566964293759, "loss": 2.0698, "step": 246820 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016118420769190342, "loss": 2.0863, "step": 246825 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.0001611827457199676, "loss": 2.1351, "step": 246830 }, { "epoch": 0.58, "grad_norm": 1.984375, "learning_rate": 0.00016118128372713067, "loss": 2.2094, "step": 246835 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016117982171339309, "loss": 1.9099, "step": 246840 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.00016117835967875536, "loss": 1.9199, "step": 246845 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016117689762321799, "loss": 2.2062, "step": 246850 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.0001611754355467815, "loss": 2.1393, "step": 246855 }, { "epoch": 0.58, "grad_norm": 1.96875, "learning_rate": 0.00016117397344944635, "loss": 2.2367, "step": 246860 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016117251133121308, "loss": 2.0782, "step": 246865 }, { "epoch": 0.58, "grad_norm": 2.53125, "learning_rate": 0.0001611710491920822, "loss": 2.1023, "step": 246870 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.00016116958703205414, "loss": 2.1894, "step": 246875 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016116812485112943, "loss": 2.1254, "step": 246880 }, { "epoch": 0.58, "grad_norm": 1.7734375, "learning_rate": 0.0001611666626493086, "loss": 2.0242, "step": 246885 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.00016116520042659213, "loss": 2.2314, "step": 246890 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.00016116373818298048, "loss": 2.1352, "step": 246895 }, { "epoch": 0.58, "grad_norm": 2.53125, "learning_rate": 0.00016116227591847423, "loss": 2.2931, "step": 246900 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.00016116081363307382, "loss": 2.1227, "step": 246905 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016115935132677978, "loss": 2.0837, "step": 246910 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.0001611578889995926, "loss": 2.0367, "step": 246915 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016115642665151278, "loss": 2.0638, "step": 246920 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.0001611549642825408, "loss": 2.2167, "step": 246925 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016115350189267716, "loss": 2.2555, "step": 246930 }, { "epoch": 0.58, "grad_norm": 1.578125, "learning_rate": 0.00016115203948192242, "loss": 2.1266, "step": 246935 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.000161150577050277, "loss": 2.147, "step": 246940 }, { "epoch": 0.58, "grad_norm": 1.7890625, "learning_rate": 0.00016114911459774142, "loss": 2.1691, "step": 246945 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001611476521243162, "loss": 2.3334, "step": 246950 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016114618963000186, "loss": 2.2862, "step": 246955 }, { "epoch": 0.58, "grad_norm": 1.9921875, "learning_rate": 0.00016114472711479887, "loss": 2.0922, "step": 246960 }, { "epoch": 0.58, "grad_norm": 2.796875, "learning_rate": 0.00016114326457870772, "loss": 1.987, "step": 246965 }, { "epoch": 0.58, "grad_norm": 2.515625, "learning_rate": 0.00016114180202172893, "loss": 2.0348, "step": 246970 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016114033944386298, "loss": 2.0774, "step": 246975 }, { "epoch": 0.58, "grad_norm": 1.8671875, "learning_rate": 0.0001611388768451104, "loss": 2.2096, "step": 246980 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.00016113741422547163, "loss": 2.0124, "step": 246985 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016113595158494723, "loss": 2.1586, "step": 246990 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001611344889235377, "loss": 2.0891, "step": 246995 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016113302624124352, "loss": 2.1283, "step": 247000 }, { "epoch": 0.58, "grad_norm": 1.90625, "learning_rate": 0.00016113156353806518, "loss": 2.165, "step": 247005 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.00016113010081400317, "loss": 2.1124, "step": 247010 }, { "epoch": 0.58, "grad_norm": 3.109375, "learning_rate": 0.00016112863806905803, "loss": 2.0618, "step": 247015 }, { "epoch": 0.58, "grad_norm": 2.59375, "learning_rate": 0.00016112717530323022, "loss": 2.0798, "step": 247020 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016112571251652029, "loss": 2.045, "step": 247025 }, { "epoch": 0.58, "grad_norm": 1.875, "learning_rate": 0.0001611242497089287, "loss": 2.0983, "step": 247030 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016112278688045596, "loss": 1.8506, "step": 247035 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.0001611213240311025, "loss": 2.1618, "step": 247040 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.000161119861160869, "loss": 2.2042, "step": 247045 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016111839826975577, "loss": 2.0715, "step": 247050 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016111693535776343, "loss": 2.0455, "step": 247055 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.0001611154724248924, "loss": 1.9512, "step": 247060 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.0001611140094711432, "loss": 2.0474, "step": 247065 }, { "epoch": 0.58, "grad_norm": 2.5, "learning_rate": 0.0001611125464965164, "loss": 2.0633, "step": 247070 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016111108350101242, "loss": 2.0703, "step": 247075 }, { "epoch": 0.58, "grad_norm": 1.9296875, "learning_rate": 0.0001611096204846318, "loss": 1.9434, "step": 247080 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.000161108157447375, "loss": 2.0264, "step": 247085 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016110669438924253, "loss": 2.068, "step": 247090 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016110523131023495, "loss": 2.0439, "step": 247095 }, { "epoch": 0.58, "grad_norm": 1.5, "learning_rate": 0.0001611037682103527, "loss": 2.0038, "step": 247100 }, { "epoch": 0.58, "grad_norm": 2.484375, "learning_rate": 0.0001611023050895963, "loss": 1.8301, "step": 247105 }, { "epoch": 0.58, "grad_norm": 2.5, "learning_rate": 0.00016110084194796622, "loss": 2.1414, "step": 247110 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.000161099378785463, "loss": 2.0813, "step": 247115 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016109791560208711, "loss": 2.0878, "step": 247120 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.0001610964523978391, "loss": 2.4067, "step": 247125 }, { "epoch": 0.58, "grad_norm": 1.890625, "learning_rate": 0.0001610949891727194, "loss": 1.9606, "step": 247130 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016109352592672858, "loss": 2.2623, "step": 247135 }, { "epoch": 0.58, "grad_norm": 2.5625, "learning_rate": 0.00016109206265986706, "loss": 2.0056, "step": 247140 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.0001610905993721354, "loss": 2.1425, "step": 247145 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.00016108913606353409, "loss": 2.0653, "step": 247150 }, { "epoch": 0.58, "grad_norm": 3.25, "learning_rate": 0.0001610876727340636, "loss": 2.0839, "step": 247155 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.0001610862093837245, "loss": 1.9244, "step": 247160 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.0001610847460125172, "loss": 2.0724, "step": 247165 }, { "epoch": 0.58, "grad_norm": 1.953125, "learning_rate": 0.00016108328262044226, "loss": 2.1587, "step": 247170 }, { "epoch": 0.58, "grad_norm": 1.7734375, "learning_rate": 0.00016108181920750016, "loss": 2.1854, "step": 247175 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.00016108035577369143, "loss": 2.0331, "step": 247180 }, { "epoch": 0.58, "grad_norm": 1.828125, "learning_rate": 0.0001610788923190165, "loss": 1.9785, "step": 247185 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016107742884347595, "loss": 2.184, "step": 247190 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.0001610759653470702, "loss": 2.1114, "step": 247195 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.0001610745018297998, "loss": 2.1486, "step": 247200 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.00016107303829166527, "loss": 2.0008, "step": 247205 }, { "epoch": 0.58, "grad_norm": 1.78125, "learning_rate": 0.00016107157473266707, "loss": 1.8922, "step": 247210 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016107011115280574, "loss": 2.2083, "step": 247215 }, { "epoch": 0.58, "grad_norm": 1.90625, "learning_rate": 0.0001610686475520817, "loss": 1.9015, "step": 247220 }, { "epoch": 0.58, "grad_norm": 1.8984375, "learning_rate": 0.00016106718393049554, "loss": 2.0233, "step": 247225 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016106572028804766, "loss": 2.0449, "step": 247230 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016106425662473873, "loss": 2.173, "step": 247235 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016106279294056906, "loss": 2.0917, "step": 247240 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016106132923553927, "loss": 1.993, "step": 247245 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.0001610598655096498, "loss": 2.1475, "step": 247250 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.00016105840176290116, "loss": 1.7746, "step": 247255 }, { "epoch": 0.58, "grad_norm": 2.40625, "learning_rate": 0.00016105693799529387, "loss": 2.1879, "step": 247260 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.00016105547420682846, "loss": 2.2091, "step": 247265 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.0001610540103975054, "loss": 2.1778, "step": 247270 }, { "epoch": 0.58, "grad_norm": 1.8359375, "learning_rate": 0.00016105254656732513, "loss": 2.035, "step": 247275 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016105108271628825, "loss": 2.0866, "step": 247280 }, { "epoch": 0.58, "grad_norm": 2.609375, "learning_rate": 0.00016104961884439516, "loss": 2.256, "step": 247285 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016104815495164644, "loss": 2.0897, "step": 247290 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016104669103804257, "loss": 2.173, "step": 247295 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016104522710358405, "loss": 2.1617, "step": 247300 }, { "epoch": 0.58, "grad_norm": 1.7265625, "learning_rate": 0.00016104376314827134, "loss": 1.9677, "step": 247305 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.000161042299172105, "loss": 2.3106, "step": 247310 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.0001610408351750855, "loss": 2.1628, "step": 247315 }, { "epoch": 0.58, "grad_norm": 1.71875, "learning_rate": 0.00016103937115721334, "loss": 1.9631, "step": 247320 }, { "epoch": 0.58, "grad_norm": 2.40625, "learning_rate": 0.00016103790711848902, "loss": 2.0536, "step": 247325 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.00016103644305891308, "loss": 2.2338, "step": 247330 }, { "epoch": 0.58, "grad_norm": 1.8125, "learning_rate": 0.00016103497897848593, "loss": 2.0575, "step": 247335 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.00016103351487720816, "loss": 1.9867, "step": 247340 }, { "epoch": 0.58, "grad_norm": 1.6875, "learning_rate": 0.0001610320507550802, "loss": 1.9875, "step": 247345 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016103058661210264, "loss": 2.0641, "step": 247350 }, { "epoch": 0.58, "grad_norm": 1.984375, "learning_rate": 0.0001610291224482759, "loss": 2.0937, "step": 247355 }, { "epoch": 0.58, "grad_norm": 1.921875, "learning_rate": 0.00016102765826360048, "loss": 1.9886, "step": 247360 }, { "epoch": 0.58, "grad_norm": 2.546875, "learning_rate": 0.0001610261940580769, "loss": 2.0352, "step": 247365 }, { "epoch": 0.58, "grad_norm": 2.390625, "learning_rate": 0.00016102472983170569, "loss": 2.2285, "step": 247370 }, { "epoch": 0.58, "grad_norm": 1.484375, "learning_rate": 0.00016102326558448734, "loss": 1.9859, "step": 247375 }, { "epoch": 0.58, "grad_norm": 1.7421875, "learning_rate": 0.0001610218013164223, "loss": 2.1853, "step": 247380 }, { "epoch": 0.58, "grad_norm": 1.65625, "learning_rate": 0.00016102033702751112, "loss": 1.9668, "step": 247385 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.00016101887271775431, "loss": 2.0374, "step": 247390 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016101740838715234, "loss": 2.2322, "step": 247395 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.00016101594403570568, "loss": 2.2458, "step": 247400 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.0001610144796634149, "loss": 2.0434, "step": 247405 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016101301527028045, "loss": 2.112, "step": 247410 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016101155085630284, "loss": 2.1233, "step": 247415 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.0001610100864214826, "loss": 2.1014, "step": 247420 }, { "epoch": 0.58, "grad_norm": 2.90625, "learning_rate": 0.0001610086219658202, "loss": 2.118, "step": 247425 }, { "epoch": 0.58, "grad_norm": 1.953125, "learning_rate": 0.00016100715748931617, "loss": 2.2006, "step": 247430 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016100569299197094, "loss": 2.1531, "step": 247435 }, { "epoch": 0.58, "grad_norm": 1.90625, "learning_rate": 0.0001610042284737851, "loss": 2.1201, "step": 247440 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.0001610027639347591, "loss": 2.1494, "step": 247445 }, { "epoch": 0.58, "grad_norm": 1.90625, "learning_rate": 0.00016100129937489345, "loss": 2.0908, "step": 247450 }, { "epoch": 0.58, "grad_norm": 2.484375, "learning_rate": 0.00016099983479418866, "loss": 2.2122, "step": 247455 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016099837019264518, "loss": 2.1997, "step": 247460 }, { "epoch": 0.58, "grad_norm": 1.96875, "learning_rate": 0.00016099690557026358, "loss": 2.1294, "step": 247465 }, { "epoch": 0.58, "grad_norm": 1.96875, "learning_rate": 0.0001609954409270443, "loss": 2.2915, "step": 247470 }, { "epoch": 0.58, "grad_norm": 1.9296875, "learning_rate": 0.0001609939762629879, "loss": 2.1293, "step": 247475 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016099251157809487, "loss": 2.0805, "step": 247480 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016099104687236567, "loss": 2.043, "step": 247485 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016098958214580083, "loss": 1.8556, "step": 247490 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016098811739840083, "loss": 1.9687, "step": 247495 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.0001609866526301662, "loss": 2.0977, "step": 247500 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016098518784109743, "loss": 2.0405, "step": 247505 }, { "epoch": 0.58, "grad_norm": 1.8671875, "learning_rate": 0.000160983723031195, "loss": 2.1969, "step": 247510 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016098225820045938, "loss": 2.1512, "step": 247515 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016098079334889116, "loss": 2.01, "step": 247520 }, { "epoch": 0.58, "grad_norm": 2.484375, "learning_rate": 0.0001609793284764908, "loss": 1.9659, "step": 247525 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.0001609778635832588, "loss": 2.132, "step": 247530 }, { "epoch": 0.58, "grad_norm": 1.90625, "learning_rate": 0.00016097639866919562, "loss": 1.9157, "step": 247535 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016097493373430183, "loss": 2.0721, "step": 247540 }, { "epoch": 0.58, "grad_norm": 1.953125, "learning_rate": 0.00016097346877857785, "loss": 2.0258, "step": 247545 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.0001609720038020243, "loss": 2.1161, "step": 247550 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016097053880464155, "loss": 1.985, "step": 247555 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.00016096907378643018, "loss": 1.9653, "step": 247560 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016096760874739066, "loss": 2.102, "step": 247565 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001609661436875235, "loss": 2.277, "step": 247570 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.00016096467860682922, "loss": 2.1286, "step": 247575 }, { "epoch": 0.58, "grad_norm": 2.609375, "learning_rate": 0.0001609632135053083, "loss": 2.0481, "step": 247580 }, { "epoch": 0.58, "grad_norm": 1.953125, "learning_rate": 0.0001609617483829612, "loss": 2.106, "step": 247585 }, { "epoch": 0.58, "grad_norm": 1.96875, "learning_rate": 0.0001609602832397885, "loss": 1.9815, "step": 247590 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016095881807579065, "loss": 2.13, "step": 247595 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.00016095735289096817, "loss": 2.2701, "step": 247600 }, { "epoch": 0.58, "grad_norm": 1.8125, "learning_rate": 0.00016095588768532152, "loss": 1.9719, "step": 247605 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016095442245885129, "loss": 2.1195, "step": 247610 }, { "epoch": 0.58, "grad_norm": 2.0, "learning_rate": 0.0001609529572115579, "loss": 2.1945, "step": 247615 }, { "epoch": 0.58, "grad_norm": 2.5625, "learning_rate": 0.00016095149194344185, "loss": 1.8866, "step": 247620 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.0001609500266545037, "loss": 2.0452, "step": 247625 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016094856134474386, "loss": 2.1501, "step": 247630 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016094709601416294, "loss": 2.1752, "step": 247635 }, { "epoch": 0.58, "grad_norm": 1.7265625, "learning_rate": 0.00016094563066276134, "loss": 1.9725, "step": 247640 }, { "epoch": 0.58, "grad_norm": 2.40625, "learning_rate": 0.00016094416529053966, "loss": 1.7923, "step": 247645 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016094269989749832, "loss": 2.1688, "step": 247650 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016094123448363786, "loss": 2.1611, "step": 247655 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016093976904895874, "loss": 2.2568, "step": 247660 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016093830359346152, "loss": 2.1338, "step": 247665 }, { "epoch": 0.58, "grad_norm": 2.546875, "learning_rate": 0.00016093683811714665, "loss": 1.9022, "step": 247670 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.00016093537262001467, "loss": 2.0523, "step": 247675 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016093390710206603, "loss": 2.003, "step": 247680 }, { "epoch": 0.58, "grad_norm": 2.59375, "learning_rate": 0.0001609324415633013, "loss": 2.0054, "step": 247685 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016093097600372092, "loss": 2.2685, "step": 247690 }, { "epoch": 0.58, "grad_norm": 1.9375, "learning_rate": 0.00016092951042332544, "loss": 2.0196, "step": 247695 }, { "epoch": 0.58, "grad_norm": 1.7890625, "learning_rate": 0.00016092804482211533, "loss": 2.1148, "step": 247700 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016092657920009108, "loss": 2.075, "step": 247705 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.0001609251135572532, "loss": 2.1215, "step": 247710 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.0001609236478936022, "loss": 2.0938, "step": 247715 }, { "epoch": 0.58, "grad_norm": 2.609375, "learning_rate": 0.0001609221822091386, "loss": 2.0825, "step": 247720 }, { "epoch": 0.58, "grad_norm": 7.1875, "learning_rate": 0.00016092071650386286, "loss": 2.3089, "step": 247725 }, { "epoch": 0.58, "grad_norm": 1.8515625, "learning_rate": 0.00016091925077777548, "loss": 1.9791, "step": 247730 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.000160917785030877, "loss": 2.0771, "step": 247735 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.0001609163192631679, "loss": 2.0777, "step": 247740 }, { "epoch": 0.58, "grad_norm": 1.890625, "learning_rate": 0.00016091485347464866, "loss": 2.0776, "step": 247745 }, { "epoch": 0.58, "grad_norm": 1.8359375, "learning_rate": 0.00016091338766531982, "loss": 2.0682, "step": 247750 }, { "epoch": 0.58, "grad_norm": 1.8125, "learning_rate": 0.00016091192183518186, "loss": 2.0908, "step": 247755 }, { "epoch": 0.58, "grad_norm": 1.875, "learning_rate": 0.0001609104559842353, "loss": 1.9719, "step": 247760 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016090899011248058, "loss": 2.1213, "step": 247765 }, { "epoch": 0.58, "grad_norm": 1.8125, "learning_rate": 0.00016090752421991827, "loss": 1.9131, "step": 247770 }, { "epoch": 0.58, "grad_norm": 2.640625, "learning_rate": 0.00016090605830654885, "loss": 2.0582, "step": 247775 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.0001609045923723728, "loss": 2.1974, "step": 247780 }, { "epoch": 0.58, "grad_norm": 2.609375, "learning_rate": 0.00016090312641739066, "loss": 2.1217, "step": 247785 }, { "epoch": 0.58, "grad_norm": 2.90625, "learning_rate": 0.00016090166044160288, "loss": 2.0225, "step": 247790 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016090019444501001, "loss": 2.0883, "step": 247795 }, { "epoch": 0.58, "grad_norm": 1.984375, "learning_rate": 0.00016089872842761253, "loss": 2.174, "step": 247800 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016089726238941094, "loss": 2.1074, "step": 247805 }, { "epoch": 0.58, "grad_norm": 1.953125, "learning_rate": 0.00016089579633040573, "loss": 2.0583, "step": 247810 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016089433025059743, "loss": 1.9963, "step": 247815 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016089286414998648, "loss": 2.1518, "step": 247820 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016089139802857345, "loss": 2.1605, "step": 247825 }, { "epoch": 0.58, "grad_norm": 1.7734375, "learning_rate": 0.00016088993188635882, "loss": 2.0261, "step": 247830 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001608884657233431, "loss": 2.0979, "step": 247835 }, { "epoch": 0.58, "grad_norm": 1.9765625, "learning_rate": 0.00016088699953952673, "loss": 2.0194, "step": 247840 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.00016088553333491027, "loss": 2.1453, "step": 247845 }, { "epoch": 0.58, "grad_norm": 1.984375, "learning_rate": 0.00016088406710949423, "loss": 2.011, "step": 247850 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016088260086327908, "loss": 2.1975, "step": 247855 }, { "epoch": 0.58, "grad_norm": 1.8515625, "learning_rate": 0.00016088113459626532, "loss": 1.9943, "step": 247860 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.0001608796683084535, "loss": 2.1085, "step": 247865 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.00016087820199984403, "loss": 2.2104, "step": 247870 }, { "epoch": 0.58, "grad_norm": 1.9296875, "learning_rate": 0.00016087673567043747, "loss": 1.9381, "step": 247875 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.00016087526932023435, "loss": 1.9957, "step": 247880 }, { "epoch": 0.58, "grad_norm": 2.59375, "learning_rate": 0.0001608738029492351, "loss": 2.2038, "step": 247885 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016087233655744028, "loss": 1.8612, "step": 247890 }, { "epoch": 0.58, "grad_norm": 2.40625, "learning_rate": 0.00016087087014485032, "loss": 1.9024, "step": 247895 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.0001608694037114658, "loss": 1.9625, "step": 247900 }, { "epoch": 0.58, "grad_norm": 1.8671875, "learning_rate": 0.00016086793725728717, "loss": 1.9925, "step": 247905 }, { "epoch": 0.58, "grad_norm": 1.890625, "learning_rate": 0.00016086647078231497, "loss": 2.3482, "step": 247910 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.00016086500428654968, "loss": 1.9693, "step": 247915 }, { "epoch": 0.58, "grad_norm": 1.84375, "learning_rate": 0.00016086353776999178, "loss": 2.0291, "step": 247920 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.0001608620712326418, "loss": 2.2282, "step": 247925 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016086060467450025, "loss": 2.0764, "step": 247930 }, { "epoch": 0.58, "grad_norm": 1.9296875, "learning_rate": 0.0001608591380955676, "loss": 2.0693, "step": 247935 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.00016085767149584437, "loss": 1.9975, "step": 247940 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016085620487533105, "loss": 1.9895, "step": 247945 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.00016085473823402815, "loss": 1.9593, "step": 247950 }, { "epoch": 0.58, "grad_norm": 2.46875, "learning_rate": 0.00016085327157193614, "loss": 2.2375, "step": 247955 }, { "epoch": 0.58, "grad_norm": 2.53125, "learning_rate": 0.00016085180488905557, "loss": 2.1231, "step": 247960 }, { "epoch": 0.58, "grad_norm": 1.6875, "learning_rate": 0.00016085033818538695, "loss": 1.9745, "step": 247965 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.00016084887146093074, "loss": 2.1764, "step": 247970 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.0001608474047156874, "loss": 2.2645, "step": 247975 }, { "epoch": 0.58, "grad_norm": 1.59375, "learning_rate": 0.00016084593794965754, "loss": 2.236, "step": 247980 }, { "epoch": 0.58, "grad_norm": 1.75, "learning_rate": 0.0001608444711628416, "loss": 1.93, "step": 247985 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016084300435524008, "loss": 1.9467, "step": 247990 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.00016084153752685346, "loss": 2.0753, "step": 247995 }, { "epoch": 0.58, "grad_norm": 2.390625, "learning_rate": 0.00016084007067768228, "loss": 2.1193, "step": 248000 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016083860380772702, "loss": 2.1874, "step": 248005 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.0001608371369169882, "loss": 2.1992, "step": 248010 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001608356700054663, "loss": 2.1647, "step": 248015 }, { "epoch": 0.58, "grad_norm": 1.9375, "learning_rate": 0.00016083420307316185, "loss": 2.0095, "step": 248020 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016083273612007534, "loss": 2.0374, "step": 248025 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016083126914620723, "loss": 1.9626, "step": 248030 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.0001608298021515581, "loss": 2.0285, "step": 248035 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.0001608283351361284, "loss": 2.1554, "step": 248040 }, { "epoch": 0.58, "grad_norm": 2.171875, "learning_rate": 0.0001608268680999186, "loss": 2.0565, "step": 248045 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016082540104292927, "loss": 2.0254, "step": 248050 }, { "epoch": 0.58, "grad_norm": 1.9140625, "learning_rate": 0.00016082393396516084, "loss": 2.0874, "step": 248055 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.0001608224668666139, "loss": 2.1613, "step": 248060 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016082099974728888, "loss": 2.1105, "step": 248065 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.0001608195326071863, "loss": 2.0724, "step": 248070 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016081806544630668, "loss": 1.9995, "step": 248075 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.00016081659826465048, "loss": 2.0055, "step": 248080 }, { "epoch": 0.58, "grad_norm": 2.078125, "learning_rate": 0.00016081513106221823, "loss": 2.1348, "step": 248085 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016081366383901045, "loss": 2.2742, "step": 248090 }, { "epoch": 0.58, "grad_norm": 2.40625, "learning_rate": 0.0001608121965950276, "loss": 2.0474, "step": 248095 }, { "epoch": 0.58, "grad_norm": 2.21875, "learning_rate": 0.00016081072933027022, "loss": 2.1263, "step": 248100 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016080926204473878, "loss": 2.1559, "step": 248105 }, { "epoch": 0.58, "grad_norm": 2.53125, "learning_rate": 0.0001608077947384338, "loss": 2.1588, "step": 248110 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.00016080632741135578, "loss": 2.0285, "step": 248115 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.00016080486006350518, "loss": 2.1306, "step": 248120 }, { "epoch": 0.58, "grad_norm": 1.90625, "learning_rate": 0.00016080339269488258, "loss": 2.1592, "step": 248125 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016080192530548842, "loss": 2.0407, "step": 248130 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.00016080045789532323, "loss": 2.0934, "step": 248135 }, { "epoch": 0.58, "grad_norm": 2.578125, "learning_rate": 0.00016079899046438748, "loss": 1.9939, "step": 248140 }, { "epoch": 0.58, "grad_norm": 1.984375, "learning_rate": 0.0001607975230126817, "loss": 1.8946, "step": 248145 }, { "epoch": 0.58, "grad_norm": 2.5, "learning_rate": 0.0001607960555402064, "loss": 2.1501, "step": 248150 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016079458804696206, "loss": 1.8761, "step": 248155 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016079312053294919, "loss": 2.1116, "step": 248160 }, { "epoch": 0.58, "grad_norm": 1.9921875, "learning_rate": 0.00016079165299816826, "loss": 2.207, "step": 248165 }, { "epoch": 0.58, "grad_norm": 1.9140625, "learning_rate": 0.00016079018544261982, "loss": 2.213, "step": 248170 }, { "epoch": 0.58, "grad_norm": 2.359375, "learning_rate": 0.00016078871786630433, "loss": 2.1401, "step": 248175 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016078725026922236, "loss": 2.1103, "step": 248180 }, { "epoch": 0.58, "grad_norm": 1.8984375, "learning_rate": 0.0001607857826513743, "loss": 2.1652, "step": 248185 }, { "epoch": 0.58, "grad_norm": 1.9453125, "learning_rate": 0.00016078431501276075, "loss": 2.1869, "step": 248190 }, { "epoch": 0.58, "grad_norm": 1.7265625, "learning_rate": 0.00016078284735338217, "loss": 1.9704, "step": 248195 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.0001607813796732391, "loss": 2.1672, "step": 248200 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016077991197233193, "loss": 2.1337, "step": 248205 }, { "epoch": 0.58, "grad_norm": 2.59375, "learning_rate": 0.00016077844425066128, "loss": 1.7952, "step": 248210 }, { "epoch": 0.58, "grad_norm": 1.8984375, "learning_rate": 0.00016077697650822764, "loss": 2.2578, "step": 248215 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016077550874503145, "loss": 1.9921, "step": 248220 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.00016077404096107327, "loss": 1.9765, "step": 248225 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016077257315635354, "loss": 2.0089, "step": 248230 }, { "epoch": 0.58, "grad_norm": 1.984375, "learning_rate": 0.00016077110533087283, "loss": 1.9699, "step": 248235 }, { "epoch": 0.58, "grad_norm": 2.015625, "learning_rate": 0.00016076963748463158, "loss": 2.0287, "step": 248240 }, { "epoch": 0.58, "grad_norm": 2.515625, "learning_rate": 0.00016076816961763031, "loss": 1.9863, "step": 248245 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016076670172986956, "loss": 2.0949, "step": 248250 }, { "epoch": 0.58, "grad_norm": 2.390625, "learning_rate": 0.0001607652338213498, "loss": 2.2847, "step": 248255 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016076376589207154, "loss": 2.229, "step": 248260 }, { "epoch": 0.58, "grad_norm": 1.96875, "learning_rate": 0.00016076229794203528, "loss": 2.177, "step": 248265 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.0001607608299712415, "loss": 2.1497, "step": 248270 }, { "epoch": 0.58, "grad_norm": 1.8984375, "learning_rate": 0.00016075936197969073, "loss": 1.9829, "step": 248275 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016075789396738344, "loss": 2.0721, "step": 248280 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016075642593432017, "loss": 2.0636, "step": 248285 }, { "epoch": 0.58, "grad_norm": 2.3125, "learning_rate": 0.00016075495788050138, "loss": 2.2095, "step": 248290 }, { "epoch": 0.58, "grad_norm": 1.921875, "learning_rate": 0.0001607534898059276, "loss": 2.0251, "step": 248295 }, { "epoch": 0.58, "grad_norm": 2.640625, "learning_rate": 0.00016075202171059936, "loss": 1.9425, "step": 248300 }, { "epoch": 0.58, "grad_norm": 1.8828125, "learning_rate": 0.00016075055359451713, "loss": 2.0148, "step": 248305 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016074908545768137, "loss": 2.0183, "step": 248310 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.00016074761730009262, "loss": 2.1092, "step": 248315 }, { "epoch": 0.58, "grad_norm": 1.8828125, "learning_rate": 0.0001607461491217514, "loss": 1.9774, "step": 248320 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016074468092265817, "loss": 2.1695, "step": 248325 }, { "epoch": 0.58, "grad_norm": 1.9609375, "learning_rate": 0.0001607432127028135, "loss": 2.2284, "step": 248330 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.0001607417444622178, "loss": 2.2511, "step": 248335 }, { "epoch": 0.58, "grad_norm": 2.15625, "learning_rate": 0.00016074027620087167, "loss": 2.03, "step": 248340 }, { "epoch": 0.58, "grad_norm": 2.25, "learning_rate": 0.00016073880791877553, "loss": 2.0723, "step": 248345 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.0001607373396159299, "loss": 2.1333, "step": 248350 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.0001607358712923353, "loss": 2.1428, "step": 248355 }, { "epoch": 0.58, "grad_norm": 2.328125, "learning_rate": 0.00016073440294799224, "loss": 2.0563, "step": 248360 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016073293458290119, "loss": 2.1174, "step": 248365 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.0001607314661970627, "loss": 2.0692, "step": 248370 }, { "epoch": 0.58, "grad_norm": 1.78125, "learning_rate": 0.00016072999779047722, "loss": 2.0053, "step": 248375 }, { "epoch": 0.58, "grad_norm": 2.453125, "learning_rate": 0.00016072852936314523, "loss": 2.0848, "step": 248380 }, { "epoch": 0.58, "grad_norm": 3.203125, "learning_rate": 0.00016072706091506734, "loss": 2.1394, "step": 248385 }, { "epoch": 0.58, "grad_norm": 1.8671875, "learning_rate": 0.00016072559244624394, "loss": 1.9962, "step": 248390 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.0001607241239566756, "loss": 1.9117, "step": 248395 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 0.0001607226554463628, "loss": 2.2559, "step": 248400 }, { "epoch": 0.58, "grad_norm": 1.984375, "learning_rate": 0.000160721186915306, "loss": 2.1177, "step": 248405 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016071971836350576, "loss": 2.0991, "step": 248410 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.0001607182497909626, "loss": 2.1076, "step": 248415 }, { "epoch": 0.58, "grad_norm": 2.296875, "learning_rate": 0.00016071678119767695, "loss": 2.3141, "step": 248420 }, { "epoch": 0.58, "grad_norm": 2.234375, "learning_rate": 0.00016071531258364936, "loss": 2.1604, "step": 248425 }, { "epoch": 0.58, "grad_norm": 1.9765625, "learning_rate": 0.00016071384394888028, "loss": 2.2029, "step": 248430 }, { "epoch": 0.58, "grad_norm": 1.890625, "learning_rate": 0.0001607123752933703, "loss": 2.2167, "step": 248435 }, { "epoch": 0.58, "grad_norm": 2.203125, "learning_rate": 0.00016071090661711986, "loss": 2.2181, "step": 248440 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016070943792012945, "loss": 2.086, "step": 248445 }, { "epoch": 0.58, "grad_norm": 2.03125, "learning_rate": 0.0001607079692023996, "loss": 2.1083, "step": 248450 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016070650046393087, "loss": 2.0791, "step": 248455 }, { "epoch": 0.58, "grad_norm": 2.421875, "learning_rate": 0.00016070503170472362, "loss": 2.1631, "step": 248460 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016070356292477843, "loss": 2.1122, "step": 248465 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016070209412409584, "loss": 2.0152, "step": 248470 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.00016070062530267634, "loss": 2.086, "step": 248475 }, { "epoch": 0.58, "grad_norm": 2.28125, "learning_rate": 0.00016069915646052034, "loss": 1.952, "step": 248480 }, { "epoch": 0.58, "grad_norm": 2.109375, "learning_rate": 0.00016069768759762845, "loss": 2.0803, "step": 248485 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.0001606962187140011, "loss": 2.1509, "step": 248490 }, { "epoch": 0.58, "grad_norm": 2.390625, "learning_rate": 0.00016069474980963885, "loss": 1.9769, "step": 248495 }, { "epoch": 0.58, "grad_norm": 1.90625, "learning_rate": 0.00016069328088454213, "loss": 1.9989, "step": 248500 }, { "epoch": 0.58, "grad_norm": 2.375, "learning_rate": 0.00016069181193871152, "loss": 2.0494, "step": 248505 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.0001606903429721475, "loss": 2.1036, "step": 248510 }, { "epoch": 0.58, "grad_norm": 2.09375, "learning_rate": 0.00016068887398485053, "loss": 2.0359, "step": 248515 }, { "epoch": 0.58, "grad_norm": 1.9765625, "learning_rate": 0.00016068740497682114, "loss": 2.0033, "step": 248520 }, { "epoch": 0.58, "grad_norm": 2.0, "learning_rate": 0.00016068593594805984, "loss": 2.1572, "step": 248525 }, { "epoch": 0.58, "grad_norm": 1.921875, "learning_rate": 0.0001606844668985671, "loss": 2.2693, "step": 248530 }, { "epoch": 0.58, "grad_norm": 1.5234375, "learning_rate": 0.00016068299782834352, "loss": 2.0517, "step": 248535 }, { "epoch": 0.58, "grad_norm": 2.125, "learning_rate": 0.00016068152873738943, "loss": 1.9254, "step": 248540 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016068005962570548, "loss": 2.2647, "step": 248545 }, { "epoch": 0.58, "grad_norm": 2.046875, "learning_rate": 0.00016067859049329213, "loss": 2.0522, "step": 248550 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016067712134014986, "loss": 2.1424, "step": 248555 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016067565216627917, "loss": 2.0142, "step": 248560 }, { "epoch": 0.58, "grad_norm": 2.4375, "learning_rate": 0.00016067418297168059, "loss": 2.1119, "step": 248565 }, { "epoch": 0.58, "grad_norm": 2.1875, "learning_rate": 0.0001606727137563546, "loss": 1.9854, "step": 248570 }, { "epoch": 0.58, "grad_norm": 2.34375, "learning_rate": 0.00016067124452030175, "loss": 1.9834, "step": 248575 }, { "epoch": 0.58, "grad_norm": 2.0625, "learning_rate": 0.00016066977526352244, "loss": 1.8993, "step": 248580 }, { "epoch": 0.58, "grad_norm": 2.140625, "learning_rate": 0.00016066830598601727, "loss": 2.1552, "step": 248585 }, { "epoch": 0.59, "grad_norm": 1.8046875, "learning_rate": 0.00016066683668778672, "loss": 2.0058, "step": 248590 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.00016066536736883123, "loss": 2.0888, "step": 248595 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.0001606638980291514, "loss": 2.2339, "step": 248600 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.0001606624286687477, "loss": 1.9342, "step": 248605 }, { "epoch": 0.59, "grad_norm": 2.59375, "learning_rate": 0.00016066095928762058, "loss": 2.2163, "step": 248610 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00016065948988577055, "loss": 2.1218, "step": 248615 }, { "epoch": 0.59, "grad_norm": 1.9921875, "learning_rate": 0.00016065802046319816, "loss": 2.0826, "step": 248620 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.0001606565510199039, "loss": 2.1696, "step": 248625 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00016065508155588824, "loss": 2.0538, "step": 248630 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00016065361207115176, "loss": 2.0802, "step": 248635 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00016065214256569482, "loss": 2.0705, "step": 248640 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00016065067303951805, "loss": 2.2548, "step": 248645 }, { "epoch": 0.59, "grad_norm": 2.46875, "learning_rate": 0.0001606492034926219, "loss": 2.2293, "step": 248650 }, { "epoch": 0.59, "grad_norm": 2.421875, "learning_rate": 0.00016064773392500693, "loss": 2.2218, "step": 248655 }, { "epoch": 0.59, "grad_norm": 1.9375, "learning_rate": 0.00016064626433667356, "loss": 2.0443, "step": 248660 }, { "epoch": 0.59, "grad_norm": 1.9140625, "learning_rate": 0.00016064479472762232, "loss": 2.0672, "step": 248665 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00016064332509785372, "loss": 2.092, "step": 248670 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00016064185544736827, "loss": 2.1311, "step": 248675 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.00016064038577616647, "loss": 1.9221, "step": 248680 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.0001606389160842488, "loss": 2.0726, "step": 248685 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00016063744637161576, "loss": 2.1993, "step": 248690 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.0001606359766382679, "loss": 1.9611, "step": 248695 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00016063450688420566, "loss": 2.1066, "step": 248700 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.0001606330371094296, "loss": 2.0923, "step": 248705 }, { "epoch": 0.59, "grad_norm": 1.8125, "learning_rate": 0.00016063156731394018, "loss": 2.1441, "step": 248710 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00016063009749773795, "loss": 2.0643, "step": 248715 }, { "epoch": 0.59, "grad_norm": 2.40625, "learning_rate": 0.0001606286276608233, "loss": 2.1149, "step": 248720 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00016062715780319688, "loss": 2.2516, "step": 248725 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.0001606256879248591, "loss": 2.021, "step": 248730 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00016062421802581049, "loss": 2.2328, "step": 248735 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00016062274810605155, "loss": 2.0717, "step": 248740 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.00016062127816558278, "loss": 2.146, "step": 248745 }, { "epoch": 0.59, "grad_norm": 1.8125, "learning_rate": 0.0001606198082044047, "loss": 1.9262, "step": 248750 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00016061833822251777, "loss": 2.1994, "step": 248755 }, { "epoch": 0.59, "grad_norm": 1.609375, "learning_rate": 0.00016061686821992253, "loss": 2.0111, "step": 248760 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00016061539819661947, "loss": 1.9845, "step": 248765 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00016061392815260907, "loss": 2.0238, "step": 248770 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.00016061245808789186, "loss": 1.9875, "step": 248775 }, { "epoch": 0.59, "grad_norm": 1.9375, "learning_rate": 0.00016061098800246833, "loss": 2.1798, "step": 248780 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00016060951789633897, "loss": 2.0621, "step": 248785 }, { "epoch": 0.59, "grad_norm": 1.953125, "learning_rate": 0.00016060804776950435, "loss": 2.2202, "step": 248790 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.0001606065776219649, "loss": 2.0967, "step": 248795 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00016060510745372116, "loss": 2.0976, "step": 248800 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00016060363726477358, "loss": 2.0548, "step": 248805 }, { "epoch": 0.59, "grad_norm": 1.9375, "learning_rate": 0.0001606021670551227, "loss": 2.1374, "step": 248810 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00016060069682476903, "loss": 2.1594, "step": 248815 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00016059922657371306, "loss": 2.0964, "step": 248820 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00016059775630195531, "loss": 2.0157, "step": 248825 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00016059628600949627, "loss": 2.1175, "step": 248830 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00016059481569633642, "loss": 2.0638, "step": 248835 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.0001605933453624763, "loss": 2.0382, "step": 248840 }, { "epoch": 0.59, "grad_norm": 2.375, "learning_rate": 0.0001605918750079164, "loss": 2.1214, "step": 248845 }, { "epoch": 0.59, "grad_norm": 2.453125, "learning_rate": 0.0001605904046326572, "loss": 1.9501, "step": 248850 }, { "epoch": 0.59, "grad_norm": 2.46875, "learning_rate": 0.00016058893423669923, "loss": 2.152, "step": 248855 }, { "epoch": 0.59, "grad_norm": 2.609375, "learning_rate": 0.00016058746382004296, "loss": 2.2877, "step": 248860 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00016058599338268894, "loss": 2.1184, "step": 248865 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.0001605845229246376, "loss": 2.0839, "step": 248870 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.00016058305244588957, "loss": 2.0389, "step": 248875 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00016058158194644522, "loss": 2.0057, "step": 248880 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00016058011142630507, "loss": 2.0567, "step": 248885 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.0001605786408854697, "loss": 2.1571, "step": 248890 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00016057717032393954, "loss": 2.1189, "step": 248895 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00016057569974171516, "loss": 1.901, "step": 248900 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.00016057422913879702, "loss": 2.1568, "step": 248905 }, { "epoch": 0.59, "grad_norm": 1.984375, "learning_rate": 0.00016057275851518558, "loss": 2.2074, "step": 248910 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00016057128787088143, "loss": 2.005, "step": 248915 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.000160569817205885, "loss": 2.0098, "step": 248920 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.0001605683465201968, "loss": 2.1705, "step": 248925 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.00016056687581381742, "loss": 2.054, "step": 248930 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00016056540508674726, "loss": 2.1552, "step": 248935 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00016056393433898686, "loss": 2.0593, "step": 248940 }, { "epoch": 0.59, "grad_norm": 2.578125, "learning_rate": 0.00016056246357053672, "loss": 2.1739, "step": 248945 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00016056099278139737, "loss": 2.0976, "step": 248950 }, { "epoch": 0.59, "grad_norm": 2.65625, "learning_rate": 0.00016055952197156924, "loss": 2.1171, "step": 248955 }, { "epoch": 0.59, "grad_norm": 2.625, "learning_rate": 0.00016055805114105294, "loss": 2.0115, "step": 248960 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00016055658028984885, "loss": 2.0234, "step": 248965 }, { "epoch": 0.59, "grad_norm": 2.375, "learning_rate": 0.00016055510941795757, "loss": 2.0711, "step": 248970 }, { "epoch": 0.59, "grad_norm": 2.625, "learning_rate": 0.00016055363852537954, "loss": 2.0739, "step": 248975 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00016055216761211533, "loss": 2.1173, "step": 248980 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00016055069667816538, "loss": 2.1431, "step": 248985 }, { "epoch": 0.59, "grad_norm": 2.421875, "learning_rate": 0.00016054922572353018, "loss": 2.2279, "step": 248990 }, { "epoch": 0.59, "grad_norm": 1.9140625, "learning_rate": 0.0001605477547482103, "loss": 2.1159, "step": 248995 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.0001605462837522062, "loss": 2.0286, "step": 249000 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.0001605448127355184, "loss": 2.2871, "step": 249005 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00016054334169814742, "loss": 1.9917, "step": 249010 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.0001605418706400937, "loss": 2.0696, "step": 249015 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00016054039956135777, "loss": 2.2806, "step": 249020 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00016053892846194017, "loss": 2.2152, "step": 249025 }, { "epoch": 0.59, "grad_norm": 1.921875, "learning_rate": 0.00016053745734184136, "loss": 2.0777, "step": 249030 }, { "epoch": 0.59, "grad_norm": 1.984375, "learning_rate": 0.00016053598620106188, "loss": 2.1223, "step": 249035 }, { "epoch": 0.59, "grad_norm": 2.421875, "learning_rate": 0.00016053451503960219, "loss": 1.9606, "step": 249040 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.00016053304385746278, "loss": 2.0868, "step": 249045 }, { "epoch": 0.59, "grad_norm": 2.9375, "learning_rate": 0.00016053157265464422, "loss": 1.9548, "step": 249050 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00016053010143114697, "loss": 2.2131, "step": 249055 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00016052863018697154, "loss": 2.042, "step": 249060 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.00016052715892211846, "loss": 2.1218, "step": 249065 }, { "epoch": 0.59, "grad_norm": 1.7421875, "learning_rate": 0.00016052568763658817, "loss": 2.032, "step": 249070 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.0001605242163303812, "loss": 2.1738, "step": 249075 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.0001605227450034981, "loss": 1.9398, "step": 249080 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.0001605212736559393, "loss": 2.077, "step": 249085 }, { "epoch": 0.59, "grad_norm": 2.390625, "learning_rate": 0.00016051980228770535, "loss": 2.2556, "step": 249090 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.0001605183308987967, "loss": 1.9039, "step": 249095 }, { "epoch": 0.59, "grad_norm": 3.0, "learning_rate": 0.00016051685948921394, "loss": 2.0746, "step": 249100 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00016051538805895752, "loss": 2.0183, "step": 249105 }, { "epoch": 0.59, "grad_norm": 1.78125, "learning_rate": 0.00016051391660802794, "loss": 2.069, "step": 249110 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00016051244513642568, "loss": 2.044, "step": 249115 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.0001605109736441513, "loss": 1.9984, "step": 249120 }, { "epoch": 0.59, "grad_norm": 1.9375, "learning_rate": 0.00016050950213120525, "loss": 2.0901, "step": 249125 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00016050803059758806, "loss": 2.1297, "step": 249130 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.00016050655904330027, "loss": 2.1109, "step": 249135 }, { "epoch": 0.59, "grad_norm": 2.578125, "learning_rate": 0.0001605050874683423, "loss": 2.0318, "step": 249140 }, { "epoch": 0.59, "grad_norm": 1.9765625, "learning_rate": 0.00016050361587271472, "loss": 2.0749, "step": 249145 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00016050214425641798, "loss": 2.2674, "step": 249150 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00016050067261945262, "loss": 1.9969, "step": 249155 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00016049920096181914, "loss": 2.1555, "step": 249160 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.00016049772928351805, "loss": 2.0551, "step": 249165 }, { "epoch": 0.59, "grad_norm": 1.953125, "learning_rate": 0.00016049625758454982, "loss": 2.0911, "step": 249170 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00016049478586491498, "loss": 2.1649, "step": 249175 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.000160493314124614, "loss": 2.0401, "step": 249180 }, { "epoch": 0.59, "grad_norm": 2.546875, "learning_rate": 0.00016049184236364744, "loss": 2.1519, "step": 249185 }, { "epoch": 0.59, "grad_norm": 1.9453125, "learning_rate": 0.00016049037058201576, "loss": 2.0636, "step": 249190 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00016048889877971946, "loss": 2.2381, "step": 249195 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00016048742695675907, "loss": 2.1087, "step": 249200 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00016048595511313507, "loss": 1.9338, "step": 249205 }, { "epoch": 0.59, "grad_norm": 1.96875, "learning_rate": 0.00016048448324884794, "loss": 2.0491, "step": 249210 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00016048301136389827, "loss": 1.9614, "step": 249215 }, { "epoch": 0.59, "grad_norm": 1.9296875, "learning_rate": 0.00016048153945828647, "loss": 2.1174, "step": 249220 }, { "epoch": 0.59, "grad_norm": 2.4375, "learning_rate": 0.0001604800675320131, "loss": 1.9734, "step": 249225 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00016047859558507863, "loss": 2.0769, "step": 249230 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.00016047712361748356, "loss": 2.149, "step": 249235 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.00016047565162922845, "loss": 2.1214, "step": 249240 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00016047417962031374, "loss": 2.2332, "step": 249245 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00016047270759073995, "loss": 2.1648, "step": 249250 }, { "epoch": 0.59, "grad_norm": 1.8984375, "learning_rate": 0.0001604712355405076, "loss": 1.9586, "step": 249255 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.00016046976346961714, "loss": 2.0441, "step": 249260 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00016046829137806917, "loss": 2.2019, "step": 249265 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00016046681926586408, "loss": 2.1113, "step": 249270 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00016046534713300243, "loss": 2.1292, "step": 249275 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00016046387497948477, "loss": 2.1654, "step": 249280 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00016046240280531154, "loss": 2.0695, "step": 249285 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00016046093061048322, "loss": 2.0406, "step": 249290 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00016045945839500037, "loss": 2.197, "step": 249295 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00016045798615886344, "loss": 2.2299, "step": 249300 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.000160456513902073, "loss": 2.1044, "step": 249305 }, { "epoch": 0.59, "grad_norm": 1.8125, "learning_rate": 0.00016045504162462955, "loss": 2.1436, "step": 249310 }, { "epoch": 0.59, "grad_norm": 1.8203125, "learning_rate": 0.00016045356932653353, "loss": 2.0971, "step": 249315 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00016045209700778546, "loss": 2.1791, "step": 249320 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00016045062466838586, "loss": 2.0852, "step": 249325 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.00016044915230833525, "loss": 2.1677, "step": 249330 }, { "epoch": 0.59, "grad_norm": 2.421875, "learning_rate": 0.0001604476799276341, "loss": 2.2167, "step": 249335 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00016044620752628293, "loss": 2.1572, "step": 249340 }, { "epoch": 0.59, "grad_norm": 1.84375, "learning_rate": 0.00016044473510428225, "loss": 2.0579, "step": 249345 }, { "epoch": 0.59, "grad_norm": 1.6015625, "learning_rate": 0.0001604432626616325, "loss": 2.0304, "step": 249350 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.0001604417901983343, "loss": 2.0036, "step": 249355 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.0001604403177143881, "loss": 2.1266, "step": 249360 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00016043884520979432, "loss": 2.1585, "step": 249365 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00016043737268455356, "loss": 1.9957, "step": 249370 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00016043590013866633, "loss": 2.1728, "step": 249375 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.0001604344275721331, "loss": 2.2423, "step": 249380 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00016043295498495433, "loss": 2.1849, "step": 249385 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00016043148237713059, "loss": 2.158, "step": 249390 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00016043000974866236, "loss": 2.2215, "step": 249395 }, { "epoch": 0.59, "grad_norm": 2.515625, "learning_rate": 0.00016042853709955014, "loss": 2.15, "step": 249400 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00016042706442979444, "loss": 2.0304, "step": 249405 }, { "epoch": 0.59, "grad_norm": 2.375, "learning_rate": 0.00016042559173939575, "loss": 1.9935, "step": 249410 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.0001604241190283546, "loss": 1.9113, "step": 249415 }, { "epoch": 0.59, "grad_norm": 1.8203125, "learning_rate": 0.00016042264629667144, "loss": 2.0372, "step": 249420 }, { "epoch": 0.59, "grad_norm": 1.7265625, "learning_rate": 0.00016042117354434684, "loss": 2.1402, "step": 249425 }, { "epoch": 0.59, "grad_norm": 1.7734375, "learning_rate": 0.00016041970077138127, "loss": 2.0624, "step": 249430 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00016041822797777523, "loss": 2.0918, "step": 249435 }, { "epoch": 0.59, "grad_norm": 1.9140625, "learning_rate": 0.0001604167551635292, "loss": 2.0405, "step": 249440 }, { "epoch": 0.59, "grad_norm": 1.9140625, "learning_rate": 0.00016041528232864377, "loss": 2.1522, "step": 249445 }, { "epoch": 0.59, "grad_norm": 1.953125, "learning_rate": 0.00016041380947311932, "loss": 2.2112, "step": 249450 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.00016041233659695644, "loss": 2.1128, "step": 249455 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00016041086370015563, "loss": 2.0143, "step": 249460 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00016040939078271739, "loss": 1.9098, "step": 249465 }, { "epoch": 0.59, "grad_norm": 1.8515625, "learning_rate": 0.00016040791784464217, "loss": 2.0713, "step": 249470 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.00016040644488593049, "loss": 2.0719, "step": 249475 }, { "epoch": 0.59, "grad_norm": 1.9453125, "learning_rate": 0.00016040497190658291, "loss": 2.0313, "step": 249480 }, { "epoch": 0.59, "grad_norm": 2.609375, "learning_rate": 0.0001604034989065999, "loss": 2.2916, "step": 249485 }, { "epoch": 0.59, "grad_norm": 1.84375, "learning_rate": 0.00016040202588598192, "loss": 2.2233, "step": 249490 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.00016040055284472955, "loss": 2.068, "step": 249495 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00016039907978284324, "loss": 2.1139, "step": 249500 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.0001603976067003235, "loss": 1.97, "step": 249505 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00016039613359717086, "loss": 2.1373, "step": 249510 }, { "epoch": 0.59, "grad_norm": 1.9453125, "learning_rate": 0.0001603946604733858, "loss": 2.2411, "step": 249515 }, { "epoch": 0.59, "grad_norm": 1.9296875, "learning_rate": 0.00016039318732896885, "loss": 2.1849, "step": 249520 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00016039171416392045, "loss": 2.134, "step": 249525 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00016039024097824113, "loss": 2.1018, "step": 249530 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00016038876777193145, "loss": 2.1119, "step": 249535 }, { "epoch": 0.59, "grad_norm": 1.9140625, "learning_rate": 0.0001603872945449919, "loss": 1.9932, "step": 249540 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.0001603858212974229, "loss": 1.851, "step": 249545 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00016038434802922503, "loss": 2.1092, "step": 249550 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00016038287474039878, "loss": 2.1743, "step": 249555 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00016038140143094463, "loss": 2.1459, "step": 249560 }, { "epoch": 0.59, "grad_norm": 2.828125, "learning_rate": 0.0001603799281008631, "loss": 2.0802, "step": 249565 }, { "epoch": 0.59, "grad_norm": 1.8359375, "learning_rate": 0.0001603784547501547, "loss": 1.9944, "step": 249570 }, { "epoch": 0.59, "grad_norm": 1.8984375, "learning_rate": 0.00016037698137881992, "loss": 2.1417, "step": 249575 }, { "epoch": 0.59, "grad_norm": 2.4375, "learning_rate": 0.00016037550798685926, "loss": 2.0784, "step": 249580 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00016037403457427323, "loss": 1.9103, "step": 249585 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00016037256114106234, "loss": 2.1568, "step": 249590 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00016037108768722708, "loss": 2.1673, "step": 249595 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.000160369614212768, "loss": 1.9508, "step": 249600 }, { "epoch": 0.59, "grad_norm": 1.84375, "learning_rate": 0.00016036814071768553, "loss": 2.1264, "step": 249605 }, { "epoch": 0.59, "grad_norm": 1.9921875, "learning_rate": 0.00016036666720198022, "loss": 1.8894, "step": 249610 }, { "epoch": 0.59, "grad_norm": 1.890625, "learning_rate": 0.00016036519366565254, "loss": 2.0404, "step": 249615 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00016036372010870306, "loss": 2.212, "step": 249620 }, { "epoch": 0.59, "grad_norm": 2.421875, "learning_rate": 0.0001603622465311322, "loss": 2.0184, "step": 249625 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.00016036077293294051, "loss": 2.0803, "step": 249630 }, { "epoch": 0.59, "grad_norm": 2.4375, "learning_rate": 0.00016035929931412847, "loss": 2.1017, "step": 249635 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.00016035782567469663, "loss": 2.0681, "step": 249640 }, { "epoch": 0.59, "grad_norm": 1.640625, "learning_rate": 0.00016035635201464547, "loss": 2.1519, "step": 249645 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00016035487833397543, "loss": 1.9718, "step": 249650 }, { "epoch": 0.59, "grad_norm": 3.03125, "learning_rate": 0.00016035340463268713, "loss": 2.1328, "step": 249655 }, { "epoch": 0.59, "grad_norm": 1.90625, "learning_rate": 0.000160351930910781, "loss": 2.1878, "step": 249660 }, { "epoch": 0.59, "grad_norm": 1.9453125, "learning_rate": 0.0001603504571682575, "loss": 2.0418, "step": 249665 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00016034898340511726, "loss": 2.1732, "step": 249670 }, { "epoch": 0.59, "grad_norm": 1.875, "learning_rate": 0.00016034750962136068, "loss": 2.1108, "step": 249675 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.0001603460358169883, "loss": 1.9359, "step": 249680 }, { "epoch": 0.59, "grad_norm": 3.0, "learning_rate": 0.00016034456199200063, "loss": 2.2812, "step": 249685 }, { "epoch": 0.59, "grad_norm": 2.453125, "learning_rate": 0.00016034308814639813, "loss": 2.176, "step": 249690 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00016034161428018137, "loss": 2.0936, "step": 249695 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.00016034014039335083, "loss": 2.1356, "step": 249700 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00016033866648590696, "loss": 2.0725, "step": 249705 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00016033719255785034, "loss": 2.2689, "step": 249710 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00016033571860918144, "loss": 2.0581, "step": 249715 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00016033424463990077, "loss": 2.159, "step": 249720 }, { "epoch": 0.59, "grad_norm": 1.890625, "learning_rate": 0.0001603327706500088, "loss": 2.0794, "step": 249725 }, { "epoch": 0.59, "grad_norm": 2.609375, "learning_rate": 0.00016033129663950608, "loss": 2.0933, "step": 249730 }, { "epoch": 0.59, "grad_norm": 3.109375, "learning_rate": 0.0001603298226083931, "loss": 2.0949, "step": 249735 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00016032834855667034, "loss": 2.2813, "step": 249740 }, { "epoch": 0.59, "grad_norm": 2.6875, "learning_rate": 0.00016032687448433832, "loss": 2.0293, "step": 249745 }, { "epoch": 0.59, "grad_norm": 2.453125, "learning_rate": 0.00016032540039139756, "loss": 1.9951, "step": 249750 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.00016032392627784856, "loss": 1.978, "step": 249755 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.0001603224521436918, "loss": 2.0945, "step": 249760 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00016032097798892778, "loss": 2.0237, "step": 249765 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00016031950381355703, "loss": 1.9892, "step": 249770 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00016031802961758004, "loss": 2.1538, "step": 249775 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.00016031655540099733, "loss": 2.2499, "step": 249780 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.0001603150811638094, "loss": 2.1395, "step": 249785 }, { "epoch": 0.59, "grad_norm": 1.8125, "learning_rate": 0.00016031360690601674, "loss": 1.9864, "step": 249790 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00016031213262761984, "loss": 2.2, "step": 249795 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.00016031065832861923, "loss": 1.963, "step": 249800 }, { "epoch": 0.59, "grad_norm": 2.390625, "learning_rate": 0.0001603091840090154, "loss": 2.0679, "step": 249805 }, { "epoch": 0.59, "grad_norm": 1.84375, "learning_rate": 0.00016030770966880885, "loss": 2.0643, "step": 249810 }, { "epoch": 0.59, "grad_norm": 1.921875, "learning_rate": 0.00016030623530800014, "loss": 1.9657, "step": 249815 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00016030476092658967, "loss": 1.9648, "step": 249820 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00016030328652457803, "loss": 1.9104, "step": 249825 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00016030181210196568, "loss": 2.1826, "step": 249830 }, { "epoch": 0.59, "grad_norm": 1.8515625, "learning_rate": 0.00016030033765875316, "loss": 2.0452, "step": 249835 }, { "epoch": 0.59, "grad_norm": 1.7578125, "learning_rate": 0.00016029886319494092, "loss": 2.0998, "step": 249840 }, { "epoch": 0.59, "grad_norm": 1.734375, "learning_rate": 0.0001602973887105295, "loss": 1.9772, "step": 249845 }, { "epoch": 0.59, "grad_norm": 2.546875, "learning_rate": 0.00016029591420551942, "loss": 2.1459, "step": 249850 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00016029443967991114, "loss": 2.0353, "step": 249855 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.00016029296513370522, "loss": 2.2153, "step": 249860 }, { "epoch": 0.59, "grad_norm": 2.421875, "learning_rate": 0.00016029149056690208, "loss": 2.1019, "step": 249865 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00016029001597950233, "loss": 2.1027, "step": 249870 }, { "epoch": 0.59, "grad_norm": 2.734375, "learning_rate": 0.00016028854137150637, "loss": 2.1016, "step": 249875 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00016028706674291478, "loss": 2.153, "step": 249880 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00016028559209372802, "loss": 2.2048, "step": 249885 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.0001602841174239466, "loss": 2.0387, "step": 249890 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00016028264273357107, "loss": 1.9376, "step": 249895 }, { "epoch": 0.59, "grad_norm": 2.46875, "learning_rate": 0.00016028116802260187, "loss": 2.1332, "step": 249900 }, { "epoch": 0.59, "grad_norm": 2.421875, "learning_rate": 0.0001602796932910395, "loss": 2.076, "step": 249905 }, { "epoch": 0.59, "grad_norm": 2.40625, "learning_rate": 0.00016027821853888452, "loss": 1.8904, "step": 249910 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.00016027674376613743, "loss": 2.0167, "step": 249915 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.0001602752689727987, "loss": 2.0937, "step": 249920 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00016027379415886882, "loss": 2.0648, "step": 249925 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.0001602723193243483, "loss": 2.0933, "step": 249930 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00016027084446923774, "loss": 2.313, "step": 249935 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.00016026936959353752, "loss": 2.0699, "step": 249940 }, { "epoch": 0.59, "grad_norm": 1.84375, "learning_rate": 0.0001602678946972482, "loss": 2.0615, "step": 249945 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00016026641978037027, "loss": 2.0708, "step": 249950 }, { "epoch": 0.59, "grad_norm": 2.6875, "learning_rate": 0.00016026494484290424, "loss": 2.0403, "step": 249955 }, { "epoch": 0.59, "grad_norm": 1.8671875, "learning_rate": 0.0001602634698848506, "loss": 1.981, "step": 249960 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00016026199490620986, "loss": 2.1843, "step": 249965 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00016026051990698255, "loss": 2.1733, "step": 249970 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00016025904488716918, "loss": 1.9295, "step": 249975 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.0001602575698467702, "loss": 2.1136, "step": 249980 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.0001602560947857861, "loss": 1.9829, "step": 249985 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.00016025461970421744, "loss": 1.9359, "step": 249990 }, { "epoch": 0.59, "grad_norm": 1.9921875, "learning_rate": 0.00016025314460206476, "loss": 2.1187, "step": 249995 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00016025166947932848, "loss": 2.1338, "step": 250000 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00016025019433600914, "loss": 2.2105, "step": 250005 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00016024871917210724, "loss": 1.9185, "step": 250010 }, { "epoch": 0.59, "grad_norm": 3.015625, "learning_rate": 0.0001602472439876233, "loss": 2.2134, "step": 250015 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00016024576878255776, "loss": 2.0198, "step": 250020 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00016024429355691123, "loss": 2.0509, "step": 250025 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00016024281831068416, "loss": 1.8983, "step": 250030 }, { "epoch": 0.59, "grad_norm": 1.9375, "learning_rate": 0.00016024134304387698, "loss": 1.9043, "step": 250035 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00016023986775649034, "loss": 2.1442, "step": 250040 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00016023839244852463, "loss": 2.2606, "step": 250045 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00016023691711998043, "loss": 2.1158, "step": 250050 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00016023544177085815, "loss": 2.1233, "step": 250055 }, { "epoch": 0.59, "grad_norm": 1.8671875, "learning_rate": 0.00016023396640115837, "loss": 2.0581, "step": 250060 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.0001602324910108816, "loss": 1.9962, "step": 250065 }, { "epoch": 0.59, "grad_norm": 2.4375, "learning_rate": 0.00016023101560002826, "loss": 2.2773, "step": 250070 }, { "epoch": 0.59, "grad_norm": 2.375, "learning_rate": 0.00016022954016859896, "loss": 2.1946, "step": 250075 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00016022806471659417, "loss": 1.9394, "step": 250080 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.00016022658924401436, "loss": 2.0726, "step": 250085 }, { "epoch": 0.59, "grad_norm": 1.9296875, "learning_rate": 0.00016022511375086005, "loss": 1.8969, "step": 250090 }, { "epoch": 0.59, "grad_norm": 4.15625, "learning_rate": 0.0001602236382371318, "loss": 2.2985, "step": 250095 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.00016022216270282998, "loss": 2.0598, "step": 250100 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00016022068714795522, "loss": 2.0493, "step": 250105 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.00016021921157250799, "loss": 2.1306, "step": 250110 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.00016021773597648875, "loss": 2.2886, "step": 250115 }, { "epoch": 0.59, "grad_norm": 1.9296875, "learning_rate": 0.00016021626035989806, "loss": 2.1275, "step": 250120 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.0001602147847227364, "loss": 2.0832, "step": 250125 }, { "epoch": 0.59, "grad_norm": 2.375, "learning_rate": 0.00016021330906500428, "loss": 2.1481, "step": 250130 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.00016021183338670223, "loss": 1.996, "step": 250135 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.0001602103576878307, "loss": 1.945, "step": 250140 }, { "epoch": 0.59, "grad_norm": 1.953125, "learning_rate": 0.0001602088819683902, "loss": 1.9862, "step": 250145 }, { "epoch": 0.59, "grad_norm": 2.390625, "learning_rate": 0.00016020740622838128, "loss": 2.021, "step": 250150 }, { "epoch": 0.59, "grad_norm": 1.9453125, "learning_rate": 0.0001602059304678044, "loss": 2.0781, "step": 250155 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00016020445468666012, "loss": 2.1532, "step": 250160 }, { "epoch": 0.59, "grad_norm": 1.90625, "learning_rate": 0.0001602029788849489, "loss": 2.3043, "step": 250165 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.0001602015030626712, "loss": 2.0232, "step": 250170 }, { "epoch": 0.59, "grad_norm": 2.75, "learning_rate": 0.0001602000272198276, "loss": 2.1717, "step": 250175 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.0001601985513564186, "loss": 1.962, "step": 250180 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00016019707547244467, "loss": 2.0157, "step": 250185 }, { "epoch": 0.59, "grad_norm": 1.96875, "learning_rate": 0.0001601955995679063, "loss": 2.0215, "step": 250190 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00016019412364280406, "loss": 2.0977, "step": 250195 }, { "epoch": 0.59, "grad_norm": 1.8203125, "learning_rate": 0.0001601926476971384, "loss": 2.0626, "step": 250200 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00016019117173090983, "loss": 2.1539, "step": 250205 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.00016018969574411887, "loss": 2.1123, "step": 250210 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00016018821973676602, "loss": 2.2606, "step": 250215 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.00016018674370885181, "loss": 1.9981, "step": 250220 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.0001601852676603767, "loss": 2.3355, "step": 250225 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00016018379159134122, "loss": 2.1771, "step": 250230 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00016018231550174583, "loss": 2.1496, "step": 250235 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.00016018083939159107, "loss": 2.1358, "step": 250240 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.0001601793632608775, "loss": 2.2026, "step": 250245 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.00016017788710960554, "loss": 2.0996, "step": 250250 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.0001601764109377757, "loss": 1.9684, "step": 250255 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00016017493474538853, "loss": 2.092, "step": 250260 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00016017345853244446, "loss": 2.1885, "step": 250265 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.0001601719822989441, "loss": 2.0989, "step": 250270 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00016017050604488788, "loss": 2.0168, "step": 250275 }, { "epoch": 0.59, "grad_norm": 2.515625, "learning_rate": 0.00016016902977027635, "loss": 2.0401, "step": 250280 }, { "epoch": 0.59, "grad_norm": 1.875, "learning_rate": 0.00016016755347510997, "loss": 2.0849, "step": 250285 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.00016016607715938925, "loss": 2.1426, "step": 250290 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.0001601646008231147, "loss": 2.0303, "step": 250295 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.00016016312446628686, "loss": 2.2843, "step": 250300 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.0001601616480889062, "loss": 2.0593, "step": 250305 }, { "epoch": 0.59, "grad_norm": 1.9453125, "learning_rate": 0.00016016017169097322, "loss": 1.8645, "step": 250310 }, { "epoch": 0.59, "grad_norm": 1.8046875, "learning_rate": 0.00016015869527248846, "loss": 2.0658, "step": 250315 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.00016015721883345235, "loss": 1.8915, "step": 250320 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.0001601557423738655, "loss": 2.1027, "step": 250325 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00016015426589372834, "loss": 2.1438, "step": 250330 }, { "epoch": 0.59, "grad_norm": 1.9453125, "learning_rate": 0.00016015278939304135, "loss": 2.1778, "step": 250335 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.0001601513128718051, "loss": 2.1842, "step": 250340 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.0001601498363300201, "loss": 2.0766, "step": 250345 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.0001601483597676868, "loss": 1.9505, "step": 250350 }, { "epoch": 0.59, "grad_norm": 1.96875, "learning_rate": 0.00016014688318480573, "loss": 1.8011, "step": 250355 }, { "epoch": 0.59, "grad_norm": 2.390625, "learning_rate": 0.0001601454065813774, "loss": 2.0081, "step": 250360 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00016014392995740225, "loss": 2.0576, "step": 250365 }, { "epoch": 0.59, "grad_norm": 1.921875, "learning_rate": 0.00016014245331288094, "loss": 2.2054, "step": 250370 }, { "epoch": 0.59, "grad_norm": 1.9765625, "learning_rate": 0.00016014097664781383, "loss": 2.2231, "step": 250375 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00016013949996220147, "loss": 2.2874, "step": 250380 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.00016013802325604438, "loss": 2.13, "step": 250385 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.00016013654652934304, "loss": 2.0168, "step": 250390 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.00016013506978209796, "loss": 2.0548, "step": 250395 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.00016013359301430966, "loss": 2.1646, "step": 250400 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.0001601321162259786, "loss": 2.1881, "step": 250405 }, { "epoch": 0.59, "grad_norm": 2.53125, "learning_rate": 0.00016013063941710536, "loss": 2.2139, "step": 250410 }, { "epoch": 0.59, "grad_norm": 2.4375, "learning_rate": 0.0001601291625876904, "loss": 2.0586, "step": 250415 }, { "epoch": 0.59, "grad_norm": 1.765625, "learning_rate": 0.0001601276857377342, "loss": 1.9314, "step": 250420 }, { "epoch": 0.59, "grad_norm": 2.90625, "learning_rate": 0.00016012620886723733, "loss": 2.1044, "step": 250425 }, { "epoch": 0.59, "grad_norm": 1.984375, "learning_rate": 0.00016012473197620026, "loss": 2.0021, "step": 250430 }, { "epoch": 0.59, "grad_norm": 1.953125, "learning_rate": 0.00016012325506462345, "loss": 2.2389, "step": 250435 }, { "epoch": 0.59, "grad_norm": 1.7578125, "learning_rate": 0.00016012177813250745, "loss": 2.03, "step": 250440 }, { "epoch": 0.59, "grad_norm": 1.9375, "learning_rate": 0.0001601203011798528, "loss": 2.064, "step": 250445 }, { "epoch": 0.59, "grad_norm": 1.921875, "learning_rate": 0.00016011882420665993, "loss": 2.1517, "step": 250450 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.0001601173472129294, "loss": 2.0504, "step": 250455 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00016011587019866167, "loss": 2.1684, "step": 250460 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00016011439316385728, "loss": 2.1439, "step": 250465 }, { "epoch": 0.59, "grad_norm": 1.8359375, "learning_rate": 0.00016011291610851672, "loss": 2.072, "step": 250470 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.0001601114390326405, "loss": 2.3016, "step": 250475 }, { "epoch": 0.59, "grad_norm": 2.578125, "learning_rate": 0.00016010996193622913, "loss": 2.1856, "step": 250480 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00016010848481928311, "loss": 2.2848, "step": 250485 }, { "epoch": 0.59, "grad_norm": 2.4375, "learning_rate": 0.00016010700768180294, "loss": 2.089, "step": 250490 }, { "epoch": 0.59, "grad_norm": 1.8515625, "learning_rate": 0.0001601055305237891, "loss": 2.0027, "step": 250495 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00016010405334524215, "loss": 2.1037, "step": 250500 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00016010257614616256, "loss": 1.9858, "step": 250505 }, { "epoch": 0.59, "grad_norm": 1.9296875, "learning_rate": 0.00016010109892655082, "loss": 2.0746, "step": 250510 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00016009962168640749, "loss": 1.9858, "step": 250515 }, { "epoch": 0.59, "grad_norm": 2.40625, "learning_rate": 0.00016009814442573302, "loss": 2.1086, "step": 250520 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00016009666714452792, "loss": 2.116, "step": 250525 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00016009518984279274, "loss": 2.1338, "step": 250530 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00016009371252052792, "loss": 2.0435, "step": 250535 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00016009223517773403, "loss": 1.9589, "step": 250540 }, { "epoch": 0.59, "grad_norm": 2.4375, "learning_rate": 0.00016009075781441153, "loss": 2.1084, "step": 250545 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.0001600892804305609, "loss": 2.0904, "step": 250550 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.00016008780302618271, "loss": 1.969, "step": 250555 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00016008632560127745, "loss": 2.0995, "step": 250560 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.0001600848481558456, "loss": 2.1454, "step": 250565 }, { "epoch": 0.59, "grad_norm": 2.59375, "learning_rate": 0.0001600833706898877, "loss": 2.1804, "step": 250570 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.00016008189320340419, "loss": 2.118, "step": 250575 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00016008041569639563, "loss": 1.9901, "step": 250580 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00016007893816886252, "loss": 1.9929, "step": 250585 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00016007746062080537, "loss": 2.0063, "step": 250590 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00016007598305222464, "loss": 2.1477, "step": 250595 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00016007450546312086, "loss": 2.0159, "step": 250600 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00016007302785349457, "loss": 2.0822, "step": 250605 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.0001600715502233462, "loss": 2.017, "step": 250610 }, { "epoch": 0.59, "grad_norm": 1.9921875, "learning_rate": 0.00016007007257267633, "loss": 2.0676, "step": 250615 }, { "epoch": 0.59, "grad_norm": 1.9296875, "learning_rate": 0.00016006859490148547, "loss": 1.9321, "step": 250620 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00016006711720977404, "loss": 2.0931, "step": 250625 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.0001600656394975426, "loss": 2.264, "step": 250630 }, { "epoch": 0.59, "grad_norm": 2.46875, "learning_rate": 0.00016006416176479163, "loss": 2.328, "step": 250635 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00016006268401152173, "loss": 2.0508, "step": 250640 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00016006120623773324, "loss": 1.9165, "step": 250645 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00016005972844342677, "loss": 2.0327, "step": 250650 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00016005825062860284, "loss": 2.3587, "step": 250655 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.0001600567727932619, "loss": 2.1044, "step": 250660 }, { "epoch": 0.59, "grad_norm": 1.953125, "learning_rate": 0.00016005529493740448, "loss": 2.1216, "step": 250665 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.00016005381706103112, "loss": 1.8123, "step": 250670 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00016005233916414224, "loss": 2.0185, "step": 250675 }, { "epoch": 0.59, "grad_norm": 2.515625, "learning_rate": 0.00016005086124673838, "loss": 2.1875, "step": 250680 }, { "epoch": 0.59, "grad_norm": 1.8671875, "learning_rate": 0.0001600493833088201, "loss": 1.9561, "step": 250685 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00016004790535038783, "loss": 2.0948, "step": 250690 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.00016004642737144211, "loss": 1.8907, "step": 250695 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.00016004494937198345, "loss": 1.9109, "step": 250700 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00016004347135201237, "loss": 2.1522, "step": 250705 }, { "epoch": 0.59, "grad_norm": 2.390625, "learning_rate": 0.0001600419933115293, "loss": 2.0585, "step": 250710 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00016004051525053483, "loss": 2.0651, "step": 250715 }, { "epoch": 0.59, "grad_norm": 1.7890625, "learning_rate": 0.00016003903716902944, "loss": 2.218, "step": 250720 }, { "epoch": 0.59, "grad_norm": 1.7265625, "learning_rate": 0.0001600375590670136, "loss": 1.7863, "step": 250725 }, { "epoch": 0.59, "grad_norm": 2.6875, "learning_rate": 0.00016003608094448786, "loss": 2.1892, "step": 250730 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.0001600346028014527, "loss": 2.0713, "step": 250735 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00016003312463790863, "loss": 2.076, "step": 250740 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00016003164645385614, "loss": 2.1764, "step": 250745 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.0001600301682492958, "loss": 2.0751, "step": 250750 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.000160028690024228, "loss": 2.2925, "step": 250755 }, { "epoch": 0.59, "grad_norm": 1.921875, "learning_rate": 0.00016002721177865335, "loss": 2.0972, "step": 250760 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.0001600257335125723, "loss": 2.2228, "step": 250765 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00016002425522598538, "loss": 2.1749, "step": 250770 }, { "epoch": 0.59, "grad_norm": 2.40625, "learning_rate": 0.00016002277691889308, "loss": 2.0432, "step": 250775 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.00016002129859129594, "loss": 1.9521, "step": 250780 }, { "epoch": 0.59, "grad_norm": 2.421875, "learning_rate": 0.0001600198202431944, "loss": 1.9846, "step": 250785 }, { "epoch": 0.59, "grad_norm": 1.890625, "learning_rate": 0.000160018341874589, "loss": 2.3014, "step": 250790 }, { "epoch": 0.59, "grad_norm": 2.59375, "learning_rate": 0.00016001686348548027, "loss": 1.9832, "step": 250795 }, { "epoch": 0.59, "grad_norm": 1.9765625, "learning_rate": 0.00016001538507586869, "loss": 2.204, "step": 250800 }, { "epoch": 0.59, "grad_norm": 1.8984375, "learning_rate": 0.00016001390664575476, "loss": 1.9668, "step": 250805 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00016001242819513895, "loss": 2.039, "step": 250810 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00016001094972402185, "loss": 2.0433, "step": 250815 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00016000947123240392, "loss": 2.149, "step": 250820 }, { "epoch": 0.59, "grad_norm": 3.40625, "learning_rate": 0.00016000799272028566, "loss": 2.0236, "step": 250825 }, { "epoch": 0.59, "grad_norm": 2.46875, "learning_rate": 0.00016000651418766757, "loss": 2.0125, "step": 250830 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.0001600050356345502, "loss": 2.0323, "step": 250835 }, { "epoch": 0.59, "grad_norm": 1.9765625, "learning_rate": 0.00016000355706093398, "loss": 2.0678, "step": 250840 }, { "epoch": 0.59, "grad_norm": 1.84375, "learning_rate": 0.00016000207846681946, "loss": 2.1788, "step": 250845 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00016000059985220717, "loss": 2.0195, "step": 250850 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.00015999912121709758, "loss": 2.0635, "step": 250855 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.0001599976425614912, "loss": 2.0795, "step": 250860 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00015999616388538853, "loss": 2.1605, "step": 250865 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.0001599946851887901, "loss": 1.8528, "step": 250870 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.0001599932064716964, "loss": 1.9879, "step": 250875 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.0001599917277341079, "loss": 2.1989, "step": 250880 }, { "epoch": 0.59, "grad_norm": 2.46875, "learning_rate": 0.00015999024897602512, "loss": 2.1593, "step": 250885 }, { "epoch": 0.59, "grad_norm": 2.578125, "learning_rate": 0.00015998877019744864, "loss": 2.1961, "step": 250890 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015998729139837887, "loss": 2.2013, "step": 250895 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00015998581257881637, "loss": 2.0338, "step": 250900 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00015998433373876165, "loss": 2.0584, "step": 250905 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00015998285487821515, "loss": 2.227, "step": 250910 }, { "epoch": 0.59, "grad_norm": 1.9375, "learning_rate": 0.00015998137599717747, "loss": 2.1877, "step": 250915 }, { "epoch": 0.59, "grad_norm": 1.9140625, "learning_rate": 0.00015997989709564902, "loss": 2.0329, "step": 250920 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00015997841817363035, "loss": 2.1278, "step": 250925 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00015997693923112198, "loss": 2.1937, "step": 250930 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.0001599754602681244, "loss": 1.9799, "step": 250935 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.00015997398128463812, "loss": 2.058, "step": 250940 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00015997250228066363, "loss": 2.1719, "step": 250945 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.00015997102325620143, "loss": 2.0926, "step": 250950 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00015996954421125206, "loss": 2.1463, "step": 250955 }, { "epoch": 0.59, "grad_norm": 1.703125, "learning_rate": 0.000159968065145816, "loss": 1.9402, "step": 250960 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00015996658605989378, "loss": 2.0698, "step": 250965 }, { "epoch": 0.59, "grad_norm": 1.9375, "learning_rate": 0.0001599651069534859, "loss": 2.2511, "step": 250970 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.0001599636278265928, "loss": 2.2597, "step": 250975 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00015996214867921506, "loss": 2.0918, "step": 250980 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00015996066951135318, "loss": 2.2762, "step": 250985 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00015995919032300764, "loss": 1.9907, "step": 250990 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00015995771111417892, "loss": 2.0677, "step": 250995 }, { "epoch": 0.59, "grad_norm": 2.453125, "learning_rate": 0.00015995623188486762, "loss": 2.2815, "step": 251000 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.0001599547526350741, "loss": 2.1566, "step": 251005 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.000159953273364799, "loss": 2.1967, "step": 251010 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.0001599517940740428, "loss": 2.1588, "step": 251015 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015995031476280595, "loss": 2.0009, "step": 251020 }, { "epoch": 0.59, "grad_norm": 1.9296875, "learning_rate": 0.00015994883543108898, "loss": 1.9773, "step": 251025 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00015994735607889238, "loss": 2.1914, "step": 251030 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.00015994587670621673, "loss": 2.1263, "step": 251035 }, { "epoch": 0.59, "grad_norm": 2.546875, "learning_rate": 0.00015994439731306247, "loss": 2.092, "step": 251040 }, { "epoch": 0.59, "grad_norm": 2.515625, "learning_rate": 0.0001599429178994301, "loss": 2.205, "step": 251045 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015994143846532013, "loss": 2.172, "step": 251050 }, { "epoch": 0.59, "grad_norm": 2.390625, "learning_rate": 0.00015993995901073308, "loss": 2.1918, "step": 251055 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00015993847953566946, "loss": 2.0036, "step": 251060 }, { "epoch": 0.59, "grad_norm": 2.390625, "learning_rate": 0.0001599370000401298, "loss": 1.9257, "step": 251065 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.00015993552052411453, "loss": 2.2296, "step": 251070 }, { "epoch": 0.59, "grad_norm": 2.75, "learning_rate": 0.00015993404098762421, "loss": 2.0322, "step": 251075 }, { "epoch": 0.59, "grad_norm": 1.8359375, "learning_rate": 0.00015993256143065937, "loss": 1.9281, "step": 251080 }, { "epoch": 0.59, "grad_norm": 2.390625, "learning_rate": 0.00015993108185322042, "loss": 2.1584, "step": 251085 }, { "epoch": 0.59, "grad_norm": 1.9140625, "learning_rate": 0.00015992960225530797, "loss": 2.2082, "step": 251090 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015992812263692247, "loss": 2.1195, "step": 251095 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00015992664299806445, "loss": 2.2512, "step": 251100 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00015992516333873438, "loss": 2.0015, "step": 251105 }, { "epoch": 0.59, "grad_norm": 1.953125, "learning_rate": 0.00015992368365893276, "loss": 2.1152, "step": 251110 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.0001599222039586602, "loss": 2.1487, "step": 251115 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00015992072423791703, "loss": 2.317, "step": 251120 }, { "epoch": 0.59, "grad_norm": 1.84375, "learning_rate": 0.00015991924449670393, "loss": 1.9772, "step": 251125 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.0001599177647350213, "loss": 2.2225, "step": 251130 }, { "epoch": 0.59, "grad_norm": 1.59375, "learning_rate": 0.00015991628495286967, "loss": 1.9591, "step": 251135 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015991480515024957, "loss": 1.9652, "step": 251140 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.0001599133253271615, "loss": 2.0559, "step": 251145 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00015991184548360592, "loss": 2.0872, "step": 251150 }, { "epoch": 0.59, "grad_norm": 2.71875, "learning_rate": 0.00015991036561958336, "loss": 2.1731, "step": 251155 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.00015990888573509437, "loss": 2.1721, "step": 251160 }, { "epoch": 0.59, "grad_norm": 1.9140625, "learning_rate": 0.0001599074058301394, "loss": 1.9429, "step": 251165 }, { "epoch": 0.59, "grad_norm": 1.9140625, "learning_rate": 0.00015990592590471895, "loss": 2.1037, "step": 251170 }, { "epoch": 0.59, "grad_norm": 2.46875, "learning_rate": 0.0001599044459588336, "loss": 2.0053, "step": 251175 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015990296599248377, "loss": 2.1393, "step": 251180 }, { "epoch": 0.59, "grad_norm": 1.84375, "learning_rate": 0.00015990148600567, "loss": 2.0736, "step": 251185 }, { "epoch": 0.59, "grad_norm": 2.5625, "learning_rate": 0.0001599000059983928, "loss": 2.1589, "step": 251190 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.0001598985259706527, "loss": 2.0988, "step": 251195 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.0001598970459224501, "loss": 1.9904, "step": 251200 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015989556585378563, "loss": 2.0738, "step": 251205 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.00015989408576465978, "loss": 2.1321, "step": 251210 }, { "epoch": 0.59, "grad_norm": 1.921875, "learning_rate": 0.000159892605655073, "loss": 2.126, "step": 251215 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015989112552502582, "loss": 2.1969, "step": 251220 }, { "epoch": 0.59, "grad_norm": 2.640625, "learning_rate": 0.00015988964537451872, "loss": 1.917, "step": 251225 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.00015988816520355227, "loss": 2.0158, "step": 251230 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00015988668501212692, "loss": 1.9792, "step": 251235 }, { "epoch": 0.59, "grad_norm": 2.515625, "learning_rate": 0.0001598852048002432, "loss": 2.0751, "step": 251240 }, { "epoch": 0.59, "grad_norm": 1.6640625, "learning_rate": 0.0001598837245679016, "loss": 1.9554, "step": 251245 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00015988224431510263, "loss": 2.0419, "step": 251250 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.0001598807640418468, "loss": 2.034, "step": 251255 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.0001598792837481346, "loss": 2.0715, "step": 251260 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015987780343396657, "loss": 1.9942, "step": 251265 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.0001598763230993432, "loss": 2.1517, "step": 251270 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00015987484274426498, "loss": 2.0768, "step": 251275 }, { "epoch": 0.59, "grad_norm": 2.65625, "learning_rate": 0.00015987336236873242, "loss": 1.8083, "step": 251280 }, { "epoch": 0.59, "grad_norm": 1.78125, "learning_rate": 0.00015987188197274604, "loss": 2.1357, "step": 251285 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00015987040155630638, "loss": 2.0794, "step": 251290 }, { "epoch": 0.59, "grad_norm": 1.9296875, "learning_rate": 0.00015986892111941387, "loss": 1.9357, "step": 251295 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00015986744066206904, "loss": 2.0238, "step": 251300 }, { "epoch": 0.59, "grad_norm": 2.53125, "learning_rate": 0.00015986596018427243, "loss": 2.0771, "step": 251305 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.0001598644796860245, "loss": 1.9867, "step": 251310 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00015986299916732582, "loss": 2.2, "step": 251315 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.0001598615186281768, "loss": 2.0483, "step": 251320 }, { "epoch": 0.59, "grad_norm": 1.921875, "learning_rate": 0.000159860038068578, "loss": 2.2034, "step": 251325 }, { "epoch": 0.59, "grad_norm": 1.78125, "learning_rate": 0.00015985855748852995, "loss": 2.071, "step": 251330 }, { "epoch": 0.59, "grad_norm": 1.78125, "learning_rate": 0.00015985707688803314, "loss": 2.0951, "step": 251335 }, { "epoch": 0.59, "grad_norm": 1.90625, "learning_rate": 0.00015985559626708807, "loss": 2.1206, "step": 251340 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.00015985411562569523, "loss": 2.1137, "step": 251345 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.0001598526349638551, "loss": 2.1325, "step": 251350 }, { "epoch": 0.59, "grad_norm": 1.8125, "learning_rate": 0.00015985115428156829, "loss": 2.0657, "step": 251355 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.0001598496735788352, "loss": 1.9893, "step": 251360 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.0001598481928556564, "loss": 2.1188, "step": 251365 }, { "epoch": 0.59, "grad_norm": 1.9296875, "learning_rate": 0.00015984671211203237, "loss": 2.0944, "step": 251370 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.0001598452313479636, "loss": 1.9673, "step": 251375 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00015984375056345062, "loss": 2.2881, "step": 251380 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.00015984226975849394, "loss": 1.9345, "step": 251385 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.00015984078893309405, "loss": 2.1798, "step": 251390 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00015983930808725146, "loss": 2.0492, "step": 251395 }, { "epoch": 0.59, "grad_norm": 1.890625, "learning_rate": 0.00015983782722096666, "loss": 2.0017, "step": 251400 }, { "epoch": 0.59, "grad_norm": 1.8515625, "learning_rate": 0.00015983634633424018, "loss": 1.792, "step": 251405 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00015983486542707254, "loss": 2.2011, "step": 251410 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.0001598333844994642, "loss": 2.1411, "step": 251415 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015983190355141572, "loss": 2.1668, "step": 251420 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00015983042258292755, "loss": 2.0741, "step": 251425 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00015982894159400024, "loss": 2.1152, "step": 251430 }, { "epoch": 0.59, "grad_norm": 2.625, "learning_rate": 0.00015982746058463426, "loss": 2.1734, "step": 251435 }, { "epoch": 0.59, "grad_norm": 1.875, "learning_rate": 0.00015982597955483018, "loss": 2.1049, "step": 251440 }, { "epoch": 0.59, "grad_norm": 1.953125, "learning_rate": 0.0001598244985045884, "loss": 1.93, "step": 251445 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.0001598230174339095, "loss": 2.2901, "step": 251450 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.000159821536342794, "loss": 1.9958, "step": 251455 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00015982005523124235, "loss": 2.0649, "step": 251460 }, { "epoch": 0.59, "grad_norm": 1.53125, "learning_rate": 0.00015981857409925508, "loss": 2.2142, "step": 251465 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015981709294683274, "loss": 1.9441, "step": 251470 }, { "epoch": 0.59, "grad_norm": 2.5625, "learning_rate": 0.00015981561177397577, "loss": 2.0636, "step": 251475 }, { "epoch": 0.59, "grad_norm": 2.71875, "learning_rate": 0.0001598141305806847, "loss": 1.8519, "step": 251480 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00015981264936696003, "loss": 1.9629, "step": 251485 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.0001598111681328023, "loss": 2.1785, "step": 251490 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00015980968687821196, "loss": 1.8992, "step": 251495 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00015980820560318958, "loss": 2.118, "step": 251500 }, { "epoch": 0.59, "grad_norm": 1.96875, "learning_rate": 0.0001598067243077356, "loss": 2.0815, "step": 251505 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.00015980524299185058, "loss": 2.0673, "step": 251510 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.000159803761655535, "loss": 1.9509, "step": 251515 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00015980228029878937, "loss": 2.0835, "step": 251520 }, { "epoch": 0.59, "grad_norm": 3.796875, "learning_rate": 0.0001598007989216142, "loss": 2.2168, "step": 251525 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00015979931752401, "loss": 1.9919, "step": 251530 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00015979783610597725, "loss": 2.0032, "step": 251535 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.0001597963546675165, "loss": 2.0387, "step": 251540 }, { "epoch": 0.59, "grad_norm": 2.625, "learning_rate": 0.00015979487320862818, "loss": 2.251, "step": 251545 }, { "epoch": 0.59, "grad_norm": 1.9375, "learning_rate": 0.0001597933917293129, "loss": 2.2321, "step": 251550 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015979191022957108, "loss": 2.1001, "step": 251555 }, { "epoch": 0.59, "grad_norm": 1.9921875, "learning_rate": 0.00015979042870940328, "loss": 2.2615, "step": 251560 }, { "epoch": 0.59, "grad_norm": 2.65625, "learning_rate": 0.00015978894716880997, "loss": 2.2584, "step": 251565 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.00015978746560779168, "loss": 2.1185, "step": 251570 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.0001597859840263489, "loss": 1.9933, "step": 251575 }, { "epoch": 0.59, "grad_norm": 2.46875, "learning_rate": 0.00015978450242448215, "loss": 2.0986, "step": 251580 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00015978302080219193, "loss": 2.0861, "step": 251585 }, { "epoch": 0.59, "grad_norm": 1.859375, "learning_rate": 0.00015978153915947873, "loss": 2.0833, "step": 251590 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.0001597800574963431, "loss": 2.071, "step": 251595 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00015977857581278551, "loss": 2.0, "step": 251600 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00015977709410880647, "loss": 2.0191, "step": 251605 }, { "epoch": 0.59, "grad_norm": 1.8203125, "learning_rate": 0.00015977561238440647, "loss": 2.2386, "step": 251610 }, { "epoch": 0.59, "grad_norm": 2.5625, "learning_rate": 0.0001597741306395861, "loss": 2.3438, "step": 251615 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015977264887434572, "loss": 2.0363, "step": 251620 }, { "epoch": 0.59, "grad_norm": 1.5234375, "learning_rate": 0.00015977116708868597, "loss": 1.9905, "step": 251625 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.0001597696852826073, "loss": 2.1649, "step": 251630 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.0001597682034561102, "loss": 2.1689, "step": 251635 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00015976672160919524, "loss": 2.0708, "step": 251640 }, { "epoch": 0.59, "grad_norm": 2.5625, "learning_rate": 0.0001597652397418628, "loss": 2.1665, "step": 251645 }, { "epoch": 0.59, "grad_norm": 1.96875, "learning_rate": 0.00015976375785411355, "loss": 2.096, "step": 251650 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00015976227594594787, "loss": 2.1197, "step": 251655 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00015976079401736637, "loss": 2.1138, "step": 251660 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00015975931206836945, "loss": 2.1659, "step": 251665 }, { "epoch": 0.59, "grad_norm": 1.578125, "learning_rate": 0.00015975783009895768, "loss": 1.9413, "step": 251670 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00015975634810913155, "loss": 2.0923, "step": 251675 }, { "epoch": 0.59, "grad_norm": 2.40625, "learning_rate": 0.0001597548660988916, "loss": 2.0469, "step": 251680 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00015975338406823826, "loss": 2.1641, "step": 251685 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.0001597519020171721, "loss": 1.9993, "step": 251690 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00015975041994569357, "loss": 2.1409, "step": 251695 }, { "epoch": 0.59, "grad_norm": 2.375, "learning_rate": 0.00015974893785380325, "loss": 2.277, "step": 251700 }, { "epoch": 0.59, "grad_norm": 2.609375, "learning_rate": 0.0001597474557415016, "loss": 2.3297, "step": 251705 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.00015974597360878914, "loss": 2.0414, "step": 251710 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00015974449145566636, "loss": 1.9488, "step": 251715 }, { "epoch": 0.59, "grad_norm": 1.9921875, "learning_rate": 0.00015974300928213382, "loss": 1.9796, "step": 251720 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.00015974152708819194, "loss": 2.0274, "step": 251725 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00015974004487384125, "loss": 2.0158, "step": 251730 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00015973856263908233, "loss": 2.1004, "step": 251735 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00015973708038391563, "loss": 1.9583, "step": 251740 }, { "epoch": 0.59, "grad_norm": 2.859375, "learning_rate": 0.00015973559810834165, "loss": 1.9657, "step": 251745 }, { "epoch": 0.59, "grad_norm": 1.8828125, "learning_rate": 0.0001597341158123609, "loss": 1.9195, "step": 251750 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015973263349597388, "loss": 2.1277, "step": 251755 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00015973115115918113, "loss": 2.1766, "step": 251760 }, { "epoch": 0.59, "grad_norm": 1.96875, "learning_rate": 0.00015972966880198312, "loss": 1.9001, "step": 251765 }, { "epoch": 0.59, "grad_norm": 2.53125, "learning_rate": 0.0001597281864243804, "loss": 2.0187, "step": 251770 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.00015972670402637342, "loss": 2.103, "step": 251775 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.0001597252216079627, "loss": 2.101, "step": 251780 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.0001597237391691488, "loss": 2.1551, "step": 251785 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00015972225670993217, "loss": 2.1075, "step": 251790 }, { "epoch": 0.59, "grad_norm": 2.375, "learning_rate": 0.00015972077423031332, "loss": 2.1685, "step": 251795 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00015971929173029282, "loss": 1.9976, "step": 251800 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00015971780920987104, "loss": 2.3116, "step": 251805 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00015971632666904865, "loss": 2.1115, "step": 251810 }, { "epoch": 0.59, "grad_norm": 1.8359375, "learning_rate": 0.00015971484410782605, "loss": 2.1678, "step": 251815 }, { "epoch": 0.59, "grad_norm": 1.7734375, "learning_rate": 0.0001597133615262038, "loss": 2.006, "step": 251820 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00015971187892418237, "loss": 2.1649, "step": 251825 }, { "epoch": 0.59, "grad_norm": 2.453125, "learning_rate": 0.00015971039630176227, "loss": 2.0805, "step": 251830 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00015970891365894402, "loss": 2.194, "step": 251835 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.0001597074309957281, "loss": 2.0472, "step": 251840 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.0001597059483121151, "loss": 1.993, "step": 251845 }, { "epoch": 0.59, "grad_norm": 1.84375, "learning_rate": 0.0001597044656081054, "loss": 2.155, "step": 251850 }, { "epoch": 0.59, "grad_norm": 2.46875, "learning_rate": 0.0001597029828836996, "loss": 2.2526, "step": 251855 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00015970150013889817, "loss": 2.0008, "step": 251860 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.00015970001737370163, "loss": 2.0977, "step": 251865 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.0001596985345881105, "loss": 1.9942, "step": 251870 }, { "epoch": 0.59, "grad_norm": 1.7890625, "learning_rate": 0.00015969705178212523, "loss": 2.0752, "step": 251875 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.0001596955689557464, "loss": 2.2065, "step": 251880 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00015969408610897446, "loss": 2.1156, "step": 251885 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00015969260324180993, "loss": 2.1147, "step": 251890 }, { "epoch": 0.59, "grad_norm": 2.59375, "learning_rate": 0.00015969112035425336, "loss": 2.1721, "step": 251895 }, { "epoch": 0.59, "grad_norm": 1.9140625, "learning_rate": 0.00015968963744630518, "loss": 2.0941, "step": 251900 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00015968815451796595, "loss": 1.9414, "step": 251905 }, { "epoch": 0.59, "grad_norm": 1.890625, "learning_rate": 0.00015968667156923618, "loss": 1.913, "step": 251910 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.00015968518860011633, "loss": 2.1832, "step": 251915 }, { "epoch": 0.59, "grad_norm": 2.40625, "learning_rate": 0.00015968370561060698, "loss": 2.238, "step": 251920 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015968222260070856, "loss": 2.1188, "step": 251925 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015968073957042163, "loss": 1.9834, "step": 251930 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.0001596792565197467, "loss": 1.9456, "step": 251935 }, { "epoch": 0.59, "grad_norm": 2.75, "learning_rate": 0.00015967777344868416, "loss": 2.118, "step": 251940 }, { "epoch": 0.59, "grad_norm": 1.6953125, "learning_rate": 0.00015967629035723468, "loss": 1.9601, "step": 251945 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.0001596748072453987, "loss": 2.0772, "step": 251950 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015967332411317671, "loss": 2.1303, "step": 251955 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.00015967184096056923, "loss": 2.0719, "step": 251960 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00015967035778757677, "loss": 2.0221, "step": 251965 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00015966887459419984, "loss": 1.9819, "step": 251970 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00015966739138043894, "loss": 1.9628, "step": 251975 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00015966590814629457, "loss": 1.9712, "step": 251980 }, { "epoch": 0.59, "grad_norm": 2.640625, "learning_rate": 0.00015966442489176723, "loss": 2.1492, "step": 251985 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.00015966294161685746, "loss": 1.8165, "step": 251990 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00015966145832156575, "loss": 2.0329, "step": 251995 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.0001596599750058926, "loss": 2.0888, "step": 252000 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.00015965849166983852, "loss": 2.099, "step": 252005 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.000159657008313404, "loss": 2.139, "step": 252010 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.0001596555249365896, "loss": 2.1556, "step": 252015 }, { "epoch": 0.59, "grad_norm": 1.890625, "learning_rate": 0.00015965404153939574, "loss": 2.0652, "step": 252020 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.000159652558121823, "loss": 1.9825, "step": 252025 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.00015965107468387187, "loss": 2.1332, "step": 252030 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00015964959122554285, "loss": 2.1126, "step": 252035 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00015964810774683646, "loss": 2.1855, "step": 252040 }, { "epoch": 0.59, "grad_norm": 2.9375, "learning_rate": 0.00015964662424775318, "loss": 2.2262, "step": 252045 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.00015964514072829353, "loss": 2.1727, "step": 252050 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.000159643657188458, "loss": 2.2923, "step": 252055 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00015964217362824718, "loss": 2.0987, "step": 252060 }, { "epoch": 0.59, "grad_norm": 2.515625, "learning_rate": 0.00015964069004766146, "loss": 2.2015, "step": 252065 }, { "epoch": 0.59, "grad_norm": 1.96875, "learning_rate": 0.00015963920644670142, "loss": 2.2311, "step": 252070 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.0001596377228253675, "loss": 1.9827, "step": 252075 }, { "epoch": 0.59, "grad_norm": 1.90625, "learning_rate": 0.00015963623918366032, "loss": 2.0656, "step": 252080 }, { "epoch": 0.59, "grad_norm": 2.546875, "learning_rate": 0.00015963475552158027, "loss": 2.2417, "step": 252085 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00015963327183912793, "loss": 1.9916, "step": 252090 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00015963178813630378, "loss": 2.0208, "step": 252095 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00015963030441310832, "loss": 2.2356, "step": 252100 }, { "epoch": 0.59, "grad_norm": 2.484375, "learning_rate": 0.00015962882066954205, "loss": 2.3867, "step": 252105 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.0001596273369056055, "loss": 1.9624, "step": 252110 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.0001596258531212992, "loss": 2.1091, "step": 252115 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00015962436931662364, "loss": 2.0265, "step": 252120 }, { "epoch": 0.59, "grad_norm": 1.8046875, "learning_rate": 0.00015962288549157926, "loss": 2.0649, "step": 252125 }, { "epoch": 0.59, "grad_norm": 2.390625, "learning_rate": 0.00015962140164616667, "loss": 2.0196, "step": 252130 }, { "epoch": 0.59, "grad_norm": 1.8203125, "learning_rate": 0.00015961991778038628, "loss": 2.1189, "step": 252135 }, { "epoch": 0.59, "grad_norm": 1.8203125, "learning_rate": 0.00015961843389423867, "loss": 1.9697, "step": 252140 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.00015961694998772432, "loss": 2.0704, "step": 252145 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00015961546606084375, "loss": 2.1739, "step": 252150 }, { "epoch": 0.59, "grad_norm": 2.453125, "learning_rate": 0.00015961398211359744, "loss": 2.1838, "step": 252155 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00015961249814598592, "loss": 2.0774, "step": 252160 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00015961101415800969, "loss": 2.0923, "step": 252165 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00015960953014966924, "loss": 2.013, "step": 252170 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00015960804612096511, "loss": 2.0533, "step": 252175 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.0001596065620718978, "loss": 2.1012, "step": 252180 }, { "epoch": 0.59, "grad_norm": 1.859375, "learning_rate": 0.00015960507800246777, "loss": 2.1684, "step": 252185 }, { "epoch": 0.59, "grad_norm": 1.953125, "learning_rate": 0.0001596035939126756, "loss": 2.1392, "step": 252190 }, { "epoch": 0.59, "grad_norm": 1.90625, "learning_rate": 0.00015960210980252176, "loss": 2.1521, "step": 252195 }, { "epoch": 0.59, "grad_norm": 1.84375, "learning_rate": 0.00015960062567200675, "loss": 2.0447, "step": 252200 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.0001595991415211311, "loss": 2.0847, "step": 252205 }, { "epoch": 0.59, "grad_norm": 1.9765625, "learning_rate": 0.00015959765734989526, "loss": 2.1361, "step": 252210 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.0001595961731582998, "loss": 2.1502, "step": 252215 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015959468894634522, "loss": 2.12, "step": 252220 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.000159593204714032, "loss": 2.2033, "step": 252225 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.00015959172046136068, "loss": 2.1311, "step": 252230 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.0001595902361883317, "loss": 2.1387, "step": 252235 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00015958875189494567, "loss": 2.0924, "step": 252240 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.000159587267581203, "loss": 2.0361, "step": 252245 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.00015958578324710428, "loss": 2.0267, "step": 252250 }, { "epoch": 0.59, "grad_norm": 2.46875, "learning_rate": 0.00015958429889264996, "loss": 2.0533, "step": 252255 }, { "epoch": 0.59, "grad_norm": 1.984375, "learning_rate": 0.00015958281451784054, "loss": 2.0961, "step": 252260 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00015958133012267657, "loss": 2.0326, "step": 252265 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00015957984570715854, "loss": 2.0999, "step": 252270 }, { "epoch": 0.59, "grad_norm": 2.578125, "learning_rate": 0.00015957836127128696, "loss": 2.2405, "step": 252275 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00015957687681506232, "loss": 2.0859, "step": 252280 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00015957539233848512, "loss": 2.2768, "step": 252285 }, { "epoch": 0.59, "grad_norm": 2.40625, "learning_rate": 0.0001595739078415559, "loss": 2.1692, "step": 252290 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00015957242332427515, "loss": 1.9057, "step": 252295 }, { "epoch": 0.59, "grad_norm": 3.125, "learning_rate": 0.00015957093878664339, "loss": 2.0784, "step": 252300 }, { "epoch": 0.59, "grad_norm": 2.453125, "learning_rate": 0.00015956945422866112, "loss": 2.0512, "step": 252305 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00015956796965032886, "loss": 1.9347, "step": 252310 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00015956648505164705, "loss": 2.1072, "step": 252315 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.0001595650004326163, "loss": 2.19, "step": 252320 }, { "epoch": 0.59, "grad_norm": 2.1875, "learning_rate": 0.00015956351579323702, "loss": 2.1033, "step": 252325 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.0001595620311335098, "loss": 2.0666, "step": 252330 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.0001595605464534351, "loss": 2.0586, "step": 252335 }, { "epoch": 0.59, "grad_norm": 1.9921875, "learning_rate": 0.00015955906175301344, "loss": 2.1552, "step": 252340 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00015955757703224529, "loss": 2.1183, "step": 252345 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015955609229113121, "loss": 2.1198, "step": 252350 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.0001595546075296717, "loss": 2.1536, "step": 252355 }, { "epoch": 0.59, "grad_norm": 1.890625, "learning_rate": 0.00015955312274786726, "loss": 2.08, "step": 252360 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00015955163794571838, "loss": 1.9176, "step": 252365 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.0001595501531232256, "loss": 2.1442, "step": 252370 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.0001595486682803894, "loss": 2.2035, "step": 252375 }, { "epoch": 0.59, "grad_norm": 2.671875, "learning_rate": 0.00015954718341721028, "loss": 2.1096, "step": 252380 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.00015954569853368877, "loss": 2.0678, "step": 252385 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00015954421362982538, "loss": 2.1821, "step": 252390 }, { "epoch": 0.59, "grad_norm": 2.578125, "learning_rate": 0.0001595427287056206, "loss": 2.0127, "step": 252395 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00015954124376107496, "loss": 2.1446, "step": 252400 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.00015953975879618893, "loss": 2.1725, "step": 252405 }, { "epoch": 0.59, "grad_norm": 1.9765625, "learning_rate": 0.00015953827381096303, "loss": 2.0704, "step": 252410 }, { "epoch": 0.59, "grad_norm": 2.5, "learning_rate": 0.0001595367888053978, "loss": 2.37, "step": 252415 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00015953530377949368, "loss": 1.9053, "step": 252420 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.00015953381873325126, "loss": 2.0475, "step": 252425 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.000159532333666671, "loss": 2.0109, "step": 252430 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015953084857975343, "loss": 2.1404, "step": 252435 }, { "epoch": 0.59, "grad_norm": 2.015625, "learning_rate": 0.00015952936347249903, "loss": 1.8649, "step": 252440 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.0001595278783449083, "loss": 2.1713, "step": 252445 }, { "epoch": 0.59, "grad_norm": 2.359375, "learning_rate": 0.00015952639319698176, "loss": 2.164, "step": 252450 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00015952490802871995, "loss": 2.0598, "step": 252455 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00015952342284012336, "loss": 2.0701, "step": 252460 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.0001595219376311925, "loss": 2.1155, "step": 252465 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015952045240192787, "loss": 2.004, "step": 252470 }, { "epoch": 0.59, "grad_norm": 1.9375, "learning_rate": 0.0001595189671523299, "loss": 2.094, "step": 252475 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00015951748188239924, "loss": 2.1287, "step": 252480 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.0001595159965921363, "loss": 2.0485, "step": 252485 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00015951451128154163, "loss": 2.2208, "step": 252490 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00015951302595061572, "loss": 2.0216, "step": 252495 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.0001595115405993591, "loss": 2.1909, "step": 252500 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015951005522777222, "loss": 2.0451, "step": 252505 }, { "epoch": 0.59, "grad_norm": 1.9609375, "learning_rate": 0.00015950856983585566, "loss": 2.1315, "step": 252510 }, { "epoch": 0.59, "grad_norm": 1.8984375, "learning_rate": 0.00015950708442360987, "loss": 1.9218, "step": 252515 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.0001595055989910354, "loss": 1.9114, "step": 252520 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00015950411353813274, "loss": 2.2, "step": 252525 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015950262806490234, "loss": 1.9687, "step": 252530 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.0001595011425713448, "loss": 2.0158, "step": 252535 }, { "epoch": 0.59, "grad_norm": 1.9765625, "learning_rate": 0.00015949965705746062, "loss": 2.1369, "step": 252540 }, { "epoch": 0.59, "grad_norm": 2.109375, "learning_rate": 0.00015949817152325024, "loss": 2.0184, "step": 252545 }, { "epoch": 0.59, "grad_norm": 2.140625, "learning_rate": 0.00015949668596871422, "loss": 1.9434, "step": 252550 }, { "epoch": 0.59, "grad_norm": 2.234375, "learning_rate": 0.00015949520039385307, "loss": 2.0965, "step": 252555 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00015949371479866725, "loss": 1.9684, "step": 252560 }, { "epoch": 0.59, "grad_norm": 2.078125, "learning_rate": 0.00015949222918315733, "loss": 2.1162, "step": 252565 }, { "epoch": 0.59, "grad_norm": 2.296875, "learning_rate": 0.00015949074354732376, "loss": 2.0943, "step": 252570 }, { "epoch": 0.59, "grad_norm": 1.8671875, "learning_rate": 0.0001594892578911671, "loss": 2.2006, "step": 252575 }, { "epoch": 0.59, "grad_norm": 2.46875, "learning_rate": 0.00015948777221468777, "loss": 2.1777, "step": 252580 }, { "epoch": 0.59, "grad_norm": 1.953125, "learning_rate": 0.0001594862865178864, "loss": 2.1881, "step": 252585 }, { "epoch": 0.59, "grad_norm": 1.671875, "learning_rate": 0.0001594848008007634, "loss": 2.0449, "step": 252590 }, { "epoch": 0.59, "grad_norm": 1.6875, "learning_rate": 0.00015948331506331935, "loss": 2.2308, "step": 252595 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.0001594818293055547, "loss": 2.1885, "step": 252600 }, { "epoch": 0.59, "grad_norm": 1.84375, "learning_rate": 0.00015948034352746996, "loss": 2.0264, "step": 252605 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00015947885772906568, "loss": 2.0211, "step": 252610 }, { "epoch": 0.59, "grad_norm": 2.265625, "learning_rate": 0.00015947737191034233, "loss": 2.1013, "step": 252615 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00015947588607130044, "loss": 2.1049, "step": 252620 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.00015947440021194052, "loss": 1.9822, "step": 252625 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.00015947291433226305, "loss": 2.2905, "step": 252630 }, { "epoch": 0.59, "grad_norm": 2.03125, "learning_rate": 0.00015947142843226854, "loss": 2.0553, "step": 252635 }, { "epoch": 0.59, "grad_norm": 1.8828125, "learning_rate": 0.00015946994251195756, "loss": 1.936, "step": 252640 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.0001594684565713305, "loss": 2.0142, "step": 252645 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.000159466970610388, "loss": 2.1442, "step": 252650 }, { "epoch": 0.59, "grad_norm": 1.984375, "learning_rate": 0.00015946548462913046, "loss": 2.1392, "step": 252655 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00015946399862755848, "loss": 1.9438, "step": 252660 }, { "epoch": 0.59, "grad_norm": 2.046875, "learning_rate": 0.00015946251260567245, "loss": 2.1439, "step": 252665 }, { "epoch": 0.59, "grad_norm": 2.09375, "learning_rate": 0.000159461026563473, "loss": 2.03, "step": 252670 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.0001594595405009606, "loss": 1.9713, "step": 252675 }, { "epoch": 0.59, "grad_norm": 2.4375, "learning_rate": 0.0001594580544181357, "loss": 1.9498, "step": 252680 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00015945656831499886, "loss": 2.0775, "step": 252685 }, { "epoch": 0.59, "grad_norm": 2.0, "learning_rate": 0.00015945508219155057, "loss": 1.9094, "step": 252690 }, { "epoch": 0.59, "grad_norm": 2.125, "learning_rate": 0.00015945359604779138, "loss": 2.0307, "step": 252695 }, { "epoch": 0.59, "grad_norm": 1.9375, "learning_rate": 0.00015945210988372175, "loss": 2.2087, "step": 252700 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.00015945062369934217, "loss": 1.8994, "step": 252705 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00015944913749465323, "loss": 2.1025, "step": 252710 }, { "epoch": 0.59, "grad_norm": 2.15625, "learning_rate": 0.00015944765126965532, "loss": 2.1389, "step": 252715 }, { "epoch": 0.59, "grad_norm": 2.171875, "learning_rate": 0.00015944616502434905, "loss": 2.2125, "step": 252720 }, { "epoch": 0.59, "grad_norm": 1.921875, "learning_rate": 0.00015944467875873491, "loss": 2.0692, "step": 252725 }, { "epoch": 0.59, "grad_norm": 2.421875, "learning_rate": 0.00015944319247281337, "loss": 2.1247, "step": 252730 }, { "epoch": 0.59, "grad_norm": 2.546875, "learning_rate": 0.00015944170616658497, "loss": 2.0503, "step": 252735 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.0001594402198400502, "loss": 2.0037, "step": 252740 }, { "epoch": 0.59, "grad_norm": 2.25, "learning_rate": 0.00015943873349320956, "loss": 2.1196, "step": 252745 }, { "epoch": 0.59, "grad_norm": 1.8203125, "learning_rate": 0.00015943724712606358, "loss": 2.3367, "step": 252750 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015943576073861276, "loss": 2.1636, "step": 252755 }, { "epoch": 0.59, "grad_norm": 1.984375, "learning_rate": 0.0001594342743308576, "loss": 1.9997, "step": 252760 }, { "epoch": 0.59, "grad_norm": 2.34375, "learning_rate": 0.00015943278790279863, "loss": 2.2046, "step": 252765 }, { "epoch": 0.59, "grad_norm": 1.84375, "learning_rate": 0.0001594313014544363, "loss": 1.9881, "step": 252770 }, { "epoch": 0.59, "grad_norm": 1.890625, "learning_rate": 0.00015942981498577122, "loss": 1.9309, "step": 252775 }, { "epoch": 0.59, "grad_norm": 2.3125, "learning_rate": 0.0001594283284968038, "loss": 2.1146, "step": 252780 }, { "epoch": 0.59, "grad_norm": 2.390625, "learning_rate": 0.00015942684198753463, "loss": 2.0507, "step": 252785 }, { "epoch": 0.59, "grad_norm": 2.0625, "learning_rate": 0.0001594253554579641, "loss": 1.9488, "step": 252790 }, { "epoch": 0.59, "grad_norm": 2.28125, "learning_rate": 0.0001594238689080928, "loss": 1.8954, "step": 252795 }, { "epoch": 0.59, "grad_norm": 1.9296875, "learning_rate": 0.00015942238233792127, "loss": 2.2077, "step": 252800 }, { "epoch": 0.59, "grad_norm": 1.9765625, "learning_rate": 0.00015942089574744997, "loss": 1.9944, "step": 252805 }, { "epoch": 0.59, "grad_norm": 2.328125, "learning_rate": 0.0001594194091366794, "loss": 2.315, "step": 252810 }, { "epoch": 0.59, "grad_norm": 2.203125, "learning_rate": 0.0001594179225056101, "loss": 2.0331, "step": 252815 }, { "epoch": 0.59, "grad_norm": 1.875, "learning_rate": 0.00015941643585424254, "loss": 2.1678, "step": 252820 }, { "epoch": 0.59, "grad_norm": 2.21875, "learning_rate": 0.00015941494918257724, "loss": 2.1828, "step": 252825 }, { "epoch": 0.59, "grad_norm": 1.9765625, "learning_rate": 0.00015941346249061475, "loss": 1.855, "step": 252830 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.0001594119757783555, "loss": 2.174, "step": 252835 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.0001594104890458001, "loss": 2.0387, "step": 252840 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.00015940900229294898, "loss": 2.1073, "step": 252845 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.00015940751551980263, "loss": 2.1289, "step": 252850 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015940602872636163, "loss": 1.9286, "step": 252855 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015940454191262643, "loss": 2.1298, "step": 252860 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015940305507859757, "loss": 2.1769, "step": 252865 }, { "epoch": 0.6, "grad_norm": 2.046875, "learning_rate": 0.00015940156822427556, "loss": 1.9893, "step": 252870 }, { "epoch": 0.6, "grad_norm": 1.6328125, "learning_rate": 0.00015940008134966092, "loss": 1.99, "step": 252875 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.00015939859445475408, "loss": 2.1068, "step": 252880 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015939710753955564, "loss": 2.2404, "step": 252885 }, { "epoch": 0.6, "grad_norm": 1.7109375, "learning_rate": 0.00015939562060406605, "loss": 1.977, "step": 252890 }, { "epoch": 0.6, "grad_norm": 2.453125, "learning_rate": 0.00015939413364828586, "loss": 2.194, "step": 252895 }, { "epoch": 0.6, "grad_norm": 2.5625, "learning_rate": 0.00015939264667221554, "loss": 2.0628, "step": 252900 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.0001593911596758556, "loss": 1.928, "step": 252905 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.0001593896726592066, "loss": 2.2035, "step": 252910 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.000159388185622269, "loss": 2.0418, "step": 252915 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.0001593866985650433, "loss": 2.0935, "step": 252920 }, { "epoch": 0.6, "grad_norm": 2.046875, "learning_rate": 0.00015938521148753005, "loss": 2.0702, "step": 252925 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015938372438972968, "loss": 2.0256, "step": 252930 }, { "epoch": 0.6, "grad_norm": 1.953125, "learning_rate": 0.0001593822372716428, "loss": 2.0374, "step": 252935 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.00015938075013326988, "loss": 2.2381, "step": 252940 }, { "epoch": 0.6, "grad_norm": 2.28125, "learning_rate": 0.0001593792629746114, "loss": 1.9277, "step": 252945 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.0001593777757956679, "loss": 2.1623, "step": 252950 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015937628859643988, "loss": 2.0046, "step": 252955 }, { "epoch": 0.6, "grad_norm": 2.484375, "learning_rate": 0.00015937480137692778, "loss": 2.0531, "step": 252960 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015937331413713225, "loss": 2.081, "step": 252965 }, { "epoch": 0.6, "grad_norm": 2.34375, "learning_rate": 0.0001593718268770537, "loss": 2.1196, "step": 252970 }, { "epoch": 0.6, "grad_norm": 1.9609375, "learning_rate": 0.00015937033959669266, "loss": 1.9931, "step": 252975 }, { "epoch": 0.6, "grad_norm": 2.28125, "learning_rate": 0.0001593688522960496, "loss": 2.0939, "step": 252980 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.00015936736497512507, "loss": 2.1748, "step": 252985 }, { "epoch": 0.6, "grad_norm": 1.9296875, "learning_rate": 0.0001593658776339196, "loss": 2.0575, "step": 252990 }, { "epoch": 0.6, "grad_norm": 2.578125, "learning_rate": 0.00015936439027243367, "loss": 1.9551, "step": 252995 }, { "epoch": 0.6, "grad_norm": 1.984375, "learning_rate": 0.00015936290289066777, "loss": 2.0004, "step": 253000 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015936141548862244, "loss": 2.2452, "step": 253005 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015935992806629816, "loss": 2.1059, "step": 253010 }, { "epoch": 0.6, "grad_norm": 1.703125, "learning_rate": 0.00015935844062369542, "loss": 2.1556, "step": 253015 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.00015935695316081483, "loss": 2.3003, "step": 253020 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.0001593554656776568, "loss": 1.9721, "step": 253025 }, { "epoch": 0.6, "grad_norm": 2.421875, "learning_rate": 0.00015935397817422184, "loss": 2.1363, "step": 253030 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.0001593524906505105, "loss": 2.1288, "step": 253035 }, { "epoch": 0.6, "grad_norm": 2.46875, "learning_rate": 0.0001593510031065233, "loss": 2.0586, "step": 253040 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.0001593495155422607, "loss": 2.1084, "step": 253045 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.0001593480279577232, "loss": 2.1553, "step": 253050 }, { "epoch": 0.6, "grad_norm": 1.8515625, "learning_rate": 0.00015934654035291138, "loss": 2.1358, "step": 253055 }, { "epoch": 0.6, "grad_norm": 1.7421875, "learning_rate": 0.00015934505272782569, "loss": 2.0576, "step": 253060 }, { "epoch": 0.6, "grad_norm": 2.046875, "learning_rate": 0.00015934356508246665, "loss": 2.0052, "step": 253065 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015934207741683478, "loss": 2.0748, "step": 253070 }, { "epoch": 0.6, "grad_norm": 2.28125, "learning_rate": 0.00015934058973093057, "loss": 2.1363, "step": 253075 }, { "epoch": 0.6, "grad_norm": 2.90625, "learning_rate": 0.00015933910202475454, "loss": 2.1329, "step": 253080 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015933761429830722, "loss": 2.1316, "step": 253085 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015933612655158908, "loss": 1.9727, "step": 253090 }, { "epoch": 0.6, "grad_norm": 2.4375, "learning_rate": 0.00015933463878460064, "loss": 2.071, "step": 253095 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.0001593331509973424, "loss": 2.232, "step": 253100 }, { "epoch": 0.6, "grad_norm": 2.296875, "learning_rate": 0.0001593316631898149, "loss": 2.1291, "step": 253105 }, { "epoch": 0.6, "grad_norm": 2.65625, "learning_rate": 0.00015933017536201862, "loss": 2.0094, "step": 253110 }, { "epoch": 0.6, "grad_norm": 2.65625, "learning_rate": 0.00015932868751395408, "loss": 2.1302, "step": 253115 }, { "epoch": 0.6, "grad_norm": 2.28125, "learning_rate": 0.00015932719964562177, "loss": 2.0145, "step": 253120 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015932571175702223, "loss": 2.0578, "step": 253125 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015932422384815593, "loss": 2.2259, "step": 253130 }, { "epoch": 0.6, "grad_norm": 1.7890625, "learning_rate": 0.00015932273591902344, "loss": 1.9961, "step": 253135 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.00015932124796962517, "loss": 2.1295, "step": 253140 }, { "epoch": 0.6, "grad_norm": 2.34375, "learning_rate": 0.00015931975999996172, "loss": 2.1677, "step": 253145 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015931827201003357, "loss": 2.1821, "step": 253150 }, { "epoch": 0.6, "grad_norm": 1.8515625, "learning_rate": 0.0001593167839998412, "loss": 1.9363, "step": 253155 }, { "epoch": 0.6, "grad_norm": 1.7890625, "learning_rate": 0.00015931529596938514, "loss": 2.0836, "step": 253160 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015931380791866593, "loss": 2.1414, "step": 253165 }, { "epoch": 0.6, "grad_norm": 2.484375, "learning_rate": 0.00015931231984768403, "loss": 2.0924, "step": 253170 }, { "epoch": 0.6, "grad_norm": 2.734375, "learning_rate": 0.00015931083175643995, "loss": 1.9795, "step": 253175 }, { "epoch": 0.6, "grad_norm": 2.5, "learning_rate": 0.0001593093436449342, "loss": 2.0491, "step": 253180 }, { "epoch": 0.6, "grad_norm": 1.8671875, "learning_rate": 0.00015930785551316739, "loss": 2.0745, "step": 253185 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015930636736113986, "loss": 2.2482, "step": 253190 }, { "epoch": 0.6, "grad_norm": 1.875, "learning_rate": 0.00015930487918885223, "loss": 2.1397, "step": 253195 }, { "epoch": 0.6, "grad_norm": 2.296875, "learning_rate": 0.00015930339099630496, "loss": 2.0561, "step": 253200 }, { "epoch": 0.6, "grad_norm": 1.8671875, "learning_rate": 0.0001593019027834986, "loss": 2.1013, "step": 253205 }, { "epoch": 0.6, "grad_norm": 1.921875, "learning_rate": 0.0001593004145504336, "loss": 2.2289, "step": 253210 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.0001592989262971105, "loss": 2.0299, "step": 253215 }, { "epoch": 0.6, "grad_norm": 2.765625, "learning_rate": 0.00015929743802352989, "loss": 2.0393, "step": 253220 }, { "epoch": 0.6, "grad_norm": 2.34375, "learning_rate": 0.0001592959497296921, "loss": 2.0277, "step": 253225 }, { "epoch": 0.6, "grad_norm": 2.828125, "learning_rate": 0.0001592944614155978, "loss": 2.1147, "step": 253230 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015929297308124742, "loss": 2.1113, "step": 253235 }, { "epoch": 0.6, "grad_norm": 1.828125, "learning_rate": 0.0001592914847266415, "loss": 2.1338, "step": 253240 }, { "epoch": 0.6, "grad_norm": 1.9375, "learning_rate": 0.0001592899963517805, "loss": 1.9428, "step": 253245 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015928850795666498, "loss": 2.0581, "step": 253250 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015928701954129543, "loss": 1.992, "step": 253255 }, { "epoch": 0.6, "grad_norm": 2.53125, "learning_rate": 0.00015928553110567236, "loss": 2.036, "step": 253260 }, { "epoch": 0.6, "grad_norm": 1.9609375, "learning_rate": 0.00015928404264979628, "loss": 2.0118, "step": 253265 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015928255417366769, "loss": 2.0734, "step": 253270 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015928106567728712, "loss": 2.0615, "step": 253275 }, { "epoch": 0.6, "grad_norm": 1.984375, "learning_rate": 0.00015927957716065503, "loss": 2.1898, "step": 253280 }, { "epoch": 0.6, "grad_norm": 1.75, "learning_rate": 0.00015927808862377198, "loss": 2.0725, "step": 253285 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015927660006663847, "loss": 2.083, "step": 253290 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.000159275111489255, "loss": 1.8864, "step": 253295 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015927362289162208, "loss": 2.0295, "step": 253300 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.00015927213427374017, "loss": 1.9445, "step": 253305 }, { "epoch": 0.6, "grad_norm": 3.015625, "learning_rate": 0.00015927064563560985, "loss": 1.8842, "step": 253310 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.0001592691569772316, "loss": 2.0881, "step": 253315 }, { "epoch": 0.6, "grad_norm": 2.796875, "learning_rate": 0.00015926766829860595, "loss": 2.1906, "step": 253320 }, { "epoch": 0.6, "grad_norm": 2.625, "learning_rate": 0.0001592661795997334, "loss": 2.1834, "step": 253325 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.0001592646908806144, "loss": 1.982, "step": 253330 }, { "epoch": 0.6, "grad_norm": 1.7890625, "learning_rate": 0.00015926320214124955, "loss": 1.863, "step": 253335 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015926171338163928, "loss": 2.2061, "step": 253340 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015926022460178416, "loss": 2.0964, "step": 253345 }, { "epoch": 0.6, "grad_norm": 1.9453125, "learning_rate": 0.00015925873580168468, "loss": 2.2603, "step": 253350 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.00015925724698134133, "loss": 2.0179, "step": 253355 }, { "epoch": 0.6, "grad_norm": 7.90625, "learning_rate": 0.0001592557581407546, "loss": 2.2249, "step": 253360 }, { "epoch": 0.6, "grad_norm": 2.34375, "learning_rate": 0.00015925426927992505, "loss": 2.1211, "step": 253365 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015925278039885318, "loss": 2.1135, "step": 253370 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.00015925129149753947, "loss": 2.0939, "step": 253375 }, { "epoch": 0.6, "grad_norm": 1.9375, "learning_rate": 0.00015924980257598446, "loss": 1.9488, "step": 253380 }, { "epoch": 0.6, "grad_norm": 1.9921875, "learning_rate": 0.00015924831363418865, "loss": 2.0562, "step": 253385 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.00015924682467215252, "loss": 2.0832, "step": 253390 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.0001592453356898766, "loss": 2.2159, "step": 253395 }, { "epoch": 0.6, "grad_norm": 1.9453125, "learning_rate": 0.0001592438466873614, "loss": 2.1327, "step": 253400 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015924235766460744, "loss": 2.0861, "step": 253405 }, { "epoch": 0.6, "grad_norm": 1.8125, "learning_rate": 0.00015924086862161523, "loss": 2.2233, "step": 253410 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015923937955838522, "loss": 2.1779, "step": 253415 }, { "epoch": 0.6, "grad_norm": 2.4375, "learning_rate": 0.00015923789047491799, "loss": 2.1172, "step": 253420 }, { "epoch": 0.6, "grad_norm": 2.921875, "learning_rate": 0.00015923640137121403, "loss": 2.0287, "step": 253425 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015923491224727383, "loss": 1.9244, "step": 253430 }, { "epoch": 0.6, "grad_norm": 2.515625, "learning_rate": 0.0001592334231030979, "loss": 2.1265, "step": 253435 }, { "epoch": 0.6, "grad_norm": 2.296875, "learning_rate": 0.00015923193393868677, "loss": 2.0503, "step": 253440 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015923044475404092, "loss": 2.2646, "step": 253445 }, { "epoch": 0.6, "grad_norm": 1.7109375, "learning_rate": 0.0001592289555491609, "loss": 1.9391, "step": 253450 }, { "epoch": 0.6, "grad_norm": 3.046875, "learning_rate": 0.00015922746632404718, "loss": 2.2208, "step": 253455 }, { "epoch": 0.6, "grad_norm": 1.953125, "learning_rate": 0.00015922597707870032, "loss": 2.1469, "step": 253460 }, { "epoch": 0.6, "grad_norm": 2.53125, "learning_rate": 0.00015922448781312073, "loss": 1.9687, "step": 253465 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015922299852730903, "loss": 1.9482, "step": 253470 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.00015922150922126566, "loss": 2.0407, "step": 253475 }, { "epoch": 0.6, "grad_norm": 1.890625, "learning_rate": 0.00015922001989499113, "loss": 2.0471, "step": 253480 }, { "epoch": 0.6, "grad_norm": 1.984375, "learning_rate": 0.000159218530548486, "loss": 2.1765, "step": 253485 }, { "epoch": 0.6, "grad_norm": 1.578125, "learning_rate": 0.00015921704118175076, "loss": 2.014, "step": 253490 }, { "epoch": 0.6, "grad_norm": 1.9765625, "learning_rate": 0.00015921555179478585, "loss": 2.0323, "step": 253495 }, { "epoch": 0.6, "grad_norm": 2.421875, "learning_rate": 0.00015921406238759186, "loss": 2.0929, "step": 253500 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015921257296016927, "loss": 2.0595, "step": 253505 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.0001592110835125186, "loss": 2.0858, "step": 253510 }, { "epoch": 0.6, "grad_norm": 1.84375, "learning_rate": 0.00015920959404464036, "loss": 2.1385, "step": 253515 }, { "epoch": 0.6, "grad_norm": 1.875, "learning_rate": 0.00015920810455653502, "loss": 2.2183, "step": 253520 }, { "epoch": 0.6, "grad_norm": 1.953125, "learning_rate": 0.00015920661504820313, "loss": 2.1529, "step": 253525 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.0001592051255196452, "loss": 2.1769, "step": 253530 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.0001592036359708617, "loss": 2.1464, "step": 253535 }, { "epoch": 0.6, "grad_norm": 2.515625, "learning_rate": 0.0001592021464018532, "loss": 1.9733, "step": 253540 }, { "epoch": 0.6, "grad_norm": 2.453125, "learning_rate": 0.00015920065681262013, "loss": 2.2605, "step": 253545 }, { "epoch": 0.6, "grad_norm": 2.484375, "learning_rate": 0.00015919916720316305, "loss": 1.9672, "step": 253550 }, { "epoch": 0.6, "grad_norm": 1.6484375, "learning_rate": 0.0001591976775734825, "loss": 1.9887, "step": 253555 }, { "epoch": 0.6, "grad_norm": 2.53125, "learning_rate": 0.00015919618792357892, "loss": 2.1515, "step": 253560 }, { "epoch": 0.6, "grad_norm": 1.984375, "learning_rate": 0.00015919469825345287, "loss": 2.0415, "step": 253565 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.0001591932085631048, "loss": 2.1757, "step": 253570 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.00015919171885253526, "loss": 2.1522, "step": 253575 }, { "epoch": 0.6, "grad_norm": 1.9765625, "learning_rate": 0.0001591902291217448, "loss": 2.0821, "step": 253580 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015918873937073388, "loss": 2.0076, "step": 253585 }, { "epoch": 0.6, "grad_norm": 2.546875, "learning_rate": 0.00015918724959950296, "loss": 2.0031, "step": 253590 }, { "epoch": 0.6, "grad_norm": 2.453125, "learning_rate": 0.00015918575980805263, "loss": 2.0401, "step": 253595 }, { "epoch": 0.6, "grad_norm": 1.9765625, "learning_rate": 0.0001591842699963834, "loss": 2.1917, "step": 253600 }, { "epoch": 0.6, "grad_norm": 2.578125, "learning_rate": 0.0001591827801644957, "loss": 1.9466, "step": 253605 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015918129031239011, "loss": 2.013, "step": 253610 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015917980044006714, "loss": 1.9291, "step": 253615 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.00015917831054752723, "loss": 1.989, "step": 253620 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.00015917682063477096, "loss": 1.9949, "step": 253625 }, { "epoch": 0.6, "grad_norm": 2.515625, "learning_rate": 0.00015917533070179882, "loss": 2.0702, "step": 253630 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.00015917384074861133, "loss": 2.037, "step": 253635 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015917235077520894, "loss": 2.1237, "step": 253640 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.00015917086078159223, "loss": 2.1471, "step": 253645 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015916937076776167, "loss": 2.0678, "step": 253650 }, { "epoch": 0.6, "grad_norm": 1.984375, "learning_rate": 0.0001591678807337178, "loss": 2.0062, "step": 253655 }, { "epoch": 0.6, "grad_norm": 1.796875, "learning_rate": 0.00015916639067946109, "loss": 2.1068, "step": 253660 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015916490060499208, "loss": 2.0126, "step": 253665 }, { "epoch": 0.6, "grad_norm": 1.9296875, "learning_rate": 0.00015916341051031124, "loss": 2.0463, "step": 253670 }, { "epoch": 0.6, "grad_norm": 2.28125, "learning_rate": 0.00015916192039541914, "loss": 1.9809, "step": 253675 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015916043026031624, "loss": 1.9738, "step": 253680 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.00015915894010500305, "loss": 2.0297, "step": 253685 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015915744992948011, "loss": 2.186, "step": 253690 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.00015915595973374792, "loss": 2.1608, "step": 253695 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015915446951780696, "loss": 2.1519, "step": 253700 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.0001591529792816578, "loss": 2.1065, "step": 253705 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015915148902530087, "loss": 2.0811, "step": 253710 }, { "epoch": 0.6, "grad_norm": 1.9296875, "learning_rate": 0.00015914999874873674, "loss": 2.1342, "step": 253715 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015914850845196588, "loss": 2.1032, "step": 253720 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015914701813498882, "loss": 2.1589, "step": 253725 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.0001591455277978061, "loss": 2.0263, "step": 253730 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.00015914403744041815, "loss": 2.0173, "step": 253735 }, { "epoch": 0.6, "grad_norm": 1.7734375, "learning_rate": 0.00015914254706282552, "loss": 2.0822, "step": 253740 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015914105666502875, "loss": 2.0067, "step": 253745 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.0001591395662470283, "loss": 2.0305, "step": 253750 }, { "epoch": 0.6, "grad_norm": 2.9375, "learning_rate": 0.00015913807580882473, "loss": 1.957, "step": 253755 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.0001591365853504185, "loss": 1.7239, "step": 253760 }, { "epoch": 0.6, "grad_norm": 2.46875, "learning_rate": 0.00015913509487181014, "loss": 2.2693, "step": 253765 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015913360437300018, "loss": 2.142, "step": 253770 }, { "epoch": 0.6, "grad_norm": 1.875, "learning_rate": 0.0001591321138539891, "loss": 2.0337, "step": 253775 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015913062331477743, "loss": 1.9832, "step": 253780 }, { "epoch": 0.6, "grad_norm": 1.8828125, "learning_rate": 0.00015912913275536563, "loss": 2.1081, "step": 253785 }, { "epoch": 0.6, "grad_norm": 1.859375, "learning_rate": 0.00015912764217575423, "loss": 2.1047, "step": 253790 }, { "epoch": 0.6, "grad_norm": 2.546875, "learning_rate": 0.00015912615157594382, "loss": 2.0926, "step": 253795 }, { "epoch": 0.6, "grad_norm": 1.671875, "learning_rate": 0.00015912466095593483, "loss": 2.1745, "step": 253800 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.00015912317031572775, "loss": 2.1871, "step": 253805 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015912167965532317, "loss": 2.1794, "step": 253810 }, { "epoch": 0.6, "grad_norm": 2.546875, "learning_rate": 0.00015912018897472149, "loss": 2.1562, "step": 253815 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.0001591186982739233, "loss": 2.2159, "step": 253820 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015911720755292912, "loss": 2.1252, "step": 253825 }, { "epoch": 0.6, "grad_norm": 1.953125, "learning_rate": 0.00015911571681173942, "loss": 2.2314, "step": 253830 }, { "epoch": 0.6, "grad_norm": 2.625, "learning_rate": 0.00015911422605035473, "loss": 1.9596, "step": 253835 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.00015911273526877552, "loss": 1.9001, "step": 253840 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015911124446700233, "loss": 2.1808, "step": 253845 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.0001591097536450357, "loss": 2.1504, "step": 253850 }, { "epoch": 0.6, "grad_norm": 2.375, "learning_rate": 0.00015910826280287607, "loss": 1.9192, "step": 253855 }, { "epoch": 0.6, "grad_norm": 2.765625, "learning_rate": 0.000159106771940524, "loss": 2.0305, "step": 253860 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.00015910528105798, "loss": 2.2244, "step": 253865 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015910379015524452, "loss": 2.1539, "step": 253870 }, { "epoch": 0.6, "grad_norm": 2.046875, "learning_rate": 0.00015910229923231815, "loss": 2.1814, "step": 253875 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015910080828920135, "loss": 2.1222, "step": 253880 }, { "epoch": 0.6, "grad_norm": 2.34375, "learning_rate": 0.00015909931732589465, "loss": 2.1012, "step": 253885 }, { "epoch": 0.6, "grad_norm": 2.421875, "learning_rate": 0.00015909782634239856, "loss": 2.0277, "step": 253890 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015909633533871357, "loss": 2.1784, "step": 253895 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015909484431484016, "loss": 2.2216, "step": 253900 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.0001590933532707789, "loss": 1.9155, "step": 253905 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.0001590918622065303, "loss": 1.9758, "step": 253910 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015909037112209488, "loss": 2.1816, "step": 253915 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.00015908888001747306, "loss": 2.0078, "step": 253920 }, { "epoch": 0.6, "grad_norm": 2.375, "learning_rate": 0.00015908738889266544, "loss": 2.0522, "step": 253925 }, { "epoch": 0.6, "grad_norm": 2.375, "learning_rate": 0.00015908589774767248, "loss": 1.9652, "step": 253930 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.0001590844065824947, "loss": 1.9235, "step": 253935 }, { "epoch": 0.6, "grad_norm": 2.046875, "learning_rate": 0.0001590829153971326, "loss": 2.2779, "step": 253940 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015908142419158677, "loss": 1.9812, "step": 253945 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.0001590799329658576, "loss": 2.1611, "step": 253950 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015907844171994567, "loss": 2.0816, "step": 253955 }, { "epoch": 0.6, "grad_norm": 2.765625, "learning_rate": 0.00015907695045385147, "loss": 2.143, "step": 253960 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015907545916757552, "loss": 2.2584, "step": 253965 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015907396786111832, "loss": 2.0841, "step": 253970 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015907247653448037, "loss": 2.1516, "step": 253975 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.0001590709851876622, "loss": 2.0684, "step": 253980 }, { "epoch": 0.6, "grad_norm": 1.984375, "learning_rate": 0.00015906949382066428, "loss": 2.1331, "step": 253985 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.0001590680024334872, "loss": 1.8799, "step": 253990 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.0001590665110261314, "loss": 2.1603, "step": 253995 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.0001590650195985974, "loss": 1.9982, "step": 254000 }, { "epoch": 0.6, "grad_norm": 1.9375, "learning_rate": 0.0001590635281508857, "loss": 1.8934, "step": 254005 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015906203668299685, "loss": 2.1134, "step": 254010 }, { "epoch": 0.6, "grad_norm": 1.59375, "learning_rate": 0.00015906054519493137, "loss": 1.8827, "step": 254015 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.0001590590536866897, "loss": 2.0409, "step": 254020 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.0001590575621582724, "loss": 2.1226, "step": 254025 }, { "epoch": 0.6, "grad_norm": 2.28125, "learning_rate": 0.00015905607060967992, "loss": 2.1232, "step": 254030 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015905457904091285, "loss": 2.1547, "step": 254035 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.00015905308745197167, "loss": 1.9748, "step": 254040 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015905159584285689, "loss": 1.8612, "step": 254045 }, { "epoch": 0.6, "grad_norm": 2.421875, "learning_rate": 0.00015905010421356897, "loss": 2.1143, "step": 254050 }, { "epoch": 0.6, "grad_norm": 2.5625, "learning_rate": 0.0001590486125641085, "loss": 2.1884, "step": 254055 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015904712089447599, "loss": 2.1162, "step": 254060 }, { "epoch": 0.6, "grad_norm": 1.8984375, "learning_rate": 0.00015904562920467186, "loss": 2.0745, "step": 254065 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015904413749469668, "loss": 2.1372, "step": 254070 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015904264576455094, "loss": 2.0424, "step": 254075 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015904115401423521, "loss": 2.1914, "step": 254080 }, { "epoch": 0.6, "grad_norm": 1.9765625, "learning_rate": 0.0001590396622437499, "loss": 2.1725, "step": 254085 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015903817045309559, "loss": 1.957, "step": 254090 }, { "epoch": 0.6, "grad_norm": 1.9765625, "learning_rate": 0.00015903667864227276, "loss": 1.9665, "step": 254095 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015903518681128196, "loss": 1.902, "step": 254100 }, { "epoch": 0.6, "grad_norm": 2.046875, "learning_rate": 0.00015903369496012365, "loss": 1.9686, "step": 254105 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015903220308879833, "loss": 2.147, "step": 254110 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015903071119730656, "loss": 2.1302, "step": 254115 }, { "epoch": 0.6, "grad_norm": 1.859375, "learning_rate": 0.00015902921928564884, "loss": 2.0379, "step": 254120 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.0001590277273538257, "loss": 2.1177, "step": 254125 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015902623540183755, "loss": 1.9667, "step": 254130 }, { "epoch": 0.6, "grad_norm": 2.671875, "learning_rate": 0.00015902474342968501, "loss": 2.2029, "step": 254135 }, { "epoch": 0.6, "grad_norm": 1.765625, "learning_rate": 0.00015902325143736854, "loss": 2.011, "step": 254140 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015902175942488866, "loss": 2.3041, "step": 254145 }, { "epoch": 0.6, "grad_norm": 1.9921875, "learning_rate": 0.00015902026739224586, "loss": 2.169, "step": 254150 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015901877533944068, "loss": 2.0858, "step": 254155 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015901728326647363, "loss": 2.1485, "step": 254160 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015901579117334516, "loss": 2.0465, "step": 254165 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015901429906005587, "loss": 2.2947, "step": 254170 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.0001590128069266062, "loss": 1.8982, "step": 254175 }, { "epoch": 0.6, "grad_norm": 2.28125, "learning_rate": 0.00015901131477299668, "loss": 2.043, "step": 254180 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.00015900982259922785, "loss": 2.0654, "step": 254185 }, { "epoch": 0.6, "grad_norm": 2.484375, "learning_rate": 0.00015900833040530019, "loss": 1.8409, "step": 254190 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.0001590068381912142, "loss": 1.9545, "step": 254195 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.0001590053459569704, "loss": 1.9744, "step": 254200 }, { "epoch": 0.6, "grad_norm": 2.734375, "learning_rate": 0.0001590038537025693, "loss": 2.1115, "step": 254205 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015900236142801143, "loss": 2.0583, "step": 254210 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015900086913329728, "loss": 1.9782, "step": 254215 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015899937681842737, "loss": 2.0191, "step": 254220 }, { "epoch": 0.6, "grad_norm": 3.765625, "learning_rate": 0.0001589978844834022, "loss": 2.1652, "step": 254225 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.0001589963921282223, "loss": 2.1229, "step": 254230 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015899489975288814, "loss": 1.9703, "step": 254235 }, { "epoch": 0.6, "grad_norm": 1.984375, "learning_rate": 0.00015899340735740024, "loss": 2.1778, "step": 254240 }, { "epoch": 0.6, "grad_norm": 1.8671875, "learning_rate": 0.00015899191494175914, "loss": 1.9862, "step": 254245 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.00015899042250596532, "loss": 2.0869, "step": 254250 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.0001589889300500193, "loss": 2.0757, "step": 254255 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.00015898743757392162, "loss": 2.0843, "step": 254260 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.00015898594507767272, "loss": 2.0423, "step": 254265 }, { "epoch": 0.6, "grad_norm": 2.453125, "learning_rate": 0.0001589844525612732, "loss": 1.9589, "step": 254270 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015898296002472349, "loss": 2.0346, "step": 254275 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.0001589814674680241, "loss": 2.0729, "step": 254280 }, { "epoch": 0.6, "grad_norm": 1.953125, "learning_rate": 0.00015897997489117564, "loss": 1.9831, "step": 254285 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.00015897848229417852, "loss": 2.025, "step": 254290 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.00015897698967703327, "loss": 2.0209, "step": 254295 }, { "epoch": 0.6, "grad_norm": 2.46875, "learning_rate": 0.00015897549703974045, "loss": 2.1052, "step": 254300 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015897400438230047, "loss": 2.1232, "step": 254305 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.00015897251170471396, "loss": 2.0479, "step": 254310 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.00015897101900698134, "loss": 1.9098, "step": 254315 }, { "epoch": 0.6, "grad_norm": 1.9609375, "learning_rate": 0.00015896952628910316, "loss": 2.1803, "step": 254320 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015896803355107988, "loss": 2.1068, "step": 254325 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.0001589665407929121, "loss": 2.0398, "step": 254330 }, { "epoch": 0.6, "grad_norm": 1.875, "learning_rate": 0.00015896504801460025, "loss": 1.9933, "step": 254335 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.0001589635552161449, "loss": 1.9856, "step": 254340 }, { "epoch": 0.6, "grad_norm": 1.984375, "learning_rate": 0.0001589620623975465, "loss": 2.1521, "step": 254345 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.00015896056955880562, "loss": 2.228, "step": 254350 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015895907669992272, "loss": 2.1257, "step": 254355 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.00015895758382089833, "loss": 1.996, "step": 254360 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015895609092173295, "loss": 2.2922, "step": 254365 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015895459800242714, "loss": 2.1508, "step": 254370 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015895310506298133, "loss": 2.0249, "step": 254375 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.00015895161210339608, "loss": 2.1153, "step": 254380 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.0001589501191236719, "loss": 2.017, "step": 254385 }, { "epoch": 0.6, "grad_norm": 1.875, "learning_rate": 0.0001589486261238093, "loss": 2.0203, "step": 254390 }, { "epoch": 0.6, "grad_norm": 2.796875, "learning_rate": 0.00015894713310380873, "loss": 2.1348, "step": 254395 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.0001589456400636708, "loss": 1.9783, "step": 254400 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015894414700339598, "loss": 2.1854, "step": 254405 }, { "epoch": 0.6, "grad_norm": 2.5625, "learning_rate": 0.00015894265392298471, "loss": 2.1988, "step": 254410 }, { "epoch": 0.6, "grad_norm": 1.953125, "learning_rate": 0.0001589411608224376, "loss": 2.3014, "step": 254415 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015893966770175514, "loss": 2.033, "step": 254420 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015893817456093777, "loss": 2.1027, "step": 254425 }, { "epoch": 0.6, "grad_norm": 1.8046875, "learning_rate": 0.0001589366813999861, "loss": 2.1138, "step": 254430 }, { "epoch": 0.6, "grad_norm": 1.9609375, "learning_rate": 0.00015893518821890056, "loss": 2.2355, "step": 254435 }, { "epoch": 0.6, "grad_norm": 2.671875, "learning_rate": 0.00015893369501768172, "loss": 1.9911, "step": 254440 }, { "epoch": 0.6, "grad_norm": 2.453125, "learning_rate": 0.00015893220179633003, "loss": 1.9759, "step": 254445 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.00015893070855484603, "loss": 2.2439, "step": 254450 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015892921529323025, "loss": 1.9289, "step": 254455 }, { "epoch": 0.6, "grad_norm": 2.296875, "learning_rate": 0.00015892772201148318, "loss": 2.1549, "step": 254460 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.0001589262287096053, "loss": 2.2775, "step": 254465 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.0001589247353875972, "loss": 2.1748, "step": 254470 }, { "epoch": 0.6, "grad_norm": 1.828125, "learning_rate": 0.00015892324204545933, "loss": 2.1834, "step": 254475 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.0001589217486831922, "loss": 2.094, "step": 254480 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015892025530079632, "loss": 2.0036, "step": 254485 }, { "epoch": 0.6, "grad_norm": 2.59375, "learning_rate": 0.00015891876189827223, "loss": 2.1334, "step": 254490 }, { "epoch": 0.6, "grad_norm": 2.28125, "learning_rate": 0.0001589172684756204, "loss": 2.0622, "step": 254495 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015891577503284138, "loss": 2.2691, "step": 254500 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015891428156993568, "loss": 1.9581, "step": 254505 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015891278808690377, "loss": 2.1923, "step": 254510 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.00015891129458374617, "loss": 2.0947, "step": 254515 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015890980106046345, "loss": 2.2488, "step": 254520 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015890830751705607, "loss": 2.1117, "step": 254525 }, { "epoch": 0.6, "grad_norm": 2.421875, "learning_rate": 0.00015890681395352448, "loss": 2.1605, "step": 254530 }, { "epoch": 0.6, "grad_norm": 1.8125, "learning_rate": 0.00015890532036986932, "loss": 2.1719, "step": 254535 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.00015890382676609098, "loss": 2.0895, "step": 254540 }, { "epoch": 0.6, "grad_norm": 2.375, "learning_rate": 0.00015890233314219007, "loss": 2.1037, "step": 254545 }, { "epoch": 0.6, "grad_norm": 1.9921875, "learning_rate": 0.00015890083949816704, "loss": 2.1992, "step": 254550 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015889934583402241, "loss": 1.9198, "step": 254555 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.0001588978521497567, "loss": 2.2719, "step": 254560 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.0001588963584453704, "loss": 2.2121, "step": 254565 }, { "epoch": 0.6, "grad_norm": 1.9453125, "learning_rate": 0.00015889486472086406, "loss": 2.1303, "step": 254570 }, { "epoch": 0.6, "grad_norm": 1.984375, "learning_rate": 0.00015889337097623813, "loss": 2.1546, "step": 254575 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.0001588918772114932, "loss": 2.2089, "step": 254580 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.0001588903834266297, "loss": 2.1937, "step": 254585 }, { "epoch": 0.6, "grad_norm": 3.1875, "learning_rate": 0.00015888888962164823, "loss": 2.0862, "step": 254590 }, { "epoch": 0.6, "grad_norm": 1.8046875, "learning_rate": 0.00015888739579654918, "loss": 1.9055, "step": 254595 }, { "epoch": 0.6, "grad_norm": 2.453125, "learning_rate": 0.00015888590195133317, "loss": 2.0589, "step": 254600 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.00015888440808600068, "loss": 2.0647, "step": 254605 }, { "epoch": 0.6, "grad_norm": 1.765625, "learning_rate": 0.00015888291420055218, "loss": 1.9721, "step": 254610 }, { "epoch": 0.6, "grad_norm": 2.375, "learning_rate": 0.0001588814202949882, "loss": 1.9945, "step": 254615 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.0001588799263693093, "loss": 2.0488, "step": 254620 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.00015887843242351593, "loss": 2.1296, "step": 254625 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.0001588769384576086, "loss": 2.1659, "step": 254630 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015887544447158787, "loss": 2.1839, "step": 254635 }, { "epoch": 0.6, "grad_norm": 1.875, "learning_rate": 0.00015887395046545417, "loss": 2.1228, "step": 254640 }, { "epoch": 0.6, "grad_norm": 2.671875, "learning_rate": 0.0001588724564392081, "loss": 2.3018, "step": 254645 }, { "epoch": 0.6, "grad_norm": 2.890625, "learning_rate": 0.00015887096239285012, "loss": 2.0152, "step": 254650 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015886946832638075, "loss": 2.146, "step": 254655 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015886797423980052, "loss": 1.9704, "step": 254660 }, { "epoch": 0.6, "grad_norm": 2.34375, "learning_rate": 0.0001588664801331099, "loss": 1.9451, "step": 254665 }, { "epoch": 0.6, "grad_norm": 1.7734375, "learning_rate": 0.00015886498600630947, "loss": 2.0937, "step": 254670 }, { "epoch": 0.6, "grad_norm": 2.296875, "learning_rate": 0.00015886349185939965, "loss": 2.1563, "step": 254675 }, { "epoch": 0.6, "grad_norm": 2.296875, "learning_rate": 0.000158861997692381, "loss": 2.1042, "step": 254680 }, { "epoch": 0.6, "grad_norm": 1.921875, "learning_rate": 0.00015886050350525402, "loss": 1.9214, "step": 254685 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015885900929801921, "loss": 2.1416, "step": 254690 }, { "epoch": 0.6, "grad_norm": 1.84375, "learning_rate": 0.00015885751507067712, "loss": 1.9957, "step": 254695 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.00015885602082322823, "loss": 2.2365, "step": 254700 }, { "epoch": 0.6, "grad_norm": 2.046875, "learning_rate": 0.00015885452655567305, "loss": 1.9033, "step": 254705 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015885303226801208, "loss": 1.9949, "step": 254710 }, { "epoch": 0.6, "grad_norm": 2.453125, "learning_rate": 0.0001588515379602459, "loss": 2.132, "step": 254715 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015885004363237493, "loss": 2.1109, "step": 254720 }, { "epoch": 0.6, "grad_norm": 1.9921875, "learning_rate": 0.0001588485492843997, "loss": 1.9387, "step": 254725 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.0001588470549163208, "loss": 2.0435, "step": 254730 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015884556052813865, "loss": 2.1598, "step": 254735 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015884406611985376, "loss": 2.111, "step": 254740 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015884257169146668, "loss": 2.048, "step": 254745 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015884107724297794, "loss": 2.0423, "step": 254750 }, { "epoch": 0.6, "grad_norm": 2.046875, "learning_rate": 0.000158839582774388, "loss": 2.0257, "step": 254755 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015883808828569737, "loss": 2.2127, "step": 254760 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.0001588365937769066, "loss": 1.9915, "step": 254765 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.0001588350992480162, "loss": 2.0294, "step": 254770 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015883360469902668, "loss": 2.0446, "step": 254775 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015883211012993848, "loss": 2.1225, "step": 254780 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015883061554075218, "loss": 1.938, "step": 254785 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.0001588291209314683, "loss": 2.1307, "step": 254790 }, { "epoch": 0.6, "grad_norm": 2.625, "learning_rate": 0.00015882762630208732, "loss": 2.0799, "step": 254795 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015882613165260973, "loss": 1.971, "step": 254800 }, { "epoch": 0.6, "grad_norm": 2.609375, "learning_rate": 0.00015882463698303607, "loss": 2.1593, "step": 254805 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015882314229336686, "loss": 2.1831, "step": 254810 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015882164758360262, "loss": 1.982, "step": 254815 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.0001588201528537438, "loss": 1.9748, "step": 254820 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015881865810379097, "loss": 2.0508, "step": 254825 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015881716333374462, "loss": 2.1912, "step": 254830 }, { "epoch": 0.6, "grad_norm": 2.421875, "learning_rate": 0.00015881566854360522, "loss": 2.0965, "step": 254835 }, { "epoch": 0.6, "grad_norm": 2.53125, "learning_rate": 0.00015881417373337338, "loss": 1.8598, "step": 254840 }, { "epoch": 0.6, "grad_norm": 2.515625, "learning_rate": 0.0001588126789030495, "loss": 2.0222, "step": 254845 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.0001588111840526342, "loss": 2.0336, "step": 254850 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.0001588096891821279, "loss": 1.9496, "step": 254855 }, { "epoch": 0.6, "grad_norm": 2.671875, "learning_rate": 0.0001588081942915311, "loss": 2.1252, "step": 254860 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.0001588066993808444, "loss": 1.9914, "step": 254865 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.0001588052044500683, "loss": 1.9076, "step": 254870 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.00015880370949920323, "loss": 2.0497, "step": 254875 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015880221452824978, "loss": 2.1142, "step": 254880 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.00015880071953720836, "loss": 2.3404, "step": 254885 }, { "epoch": 0.6, "grad_norm": 2.5625, "learning_rate": 0.0001587992245260796, "loss": 2.0731, "step": 254890 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015879772949486395, "loss": 2.2535, "step": 254895 }, { "epoch": 0.6, "grad_norm": 2.34375, "learning_rate": 0.00015879623444356192, "loss": 2.0703, "step": 254900 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.00015879473937217405, "loss": 2.0891, "step": 254905 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015879324428070085, "loss": 2.0263, "step": 254910 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015879174916914277, "loss": 2.1182, "step": 254915 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015879025403750036, "loss": 1.8902, "step": 254920 }, { "epoch": 0.6, "grad_norm": 2.046875, "learning_rate": 0.00015878875888577414, "loss": 2.061, "step": 254925 }, { "epoch": 0.6, "grad_norm": 1.9609375, "learning_rate": 0.00015878726371396464, "loss": 2.1624, "step": 254930 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.0001587857685220723, "loss": 1.9508, "step": 254935 }, { "epoch": 0.6, "grad_norm": 1.828125, "learning_rate": 0.0001587842733100977, "loss": 1.979, "step": 254940 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.0001587827780780413, "loss": 2.0839, "step": 254945 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.0001587812828259037, "loss": 2.2664, "step": 254950 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.0001587797875536853, "loss": 2.0203, "step": 254955 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015877829226138669, "loss": 2.0474, "step": 254960 }, { "epoch": 0.6, "grad_norm": 2.609375, "learning_rate": 0.00015877679694900834, "loss": 2.0258, "step": 254965 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015877530161655072, "loss": 2.156, "step": 254970 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.00015877380626401445, "loss": 1.9905, "step": 254975 }, { "epoch": 0.6, "grad_norm": 2.46875, "learning_rate": 0.00015877231089139998, "loss": 2.124, "step": 254980 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015877081549870776, "loss": 2.1647, "step": 254985 }, { "epoch": 0.6, "grad_norm": 1.78125, "learning_rate": 0.00015876932008593842, "loss": 2.0341, "step": 254990 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.0001587678246530924, "loss": 2.0192, "step": 254995 }, { "epoch": 0.6, "grad_norm": 1.8515625, "learning_rate": 0.00015876632920017023, "loss": 2.0594, "step": 255000 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.0001587648337271724, "loss": 2.0192, "step": 255005 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015876333823409947, "loss": 2.032, "step": 255010 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015876184272095188, "loss": 1.9432, "step": 255015 }, { "epoch": 0.6, "grad_norm": 2.546875, "learning_rate": 0.0001587603471877302, "loss": 1.9081, "step": 255020 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015875885163443493, "loss": 1.9347, "step": 255025 }, { "epoch": 0.6, "grad_norm": 3.8125, "learning_rate": 0.00015875735606106657, "loss": 1.9837, "step": 255030 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.0001587558604676256, "loss": 2.1726, "step": 255035 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.0001587543648541126, "loss": 2.1533, "step": 255040 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.000158752869220528, "loss": 1.9274, "step": 255045 }, { "epoch": 0.6, "grad_norm": 2.484375, "learning_rate": 0.00015875137356687237, "loss": 2.0766, "step": 255050 }, { "epoch": 0.6, "grad_norm": 1.953125, "learning_rate": 0.00015874987789314625, "loss": 2.0787, "step": 255055 }, { "epoch": 0.6, "grad_norm": 1.9296875, "learning_rate": 0.00015874838219935005, "loss": 2.0006, "step": 255060 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.00015874688648548436, "loss": 2.1357, "step": 255065 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.00015874539075154967, "loss": 2.1198, "step": 255070 }, { "epoch": 0.6, "grad_norm": 1.9453125, "learning_rate": 0.00015874389499754646, "loss": 1.966, "step": 255075 }, { "epoch": 0.6, "grad_norm": 2.28125, "learning_rate": 0.00015874239922347532, "loss": 2.1759, "step": 255080 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.0001587409034293367, "loss": 2.0736, "step": 255085 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015873940761513107, "loss": 2.3104, "step": 255090 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015873791178085906, "loss": 2.0345, "step": 255095 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.0001587364159265211, "loss": 2.1513, "step": 255100 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015873492005211766, "loss": 2.2374, "step": 255105 }, { "epoch": 0.6, "grad_norm": 2.4375, "learning_rate": 0.0001587334241576494, "loss": 1.999, "step": 255110 }, { "epoch": 0.6, "grad_norm": 2.6875, "learning_rate": 0.00015873192824311666, "loss": 2.1764, "step": 255115 }, { "epoch": 0.6, "grad_norm": 2.609375, "learning_rate": 0.00015873043230852006, "loss": 2.2139, "step": 255120 }, { "epoch": 0.6, "grad_norm": 2.375, "learning_rate": 0.00015872893635386008, "loss": 2.0536, "step": 255125 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015872744037913718, "loss": 2.0078, "step": 255130 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015872594438435197, "loss": 2.1641, "step": 255135 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015872444836950492, "loss": 2.0671, "step": 255140 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015872295233459652, "loss": 1.891, "step": 255145 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015872145627962732, "loss": 2.1572, "step": 255150 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015871996020459773, "loss": 2.004, "step": 255155 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.0001587184641095084, "loss": 1.9633, "step": 255160 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.0001587169679943598, "loss": 2.1813, "step": 255165 }, { "epoch": 0.6, "grad_norm": 1.9453125, "learning_rate": 0.00015871547185915237, "loss": 2.0368, "step": 255170 }, { "epoch": 0.6, "grad_norm": 2.046875, "learning_rate": 0.00015871397570388666, "loss": 1.9956, "step": 255175 }, { "epoch": 0.6, "grad_norm": 1.8515625, "learning_rate": 0.00015871247952856321, "loss": 1.8572, "step": 255180 }, { "epoch": 0.6, "grad_norm": 2.515625, "learning_rate": 0.00015871098333318252, "loss": 2.2709, "step": 255185 }, { "epoch": 0.6, "grad_norm": 1.734375, "learning_rate": 0.00015870948711774508, "loss": 2.0935, "step": 255190 }, { "epoch": 0.6, "grad_norm": 1.875, "learning_rate": 0.00015870799088225142, "loss": 1.954, "step": 255195 }, { "epoch": 0.6, "grad_norm": 1.859375, "learning_rate": 0.00015870649462670206, "loss": 2.008, "step": 255200 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.0001587049983510975, "loss": 2.0905, "step": 255205 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.0001587035020554382, "loss": 1.9125, "step": 255210 }, { "epoch": 0.6, "grad_norm": 1.71875, "learning_rate": 0.0001587020057397248, "loss": 2.2672, "step": 255215 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015870050940395764, "loss": 2.053, "step": 255220 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.00015869901304813738, "loss": 2.0982, "step": 255225 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.00015869751667226445, "loss": 1.946, "step": 255230 }, { "epoch": 0.6, "grad_norm": 2.828125, "learning_rate": 0.0001586960202763394, "loss": 2.0171, "step": 255235 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.0001586945238603627, "loss": 2.0251, "step": 255240 }, { "epoch": 0.6, "grad_norm": 2.84375, "learning_rate": 0.00015869302742433492, "loss": 1.9498, "step": 255245 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.0001586915309682565, "loss": 2.1705, "step": 255250 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.000158690034492128, "loss": 2.1988, "step": 255255 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.0001586885379959499, "loss": 2.0463, "step": 255260 }, { "epoch": 0.6, "grad_norm": 1.8671875, "learning_rate": 0.0001586870414797228, "loss": 2.0786, "step": 255265 }, { "epoch": 0.6, "grad_norm": 1.8359375, "learning_rate": 0.0001586855449434471, "loss": 2.0109, "step": 255270 }, { "epoch": 0.6, "grad_norm": 1.828125, "learning_rate": 0.00015868404838712333, "loss": 1.9729, "step": 255275 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015868255181075206, "loss": 2.0442, "step": 255280 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015868105521433376, "loss": 2.2218, "step": 255285 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.00015867955859786897, "loss": 2.0335, "step": 255290 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015867806196135812, "loss": 2.1174, "step": 255295 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.0001586765653048018, "loss": 2.1782, "step": 255300 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.00015867506862820053, "loss": 2.0657, "step": 255305 }, { "epoch": 0.6, "grad_norm": 1.9921875, "learning_rate": 0.00015867357193155476, "loss": 2.1518, "step": 255310 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015867207521486507, "loss": 2.1528, "step": 255315 }, { "epoch": 0.6, "grad_norm": 1.796875, "learning_rate": 0.0001586705784781319, "loss": 2.0446, "step": 255320 }, { "epoch": 0.6, "grad_norm": 1.9609375, "learning_rate": 0.00015866908172135582, "loss": 2.0899, "step": 255325 }, { "epoch": 0.6, "grad_norm": 2.484375, "learning_rate": 0.00015866758494453728, "loss": 2.0936, "step": 255330 }, { "epoch": 0.6, "grad_norm": 1.984375, "learning_rate": 0.00015866608814767686, "loss": 2.0876, "step": 255335 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015866459133077507, "loss": 1.9865, "step": 255340 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015866309449383232, "loss": 2.1239, "step": 255345 }, { "epoch": 0.6, "grad_norm": 2.4375, "learning_rate": 0.00015866159763684923, "loss": 1.975, "step": 255350 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.0001586601007598263, "loss": 1.9507, "step": 255355 }, { "epoch": 0.6, "grad_norm": 1.9296875, "learning_rate": 0.000158658603862764, "loss": 2.1759, "step": 255360 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.00015865710694566284, "loss": 2.1681, "step": 255365 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015865561000852333, "loss": 2.008, "step": 255370 }, { "epoch": 0.6, "grad_norm": 1.7578125, "learning_rate": 0.00015865411305134605, "loss": 1.6932, "step": 255375 }, { "epoch": 0.6, "grad_norm": 2.5625, "learning_rate": 0.00015865261607413142, "loss": 2.1466, "step": 255380 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015865111907688003, "loss": 2.0812, "step": 255385 }, { "epoch": 0.6, "grad_norm": 1.8203125, "learning_rate": 0.00015864962205959233, "loss": 2.1129, "step": 255390 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.00015864812502226885, "loss": 1.8881, "step": 255395 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.0001586466279649101, "loss": 2.1183, "step": 255400 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015864513088751663, "loss": 2.0661, "step": 255405 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.0001586436337900889, "loss": 2.1293, "step": 255410 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015864213667262747, "loss": 2.0863, "step": 255415 }, { "epoch": 0.6, "grad_norm": 1.9453125, "learning_rate": 0.0001586406395351328, "loss": 2.273, "step": 255420 }, { "epoch": 0.6, "grad_norm": 1.6640625, "learning_rate": 0.0001586391423776054, "loss": 1.9126, "step": 255425 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015863764520004583, "loss": 1.9706, "step": 255430 }, { "epoch": 0.6, "grad_norm": 2.6875, "learning_rate": 0.00015863614800245456, "loss": 2.0902, "step": 255435 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015863465078483214, "loss": 2.0641, "step": 255440 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.00015863315354717908, "loss": 2.0486, "step": 255445 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.00015863165628949585, "loss": 2.1399, "step": 255450 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015863015901178295, "loss": 1.9993, "step": 255455 }, { "epoch": 0.6, "grad_norm": 2.484375, "learning_rate": 0.00015862866171404094, "loss": 2.1671, "step": 255460 }, { "epoch": 0.6, "grad_norm": 1.671875, "learning_rate": 0.00015862716439627036, "loss": 2.1858, "step": 255465 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015862566705847163, "loss": 2.0274, "step": 255470 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.00015862416970064533, "loss": 1.9735, "step": 255475 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015862267232279192, "loss": 2.0803, "step": 255480 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015862117492491197, "loss": 2.131, "step": 255485 }, { "epoch": 0.6, "grad_norm": 1.65625, "learning_rate": 0.00015861967750700596, "loss": 2.1243, "step": 255490 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.00015861818006907442, "loss": 2.089, "step": 255495 }, { "epoch": 0.6, "grad_norm": 2.703125, "learning_rate": 0.00015861668261111785, "loss": 2.0015, "step": 255500 }, { "epoch": 0.6, "grad_norm": 2.375, "learning_rate": 0.00015861518513313668, "loss": 1.9826, "step": 255505 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015861368763513158, "loss": 2.0588, "step": 255510 }, { "epoch": 0.6, "grad_norm": 2.46875, "learning_rate": 0.00015861219011710297, "loss": 2.1443, "step": 255515 }, { "epoch": 0.6, "grad_norm": 2.71875, "learning_rate": 0.00015861069257905137, "loss": 2.2595, "step": 255520 }, { "epoch": 0.6, "grad_norm": 2.375, "learning_rate": 0.0001586091950209773, "loss": 2.2112, "step": 255525 }, { "epoch": 0.6, "grad_norm": 1.9609375, "learning_rate": 0.00015860769744288123, "loss": 2.1076, "step": 255530 }, { "epoch": 0.6, "grad_norm": 2.421875, "learning_rate": 0.00015860619984476375, "loss": 1.9314, "step": 255535 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.0001586047022266253, "loss": 1.9143, "step": 255540 }, { "epoch": 0.6, "grad_norm": 2.53125, "learning_rate": 0.00015860320458846645, "loss": 1.9712, "step": 255545 }, { "epoch": 0.6, "grad_norm": 2.421875, "learning_rate": 0.00015860170693028763, "loss": 1.9029, "step": 255550 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015860020925208946, "loss": 2.2476, "step": 255555 }, { "epoch": 0.6, "grad_norm": 2.34375, "learning_rate": 0.00015859871155387236, "loss": 1.9362, "step": 255560 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.0001585972138356369, "loss": 2.0565, "step": 255565 }, { "epoch": 0.6, "grad_norm": 1.8671875, "learning_rate": 0.00015859571609738358, "loss": 1.9948, "step": 255570 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015859421833911284, "loss": 2.1619, "step": 255575 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015859272056082532, "loss": 1.9717, "step": 255580 }, { "epoch": 0.6, "grad_norm": 2.046875, "learning_rate": 0.00015859122276252143, "loss": 2.0148, "step": 255585 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015858972494420172, "loss": 2.0931, "step": 255590 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.0001585882271058667, "loss": 2.0109, "step": 255595 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.0001585867292475169, "loss": 1.9726, "step": 255600 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015858523136915277, "loss": 2.1728, "step": 255605 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.00015858373347077488, "loss": 1.8003, "step": 255610 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015858223555238374, "loss": 2.1411, "step": 255615 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.0001585807376139798, "loss": 1.9698, "step": 255620 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.0001585792396555637, "loss": 2.161, "step": 255625 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.0001585777416771358, "loss": 2.0452, "step": 255630 }, { "epoch": 0.6, "grad_norm": 2.4375, "learning_rate": 0.0001585762436786967, "loss": 2.0826, "step": 255635 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015857474566024688, "loss": 1.9902, "step": 255640 }, { "epoch": 0.6, "grad_norm": 1.9375, "learning_rate": 0.0001585732476217869, "loss": 2.0319, "step": 255645 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015857174956331721, "loss": 2.0555, "step": 255650 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015857025148483835, "loss": 2.151, "step": 255655 }, { "epoch": 0.6, "grad_norm": 2.046875, "learning_rate": 0.00015856875338635083, "loss": 2.1477, "step": 255660 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015856725526785517, "loss": 2.1518, "step": 255665 }, { "epoch": 0.6, "grad_norm": 2.34375, "learning_rate": 0.00015856575712935186, "loss": 2.1455, "step": 255670 }, { "epoch": 0.6, "grad_norm": 1.75, "learning_rate": 0.00015856425897084142, "loss": 1.9489, "step": 255675 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.0001585627607923244, "loss": 2.1832, "step": 255680 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015856126259380124, "loss": 2.1389, "step": 255685 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.0001585597643752725, "loss": 2.067, "step": 255690 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015855826613673873, "loss": 2.0972, "step": 255695 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015855676787820033, "loss": 1.9175, "step": 255700 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.0001585552695996579, "loss": 2.0243, "step": 255705 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.00015855377130111195, "loss": 2.1555, "step": 255710 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015855227298256294, "loss": 2.2768, "step": 255715 }, { "epoch": 0.6, "grad_norm": 1.9765625, "learning_rate": 0.0001585507746440114, "loss": 2.0534, "step": 255720 }, { "epoch": 0.6, "grad_norm": 2.34375, "learning_rate": 0.0001585492762854579, "loss": 1.9731, "step": 255725 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015854777790690286, "loss": 2.2586, "step": 255730 }, { "epoch": 0.6, "grad_norm": 1.84375, "learning_rate": 0.00015854627950834686, "loss": 2.0046, "step": 255735 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015854478108979041, "loss": 2.1181, "step": 255740 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015854328265123394, "loss": 2.0687, "step": 255745 }, { "epoch": 0.6, "grad_norm": 2.671875, "learning_rate": 0.00015854178419267808, "loss": 2.0585, "step": 255750 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015854028571412327, "loss": 2.2108, "step": 255755 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015853878721557005, "loss": 2.1213, "step": 255760 }, { "epoch": 0.6, "grad_norm": 2.453125, "learning_rate": 0.0001585372886970189, "loss": 2.2286, "step": 255765 }, { "epoch": 0.6, "grad_norm": 2.75, "learning_rate": 0.00015853579015847032, "loss": 2.0618, "step": 255770 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015853429159992487, "loss": 1.9109, "step": 255775 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.0001585327930213831, "loss": 2.1142, "step": 255780 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015853129442284543, "loss": 2.1082, "step": 255785 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015852979580431237, "loss": 2.0769, "step": 255790 }, { "epoch": 0.6, "grad_norm": 1.453125, "learning_rate": 0.00015852829716578453, "loss": 2.0132, "step": 255795 }, { "epoch": 0.6, "grad_norm": 1.8203125, "learning_rate": 0.0001585267985072623, "loss": 2.0493, "step": 255800 }, { "epoch": 0.6, "grad_norm": 2.546875, "learning_rate": 0.00015852529982874632, "loss": 2.2128, "step": 255805 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.000158523801130237, "loss": 2.056, "step": 255810 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015852230241173492, "loss": 1.9536, "step": 255815 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.00015852080367324053, "loss": 2.0501, "step": 255820 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015851930491475434, "loss": 1.9019, "step": 255825 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015851780613627693, "loss": 2.1688, "step": 255830 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015851630733780878, "loss": 2.1115, "step": 255835 }, { "epoch": 0.6, "grad_norm": 1.875, "learning_rate": 0.0001585148085193504, "loss": 2.1844, "step": 255840 }, { "epoch": 0.6, "grad_norm": 1.9921875, "learning_rate": 0.0001585133096809023, "loss": 2.2261, "step": 255845 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015851181082246498, "loss": 2.0797, "step": 255850 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015851031194403898, "loss": 2.2052, "step": 255855 }, { "epoch": 0.6, "grad_norm": 1.890625, "learning_rate": 0.0001585088130456248, "loss": 2.2179, "step": 255860 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.0001585073141272229, "loss": 2.1754, "step": 255865 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.0001585058151888339, "loss": 2.1274, "step": 255870 }, { "epoch": 0.6, "grad_norm": 2.53125, "learning_rate": 0.00015850431623045823, "loss": 1.9827, "step": 255875 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.0001585028172520964, "loss": 2.1743, "step": 255880 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015850131825374898, "loss": 2.1712, "step": 255885 }, { "epoch": 0.6, "grad_norm": 1.875, "learning_rate": 0.00015849981923541644, "loss": 1.946, "step": 255890 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.0001584983201970993, "loss": 2.1084, "step": 255895 }, { "epoch": 0.6, "grad_norm": 2.546875, "learning_rate": 0.0001584968211387981, "loss": 1.8885, "step": 255900 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015849532206051327, "loss": 2.1904, "step": 255905 }, { "epoch": 0.6, "grad_norm": 2.65625, "learning_rate": 0.0001584938229622454, "loss": 1.8624, "step": 255910 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015849232384399498, "loss": 2.1419, "step": 255915 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015849082470576257, "loss": 2.1196, "step": 255920 }, { "epoch": 0.6, "grad_norm": 2.484375, "learning_rate": 0.00015848932554754857, "loss": 2.164, "step": 255925 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015848782636935355, "loss": 2.147, "step": 255930 }, { "epoch": 0.6, "grad_norm": 2.546875, "learning_rate": 0.00015848632717117806, "loss": 2.0994, "step": 255935 }, { "epoch": 0.6, "grad_norm": 2.484375, "learning_rate": 0.00015848482795302257, "loss": 2.1292, "step": 255940 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.0001584833287148876, "loss": 2.1095, "step": 255945 }, { "epoch": 0.6, "grad_norm": 1.75, "learning_rate": 0.00015848182945677367, "loss": 2.0079, "step": 255950 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015848033017868129, "loss": 2.033, "step": 255955 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.000158478830880611, "loss": 2.2177, "step": 255960 }, { "epoch": 0.6, "grad_norm": 1.890625, "learning_rate": 0.0001584773315625632, "loss": 2.1911, "step": 255965 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015847583222453854, "loss": 2.1774, "step": 255970 }, { "epoch": 0.6, "grad_norm": 1.9453125, "learning_rate": 0.00015847433286653745, "loss": 2.035, "step": 255975 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015847283348856047, "loss": 2.0753, "step": 255980 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015847133409060813, "loss": 2.0372, "step": 255985 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.0001584698346726809, "loss": 2.0978, "step": 255990 }, { "epoch": 0.6, "grad_norm": 2.65625, "learning_rate": 0.00015846833523477932, "loss": 2.1094, "step": 255995 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.0001584668357769039, "loss": 2.2921, "step": 256000 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015846533629905517, "loss": 2.1474, "step": 256005 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015846383680123357, "loss": 2.1624, "step": 256010 }, { "epoch": 0.6, "grad_norm": 2.546875, "learning_rate": 0.00015846233728343973, "loss": 2.1935, "step": 256015 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015846083774567404, "loss": 2.3534, "step": 256020 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.0001584593381879371, "loss": 2.1537, "step": 256025 }, { "epoch": 0.6, "grad_norm": 1.9140625, "learning_rate": 0.00015845783861022939, "loss": 2.0127, "step": 256030 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.0001584563390125514, "loss": 2.0432, "step": 256035 }, { "epoch": 0.6, "grad_norm": 2.5625, "learning_rate": 0.00015845483939490367, "loss": 1.9862, "step": 256040 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.0001584533397572867, "loss": 2.0178, "step": 256045 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.00015845184009970103, "loss": 2.1259, "step": 256050 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015845034042214715, "loss": 2.2191, "step": 256055 }, { "epoch": 0.6, "grad_norm": 2.46875, "learning_rate": 0.00015844884072462555, "loss": 2.088, "step": 256060 }, { "epoch": 0.6, "grad_norm": 1.859375, "learning_rate": 0.00015844734100713676, "loss": 1.9555, "step": 256065 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015844584126968134, "loss": 2.137, "step": 256070 }, { "epoch": 0.6, "grad_norm": 2.53125, "learning_rate": 0.00015844434151225976, "loss": 2.1613, "step": 256075 }, { "epoch": 0.6, "grad_norm": 2.296875, "learning_rate": 0.0001584428417348725, "loss": 2.0678, "step": 256080 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.0001584413419375201, "loss": 2.3281, "step": 256085 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.0001584398421202031, "loss": 2.2709, "step": 256090 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015843834228292198, "loss": 2.1292, "step": 256095 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015843684242567728, "loss": 2.1677, "step": 256100 }, { "epoch": 0.6, "grad_norm": 2.453125, "learning_rate": 0.00015843534254846947, "loss": 2.2742, "step": 256105 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.0001584338426512991, "loss": 2.0415, "step": 256110 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.0001584323427341667, "loss": 2.0084, "step": 256115 }, { "epoch": 0.6, "grad_norm": 2.421875, "learning_rate": 0.0001584308427970727, "loss": 2.0029, "step": 256120 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.00015842934284001768, "loss": 2.0384, "step": 256125 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015842784286300215, "loss": 2.08, "step": 256130 }, { "epoch": 0.6, "grad_norm": 2.453125, "learning_rate": 0.0001584263428660266, "loss": 2.1925, "step": 256135 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015842484284909155, "loss": 2.2111, "step": 256140 }, { "epoch": 0.6, "grad_norm": 2.296875, "learning_rate": 0.0001584233428121975, "loss": 1.9986, "step": 256145 }, { "epoch": 0.6, "grad_norm": 1.9765625, "learning_rate": 0.000158421842755345, "loss": 2.1368, "step": 256150 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.00015842034267853455, "loss": 2.1652, "step": 256155 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.00015841884258176663, "loss": 2.252, "step": 256160 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.00015841734246504178, "loss": 2.1473, "step": 256165 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.0001584158423283605, "loss": 2.1794, "step": 256170 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.0001584143421717233, "loss": 1.8776, "step": 256175 }, { "epoch": 0.6, "grad_norm": 2.453125, "learning_rate": 0.00015841284199513074, "loss": 2.0502, "step": 256180 }, { "epoch": 0.6, "grad_norm": 1.9765625, "learning_rate": 0.00015841134179858326, "loss": 2.2136, "step": 256185 }, { "epoch": 0.6, "grad_norm": 1.9140625, "learning_rate": 0.00015840984158208143, "loss": 2.0451, "step": 256190 }, { "epoch": 0.6, "grad_norm": 2.484375, "learning_rate": 0.0001584083413456257, "loss": 1.9379, "step": 256195 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.00015840684108921663, "loss": 2.1534, "step": 256200 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015840534081285474, "loss": 2.0607, "step": 256205 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015840384051654053, "loss": 2.0017, "step": 256210 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.0001584023402002745, "loss": 2.2571, "step": 256215 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.0001584008398640572, "loss": 2.0414, "step": 256220 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015839933950788907, "loss": 2.0799, "step": 256225 }, { "epoch": 0.6, "grad_norm": 1.9375, "learning_rate": 0.0001583978391317707, "loss": 1.9743, "step": 256230 }, { "epoch": 0.6, "grad_norm": 2.296875, "learning_rate": 0.00015839633873570254, "loss": 2.0604, "step": 256235 }, { "epoch": 0.6, "grad_norm": 2.4375, "learning_rate": 0.00015839483831968517, "loss": 1.9878, "step": 256240 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.00015839333788371902, "loss": 2.2106, "step": 256245 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015839183742780467, "loss": 1.8846, "step": 256250 }, { "epoch": 0.6, "grad_norm": 2.296875, "learning_rate": 0.00015839033695194262, "loss": 2.0291, "step": 256255 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015838883645613336, "loss": 2.1031, "step": 256260 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.0001583873359403774, "loss": 1.9465, "step": 256265 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.0001583858354046753, "loss": 2.158, "step": 256270 }, { "epoch": 0.6, "grad_norm": 1.75, "learning_rate": 0.00015838433484902751, "loss": 2.174, "step": 256275 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015838283427343456, "loss": 2.0442, "step": 256280 }, { "epoch": 0.6, "grad_norm": 2.609375, "learning_rate": 0.000158381333677897, "loss": 2.1007, "step": 256285 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015837983306241532, "loss": 1.8808, "step": 256290 }, { "epoch": 0.6, "grad_norm": 1.796875, "learning_rate": 0.00015837833242699002, "loss": 2.058, "step": 256295 }, { "epoch": 0.6, "grad_norm": 1.9375, "learning_rate": 0.00015837683177162163, "loss": 2.0821, "step": 256300 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015837533109631067, "loss": 2.3237, "step": 256305 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015837383040105763, "loss": 1.9916, "step": 256310 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015837232968586302, "loss": 2.1862, "step": 256315 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015837082895072736, "loss": 2.1043, "step": 256320 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.00015836932819565119, "loss": 2.0691, "step": 256325 }, { "epoch": 0.6, "grad_norm": 1.9609375, "learning_rate": 0.00015836782742063496, "loss": 2.1477, "step": 256330 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015836632662567923, "loss": 2.1759, "step": 256335 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.0001583648258107845, "loss": 2.1182, "step": 256340 }, { "epoch": 0.6, "grad_norm": 1.8828125, "learning_rate": 0.00015836332497595136, "loss": 2.1111, "step": 256345 }, { "epoch": 0.6, "grad_norm": 1.9140625, "learning_rate": 0.00015836182412118016, "loss": 1.8705, "step": 256350 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015836032324647155, "loss": 2.1159, "step": 256355 }, { "epoch": 0.6, "grad_norm": 1.921875, "learning_rate": 0.00015835882235182598, "loss": 2.1783, "step": 256360 }, { "epoch": 0.6, "grad_norm": 2.296875, "learning_rate": 0.00015835732143724398, "loss": 2.1127, "step": 256365 }, { "epoch": 0.6, "grad_norm": 2.640625, "learning_rate": 0.00015835582050272606, "loss": 1.9129, "step": 256370 }, { "epoch": 0.6, "grad_norm": 2.609375, "learning_rate": 0.00015835431954827273, "loss": 2.2006, "step": 256375 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015835281857388452, "loss": 2.1503, "step": 256380 }, { "epoch": 0.6, "grad_norm": 1.890625, "learning_rate": 0.0001583513175795619, "loss": 2.0974, "step": 256385 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.0001583498165653054, "loss": 2.0929, "step": 256390 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.0001583483155311156, "loss": 2.0819, "step": 256395 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015834681447699292, "loss": 1.954, "step": 256400 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015834531340293794, "loss": 1.939, "step": 256405 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.0001583438123089511, "loss": 2.2945, "step": 256410 }, { "epoch": 0.6, "grad_norm": 2.859375, "learning_rate": 0.000158342311195033, "loss": 2.0157, "step": 256415 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.0001583408100611841, "loss": 2.1832, "step": 256420 }, { "epoch": 0.6, "grad_norm": 1.8984375, "learning_rate": 0.0001583393089074049, "loss": 2.1333, "step": 256425 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.00015833780773369594, "loss": 2.0353, "step": 256430 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015833630654005774, "loss": 2.0555, "step": 256435 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.0001583348053264908, "loss": 2.0885, "step": 256440 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.0001583333040929956, "loss": 2.0324, "step": 256445 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.0001583318028395727, "loss": 2.0691, "step": 256450 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.0001583303015662226, "loss": 2.0615, "step": 256455 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015832880027294584, "loss": 2.1695, "step": 256460 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.0001583272989597429, "loss": 2.1655, "step": 256465 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.0001583257976266143, "loss": 2.0988, "step": 256470 }, { "epoch": 0.6, "grad_norm": 2.46875, "learning_rate": 0.00015832429627356048, "loss": 2.1314, "step": 256475 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.0001583227949005821, "loss": 2.2716, "step": 256480 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015832129350767956, "loss": 2.0632, "step": 256485 }, { "epoch": 0.6, "grad_norm": 2.625, "learning_rate": 0.0001583197920948534, "loss": 2.1263, "step": 256490 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.00015831829066210418, "loss": 1.9778, "step": 256495 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.00015831678920943232, "loss": 2.1158, "step": 256500 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.00015831528773683844, "loss": 2.012, "step": 256505 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015831378624432297, "loss": 2.0277, "step": 256510 }, { "epoch": 0.6, "grad_norm": 1.890625, "learning_rate": 0.00015831228473188645, "loss": 2.0779, "step": 256515 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015831078319952942, "loss": 2.0223, "step": 256520 }, { "epoch": 0.6, "grad_norm": 1.9375, "learning_rate": 0.00015830928164725235, "loss": 2.1779, "step": 256525 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015830778007505577, "loss": 2.1374, "step": 256530 }, { "epoch": 0.6, "grad_norm": 1.78125, "learning_rate": 0.0001583062784829402, "loss": 1.8884, "step": 256535 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015830477687090615, "loss": 2.0456, "step": 256540 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015830327523895415, "loss": 2.2666, "step": 256545 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015830177358708468, "loss": 2.0693, "step": 256550 }, { "epoch": 0.6, "grad_norm": 1.8203125, "learning_rate": 0.00015830027191529825, "loss": 2.1751, "step": 256555 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.0001582987702235954, "loss": 2.1937, "step": 256560 }, { "epoch": 0.6, "grad_norm": 2.734375, "learning_rate": 0.00015829726851197663, "loss": 1.9588, "step": 256565 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015829576678044247, "loss": 2.2475, "step": 256570 }, { "epoch": 0.6, "grad_norm": 1.9140625, "learning_rate": 0.0001582942650289934, "loss": 2.2357, "step": 256575 }, { "epoch": 0.6, "grad_norm": 1.8984375, "learning_rate": 0.00015829276325762996, "loss": 2.048, "step": 256580 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.00015829126146635266, "loss": 2.0459, "step": 256585 }, { "epoch": 0.6, "grad_norm": 2.3125, "learning_rate": 0.000158289759655162, "loss": 1.9784, "step": 256590 }, { "epoch": 0.6, "grad_norm": 2.296875, "learning_rate": 0.0001582882578240585, "loss": 2.1848, "step": 256595 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015828675597304268, "loss": 1.9392, "step": 256600 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.00015828525410211506, "loss": 1.8932, "step": 256605 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015828375221127613, "loss": 2.0721, "step": 256610 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.00015828225030052639, "loss": 2.07, "step": 256615 }, { "epoch": 0.6, "grad_norm": 2.6875, "learning_rate": 0.00015828074836986638, "loss": 2.1496, "step": 256620 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.00015827924641929664, "loss": 2.2337, "step": 256625 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015827774444881763, "loss": 2.0577, "step": 256630 }, { "epoch": 0.6, "grad_norm": 1.7578125, "learning_rate": 0.0001582762424584299, "loss": 2.0564, "step": 256635 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015827474044813396, "loss": 2.1277, "step": 256640 }, { "epoch": 0.6, "grad_norm": 2.375, "learning_rate": 0.00015827323841793026, "loss": 2.1017, "step": 256645 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015827173636781943, "loss": 1.9987, "step": 256650 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015827023429780188, "loss": 2.002, "step": 256655 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.00015826873220787813, "loss": 2.2385, "step": 256660 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.0001582672300980488, "loss": 2.0005, "step": 256665 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015826572796831428, "loss": 2.1346, "step": 256670 }, { "epoch": 0.6, "grad_norm": 2.515625, "learning_rate": 0.00015826422581867515, "loss": 2.1457, "step": 256675 }, { "epoch": 0.6, "grad_norm": 1.84375, "learning_rate": 0.00015826272364913185, "loss": 2.033, "step": 256680 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.00015826122145968502, "loss": 2.3289, "step": 256685 }, { "epoch": 0.6, "grad_norm": 3.171875, "learning_rate": 0.00015825971925033508, "loss": 1.9328, "step": 256690 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.00015825821702108254, "loss": 1.8879, "step": 256695 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015825671477192796, "loss": 2.2427, "step": 256700 }, { "epoch": 0.6, "grad_norm": 2.453125, "learning_rate": 0.00015825521250287182, "loss": 2.156, "step": 256705 }, { "epoch": 0.6, "grad_norm": 2.28125, "learning_rate": 0.00015825371021391463, "loss": 2.1083, "step": 256710 }, { "epoch": 0.6, "grad_norm": 2.390625, "learning_rate": 0.00015825220790505691, "loss": 2.2251, "step": 256715 }, { "epoch": 0.6, "grad_norm": 1.921875, "learning_rate": 0.0001582507055762992, "loss": 2.1039, "step": 256720 }, { "epoch": 0.6, "grad_norm": 1.9921875, "learning_rate": 0.000158249203227642, "loss": 1.8801, "step": 256725 }, { "epoch": 0.6, "grad_norm": 1.78125, "learning_rate": 0.00015824770085908583, "loss": 2.3187, "step": 256730 }, { "epoch": 0.6, "grad_norm": 2.140625, "learning_rate": 0.00015824619847063117, "loss": 2.0328, "step": 256735 }, { "epoch": 0.6, "grad_norm": 2.5, "learning_rate": 0.00015824469606227855, "loss": 2.1334, "step": 256740 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015824319363402846, "loss": 2.1018, "step": 256745 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015824169118588147, "loss": 2.1653, "step": 256750 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015824018871783804, "loss": 2.0577, "step": 256755 }, { "epoch": 0.6, "grad_norm": 1.8359375, "learning_rate": 0.00015823868622989872, "loss": 2.0249, "step": 256760 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015823718372206402, "loss": 2.0198, "step": 256765 }, { "epoch": 0.6, "grad_norm": 2.53125, "learning_rate": 0.00015823568119433443, "loss": 1.9854, "step": 256770 }, { "epoch": 0.6, "grad_norm": 1.859375, "learning_rate": 0.00015823417864671048, "loss": 2.0159, "step": 256775 }, { "epoch": 0.6, "grad_norm": 2.03125, "learning_rate": 0.00015823267607919267, "loss": 1.9968, "step": 256780 }, { "epoch": 0.6, "grad_norm": 2.265625, "learning_rate": 0.00015823117349178154, "loss": 2.215, "step": 256785 }, { "epoch": 0.6, "grad_norm": 2.015625, "learning_rate": 0.00015822967088447755, "loss": 1.9204, "step": 256790 }, { "epoch": 0.6, "grad_norm": 2.5, "learning_rate": 0.00015822816825728127, "loss": 1.856, "step": 256795 }, { "epoch": 0.6, "grad_norm": 2.203125, "learning_rate": 0.00015822666561019318, "loss": 2.1096, "step": 256800 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015822516294321384, "loss": 2.1158, "step": 256805 }, { "epoch": 0.6, "grad_norm": 2.125, "learning_rate": 0.0001582236602563437, "loss": 2.0091, "step": 256810 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.0001582221575495833, "loss": 2.1051, "step": 256815 }, { "epoch": 0.6, "grad_norm": 1.921875, "learning_rate": 0.00015822065482293317, "loss": 2.0815, "step": 256820 }, { "epoch": 0.6, "grad_norm": 1.90625, "learning_rate": 0.00015821915207639377, "loss": 2.0524, "step": 256825 }, { "epoch": 0.6, "grad_norm": 1.765625, "learning_rate": 0.0001582176493099657, "loss": 2.1566, "step": 256830 }, { "epoch": 0.6, "grad_norm": 2.46875, "learning_rate": 0.0001582161465236494, "loss": 2.0663, "step": 256835 }, { "epoch": 0.6, "grad_norm": 1.9375, "learning_rate": 0.00015821464371744542, "loss": 2.1038, "step": 256840 }, { "epoch": 0.6, "grad_norm": 2.5625, "learning_rate": 0.00015821314089135426, "loss": 2.0975, "step": 256845 }, { "epoch": 0.6, "grad_norm": 2.375, "learning_rate": 0.00015821163804537641, "loss": 2.0649, "step": 256850 }, { "epoch": 0.6, "grad_norm": 1.8671875, "learning_rate": 0.00015821013517951243, "loss": 2.0042, "step": 256855 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015820863229376283, "loss": 2.0324, "step": 256860 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015820712938812809, "loss": 2.1143, "step": 256865 }, { "epoch": 0.6, "grad_norm": 2.109375, "learning_rate": 0.00015820562646260872, "loss": 2.0539, "step": 256870 }, { "epoch": 0.6, "grad_norm": 2.765625, "learning_rate": 0.00015820412351720529, "loss": 2.0722, "step": 256875 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015820262055191826, "loss": 2.1271, "step": 256880 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015820111756674815, "loss": 1.99, "step": 256885 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.0001581996145616955, "loss": 2.1525, "step": 256890 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015819811153676075, "loss": 2.008, "step": 256895 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015819660849194454, "loss": 2.0106, "step": 256900 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015819510542724728, "loss": 1.8544, "step": 256905 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.0001581936023426695, "loss": 2.1565, "step": 256910 }, { "epoch": 0.6, "grad_norm": 1.90625, "learning_rate": 0.00015819209923821176, "loss": 2.0482, "step": 256915 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.00015819059611387455, "loss": 2.2377, "step": 256920 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.00015818909296965835, "loss": 2.1978, "step": 256925 }, { "epoch": 0.6, "grad_norm": 1.8046875, "learning_rate": 0.00015818758980556372, "loss": 2.0835, "step": 256930 }, { "epoch": 0.6, "grad_norm": 1.6875, "learning_rate": 0.00015818608662159116, "loss": 2.003, "step": 256935 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015818458341774116, "loss": 2.1139, "step": 256940 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015818308019401424, "loss": 2.0343, "step": 256945 }, { "epoch": 0.6, "grad_norm": 1.875, "learning_rate": 0.00015818157695041097, "loss": 2.0337, "step": 256950 }, { "epoch": 0.6, "grad_norm": 1.9140625, "learning_rate": 0.00015818007368693178, "loss": 2.0671, "step": 256955 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015817857040357724, "loss": 2.2022, "step": 256960 }, { "epoch": 0.6, "grad_norm": 2.15625, "learning_rate": 0.00015817706710034783, "loss": 2.1158, "step": 256965 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015817556377724408, "loss": 2.0099, "step": 256970 }, { "epoch": 0.6, "grad_norm": 2.09375, "learning_rate": 0.00015817406043426647, "loss": 2.1291, "step": 256975 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.0001581725570714156, "loss": 2.041, "step": 256980 }, { "epoch": 0.6, "grad_norm": 2.28125, "learning_rate": 0.00015817105368869192, "loss": 2.1317, "step": 256985 }, { "epoch": 0.6, "grad_norm": 2.4375, "learning_rate": 0.00015816955028609596, "loss": 2.065, "step": 256990 }, { "epoch": 0.6, "grad_norm": 2.234375, "learning_rate": 0.00015816804686362822, "loss": 2.1188, "step": 256995 }, { "epoch": 0.6, "grad_norm": 1.8125, "learning_rate": 0.0001581665434212892, "loss": 2.1298, "step": 257000 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.00015816503995907946, "loss": 2.0266, "step": 257005 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.00015816353647699947, "loss": 1.9546, "step": 257010 }, { "epoch": 0.6, "grad_norm": 2.171875, "learning_rate": 0.00015816203297504974, "loss": 2.148, "step": 257015 }, { "epoch": 0.6, "grad_norm": 2.40625, "learning_rate": 0.00015816052945323087, "loss": 2.2743, "step": 257020 }, { "epoch": 0.6, "grad_norm": 2.25, "learning_rate": 0.00015815902591154326, "loss": 1.9878, "step": 257025 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015815752234998745, "loss": 2.1296, "step": 257030 }, { "epoch": 0.6, "grad_norm": 2.359375, "learning_rate": 0.000158156018768564, "loss": 2.0945, "step": 257035 }, { "epoch": 0.6, "grad_norm": 1.96875, "learning_rate": 0.00015815451516727343, "loss": 2.1623, "step": 257040 }, { "epoch": 0.6, "grad_norm": 2.078125, "learning_rate": 0.00015815301154611618, "loss": 1.8366, "step": 257045 }, { "epoch": 0.6, "grad_norm": 2.0625, "learning_rate": 0.00015815150790509283, "loss": 2.0995, "step": 257050 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.00015815000424420386, "loss": 2.0358, "step": 257055 }, { "epoch": 0.6, "grad_norm": 2.21875, "learning_rate": 0.0001581485005634498, "loss": 2.1544, "step": 257060 }, { "epoch": 0.6, "grad_norm": 2.328125, "learning_rate": 0.00015814699686283117, "loss": 2.0888, "step": 257065 }, { "epoch": 0.6, "grad_norm": 2.0, "learning_rate": 0.00015814549314234846, "loss": 2.033, "step": 257070 }, { "epoch": 0.6, "grad_norm": 1.984375, "learning_rate": 0.0001581439894020022, "loss": 2.2934, "step": 257075 }, { "epoch": 0.6, "grad_norm": 2.1875, "learning_rate": 0.00015814248564179287, "loss": 1.8342, "step": 257080 }, { "epoch": 0.61, "grad_norm": 2.0, "learning_rate": 0.00015814098186172105, "loss": 2.1329, "step": 257085 }, { "epoch": 0.61, "grad_norm": 2.359375, "learning_rate": 0.00015813947806178716, "loss": 2.3082, "step": 257090 }, { "epoch": 0.61, "grad_norm": 1.8046875, "learning_rate": 0.00015813797424199183, "loss": 2.2243, "step": 257095 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.0001581364704023355, "loss": 1.9885, "step": 257100 }, { "epoch": 0.61, "grad_norm": 2.609375, "learning_rate": 0.0001581349665428187, "loss": 2.13, "step": 257105 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.0001581334626634419, "loss": 2.0839, "step": 257110 }, { "epoch": 0.61, "grad_norm": 1.8359375, "learning_rate": 0.0001581319587642057, "loss": 2.2453, "step": 257115 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.00015813045484511058, "loss": 2.2295, "step": 257120 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.000158128950906157, "loss": 2.0697, "step": 257125 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015812744694734555, "loss": 2.0084, "step": 257130 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015812594296867668, "loss": 2.2438, "step": 257135 }, { "epoch": 0.61, "grad_norm": 2.453125, "learning_rate": 0.00015812443897015094, "loss": 1.9745, "step": 257140 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.00015812293495176885, "loss": 1.9276, "step": 257145 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.0001581214309135309, "loss": 2.1594, "step": 257150 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015811992685543764, "loss": 2.1101, "step": 257155 }, { "epoch": 0.61, "grad_norm": 1.8515625, "learning_rate": 0.00015811842277748954, "loss": 2.0898, "step": 257160 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015811691867968712, "loss": 2.1141, "step": 257165 }, { "epoch": 0.61, "grad_norm": 1.84375, "learning_rate": 0.00015811541456203093, "loss": 1.9908, "step": 257170 }, { "epoch": 0.61, "grad_norm": 1.9921875, "learning_rate": 0.00015811391042452145, "loss": 2.2908, "step": 257175 }, { "epoch": 0.61, "grad_norm": 1.65625, "learning_rate": 0.0001581124062671592, "loss": 2.0022, "step": 257180 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.0001581109020899447, "loss": 2.2076, "step": 257185 }, { "epoch": 0.61, "grad_norm": 1.9765625, "learning_rate": 0.00015810939789287845, "loss": 2.0854, "step": 257190 }, { "epoch": 0.61, "grad_norm": 2.65625, "learning_rate": 0.000158107893675961, "loss": 2.1253, "step": 257195 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.00015810638943919284, "loss": 2.1631, "step": 257200 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015810488518257446, "loss": 2.1047, "step": 257205 }, { "epoch": 0.61, "grad_norm": 1.71875, "learning_rate": 0.00015810338090610642, "loss": 2.031, "step": 257210 }, { "epoch": 0.61, "grad_norm": 1.9609375, "learning_rate": 0.00015810187660978922, "loss": 2.0501, "step": 257215 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.0001581003722936233, "loss": 2.1208, "step": 257220 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.0001580988679576093, "loss": 2.1616, "step": 257225 }, { "epoch": 0.61, "grad_norm": 2.453125, "learning_rate": 0.00015809736360174767, "loss": 1.9059, "step": 257230 }, { "epoch": 0.61, "grad_norm": 1.9765625, "learning_rate": 0.00015809585922603892, "loss": 2.0028, "step": 257235 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015809435483048358, "loss": 2.1541, "step": 257240 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015809285041508213, "loss": 1.9809, "step": 257245 }, { "epoch": 0.61, "grad_norm": 1.8046875, "learning_rate": 0.00015809134597983513, "loss": 2.2273, "step": 257250 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015808984152474307, "loss": 2.2535, "step": 257255 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015808833704980644, "loss": 2.1615, "step": 257260 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.0001580868325550258, "loss": 2.1534, "step": 257265 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015808532804040165, "loss": 1.8284, "step": 257270 }, { "epoch": 0.61, "grad_norm": 2.40625, "learning_rate": 0.0001580838235059345, "loss": 2.1518, "step": 257275 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015808231895162483, "loss": 2.1378, "step": 257280 }, { "epoch": 0.61, "grad_norm": 2.375, "learning_rate": 0.00015808081437747323, "loss": 2.1341, "step": 257285 }, { "epoch": 0.61, "grad_norm": 1.9140625, "learning_rate": 0.00015807930978348016, "loss": 2.0997, "step": 257290 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015807780516964614, "loss": 1.9478, "step": 257295 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.00015807630053597167, "loss": 2.1685, "step": 257300 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.0001580747958824573, "loss": 2.0089, "step": 257305 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015807329120910352, "loss": 2.1607, "step": 257310 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015807178651591088, "loss": 1.9142, "step": 257315 }, { "epoch": 0.61, "grad_norm": 2.65625, "learning_rate": 0.00015807028180287981, "loss": 2.0893, "step": 257320 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.0001580687770700109, "loss": 1.8619, "step": 257325 }, { "epoch": 0.61, "grad_norm": 1.875, "learning_rate": 0.00015806727231730468, "loss": 2.0749, "step": 257330 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.0001580657675447616, "loss": 2.0618, "step": 257335 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015806426275238219, "loss": 2.0522, "step": 257340 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015806275794016697, "loss": 2.0951, "step": 257345 }, { "epoch": 0.61, "grad_norm": 1.9765625, "learning_rate": 0.00015806125310811647, "loss": 2.1291, "step": 257350 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.0001580597482562312, "loss": 1.976, "step": 257355 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.00015805824338451167, "loss": 2.1875, "step": 257360 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.00015805673849295836, "loss": 2.0852, "step": 257365 }, { "epoch": 0.61, "grad_norm": 1.84375, "learning_rate": 0.00015805523358157188, "loss": 2.0043, "step": 257370 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.00015805372865035263, "loss": 2.3188, "step": 257375 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015805222369930116, "loss": 2.0225, "step": 257380 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.000158050718728418, "loss": 2.0213, "step": 257385 }, { "epoch": 0.61, "grad_norm": 2.59375, "learning_rate": 0.0001580492137377037, "loss": 1.9776, "step": 257390 }, { "epoch": 0.61, "grad_norm": 1.7265625, "learning_rate": 0.00015804770872715872, "loss": 2.1666, "step": 257395 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.0001580462036967836, "loss": 2.0996, "step": 257400 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.0001580446986465788, "loss": 2.0187, "step": 257405 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.00015804319357654492, "loss": 2.2312, "step": 257410 }, { "epoch": 0.61, "grad_norm": 2.359375, "learning_rate": 0.00015804168848668245, "loss": 2.2553, "step": 257415 }, { "epoch": 0.61, "grad_norm": 2.359375, "learning_rate": 0.00015804018337699184, "loss": 2.0428, "step": 257420 }, { "epoch": 0.61, "grad_norm": 2.640625, "learning_rate": 0.00015803867824747367, "loss": 2.3554, "step": 257425 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015803717309812842, "loss": 1.9247, "step": 257430 }, { "epoch": 0.61, "grad_norm": 1.9296875, "learning_rate": 0.00015803566792895666, "loss": 2.1258, "step": 257435 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.0001580341627399588, "loss": 2.0129, "step": 257440 }, { "epoch": 0.61, "grad_norm": 2.59375, "learning_rate": 0.00015803265753113548, "loss": 2.2053, "step": 257445 }, { "epoch": 0.61, "grad_norm": 3.0, "learning_rate": 0.0001580311523024871, "loss": 2.0658, "step": 257450 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015802964705401425, "loss": 1.9619, "step": 257455 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.00015802814178571744, "loss": 1.9815, "step": 257460 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015802663649759714, "loss": 2.1131, "step": 257465 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.0001580251311896539, "loss": 2.031, "step": 257470 }, { "epoch": 0.61, "grad_norm": 2.0, "learning_rate": 0.00015802362586188822, "loss": 2.167, "step": 257475 }, { "epoch": 0.61, "grad_norm": 1.9921875, "learning_rate": 0.00015802212051430062, "loss": 1.9607, "step": 257480 }, { "epoch": 0.61, "grad_norm": 2.359375, "learning_rate": 0.00015802061514689158, "loss": 2.2268, "step": 257485 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015801910975966167, "loss": 2.0438, "step": 257490 }, { "epoch": 0.61, "grad_norm": 1.8828125, "learning_rate": 0.0001580176043526114, "loss": 2.1178, "step": 257495 }, { "epoch": 0.61, "grad_norm": 1.7421875, "learning_rate": 0.00015801609892574124, "loss": 2.1259, "step": 257500 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015801459347905173, "loss": 2.0354, "step": 257505 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015801308801254339, "loss": 2.0213, "step": 257510 }, { "epoch": 0.61, "grad_norm": 2.515625, "learning_rate": 0.00015801158252621672, "loss": 2.1773, "step": 257515 }, { "epoch": 0.61, "grad_norm": 2.640625, "learning_rate": 0.00015801007702007225, "loss": 2.1763, "step": 257520 }, { "epoch": 0.61, "grad_norm": 2.90625, "learning_rate": 0.00015800857149411045, "loss": 2.01, "step": 257525 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.0001580070659483319, "loss": 2.0351, "step": 257530 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.0001580055603827371, "loss": 2.107, "step": 257535 }, { "epoch": 0.61, "grad_norm": 1.5625, "learning_rate": 0.00015800405479732653, "loss": 2.0008, "step": 257540 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.00015800254919210073, "loss": 2.0772, "step": 257545 }, { "epoch": 0.61, "grad_norm": 1.953125, "learning_rate": 0.0001580010435670602, "loss": 2.0611, "step": 257550 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015799953792220548, "loss": 2.0847, "step": 257555 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015799803225753704, "loss": 2.1061, "step": 257560 }, { "epoch": 0.61, "grad_norm": 2.609375, "learning_rate": 0.0001579965265730554, "loss": 2.1582, "step": 257565 }, { "epoch": 0.61, "grad_norm": 1.7578125, "learning_rate": 0.00015799502086876114, "loss": 2.0505, "step": 257570 }, { "epoch": 0.61, "grad_norm": 1.890625, "learning_rate": 0.00015799351514465473, "loss": 2.0563, "step": 257575 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015799200940073667, "loss": 2.1176, "step": 257580 }, { "epoch": 0.61, "grad_norm": 2.640625, "learning_rate": 0.0001579905036370075, "loss": 2.104, "step": 257585 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.0001579889978534677, "loss": 1.9757, "step": 257590 }, { "epoch": 0.61, "grad_norm": 2.515625, "learning_rate": 0.00015798749205011783, "loss": 2.1226, "step": 257595 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015798598622695836, "loss": 2.0606, "step": 257600 }, { "epoch": 0.61, "grad_norm": 1.7890625, "learning_rate": 0.00015798448038398984, "loss": 1.9299, "step": 257605 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015798297452121273, "loss": 2.0033, "step": 257610 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015798146863862762, "loss": 2.0, "step": 257615 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015797996273623498, "loss": 2.02, "step": 257620 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015797845681403533, "loss": 1.9788, "step": 257625 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015797695087202922, "loss": 2.0655, "step": 257630 }, { "epoch": 0.61, "grad_norm": 1.8203125, "learning_rate": 0.0001579754449102171, "loss": 1.9769, "step": 257635 }, { "epoch": 0.61, "grad_norm": 1.734375, "learning_rate": 0.0001579739389285995, "loss": 1.918, "step": 257640 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015797243292717698, "loss": 2.1126, "step": 257645 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.00015797092690595003, "loss": 2.1126, "step": 257650 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015796942086491916, "loss": 1.9934, "step": 257655 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015796791480408485, "loss": 2.0759, "step": 257660 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.0001579664087234477, "loss": 2.2315, "step": 257665 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.00015796490262300815, "loss": 2.1766, "step": 257670 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015796339650276671, "loss": 2.1588, "step": 257675 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015796189036272398, "loss": 2.1396, "step": 257680 }, { "epoch": 0.61, "grad_norm": 1.9609375, "learning_rate": 0.00015796038420288038, "loss": 2.1282, "step": 257685 }, { "epoch": 0.61, "grad_norm": 2.453125, "learning_rate": 0.00015795887802323648, "loss": 2.0753, "step": 257690 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015795737182379272, "loss": 2.187, "step": 257695 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.00015795586560454972, "loss": 1.9925, "step": 257700 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.00015795435936550795, "loss": 1.967, "step": 257705 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.0001579528531066679, "loss": 2.0444, "step": 257710 }, { "epoch": 0.61, "grad_norm": 2.359375, "learning_rate": 0.00015795134682803011, "loss": 2.1113, "step": 257715 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.00015794984052959512, "loss": 1.8714, "step": 257720 }, { "epoch": 0.61, "grad_norm": 1.9296875, "learning_rate": 0.00015794833421136338, "loss": 2.11, "step": 257725 }, { "epoch": 0.61, "grad_norm": 2.75, "learning_rate": 0.00015794682787333543, "loss": 2.025, "step": 257730 }, { "epoch": 0.61, "grad_norm": 1.84375, "learning_rate": 0.00015794532151551183, "loss": 1.9164, "step": 257735 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.000157943815137893, "loss": 2.1165, "step": 257740 }, { "epoch": 0.61, "grad_norm": 2.765625, "learning_rate": 0.00015794230874047958, "loss": 1.9858, "step": 257745 }, { "epoch": 0.61, "grad_norm": 2.78125, "learning_rate": 0.00015794080232327197, "loss": 2.1046, "step": 257750 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015793929588627073, "loss": 2.1622, "step": 257755 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.00015793778942947638, "loss": 2.1068, "step": 257760 }, { "epoch": 0.61, "grad_norm": 1.9140625, "learning_rate": 0.00015793628295288946, "loss": 2.1077, "step": 257765 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015793477645651042, "loss": 2.2246, "step": 257770 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.00015793326994033982, "loss": 2.0997, "step": 257775 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015793176340437817, "loss": 2.1077, "step": 257780 }, { "epoch": 0.61, "grad_norm": 2.359375, "learning_rate": 0.00015793025684862597, "loss": 2.0471, "step": 257785 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015792875027308376, "loss": 2.0418, "step": 257790 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015792724367775206, "loss": 2.1743, "step": 257795 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.0001579257370626313, "loss": 2.1045, "step": 257800 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015792423042772208, "loss": 2.0647, "step": 257805 }, { "epoch": 0.61, "grad_norm": 1.9453125, "learning_rate": 0.00015792272377302492, "loss": 2.1634, "step": 257810 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.00015792121709854028, "loss": 2.1343, "step": 257815 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.0001579197104042687, "loss": 1.8192, "step": 257820 }, { "epoch": 0.61, "grad_norm": 1.4140625, "learning_rate": 0.0001579182036902107, "loss": 1.9161, "step": 257825 }, { "epoch": 0.61, "grad_norm": 1.84375, "learning_rate": 0.00015791669695636683, "loss": 2.1447, "step": 257830 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.0001579151902027375, "loss": 1.9782, "step": 257835 }, { "epoch": 0.61, "grad_norm": 2.5625, "learning_rate": 0.00015791368342932334, "loss": 2.0159, "step": 257840 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.0001579121766361248, "loss": 1.9223, "step": 257845 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.0001579106698231424, "loss": 2.2456, "step": 257850 }, { "epoch": 0.61, "grad_norm": 1.9375, "learning_rate": 0.0001579091629903767, "loss": 2.2108, "step": 257855 }, { "epoch": 0.61, "grad_norm": 1.7421875, "learning_rate": 0.00015790765613782814, "loss": 2.0329, "step": 257860 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.0001579061492654973, "loss": 2.1342, "step": 257865 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015790464237338468, "loss": 2.0906, "step": 257870 }, { "epoch": 0.61, "grad_norm": 1.84375, "learning_rate": 0.00015790313546149075, "loss": 2.1254, "step": 257875 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.00015790162852981606, "loss": 2.0052, "step": 257880 }, { "epoch": 0.61, "grad_norm": 1.875, "learning_rate": 0.00015790012157836114, "loss": 1.9921, "step": 257885 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015789861460712646, "loss": 2.1991, "step": 257890 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.0001578971076161126, "loss": 2.1532, "step": 257895 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015789560060532003, "loss": 2.1209, "step": 257900 }, { "epoch": 0.61, "grad_norm": 1.703125, "learning_rate": 0.00015789409357474927, "loss": 2.0714, "step": 257905 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015789258652440085, "loss": 1.9869, "step": 257910 }, { "epoch": 0.61, "grad_norm": 1.625, "learning_rate": 0.00015789107945427523, "loss": 2.0465, "step": 257915 }, { "epoch": 0.61, "grad_norm": 1.75, "learning_rate": 0.000157889572364373, "loss": 2.0806, "step": 257920 }, { "epoch": 0.61, "grad_norm": 2.0, "learning_rate": 0.00015788806525469465, "loss": 2.0726, "step": 257925 }, { "epoch": 0.61, "grad_norm": 1.890625, "learning_rate": 0.00015788655812524067, "loss": 2.0744, "step": 257930 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.0001578850509760116, "loss": 1.9704, "step": 257935 }, { "epoch": 0.61, "grad_norm": 2.671875, "learning_rate": 0.0001578835438070079, "loss": 2.0193, "step": 257940 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015788203661823023, "loss": 2.0561, "step": 257945 }, { "epoch": 0.61, "grad_norm": 1.9921875, "learning_rate": 0.00015788052940967894, "loss": 2.0155, "step": 257950 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.0001578790221813546, "loss": 2.1598, "step": 257955 }, { "epoch": 0.61, "grad_norm": 2.625, "learning_rate": 0.00015787751493325777, "loss": 2.0924, "step": 257960 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015787600766538896, "loss": 2.1404, "step": 257965 }, { "epoch": 0.61, "grad_norm": 2.625, "learning_rate": 0.00015787450037774858, "loss": 2.1717, "step": 257970 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015787299307033726, "loss": 1.9973, "step": 257975 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015787148574315546, "loss": 1.9063, "step": 257980 }, { "epoch": 0.61, "grad_norm": 2.5625, "learning_rate": 0.00015786997839620375, "loss": 2.154, "step": 257985 }, { "epoch": 0.61, "grad_norm": 1.8515625, "learning_rate": 0.00015786847102948256, "loss": 1.9311, "step": 257990 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015786696364299246, "loss": 2.0598, "step": 257995 }, { "epoch": 0.61, "grad_norm": 2.53125, "learning_rate": 0.00015786545623673397, "loss": 2.2695, "step": 258000 }, { "epoch": 0.61, "grad_norm": 2.46875, "learning_rate": 0.0001578639488107076, "loss": 2.1492, "step": 258005 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015786244136491387, "loss": 2.0999, "step": 258010 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.00015786093389935323, "loss": 2.0971, "step": 258015 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015785942641402623, "loss": 2.0055, "step": 258020 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.0001578579189089335, "loss": 2.3035, "step": 258025 }, { "epoch": 0.61, "grad_norm": 1.6484375, "learning_rate": 0.00015785641138407538, "loss": 2.0354, "step": 258030 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015785490383945247, "loss": 2.2869, "step": 258035 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.0001578533962750653, "loss": 2.2942, "step": 258040 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015785188869091435, "loss": 2.0022, "step": 258045 }, { "epoch": 0.61, "grad_norm": 2.375, "learning_rate": 0.0001578503810870001, "loss": 2.1172, "step": 258050 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015784887346332316, "loss": 2.0316, "step": 258055 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015784736581988398, "loss": 2.3004, "step": 258060 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.0001578458581566831, "loss": 2.1542, "step": 258065 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.00015784435047372104, "loss": 1.9967, "step": 258070 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015784284277099825, "loss": 2.0013, "step": 258075 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.00015784133504851534, "loss": 2.0502, "step": 258080 }, { "epoch": 0.61, "grad_norm": 1.890625, "learning_rate": 0.00015783982730627275, "loss": 2.008, "step": 258085 }, { "epoch": 0.61, "grad_norm": 1.9296875, "learning_rate": 0.00015783831954427105, "loss": 2.0562, "step": 258090 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.00015783681176251074, "loss": 1.8641, "step": 258095 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015783530396099228, "loss": 2.0587, "step": 258100 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.0001578337961397163, "loss": 2.0849, "step": 258105 }, { "epoch": 0.61, "grad_norm": 1.984375, "learning_rate": 0.00015783228829868321, "loss": 2.1134, "step": 258110 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.00015783078043789356, "loss": 2.1332, "step": 258115 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.00015782927255734788, "loss": 2.0904, "step": 258120 }, { "epoch": 0.61, "grad_norm": 1.7890625, "learning_rate": 0.00015782776465704664, "loss": 2.1847, "step": 258125 }, { "epoch": 0.61, "grad_norm": 1.890625, "learning_rate": 0.0001578262567369904, "loss": 2.1111, "step": 258130 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015782474879717966, "loss": 2.2565, "step": 258135 }, { "epoch": 0.61, "grad_norm": 1.9609375, "learning_rate": 0.00015782324083761498, "loss": 2.0564, "step": 258140 }, { "epoch": 0.61, "grad_norm": 1.75, "learning_rate": 0.00015782173285829677, "loss": 1.9674, "step": 258145 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015782022485922562, "loss": 2.1116, "step": 258150 }, { "epoch": 0.61, "grad_norm": 2.609375, "learning_rate": 0.00015781871684040206, "loss": 2.0373, "step": 258155 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.00015781720880182656, "loss": 2.1718, "step": 258160 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015781570074349966, "loss": 2.0354, "step": 258165 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.00015781419266542188, "loss": 2.1068, "step": 258170 }, { "epoch": 0.61, "grad_norm": 2.625, "learning_rate": 0.0001578126845675937, "loss": 2.1033, "step": 258175 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015781117645001567, "loss": 2.0303, "step": 258180 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.0001578096683126883, "loss": 2.1012, "step": 258185 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015780816015561208, "loss": 2.0199, "step": 258190 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015780665197878755, "loss": 2.1202, "step": 258195 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015780514378221523, "loss": 1.8594, "step": 258200 }, { "epoch": 0.61, "grad_norm": 2.59375, "learning_rate": 0.0001578036355658956, "loss": 1.9663, "step": 258205 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.00015780212732982923, "loss": 2.2284, "step": 258210 }, { "epoch": 0.61, "grad_norm": 1.875, "learning_rate": 0.0001578006190740166, "loss": 1.9998, "step": 258215 }, { "epoch": 0.61, "grad_norm": 1.8984375, "learning_rate": 0.00015779911079845822, "loss": 2.2498, "step": 258220 }, { "epoch": 0.61, "grad_norm": 2.65625, "learning_rate": 0.00015779760250315462, "loss": 1.9099, "step": 258225 }, { "epoch": 0.61, "grad_norm": 1.90625, "learning_rate": 0.0001577960941881063, "loss": 2.0471, "step": 258230 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.0001577945858533138, "loss": 1.9787, "step": 258235 }, { "epoch": 0.61, "grad_norm": 1.7890625, "learning_rate": 0.00015779307749877764, "loss": 2.0555, "step": 258240 }, { "epoch": 0.61, "grad_norm": 2.46875, "learning_rate": 0.0001577915691244983, "loss": 2.1375, "step": 258245 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015779006073047628, "loss": 2.0823, "step": 258250 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015778855231671216, "loss": 2.0556, "step": 258255 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.0001577870438832064, "loss": 1.8974, "step": 258260 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.00015778553542995956, "loss": 2.0616, "step": 258265 }, { "epoch": 0.61, "grad_norm": 4.8125, "learning_rate": 0.00015778402695697214, "loss": 2.2491, "step": 258270 }, { "epoch": 0.61, "grad_norm": 2.515625, "learning_rate": 0.0001577825184642446, "loss": 1.871, "step": 258275 }, { "epoch": 0.61, "grad_norm": 2.453125, "learning_rate": 0.0001577810099517776, "loss": 2.0702, "step": 258280 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.00015777950141957145, "loss": 2.1612, "step": 258285 }, { "epoch": 0.61, "grad_norm": 1.9453125, "learning_rate": 0.00015777799286762682, "loss": 2.1065, "step": 258290 }, { "epoch": 0.61, "grad_norm": 1.9140625, "learning_rate": 0.0001577764842959442, "loss": 1.9332, "step": 258295 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015777497570452408, "loss": 2.1577, "step": 258300 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.00015777346709336696, "loss": 2.1848, "step": 258305 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.00015777195846247336, "loss": 1.9375, "step": 258310 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015777044981184383, "loss": 2.0413, "step": 258315 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.00015776894114147888, "loss": 2.2582, "step": 258320 }, { "epoch": 0.61, "grad_norm": 2.65625, "learning_rate": 0.00015776743245137902, "loss": 2.0694, "step": 258325 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015776592374154474, "loss": 2.0134, "step": 258330 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.00015776441501197657, "loss": 2.0086, "step": 258335 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015776290626267502, "loss": 2.2448, "step": 258340 }, { "epoch": 0.61, "grad_norm": 1.9296875, "learning_rate": 0.00015776139749364062, "loss": 2.086, "step": 258345 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015775988870487387, "loss": 2.1022, "step": 258350 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015775837989637532, "loss": 2.0582, "step": 258355 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.00015775687106814546, "loss": 2.1687, "step": 258360 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.0001577553622201848, "loss": 2.0661, "step": 258365 }, { "epoch": 0.61, "grad_norm": 1.8046875, "learning_rate": 0.00015775385335249384, "loss": 2.111, "step": 258370 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015775234446507313, "loss": 2.0506, "step": 258375 }, { "epoch": 0.61, "grad_norm": 2.71875, "learning_rate": 0.00015775083555792318, "loss": 2.0347, "step": 258380 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.0001577493266310445, "loss": 1.9719, "step": 258385 }, { "epoch": 0.61, "grad_norm": 1.9296875, "learning_rate": 0.0001577478176844376, "loss": 2.1863, "step": 258390 }, { "epoch": 0.61, "grad_norm": 1.8359375, "learning_rate": 0.00015774630871810295, "loss": 1.9812, "step": 258395 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.00015774479973204117, "loss": 2.161, "step": 258400 }, { "epoch": 0.61, "grad_norm": 1.8984375, "learning_rate": 0.00015774329072625268, "loss": 2.033, "step": 258405 }, { "epoch": 0.61, "grad_norm": 2.46875, "learning_rate": 0.00015774178170073808, "loss": 2.2954, "step": 258410 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015774027265549783, "loss": 2.2651, "step": 258415 }, { "epoch": 0.61, "grad_norm": 1.9296875, "learning_rate": 0.00015773876359053242, "loss": 1.8821, "step": 258420 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015773725450584242, "loss": 1.8881, "step": 258425 }, { "epoch": 0.61, "grad_norm": 1.75, "learning_rate": 0.00015773574540142835, "loss": 2.0103, "step": 258430 }, { "epoch": 0.61, "grad_norm": 2.484375, "learning_rate": 0.00015773423627729068, "loss": 2.0571, "step": 258435 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015773272713342994, "loss": 2.3214, "step": 258440 }, { "epoch": 0.61, "grad_norm": 1.9921875, "learning_rate": 0.0001577312179698467, "loss": 2.1433, "step": 258445 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015772970878654137, "loss": 2.0115, "step": 258450 }, { "epoch": 0.61, "grad_norm": 1.9765625, "learning_rate": 0.00015772819958351456, "loss": 2.0925, "step": 258455 }, { "epoch": 0.61, "grad_norm": 1.8671875, "learning_rate": 0.00015772669036076672, "loss": 1.9481, "step": 258460 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015772518111829844, "loss": 2.1374, "step": 258465 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015772367185611016, "loss": 2.1498, "step": 258470 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.00015772216257420242, "loss": 2.0472, "step": 258475 }, { "epoch": 0.61, "grad_norm": 2.6875, "learning_rate": 0.00015772065327257577, "loss": 2.1233, "step": 258480 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015771914395123067, "loss": 2.1368, "step": 258485 }, { "epoch": 0.61, "grad_norm": 1.984375, "learning_rate": 0.00015771763461016767, "loss": 2.1244, "step": 258490 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.0001577161252493873, "loss": 2.0275, "step": 258495 }, { "epoch": 0.61, "grad_norm": 2.0, "learning_rate": 0.00015771461586889005, "loss": 2.1122, "step": 258500 }, { "epoch": 0.61, "grad_norm": 2.59375, "learning_rate": 0.0001577131064686764, "loss": 2.3152, "step": 258505 }, { "epoch": 0.61, "grad_norm": 1.953125, "learning_rate": 0.00015771159704874697, "loss": 2.0933, "step": 258510 }, { "epoch": 0.61, "grad_norm": 2.71875, "learning_rate": 0.00015771008760910216, "loss": 2.137, "step": 258515 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015770857814974257, "loss": 2.0155, "step": 258520 }, { "epoch": 0.61, "grad_norm": 3.296875, "learning_rate": 0.00015770706867066867, "loss": 2.0627, "step": 258525 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015770555917188098, "loss": 1.9383, "step": 258530 }, { "epoch": 0.61, "grad_norm": 1.9140625, "learning_rate": 0.00015770404965338, "loss": 2.0955, "step": 258535 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015770254011516634, "loss": 1.8807, "step": 258540 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015770103055724039, "loss": 2.1476, "step": 258545 }, { "epoch": 0.61, "grad_norm": 1.8828125, "learning_rate": 0.00015769952097960277, "loss": 2.119, "step": 258550 }, { "epoch": 0.61, "grad_norm": 1.9765625, "learning_rate": 0.0001576980113822539, "loss": 2.2225, "step": 258555 }, { "epoch": 0.61, "grad_norm": 1.8828125, "learning_rate": 0.00015769650176519434, "loss": 2.0408, "step": 258560 }, { "epoch": 0.61, "grad_norm": 2.9375, "learning_rate": 0.00015769499212842463, "loss": 1.7825, "step": 258565 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.0001576934824719453, "loss": 2.0267, "step": 258570 }, { "epoch": 0.61, "grad_norm": 1.9921875, "learning_rate": 0.00015769197279575677, "loss": 2.1989, "step": 258575 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.00015769046309985964, "loss": 2.1873, "step": 258580 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.0001576889533842544, "loss": 1.8907, "step": 258585 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.00015768744364894158, "loss": 2.0833, "step": 258590 }, { "epoch": 0.61, "grad_norm": 1.9765625, "learning_rate": 0.00015768593389392166, "loss": 2.0854, "step": 258595 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.0001576844241191952, "loss": 2.1624, "step": 258600 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015768291432476268, "loss": 2.1073, "step": 258605 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015768140451062463, "loss": 2.1695, "step": 258610 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015767989467678159, "loss": 2.014, "step": 258615 }, { "epoch": 0.61, "grad_norm": 1.953125, "learning_rate": 0.00015767838482323403, "loss": 2.0014, "step": 258620 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.0001576768749499825, "loss": 2.0247, "step": 258625 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015767536505702746, "loss": 2.1652, "step": 258630 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.0001576738551443695, "loss": 1.9825, "step": 258635 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.0001576723452120091, "loss": 2.0831, "step": 258640 }, { "epoch": 0.61, "grad_norm": 1.9609375, "learning_rate": 0.00015767083525994682, "loss": 1.9748, "step": 258645 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.0001576693252881831, "loss": 2.0027, "step": 258650 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.0001576678152967185, "loss": 2.0462, "step": 258655 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015766630528555353, "loss": 2.0887, "step": 258660 }, { "epoch": 0.61, "grad_norm": 1.7578125, "learning_rate": 0.0001576647952546887, "loss": 1.9841, "step": 258665 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015766328520412453, "loss": 2.1131, "step": 258670 }, { "epoch": 0.61, "grad_norm": 2.578125, "learning_rate": 0.00015766177513386152, "loss": 1.9782, "step": 258675 }, { "epoch": 0.61, "grad_norm": 1.859375, "learning_rate": 0.00015766026504390024, "loss": 2.0679, "step": 258680 }, { "epoch": 0.61, "grad_norm": 1.8671875, "learning_rate": 0.00015765875493424112, "loss": 1.9906, "step": 258685 }, { "epoch": 0.61, "grad_norm": 2.453125, "learning_rate": 0.00015765724480488476, "loss": 2.0422, "step": 258690 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.00015765573465583165, "loss": 2.1836, "step": 258695 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015765422448708226, "loss": 2.0054, "step": 258700 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015765271429863718, "loss": 2.0918, "step": 258705 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015765120409049686, "loss": 2.0843, "step": 258710 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015764969386266185, "loss": 1.9461, "step": 258715 }, { "epoch": 0.61, "grad_norm": 1.9140625, "learning_rate": 0.00015764818361513263, "loss": 2.1174, "step": 258720 }, { "epoch": 0.61, "grad_norm": 2.453125, "learning_rate": 0.0001576466733479098, "loss": 2.2601, "step": 258725 }, { "epoch": 0.61, "grad_norm": 1.8359375, "learning_rate": 0.00015764516306099378, "loss": 2.0981, "step": 258730 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015764365275438514, "loss": 2.1217, "step": 258735 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.00015764214242808437, "loss": 2.1462, "step": 258740 }, { "epoch": 0.61, "grad_norm": 1.953125, "learning_rate": 0.000157640632082092, "loss": 1.9766, "step": 258745 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.00015763912171640854, "loss": 2.0771, "step": 258750 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015763761133103454, "loss": 1.9638, "step": 258755 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.0001576361009259705, "loss": 2.0369, "step": 258760 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015763459050121688, "loss": 2.1531, "step": 258765 }, { "epoch": 0.61, "grad_norm": 2.375, "learning_rate": 0.00015763308005677424, "loss": 2.1424, "step": 258770 }, { "epoch": 0.61, "grad_norm": 1.578125, "learning_rate": 0.00015763156959264313, "loss": 2.0091, "step": 258775 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.000157630059108824, "loss": 2.0517, "step": 258780 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.0001576285486053174, "loss": 2.2477, "step": 258785 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015762703808212388, "loss": 2.207, "step": 258790 }, { "epoch": 0.61, "grad_norm": 2.921875, "learning_rate": 0.00015762552753924387, "loss": 2.2305, "step": 258795 }, { "epoch": 0.61, "grad_norm": 1.8984375, "learning_rate": 0.00015762401697667794, "loss": 1.8575, "step": 258800 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.0001576225063944266, "loss": 2.038, "step": 258805 }, { "epoch": 0.61, "grad_norm": 1.9375, "learning_rate": 0.0001576209957924904, "loss": 2.1847, "step": 258810 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.0001576194851708698, "loss": 2.1158, "step": 258815 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015761797452956534, "loss": 2.0528, "step": 258820 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.00015761646386857755, "loss": 2.002, "step": 258825 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.0001576149531879069, "loss": 2.0897, "step": 258830 }, { "epoch": 0.61, "grad_norm": 2.484375, "learning_rate": 0.00015761344248755395, "loss": 2.07, "step": 258835 }, { "epoch": 0.61, "grad_norm": 1.984375, "learning_rate": 0.00015761193176751923, "loss": 2.1159, "step": 258840 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.0001576104210278032, "loss": 2.3427, "step": 258845 }, { "epoch": 0.61, "grad_norm": 2.515625, "learning_rate": 0.0001576089102684064, "loss": 2.1149, "step": 258850 }, { "epoch": 0.61, "grad_norm": 1.953125, "learning_rate": 0.00015760739948932938, "loss": 2.0239, "step": 258855 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.0001576058886905726, "loss": 2.0677, "step": 258860 }, { "epoch": 0.61, "grad_norm": 2.359375, "learning_rate": 0.0001576043778721366, "loss": 2.0403, "step": 258865 }, { "epoch": 0.61, "grad_norm": 1.984375, "learning_rate": 0.00015760286703402193, "loss": 2.2333, "step": 258870 }, { "epoch": 0.61, "grad_norm": 1.984375, "learning_rate": 0.00015760135617622905, "loss": 2.1571, "step": 258875 }, { "epoch": 0.61, "grad_norm": 1.9609375, "learning_rate": 0.0001575998452987585, "loss": 2.1442, "step": 258880 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015759833440161082, "loss": 2.1519, "step": 258885 }, { "epoch": 0.61, "grad_norm": 1.984375, "learning_rate": 0.0001575968234847865, "loss": 2.2556, "step": 258890 }, { "epoch": 0.61, "grad_norm": 2.375, "learning_rate": 0.00015759531254828606, "loss": 1.9351, "step": 258895 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015759380159211002, "loss": 1.9992, "step": 258900 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015759229061625886, "loss": 1.9665, "step": 258905 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015759077962073317, "loss": 2.1466, "step": 258910 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.0001575892686055334, "loss": 2.0342, "step": 258915 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015758775757066011, "loss": 2.0721, "step": 258920 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.0001575862465161138, "loss": 1.9886, "step": 258925 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015758473544189494, "loss": 2.0143, "step": 258930 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.0001575832243480041, "loss": 2.0529, "step": 258935 }, { "epoch": 0.61, "grad_norm": 1.984375, "learning_rate": 0.00015758171323444184, "loss": 2.2241, "step": 258940 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015758020210120862, "loss": 2.0889, "step": 258945 }, { "epoch": 0.61, "grad_norm": 1.8203125, "learning_rate": 0.0001575786909483049, "loss": 2.0023, "step": 258950 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.0001575771797757313, "loss": 2.2689, "step": 258955 }, { "epoch": 0.61, "grad_norm": 2.515625, "learning_rate": 0.00015757566858348824, "loss": 2.1945, "step": 258960 }, { "epoch": 0.61, "grad_norm": 2.53125, "learning_rate": 0.00015757415737157633, "loss": 2.0765, "step": 258965 }, { "epoch": 0.61, "grad_norm": 1.921875, "learning_rate": 0.00015757264613999606, "loss": 2.0934, "step": 258970 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.00015757113488874792, "loss": 1.9487, "step": 258975 }, { "epoch": 0.61, "grad_norm": 1.9765625, "learning_rate": 0.0001575696236178324, "loss": 2.1589, "step": 258980 }, { "epoch": 0.61, "grad_norm": 1.828125, "learning_rate": 0.00015756811232725006, "loss": 2.0323, "step": 258985 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015756660101700146, "loss": 2.1935, "step": 258990 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015756508968708702, "loss": 2.1538, "step": 258995 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.0001575635783375073, "loss": 2.0926, "step": 259000 }, { "epoch": 0.61, "grad_norm": 2.453125, "learning_rate": 0.00015756206696826284, "loss": 2.116, "step": 259005 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.00015756055557935412, "loss": 2.1991, "step": 259010 }, { "epoch": 0.61, "grad_norm": 1.8671875, "learning_rate": 0.00015755904417078166, "loss": 2.027, "step": 259015 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.000157557532742546, "loss": 2.0083, "step": 259020 }, { "epoch": 0.61, "grad_norm": 1.9296875, "learning_rate": 0.00015755602129464763, "loss": 2.1387, "step": 259025 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.0001575545098270871, "loss": 2.0708, "step": 259030 }, { "epoch": 0.61, "grad_norm": 2.0, "learning_rate": 0.00015755299833986492, "loss": 1.8278, "step": 259035 }, { "epoch": 0.61, "grad_norm": 2.5, "learning_rate": 0.00015755148683298153, "loss": 2.13, "step": 259040 }, { "epoch": 0.61, "grad_norm": 2.8125, "learning_rate": 0.00015754997530643755, "loss": 2.024, "step": 259045 }, { "epoch": 0.61, "grad_norm": 2.65625, "learning_rate": 0.00015754846376023348, "loss": 2.1883, "step": 259050 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015754695219436976, "loss": 2.142, "step": 259055 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015754544060884703, "loss": 2.131, "step": 259060 }, { "epoch": 0.61, "grad_norm": 3.09375, "learning_rate": 0.00015754392900366564, "loss": 2.2377, "step": 259065 }, { "epoch": 0.61, "grad_norm": 2.640625, "learning_rate": 0.00015754241737882623, "loss": 2.1313, "step": 259070 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015754090573432935, "loss": 2.0958, "step": 259075 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.0001575393940701754, "loss": 2.0629, "step": 259080 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015753788238636495, "loss": 1.9316, "step": 259085 }, { "epoch": 0.61, "grad_norm": 1.9296875, "learning_rate": 0.0001575363706828985, "loss": 1.9058, "step": 259090 }, { "epoch": 0.61, "grad_norm": 2.46875, "learning_rate": 0.0001575348589597766, "loss": 2.2585, "step": 259095 }, { "epoch": 0.61, "grad_norm": 1.8203125, "learning_rate": 0.00015753334721699974, "loss": 2.0274, "step": 259100 }, { "epoch": 0.61, "grad_norm": 1.796875, "learning_rate": 0.00015753183545456846, "loss": 1.9055, "step": 259105 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.00015753032367248326, "loss": 1.9591, "step": 259110 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015752881187074466, "loss": 2.2939, "step": 259115 }, { "epoch": 0.61, "grad_norm": 2.375, "learning_rate": 0.00015752730004935317, "loss": 2.2178, "step": 259120 }, { "epoch": 0.61, "grad_norm": 1.9140625, "learning_rate": 0.0001575257882083093, "loss": 1.9989, "step": 259125 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.0001575242763476136, "loss": 2.0581, "step": 259130 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015752276446726654, "loss": 1.9999, "step": 259135 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015752125256726868, "loss": 1.9311, "step": 259140 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.0001575197406476205, "loss": 2.0651, "step": 259145 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.00015751822870832256, "loss": 2.1342, "step": 259150 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015751671674937533, "loss": 2.1042, "step": 259155 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015751520477077935, "loss": 2.0333, "step": 259160 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.00015751369277253513, "loss": 1.8954, "step": 259165 }, { "epoch": 0.61, "grad_norm": 1.8359375, "learning_rate": 0.0001575121807546432, "loss": 2.2095, "step": 259170 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015751066871710404, "loss": 1.9478, "step": 259175 }, { "epoch": 0.61, "grad_norm": 2.578125, "learning_rate": 0.00015750915665991823, "loss": 2.0355, "step": 259180 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.00015750764458308622, "loss": 1.8848, "step": 259185 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015750613248660856, "loss": 2.0527, "step": 259190 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.0001575046203704858, "loss": 2.0583, "step": 259195 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015750310823471833, "loss": 2.0477, "step": 259200 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015750159607930684, "loss": 2.0339, "step": 259205 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015750008390425174, "loss": 2.1815, "step": 259210 }, { "epoch": 0.61, "grad_norm": 1.953125, "learning_rate": 0.0001574985717095536, "loss": 2.0424, "step": 259215 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.00015749705949521286, "loss": 1.865, "step": 259220 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015749554726123005, "loss": 2.1749, "step": 259225 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.00015749403500760578, "loss": 2.0945, "step": 259230 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015749252273434045, "loss": 2.2829, "step": 259235 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015749101044143466, "loss": 2.0296, "step": 259240 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.00015748949812888894, "loss": 2.0674, "step": 259245 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015748798579670373, "loss": 2.1796, "step": 259250 }, { "epoch": 0.61, "grad_norm": 1.828125, "learning_rate": 0.00015748647344487955, "loss": 1.9804, "step": 259255 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.000157484961073417, "loss": 1.9742, "step": 259260 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.00015748344868231652, "loss": 2.2029, "step": 259265 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015748193627157864, "loss": 2.1683, "step": 259270 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.0001574804238412039, "loss": 2.0776, "step": 259275 }, { "epoch": 0.61, "grad_norm": 2.5625, "learning_rate": 0.0001574789113911928, "loss": 1.8306, "step": 259280 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.00015747739892154586, "loss": 1.9821, "step": 259285 }, { "epoch": 0.61, "grad_norm": 1.984375, "learning_rate": 0.0001574758864322636, "loss": 2.1434, "step": 259290 }, { "epoch": 0.61, "grad_norm": 2.546875, "learning_rate": 0.00015747437392334655, "loss": 2.1969, "step": 259295 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.0001574728613947952, "loss": 2.1517, "step": 259300 }, { "epoch": 0.61, "grad_norm": 1.921875, "learning_rate": 0.00015747134884661006, "loss": 2.1906, "step": 259305 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.0001574698362787917, "loss": 2.0653, "step": 259310 }, { "epoch": 0.61, "grad_norm": 1.75, "learning_rate": 0.0001574683236913406, "loss": 2.1237, "step": 259315 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015746681108425725, "loss": 2.1737, "step": 259320 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.0001574652984575422, "loss": 2.1609, "step": 259325 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015746378581119597, "loss": 2.1339, "step": 259330 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.0001574622731452191, "loss": 2.1355, "step": 259335 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015746076045961203, "loss": 2.2688, "step": 259340 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015745924775437535, "loss": 2.0772, "step": 259345 }, { "epoch": 0.61, "grad_norm": 2.578125, "learning_rate": 0.00015745773502950954, "loss": 2.0109, "step": 259350 }, { "epoch": 0.61, "grad_norm": 2.40625, "learning_rate": 0.00015745622228501512, "loss": 1.9627, "step": 259355 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.0001574547095208926, "loss": 1.9484, "step": 259360 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015745319673714253, "loss": 2.0518, "step": 259365 }, { "epoch": 0.61, "grad_norm": 1.9609375, "learning_rate": 0.00015745168393376538, "loss": 2.0794, "step": 259370 }, { "epoch": 0.61, "grad_norm": 1.9765625, "learning_rate": 0.00015745017111076171, "loss": 1.9942, "step": 259375 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.00015744865826813203, "loss": 1.9388, "step": 259380 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.00015744714540587683, "loss": 2.0687, "step": 259385 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015744563252399668, "loss": 1.9182, "step": 259390 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.00015744411962249203, "loss": 2.1207, "step": 259395 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.00015744260670136343, "loss": 2.1584, "step": 259400 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.0001574410937606114, "loss": 2.0584, "step": 259405 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.00015743958080023643, "loss": 2.0339, "step": 259410 }, { "epoch": 0.61, "grad_norm": 1.9296875, "learning_rate": 0.00015743806782023904, "loss": 2.1332, "step": 259415 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015743655482061981, "loss": 2.215, "step": 259420 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.0001574350418013792, "loss": 2.1854, "step": 259425 }, { "epoch": 0.61, "grad_norm": 2.640625, "learning_rate": 0.00015743352876251775, "loss": 2.1677, "step": 259430 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015743201570403596, "loss": 1.9795, "step": 259435 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.0001574305026259343, "loss": 1.9967, "step": 259440 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015742898952821342, "loss": 2.2259, "step": 259445 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.0001574274764108737, "loss": 2.0594, "step": 259450 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015742596327391573, "loss": 1.9585, "step": 259455 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015742445011734, "loss": 2.2038, "step": 259460 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015742293694114706, "loss": 2.1545, "step": 259465 }, { "epoch": 0.61, "grad_norm": 2.53125, "learning_rate": 0.00015742142374533736, "loss": 2.0009, "step": 259470 }, { "epoch": 0.61, "grad_norm": 2.59375, "learning_rate": 0.00015741991052991148, "loss": 2.2519, "step": 259475 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015741839729486995, "loss": 1.8598, "step": 259480 }, { "epoch": 0.61, "grad_norm": 3.03125, "learning_rate": 0.00015741688404021322, "loss": 2.1488, "step": 259485 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015741537076594187, "loss": 2.0282, "step": 259490 }, { "epoch": 0.61, "grad_norm": 2.640625, "learning_rate": 0.00015741385747205635, "loss": 1.8156, "step": 259495 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.00015741234415855723, "loss": 1.9921, "step": 259500 }, { "epoch": 0.61, "grad_norm": 2.90625, "learning_rate": 0.00015741083082544503, "loss": 2.2845, "step": 259505 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.00015740931747272021, "loss": 2.0583, "step": 259510 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015740780410038337, "loss": 2.0314, "step": 259515 }, { "epoch": 0.61, "grad_norm": 2.546875, "learning_rate": 0.00015740629070843496, "loss": 2.221, "step": 259520 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.0001574047772968755, "loss": 2.0086, "step": 259525 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015740326386570557, "loss": 2.1162, "step": 259530 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.0001574017504149256, "loss": 2.1266, "step": 259535 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.0001574002369445362, "loss": 2.1653, "step": 259540 }, { "epoch": 0.61, "grad_norm": 1.7578125, "learning_rate": 0.0001573987234545378, "loss": 2.1566, "step": 259545 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015739720994493097, "loss": 1.9833, "step": 259550 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.0001573956964157162, "loss": 2.0042, "step": 259555 }, { "epoch": 0.61, "grad_norm": 2.40625, "learning_rate": 0.00015739418286689406, "loss": 2.1745, "step": 259560 }, { "epoch": 0.61, "grad_norm": 2.359375, "learning_rate": 0.000157392669298465, "loss": 2.0108, "step": 259565 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.00015739115571042955, "loss": 1.978, "step": 259570 }, { "epoch": 0.61, "grad_norm": 2.78125, "learning_rate": 0.00015738964210278822, "loss": 2.1679, "step": 259575 }, { "epoch": 0.61, "grad_norm": 1.8671875, "learning_rate": 0.0001573881284755416, "loss": 2.1749, "step": 259580 }, { "epoch": 0.61, "grad_norm": 1.8828125, "learning_rate": 0.00015738661482869013, "loss": 2.0844, "step": 259585 }, { "epoch": 0.61, "grad_norm": 2.484375, "learning_rate": 0.00015738510116223436, "loss": 1.9769, "step": 259590 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.00015738358747617478, "loss": 2.079, "step": 259595 }, { "epoch": 0.61, "grad_norm": 1.9453125, "learning_rate": 0.00015738207377051193, "loss": 2.1282, "step": 259600 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015738056004524636, "loss": 2.0481, "step": 259605 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.0001573790463003785, "loss": 2.1321, "step": 259610 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015737753253590895, "loss": 1.8048, "step": 259615 }, { "epoch": 0.61, "grad_norm": 2.375, "learning_rate": 0.0001573760187518382, "loss": 2.1381, "step": 259620 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.00015737450494816672, "loss": 2.0068, "step": 259625 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015737299112489507, "loss": 2.0622, "step": 259630 }, { "epoch": 0.61, "grad_norm": 2.359375, "learning_rate": 0.0001573714772820238, "loss": 2.2241, "step": 259635 }, { "epoch": 0.61, "grad_norm": 1.9140625, "learning_rate": 0.0001573699634195534, "loss": 2.0034, "step": 259640 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015736844953748435, "loss": 2.0617, "step": 259645 }, { "epoch": 0.61, "grad_norm": 1.8203125, "learning_rate": 0.0001573669356358172, "loss": 2.1334, "step": 259650 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015736542171455246, "loss": 2.1003, "step": 259655 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015736390777369068, "loss": 2.215, "step": 259660 }, { "epoch": 0.61, "grad_norm": 1.7578125, "learning_rate": 0.00015736239381323235, "loss": 2.0295, "step": 259665 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.00015736087983317796, "loss": 1.97, "step": 259670 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015735936583352806, "loss": 2.0447, "step": 259675 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015735785181428315, "loss": 1.9143, "step": 259680 }, { "epoch": 0.61, "grad_norm": 2.515625, "learning_rate": 0.00015735633777544375, "loss": 2.1591, "step": 259685 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.0001573548237170104, "loss": 2.1718, "step": 259690 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.00015735330963898364, "loss": 2.1351, "step": 259695 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015735179554136392, "loss": 2.1777, "step": 259700 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.00015735028142415178, "loss": 2.051, "step": 259705 }, { "epoch": 0.61, "grad_norm": 1.9609375, "learning_rate": 0.00015734876728734772, "loss": 2.0611, "step": 259710 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.0001573472531309523, "loss": 2.0517, "step": 259715 }, { "epoch": 0.61, "grad_norm": 2.46875, "learning_rate": 0.00015734573895496604, "loss": 2.0121, "step": 259720 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015734422475938945, "loss": 2.138, "step": 259725 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015734271054422297, "loss": 2.1141, "step": 259730 }, { "epoch": 0.61, "grad_norm": 2.546875, "learning_rate": 0.00015734119630946721, "loss": 2.1079, "step": 259735 }, { "epoch": 0.61, "grad_norm": 1.5703125, "learning_rate": 0.00015733968205512269, "loss": 2.0785, "step": 259740 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015733816778118985, "loss": 2.0038, "step": 259745 }, { "epoch": 0.61, "grad_norm": 1.859375, "learning_rate": 0.00015733665348766925, "loss": 2.0107, "step": 259750 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015733513917456145, "loss": 2.2688, "step": 259755 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015733362484186692, "loss": 2.0696, "step": 259760 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015733211048958614, "loss": 2.0844, "step": 259765 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.0001573305961177197, "loss": 2.0469, "step": 259770 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.0001573290817262681, "loss": 1.9226, "step": 259775 }, { "epoch": 0.61, "grad_norm": 2.75, "learning_rate": 0.0001573275673152318, "loss": 1.9658, "step": 259780 }, { "epoch": 0.61, "grad_norm": 1.8359375, "learning_rate": 0.00015732605288461144, "loss": 2.0737, "step": 259785 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015732453843440738, "loss": 2.285, "step": 259790 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015732302396462026, "loss": 1.9575, "step": 259795 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.00015732150947525057, "loss": 2.0512, "step": 259800 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015731999496629878, "loss": 2.3359, "step": 259805 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015731848043776546, "loss": 2.0119, "step": 259810 }, { "epoch": 0.61, "grad_norm": 2.5, "learning_rate": 0.00015731696588965107, "loss": 2.2238, "step": 259815 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015731545132195623, "loss": 2.1928, "step": 259820 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.00015731393673468135, "loss": 2.0474, "step": 259825 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.000157312422127827, "loss": 1.9137, "step": 259830 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.0001573109075013937, "loss": 2.1458, "step": 259835 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.00015730939285538194, "loss": 1.9317, "step": 259840 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.00015730787818979224, "loss": 1.9374, "step": 259845 }, { "epoch": 0.61, "grad_norm": 1.78125, "learning_rate": 0.00015730636350462515, "loss": 2.1162, "step": 259850 }, { "epoch": 0.61, "grad_norm": 1.9765625, "learning_rate": 0.00015730484879988117, "loss": 2.1743, "step": 259855 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.0001573033340755608, "loss": 2.2437, "step": 259860 }, { "epoch": 0.61, "grad_norm": 1.84375, "learning_rate": 0.00015730181933166457, "loss": 2.1213, "step": 259865 }, { "epoch": 0.61, "grad_norm": 2.40625, "learning_rate": 0.000157300304568193, "loss": 2.0976, "step": 259870 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.0001572987897851466, "loss": 2.314, "step": 259875 }, { "epoch": 0.61, "grad_norm": 1.84375, "learning_rate": 0.0001572972749825259, "loss": 2.3204, "step": 259880 }, { "epoch": 0.61, "grad_norm": 1.8359375, "learning_rate": 0.00015729576016033144, "loss": 2.1244, "step": 259885 }, { "epoch": 0.61, "grad_norm": 2.5625, "learning_rate": 0.00015729424531856368, "loss": 2.1679, "step": 259890 }, { "epoch": 0.61, "grad_norm": 1.9296875, "learning_rate": 0.00015729273045722315, "loss": 1.9699, "step": 259895 }, { "epoch": 0.61, "grad_norm": 2.46875, "learning_rate": 0.00015729121557631043, "loss": 2.1508, "step": 259900 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015728970067582594, "loss": 2.2471, "step": 259905 }, { "epoch": 0.61, "grad_norm": 2.5, "learning_rate": 0.0001572881857557703, "loss": 1.8595, "step": 259910 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015728667081614398, "loss": 2.0562, "step": 259915 }, { "epoch": 0.61, "grad_norm": 2.453125, "learning_rate": 0.00015728515585694745, "loss": 2.1046, "step": 259920 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015728364087818129, "loss": 2.0301, "step": 259925 }, { "epoch": 0.61, "grad_norm": 1.7734375, "learning_rate": 0.00015728212587984598, "loss": 2.0363, "step": 259930 }, { "epoch": 0.61, "grad_norm": 2.53125, "learning_rate": 0.0001572806108619421, "loss": 1.8667, "step": 259935 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.0001572790958244701, "loss": 2.0962, "step": 259940 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015727758076743053, "loss": 1.8739, "step": 259945 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015727606569082388, "loss": 2.1614, "step": 259950 }, { "epoch": 0.61, "grad_norm": 2.5, "learning_rate": 0.0001572745505946507, "loss": 2.0199, "step": 259955 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.0001572730354789115, "loss": 2.1077, "step": 259960 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.00015727152034360678, "loss": 2.1047, "step": 259965 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.0001572700051887371, "loss": 2.1383, "step": 259970 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.0001572684900143029, "loss": 1.9994, "step": 259975 }, { "epoch": 0.61, "grad_norm": 2.515625, "learning_rate": 0.00015726697482030478, "loss": 2.1005, "step": 259980 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015726545960674323, "loss": 2.1081, "step": 259985 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015726394437361874, "loss": 1.9627, "step": 259990 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015726242912093182, "loss": 2.0975, "step": 259995 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.00015726091384868306, "loss": 2.238, "step": 260000 }, { "epoch": 0.61, "grad_norm": 2.6875, "learning_rate": 0.00015725939855687292, "loss": 2.0287, "step": 260005 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.0001572578832455019, "loss": 1.9935, "step": 260010 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.00015725636791457062, "loss": 2.1068, "step": 260015 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.00015725485256407948, "loss": 1.9877, "step": 260020 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015725333719402907, "loss": 2.1134, "step": 260025 }, { "epoch": 0.61, "grad_norm": 1.8671875, "learning_rate": 0.00015725182180441983, "loss": 2.0758, "step": 260030 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015725030639525238, "loss": 2.2073, "step": 260035 }, { "epoch": 0.61, "grad_norm": 2.484375, "learning_rate": 0.0001572487909665272, "loss": 2.0296, "step": 260040 }, { "epoch": 0.61, "grad_norm": 2.53125, "learning_rate": 0.00015724727551824473, "loss": 2.1231, "step": 260045 }, { "epoch": 0.61, "grad_norm": 1.9765625, "learning_rate": 0.0001572457600504056, "loss": 2.0256, "step": 260050 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015724424456301027, "loss": 2.093, "step": 260055 }, { "epoch": 0.61, "grad_norm": 2.40625, "learning_rate": 0.00015724272905605924, "loss": 2.1813, "step": 260060 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015724121352955308, "loss": 2.0381, "step": 260065 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015723969798349228, "loss": 2.0009, "step": 260070 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015723818241787738, "loss": 2.0945, "step": 260075 }, { "epoch": 0.61, "grad_norm": 1.984375, "learning_rate": 0.00015723666683270887, "loss": 1.9847, "step": 260080 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015723515122798725, "loss": 2.2085, "step": 260085 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015723363560371308, "loss": 2.1531, "step": 260090 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015723211995988687, "loss": 1.9834, "step": 260095 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015723060429650914, "loss": 2.1039, "step": 260100 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.0001572290886135804, "loss": 2.1752, "step": 260105 }, { "epoch": 0.61, "grad_norm": 1.8515625, "learning_rate": 0.00015722757291110112, "loss": 2.0422, "step": 260110 }, { "epoch": 0.61, "grad_norm": 1.875, "learning_rate": 0.0001572260571890719, "loss": 1.913, "step": 260115 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.0001572245414474932, "loss": 2.058, "step": 260120 }, { "epoch": 0.61, "grad_norm": 1.859375, "learning_rate": 0.0001572230256863656, "loss": 2.0071, "step": 260125 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015722150990568954, "loss": 2.1108, "step": 260130 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.0001572199941054656, "loss": 2.0084, "step": 260135 }, { "epoch": 0.61, "grad_norm": 1.7421875, "learning_rate": 0.00015721847828569424, "loss": 1.9903, "step": 260140 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.000157216962446376, "loss": 2.0111, "step": 260145 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.00015721544658751143, "loss": 2.0779, "step": 260150 }, { "epoch": 0.61, "grad_norm": 1.921875, "learning_rate": 0.00015721393070910103, "loss": 2.1675, "step": 260155 }, { "epoch": 0.61, "grad_norm": 2.40625, "learning_rate": 0.00015721241481114534, "loss": 2.1956, "step": 260160 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.0001572108988936448, "loss": 2.0829, "step": 260165 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.0001572093829566, "loss": 1.971, "step": 260170 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015720786700001147, "loss": 2.0432, "step": 260175 }, { "epoch": 0.61, "grad_norm": 1.8828125, "learning_rate": 0.00015720635102387965, "loss": 1.9724, "step": 260180 }, { "epoch": 0.61, "grad_norm": 1.796875, "learning_rate": 0.00015720483502820515, "loss": 2.0262, "step": 260185 }, { "epoch": 0.61, "grad_norm": 1.7421875, "learning_rate": 0.00015720331901298838, "loss": 2.0258, "step": 260190 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015720180297822996, "loss": 2.0746, "step": 260195 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015720028692393036, "loss": 1.9427, "step": 260200 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.00015719877085009008, "loss": 2.2385, "step": 260205 }, { "epoch": 0.61, "grad_norm": 1.9140625, "learning_rate": 0.0001571972547567097, "loss": 2.2167, "step": 260210 }, { "epoch": 0.61, "grad_norm": 1.9296875, "learning_rate": 0.00015719573864378966, "loss": 2.1203, "step": 260215 }, { "epoch": 0.61, "grad_norm": 1.8203125, "learning_rate": 0.00015719422251133053, "loss": 2.0663, "step": 260220 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015719270635933285, "loss": 2.11, "step": 260225 }, { "epoch": 0.61, "grad_norm": 1.9921875, "learning_rate": 0.0001571911901877971, "loss": 2.0029, "step": 260230 }, { "epoch": 0.61, "grad_norm": 1.859375, "learning_rate": 0.00015718967399672376, "loss": 2.2264, "step": 260235 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.0001571881577861134, "loss": 2.0027, "step": 260240 }, { "epoch": 0.61, "grad_norm": 2.5, "learning_rate": 0.00015718664155596655, "loss": 2.2405, "step": 260245 }, { "epoch": 0.61, "grad_norm": 1.953125, "learning_rate": 0.00015718512530628372, "loss": 1.9805, "step": 260250 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015718360903706538, "loss": 2.1693, "step": 260255 }, { "epoch": 0.61, "grad_norm": 3.03125, "learning_rate": 0.0001571820927483121, "loss": 2.2364, "step": 260260 }, { "epoch": 0.61, "grad_norm": 1.7734375, "learning_rate": 0.00015718057644002439, "loss": 1.923, "step": 260265 }, { "epoch": 0.61, "grad_norm": 2.6875, "learning_rate": 0.00015717906011220273, "loss": 2.0757, "step": 260270 }, { "epoch": 0.61, "grad_norm": 2.40625, "learning_rate": 0.00015717754376484767, "loss": 2.2703, "step": 260275 }, { "epoch": 0.61, "grad_norm": 2.0, "learning_rate": 0.00015717602739795974, "loss": 2.144, "step": 260280 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015717451101153945, "loss": 2.0448, "step": 260285 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015717299460558728, "loss": 2.1002, "step": 260290 }, { "epoch": 0.61, "grad_norm": 2.0, "learning_rate": 0.0001571714781801038, "loss": 2.2522, "step": 260295 }, { "epoch": 0.61, "grad_norm": 1.953125, "learning_rate": 0.0001571699617350895, "loss": 2.0979, "step": 260300 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015716844527054492, "loss": 2.0576, "step": 260305 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.00015716692878647054, "loss": 1.9743, "step": 260310 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015716541228286688, "loss": 2.0935, "step": 260315 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015716389575973452, "loss": 2.012, "step": 260320 }, { "epoch": 0.61, "grad_norm": 2.359375, "learning_rate": 0.00015716237921707392, "loss": 2.0728, "step": 260325 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015716086265488561, "loss": 2.0155, "step": 260330 }, { "epoch": 0.61, "grad_norm": 1.9140625, "learning_rate": 0.00015715934607317013, "loss": 2.0737, "step": 260335 }, { "epoch": 0.61, "grad_norm": 2.375, "learning_rate": 0.00015715782947192794, "loss": 2.0859, "step": 260340 }, { "epoch": 0.61, "grad_norm": 2.828125, "learning_rate": 0.00015715631285115965, "loss": 2.1825, "step": 260345 }, { "epoch": 0.61, "grad_norm": 2.5, "learning_rate": 0.00015715479621086572, "loss": 1.9347, "step": 260350 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015715327955104664, "loss": 1.853, "step": 260355 }, { "epoch": 0.61, "grad_norm": 1.9375, "learning_rate": 0.00015715176287170297, "loss": 2.0694, "step": 260360 }, { "epoch": 0.61, "grad_norm": 1.6328125, "learning_rate": 0.00015715024617283526, "loss": 2.1339, "step": 260365 }, { "epoch": 0.61, "grad_norm": 1.65625, "learning_rate": 0.00015714872945444394, "loss": 2.0618, "step": 260370 }, { "epoch": 0.61, "grad_norm": 2.515625, "learning_rate": 0.00015714721271652962, "loss": 1.9685, "step": 260375 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.00015714569595909274, "loss": 2.1848, "step": 260380 }, { "epoch": 0.61, "grad_norm": 1.7421875, "learning_rate": 0.00015714417918213388, "loss": 1.9958, "step": 260385 }, { "epoch": 0.61, "grad_norm": 2.53125, "learning_rate": 0.00015714266238565352, "loss": 2.1531, "step": 260390 }, { "epoch": 0.61, "grad_norm": 1.8125, "learning_rate": 0.00015714114556965222, "loss": 2.089, "step": 260395 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015713962873413042, "loss": 2.0147, "step": 260400 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.0001571381118790887, "loss": 2.0335, "step": 260405 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.0001571365950045276, "loss": 2.2016, "step": 260410 }, { "epoch": 0.61, "grad_norm": 2.453125, "learning_rate": 0.00015713507811044756, "loss": 2.1096, "step": 260415 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015713356119684914, "loss": 2.079, "step": 260420 }, { "epoch": 0.61, "grad_norm": 2.96875, "learning_rate": 0.0001571320442637329, "loss": 1.9923, "step": 260425 }, { "epoch": 0.61, "grad_norm": 1.7421875, "learning_rate": 0.00015713052731109928, "loss": 2.237, "step": 260430 }, { "epoch": 0.61, "grad_norm": 1.765625, "learning_rate": 0.00015712901033894885, "loss": 1.9676, "step": 260435 }, { "epoch": 0.61, "grad_norm": 1.953125, "learning_rate": 0.00015712749334728212, "loss": 1.8743, "step": 260440 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.0001571259763360996, "loss": 2.0557, "step": 260445 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.0001571244593054018, "loss": 2.2501, "step": 260450 }, { "epoch": 0.61, "grad_norm": 2.484375, "learning_rate": 0.00015712294225518926, "loss": 2.1071, "step": 260455 }, { "epoch": 0.61, "grad_norm": 2.359375, "learning_rate": 0.00015712142518546247, "loss": 2.0563, "step": 260460 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.000157119908096222, "loss": 2.2183, "step": 260465 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.0001571183909874683, "loss": 1.9573, "step": 260470 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.00015711687385920194, "loss": 2.2055, "step": 260475 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.0001571153567114234, "loss": 1.9601, "step": 260480 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.0001571138395441332, "loss": 1.9011, "step": 260485 }, { "epoch": 0.61, "grad_norm": 1.78125, "learning_rate": 0.00015711232235733193, "loss": 1.9674, "step": 260490 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015711080515102004, "loss": 2.0872, "step": 260495 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.0001571092879251981, "loss": 2.0944, "step": 260500 }, { "epoch": 0.61, "grad_norm": 2.65625, "learning_rate": 0.0001571077706798665, "loss": 2.2469, "step": 260505 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015710625341502593, "loss": 2.1582, "step": 260510 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015710473613067677, "loss": 1.8998, "step": 260515 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.0001571032188268196, "loss": 2.0832, "step": 260520 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015710170150345497, "loss": 2.0107, "step": 260525 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.00015710018416058335, "loss": 2.1958, "step": 260530 }, { "epoch": 0.61, "grad_norm": 1.984375, "learning_rate": 0.0001570986667982053, "loss": 2.1823, "step": 260535 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.00015709714941632122, "loss": 2.175, "step": 260540 }, { "epoch": 0.61, "grad_norm": 2.53125, "learning_rate": 0.00015709563201493182, "loss": 2.1282, "step": 260545 }, { "epoch": 0.61, "grad_norm": 1.8671875, "learning_rate": 0.00015709411459403748, "loss": 2.0146, "step": 260550 }, { "epoch": 0.61, "grad_norm": 2.46875, "learning_rate": 0.00015709259715363875, "loss": 1.9134, "step": 260555 }, { "epoch": 0.61, "grad_norm": 2.953125, "learning_rate": 0.00015709107969373617, "loss": 2.1692, "step": 260560 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.0001570895622143302, "loss": 1.9857, "step": 260565 }, { "epoch": 0.61, "grad_norm": 2.453125, "learning_rate": 0.0001570880447154214, "loss": 2.0148, "step": 260570 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015708652719701032, "loss": 1.9626, "step": 260575 }, { "epoch": 0.61, "grad_norm": 2.40625, "learning_rate": 0.00015708500965909745, "loss": 2.1339, "step": 260580 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015708349210168332, "loss": 2.025, "step": 260585 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015708197452476841, "loss": 1.9732, "step": 260590 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015708045692835326, "loss": 2.1032, "step": 260595 }, { "epoch": 0.61, "grad_norm": 2.375, "learning_rate": 0.00015707893931243838, "loss": 2.1118, "step": 260600 }, { "epoch": 0.61, "grad_norm": 1.8671875, "learning_rate": 0.00015707742167702434, "loss": 2.2663, "step": 260605 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015707590402211157, "loss": 2.0754, "step": 260610 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015707438634770065, "loss": 2.1687, "step": 260615 }, { "epoch": 0.61, "grad_norm": 2.0, "learning_rate": 0.00015707286865379206, "loss": 2.0245, "step": 260620 }, { "epoch": 0.61, "grad_norm": 2.75, "learning_rate": 0.0001570713509403864, "loss": 2.0373, "step": 260625 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015706983320748412, "loss": 1.8071, "step": 260630 }, { "epoch": 0.61, "grad_norm": 1.8984375, "learning_rate": 0.00015706831545508573, "loss": 1.988, "step": 260635 }, { "epoch": 0.61, "grad_norm": 1.7734375, "learning_rate": 0.0001570667976831918, "loss": 2.0387, "step": 260640 }, { "epoch": 0.61, "grad_norm": 1.9140625, "learning_rate": 0.00015706527989180278, "loss": 2.0949, "step": 260645 }, { "epoch": 0.61, "grad_norm": 2.5625, "learning_rate": 0.00015706376208091921, "loss": 2.0239, "step": 260650 }, { "epoch": 0.61, "grad_norm": 2.375, "learning_rate": 0.00015706224425054166, "loss": 2.077, "step": 260655 }, { "epoch": 0.61, "grad_norm": 1.6796875, "learning_rate": 0.00015706072640067062, "loss": 2.0587, "step": 260660 }, { "epoch": 0.61, "grad_norm": 1.7421875, "learning_rate": 0.00015705920853130656, "loss": 1.9752, "step": 260665 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.00015705769064245007, "loss": 2.006, "step": 260670 }, { "epoch": 0.61, "grad_norm": 1.734375, "learning_rate": 0.00015705617273410163, "loss": 2.2758, "step": 260675 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.0001570546548062618, "loss": 1.8232, "step": 260680 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015705313685893102, "loss": 1.9555, "step": 260685 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.00015705161889210985, "loss": 2.1569, "step": 260690 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015705010090579885, "loss": 2.0756, "step": 260695 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.00015704858289999844, "loss": 2.008, "step": 260700 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015704706487470926, "loss": 2.1922, "step": 260705 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015704554682993176, "loss": 2.045, "step": 260710 }, { "epoch": 0.61, "grad_norm": 2.515625, "learning_rate": 0.00015704402876566645, "loss": 2.0604, "step": 260715 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015704251068191386, "loss": 2.0764, "step": 260720 }, { "epoch": 0.61, "grad_norm": 2.671875, "learning_rate": 0.00015704099257867453, "loss": 2.0595, "step": 260725 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015703947445594896, "loss": 1.995, "step": 260730 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015703795631373765, "loss": 2.0802, "step": 260735 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.00015703643815204117, "loss": 2.0124, "step": 260740 }, { "epoch": 0.61, "grad_norm": 2.6875, "learning_rate": 0.00015703491997086, "loss": 1.9294, "step": 260745 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015703340177019467, "loss": 2.096, "step": 260750 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015703188355004565, "loss": 2.1893, "step": 260755 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015703036531041357, "loss": 2.1209, "step": 260760 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015702884705129884, "loss": 1.8559, "step": 260765 }, { "epoch": 0.61, "grad_norm": 2.671875, "learning_rate": 0.00015702732877270203, "loss": 1.7734, "step": 260770 }, { "epoch": 0.61, "grad_norm": 2.0, "learning_rate": 0.00015702581047462365, "loss": 1.9918, "step": 260775 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.00015702429215706424, "loss": 2.2052, "step": 260780 }, { "epoch": 0.61, "grad_norm": 3.359375, "learning_rate": 0.0001570227738200243, "loss": 2.0951, "step": 260785 }, { "epoch": 0.61, "grad_norm": 1.921875, "learning_rate": 0.0001570212554635043, "loss": 2.2135, "step": 260790 }, { "epoch": 0.61, "grad_norm": 1.875, "learning_rate": 0.00015701973708750482, "loss": 2.0751, "step": 260795 }, { "epoch": 0.61, "grad_norm": 2.40625, "learning_rate": 0.0001570182186920264, "loss": 2.0525, "step": 260800 }, { "epoch": 0.61, "grad_norm": 2.546875, "learning_rate": 0.0001570167002770695, "loss": 2.2845, "step": 260805 }, { "epoch": 0.61, "grad_norm": 1.9921875, "learning_rate": 0.00015701518184263466, "loss": 2.0449, "step": 260810 }, { "epoch": 0.61, "grad_norm": 1.8125, "learning_rate": 0.00015701366338872238, "loss": 1.9974, "step": 260815 }, { "epoch": 0.61, "grad_norm": 1.6875, "learning_rate": 0.00015701214491533325, "loss": 2.0989, "step": 260820 }, { "epoch": 0.61, "grad_norm": 2.546875, "learning_rate": 0.0001570106264224677, "loss": 1.9022, "step": 260825 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.0001570091079101263, "loss": 2.1828, "step": 260830 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015700758937830953, "loss": 2.0107, "step": 260835 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015700607082701798, "loss": 2.0165, "step": 260840 }, { "epoch": 0.61, "grad_norm": 2.1875, "learning_rate": 0.00015700455225625207, "loss": 1.9834, "step": 260845 }, { "epoch": 0.61, "grad_norm": 2.515625, "learning_rate": 0.0001570030336660124, "loss": 2.0578, "step": 260850 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.0001570015150562995, "loss": 2.1629, "step": 260855 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015699999642711373, "loss": 2.1889, "step": 260860 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015699847777845584, "loss": 1.9207, "step": 260865 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.0001569969591103262, "loss": 2.0961, "step": 260870 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.00015699544042272536, "loss": 1.8406, "step": 260875 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015699392171565383, "loss": 2.0867, "step": 260880 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015699240298911216, "loss": 2.1545, "step": 260885 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015699088424310083, "loss": 2.1786, "step": 260890 }, { "epoch": 0.61, "grad_norm": 1.9765625, "learning_rate": 0.00015698936547762045, "loss": 2.0816, "step": 260895 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015698784669267138, "loss": 2.0495, "step": 260900 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.0001569863278882543, "loss": 2.1754, "step": 260905 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015698480906436963, "loss": 1.9026, "step": 260910 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.0001569832902210179, "loss": 2.1509, "step": 260915 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015698177135819964, "loss": 2.0733, "step": 260920 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015698025247591542, "loss": 2.109, "step": 260925 }, { "epoch": 0.61, "grad_norm": 3.0, "learning_rate": 0.00015697873357416566, "loss": 1.9573, "step": 260930 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015697721465295097, "loss": 1.993, "step": 260935 }, { "epoch": 0.61, "grad_norm": 2.609375, "learning_rate": 0.0001569756957122718, "loss": 2.1943, "step": 260940 }, { "epoch": 0.61, "grad_norm": 2.40625, "learning_rate": 0.0001569741767521287, "loss": 1.9053, "step": 260945 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.0001569726577725222, "loss": 2.2602, "step": 260950 }, { "epoch": 0.61, "grad_norm": 1.875, "learning_rate": 0.00015697113877345282, "loss": 2.0673, "step": 260955 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.00015696961975492104, "loss": 2.0312, "step": 260960 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.00015696810071692743, "loss": 2.1169, "step": 260965 }, { "epoch": 0.61, "grad_norm": 2.609375, "learning_rate": 0.00015696658165947243, "loss": 2.1634, "step": 260970 }, { "epoch": 0.61, "grad_norm": 2.203125, "learning_rate": 0.0001569650625825567, "loss": 2.0364, "step": 260975 }, { "epoch": 0.61, "grad_norm": 1.9140625, "learning_rate": 0.0001569635434861806, "loss": 1.9395, "step": 260980 }, { "epoch": 0.61, "grad_norm": 2.375, "learning_rate": 0.00015696202437034476, "loss": 1.9801, "step": 260985 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015696050523504967, "loss": 2.0036, "step": 260990 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 0.00015695898608029577, "loss": 2.1055, "step": 260995 }, { "epoch": 0.61, "grad_norm": 2.328125, "learning_rate": 0.0001569574669060837, "loss": 2.0412, "step": 261000 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.00015695594771241393, "loss": 2.0912, "step": 261005 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015695442849928694, "loss": 2.0946, "step": 261010 }, { "epoch": 0.61, "grad_norm": 2.375, "learning_rate": 0.00015695290926670335, "loss": 1.9201, "step": 261015 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015695139001466353, "loss": 2.197, "step": 261020 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.0001569498707431681, "loss": 2.0621, "step": 261025 }, { "epoch": 0.61, "grad_norm": 2.5, "learning_rate": 0.0001569483514522176, "loss": 2.047, "step": 261030 }, { "epoch": 0.61, "grad_norm": 2.4375, "learning_rate": 0.00015694683214181253, "loss": 1.9658, "step": 261035 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015694531281195334, "loss": 2.1841, "step": 261040 }, { "epoch": 0.61, "grad_norm": 1.890625, "learning_rate": 0.0001569437934626406, "loss": 2.0129, "step": 261045 }, { "epoch": 0.61, "grad_norm": 2.296875, "learning_rate": 0.00015694227409387482, "loss": 2.1401, "step": 261050 }, { "epoch": 0.61, "grad_norm": 2.59375, "learning_rate": 0.00015694075470565656, "loss": 2.0521, "step": 261055 }, { "epoch": 0.61, "grad_norm": 1.859375, "learning_rate": 0.0001569392352979863, "loss": 1.9213, "step": 261060 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015693771587086457, "loss": 2.0445, "step": 261065 }, { "epoch": 0.61, "grad_norm": 1.9609375, "learning_rate": 0.00015693619642429186, "loss": 2.1727, "step": 261070 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.0001569346769582687, "loss": 1.9824, "step": 261075 }, { "epoch": 0.61, "grad_norm": 2.25, "learning_rate": 0.00015693315747279565, "loss": 1.9692, "step": 261080 }, { "epoch": 0.61, "grad_norm": 2.890625, "learning_rate": 0.0001569316379678732, "loss": 2.0745, "step": 261085 }, { "epoch": 0.61, "grad_norm": 2.265625, "learning_rate": 0.00015693011844350186, "loss": 2.1197, "step": 261090 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.00015692859889968218, "loss": 2.1342, "step": 261095 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015692707933641465, "loss": 2.0432, "step": 261100 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.00015692555975369975, "loss": 2.026, "step": 261105 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.0001569240401515381, "loss": 2.3132, "step": 261110 }, { "epoch": 0.61, "grad_norm": 2.046875, "learning_rate": 0.00015692252052993015, "loss": 1.9524, "step": 261115 }, { "epoch": 0.61, "grad_norm": 2.015625, "learning_rate": 0.00015692100088887645, "loss": 2.0472, "step": 261120 }, { "epoch": 0.61, "grad_norm": 1.796875, "learning_rate": 0.00015691948122837748, "loss": 2.0273, "step": 261125 }, { "epoch": 0.61, "grad_norm": 1.859375, "learning_rate": 0.0001569179615484338, "loss": 2.0196, "step": 261130 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.0001569164418490459, "loss": 1.9551, "step": 261135 }, { "epoch": 0.61, "grad_norm": 1.640625, "learning_rate": 0.0001569149221302143, "loss": 1.8719, "step": 261140 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015691340239193955, "loss": 2.1025, "step": 261145 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.00015691188263422214, "loss": 1.9106, "step": 261150 }, { "epoch": 0.61, "grad_norm": 1.96875, "learning_rate": 0.0001569103628570626, "loss": 2.0865, "step": 261155 }, { "epoch": 0.61, "grad_norm": 2.625, "learning_rate": 0.00015690884306046146, "loss": 1.9914, "step": 261160 }, { "epoch": 0.61, "grad_norm": 2.0625, "learning_rate": 0.00015690732324441924, "loss": 2.0919, "step": 261165 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015690580340893643, "loss": 2.0318, "step": 261170 }, { "epoch": 0.61, "grad_norm": 1.6796875, "learning_rate": 0.00015690428355401355, "loss": 1.8012, "step": 261175 }, { "epoch": 0.61, "grad_norm": 2.609375, "learning_rate": 0.00015690276367965114, "loss": 2.056, "step": 261180 }, { "epoch": 0.61, "grad_norm": 1.7890625, "learning_rate": 0.0001569012437858497, "loss": 1.8003, "step": 261185 }, { "epoch": 0.61, "grad_norm": 1.796875, "learning_rate": 0.0001568997238726098, "loss": 1.9647, "step": 261190 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.0001568982039399319, "loss": 2.0809, "step": 261195 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015689668398781654, "loss": 2.088, "step": 261200 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015689516401626427, "loss": 2.0217, "step": 261205 }, { "epoch": 0.61, "grad_norm": 2.078125, "learning_rate": 0.00015689364402527554, "loss": 1.8842, "step": 261210 }, { "epoch": 0.61, "grad_norm": 2.09375, "learning_rate": 0.0001568921240148509, "loss": 2.0353, "step": 261215 }, { "epoch": 0.61, "grad_norm": 2.234375, "learning_rate": 0.00015689060398499092, "loss": 2.0593, "step": 261220 }, { "epoch": 0.61, "grad_norm": 2.40625, "learning_rate": 0.00015688908393569608, "loss": 2.089, "step": 261225 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.0001568875638669669, "loss": 1.9886, "step": 261230 }, { "epoch": 0.61, "grad_norm": 2.15625, "learning_rate": 0.00015688604377880387, "loss": 1.9353, "step": 261235 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.00015688452367120754, "loss": 2.0387, "step": 261240 }, { "epoch": 0.61, "grad_norm": 2.171875, "learning_rate": 0.00015688300354417843, "loss": 2.1723, "step": 261245 }, { "epoch": 0.61, "grad_norm": 2.5625, "learning_rate": 0.00015688148339771706, "loss": 2.0268, "step": 261250 }, { "epoch": 0.61, "grad_norm": 2.28125, "learning_rate": 0.00015687996323182393, "loss": 2.004, "step": 261255 }, { "epoch": 0.61, "grad_norm": 2.109375, "learning_rate": 0.00015687844304649958, "loss": 1.9027, "step": 261260 }, { "epoch": 0.61, "grad_norm": 2.3125, "learning_rate": 0.0001568769228417445, "loss": 1.8646, "step": 261265 }, { "epoch": 0.61, "grad_norm": 1.90625, "learning_rate": 0.00015687540261755925, "loss": 1.9517, "step": 261270 }, { "epoch": 0.61, "grad_norm": 2.609375, "learning_rate": 0.00015687388237394436, "loss": 2.2397, "step": 261275 }, { "epoch": 0.61, "grad_norm": 2.125, "learning_rate": 0.00015687236211090028, "loss": 1.8839, "step": 261280 }, { "epoch": 0.61, "grad_norm": 2.421875, "learning_rate": 0.00015687084182842759, "loss": 2.0445, "step": 261285 }, { "epoch": 0.61, "grad_norm": 2.34375, "learning_rate": 0.00015686932152652675, "loss": 2.0548, "step": 261290 }, { "epoch": 0.61, "grad_norm": 3.0, "learning_rate": 0.00015686780120519837, "loss": 1.9655, "step": 261295 }, { "epoch": 0.61, "grad_norm": 2.390625, "learning_rate": 0.0001568662808644429, "loss": 2.2051, "step": 261300 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.00015686476050426088, "loss": 1.9344, "step": 261305 }, { "epoch": 0.61, "grad_norm": 2.21875, "learning_rate": 0.00015686324012465283, "loss": 1.9717, "step": 261310 }, { "epoch": 0.61, "grad_norm": 1.921875, "learning_rate": 0.00015686171972561925, "loss": 2.0243, "step": 261315 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015686019930716069, "loss": 2.0565, "step": 261320 }, { "epoch": 0.61, "grad_norm": 2.46875, "learning_rate": 0.00015685867886927764, "loss": 1.9238, "step": 261325 }, { "epoch": 0.61, "grad_norm": 2.03125, "learning_rate": 0.00015685715841197063, "loss": 1.9821, "step": 261330 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.0001568556379352402, "loss": 2.1154, "step": 261335 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015685411743908686, "loss": 2.0464, "step": 261340 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.0001568525969235111, "loss": 2.1393, "step": 261345 }, { "epoch": 0.62, "grad_norm": 1.9609375, "learning_rate": 0.0001568510763885135, "loss": 2.0376, "step": 261350 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015684955583409452, "loss": 2.177, "step": 261355 }, { "epoch": 0.62, "grad_norm": 2.390625, "learning_rate": 0.0001568480352602547, "loss": 2.1179, "step": 261360 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015684651466699452, "loss": 2.2344, "step": 261365 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.0001568449940543146, "loss": 2.0786, "step": 261370 }, { "epoch": 0.62, "grad_norm": 1.9375, "learning_rate": 0.00015684347342221537, "loss": 2.0835, "step": 261375 }, { "epoch": 0.62, "grad_norm": 1.984375, "learning_rate": 0.00015684195277069736, "loss": 2.2159, "step": 261380 }, { "epoch": 0.62, "grad_norm": 2.625, "learning_rate": 0.00015684043209976116, "loss": 2.0441, "step": 261385 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.0001568389114094072, "loss": 2.1513, "step": 261390 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015683739069963603, "loss": 2.2152, "step": 261395 }, { "epoch": 0.62, "grad_norm": 1.8984375, "learning_rate": 0.0001568358699704482, "loss": 2.0972, "step": 261400 }, { "epoch": 0.62, "grad_norm": 1.9453125, "learning_rate": 0.00015683434922184421, "loss": 1.9079, "step": 261405 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015683282845382456, "loss": 2.086, "step": 261410 }, { "epoch": 0.62, "grad_norm": 2.75, "learning_rate": 0.00015683130766638978, "loss": 2.2592, "step": 261415 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.0001568297868595404, "loss": 2.0255, "step": 261420 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015682826603327695, "loss": 2.1184, "step": 261425 }, { "epoch": 0.62, "grad_norm": 1.84375, "learning_rate": 0.0001568267451875999, "loss": 2.0827, "step": 261430 }, { "epoch": 0.62, "grad_norm": 2.5, "learning_rate": 0.00015682522432250984, "loss": 2.3114, "step": 261435 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015682370343800725, "loss": 2.2212, "step": 261440 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.00015682218253409262, "loss": 2.0748, "step": 261445 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.0001568206616107665, "loss": 2.0165, "step": 261450 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015681914066802946, "loss": 2.0244, "step": 261455 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015681761970588195, "loss": 2.0682, "step": 261460 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.0001568160987243245, "loss": 2.0374, "step": 261465 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.00015681457772335767, "loss": 2.0682, "step": 261470 }, { "epoch": 0.62, "grad_norm": 2.640625, "learning_rate": 0.0001568130567029819, "loss": 2.2666, "step": 261475 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015681153566319777, "loss": 2.0143, "step": 261480 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.00015681001460400581, "loss": 2.1061, "step": 261485 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015680849352540653, "loss": 2.1335, "step": 261490 }, { "epoch": 0.62, "grad_norm": 1.921875, "learning_rate": 0.0001568069724274004, "loss": 2.0194, "step": 261495 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.000156805451309988, "loss": 2.2749, "step": 261500 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.0001568039301731698, "loss": 2.1034, "step": 261505 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015680240901694642, "loss": 2.1085, "step": 261510 }, { "epoch": 0.62, "grad_norm": 1.8203125, "learning_rate": 0.00015680088784131825, "loss": 2.1734, "step": 261515 }, { "epoch": 0.62, "grad_norm": 2.46875, "learning_rate": 0.00015679936664628587, "loss": 2.1874, "step": 261520 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.00015679784543184982, "loss": 2.0225, "step": 261525 }, { "epoch": 0.62, "grad_norm": 2.71875, "learning_rate": 0.00015679632419801057, "loss": 2.2112, "step": 261530 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015679480294476867, "loss": 2.1078, "step": 261535 }, { "epoch": 0.62, "grad_norm": 2.484375, "learning_rate": 0.00015679328167212463, "loss": 2.0656, "step": 261540 }, { "epoch": 0.62, "grad_norm": 2.46875, "learning_rate": 0.00015679176038007898, "loss": 2.0379, "step": 261545 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015679023906863226, "loss": 2.2091, "step": 261550 }, { "epoch": 0.62, "grad_norm": 2.703125, "learning_rate": 0.0001567887177377849, "loss": 2.0398, "step": 261555 }, { "epoch": 0.62, "grad_norm": 2.390625, "learning_rate": 0.00015678719638753755, "loss": 1.8276, "step": 261560 }, { "epoch": 0.62, "grad_norm": 2.453125, "learning_rate": 0.00015678567501789065, "loss": 1.9303, "step": 261565 }, { "epoch": 0.62, "grad_norm": 1.8671875, "learning_rate": 0.00015678415362884473, "loss": 2.1204, "step": 261570 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015678263222040033, "loss": 1.936, "step": 261575 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.0001567811107925579, "loss": 2.1672, "step": 261580 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015677958934531806, "loss": 2.1268, "step": 261585 }, { "epoch": 0.62, "grad_norm": 1.8359375, "learning_rate": 0.00015677806787868127, "loss": 2.0855, "step": 261590 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.00015677654639264805, "loss": 2.0748, "step": 261595 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015677502488721894, "loss": 1.9503, "step": 261600 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015677350336239443, "loss": 1.9675, "step": 261605 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015677198181817512, "loss": 2.0109, "step": 261610 }, { "epoch": 0.62, "grad_norm": 2.484375, "learning_rate": 0.00015677046025456144, "loss": 2.1333, "step": 261615 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015676893867155392, "loss": 1.9923, "step": 261620 }, { "epoch": 0.62, "grad_norm": 2.6875, "learning_rate": 0.00015676741706915312, "loss": 2.0955, "step": 261625 }, { "epoch": 0.62, "grad_norm": 2.390625, "learning_rate": 0.00015676589544735955, "loss": 2.004, "step": 261630 }, { "epoch": 0.62, "grad_norm": 2.59375, "learning_rate": 0.0001567643738061737, "loss": 2.1532, "step": 261635 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015676285214559613, "loss": 2.0752, "step": 261640 }, { "epoch": 0.62, "grad_norm": 2.421875, "learning_rate": 0.00015676133046562733, "loss": 2.1776, "step": 261645 }, { "epoch": 0.62, "grad_norm": 1.859375, "learning_rate": 0.00015675980876626785, "loss": 2.0506, "step": 261650 }, { "epoch": 0.62, "grad_norm": 1.8046875, "learning_rate": 0.00015675828704751817, "loss": 2.1382, "step": 261655 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.0001567567653093788, "loss": 2.1097, "step": 261660 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.0001567552435518503, "loss": 2.232, "step": 261665 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.0001567537217749332, "loss": 2.1511, "step": 261670 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015675219997862802, "loss": 2.0282, "step": 261675 }, { "epoch": 0.62, "grad_norm": 2.484375, "learning_rate": 0.00015675067816293525, "loss": 1.908, "step": 261680 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015674915632785542, "loss": 1.9922, "step": 261685 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.000156747634473389, "loss": 1.9259, "step": 261690 }, { "epoch": 0.62, "grad_norm": 1.9375, "learning_rate": 0.0001567461125995366, "loss": 2.2491, "step": 261695 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.0001567445907062987, "loss": 2.0582, "step": 261700 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015674306879367582, "loss": 2.2127, "step": 261705 }, { "epoch": 0.62, "grad_norm": 1.9453125, "learning_rate": 0.00015674154686166845, "loss": 2.0246, "step": 261710 }, { "epoch": 0.62, "grad_norm": 1.7421875, "learning_rate": 0.00015674002491027718, "loss": 2.0739, "step": 261715 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015673850293950245, "loss": 2.1409, "step": 261720 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015673698094934485, "loss": 1.9664, "step": 261725 }, { "epoch": 0.62, "grad_norm": 2.515625, "learning_rate": 0.00015673545893980486, "loss": 2.1006, "step": 261730 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.000156733936910883, "loss": 2.0409, "step": 261735 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.0001567324148625798, "loss": 2.1069, "step": 261740 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015673089279489576, "loss": 2.0769, "step": 261745 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015672937070783144, "loss": 2.1925, "step": 261750 }, { "epoch": 0.62, "grad_norm": 2.5625, "learning_rate": 0.00015672784860138735, "loss": 2.0417, "step": 261755 }, { "epoch": 0.62, "grad_norm": 1.921875, "learning_rate": 0.00015672632647556397, "loss": 2.1555, "step": 261760 }, { "epoch": 0.62, "grad_norm": 1.71875, "learning_rate": 0.00015672480433036188, "loss": 1.938, "step": 261765 }, { "epoch": 0.62, "grad_norm": 1.9609375, "learning_rate": 0.00015672328216578154, "loss": 1.9077, "step": 261770 }, { "epoch": 0.62, "grad_norm": 2.390625, "learning_rate": 0.0001567217599818235, "loss": 2.011, "step": 261775 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.0001567202377784883, "loss": 2.1721, "step": 261780 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015671871555577641, "loss": 2.1755, "step": 261785 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.0001567171933136884, "loss": 2.0018, "step": 261790 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015671567105222478, "loss": 2.0882, "step": 261795 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015671414877138602, "loss": 2.0487, "step": 261800 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.0001567126264711727, "loss": 2.2296, "step": 261805 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015671110415158533, "loss": 2.0452, "step": 261810 }, { "epoch": 0.62, "grad_norm": 1.984375, "learning_rate": 0.00015670958181262438, "loss": 2.1406, "step": 261815 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015670805945429045, "loss": 2.1292, "step": 261820 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015670653707658397, "loss": 2.1341, "step": 261825 }, { "epoch": 0.62, "grad_norm": 2.40625, "learning_rate": 0.00015670501467950556, "loss": 1.9824, "step": 261830 }, { "epoch": 0.62, "grad_norm": 2.546875, "learning_rate": 0.00015670349226305566, "loss": 2.2863, "step": 261835 }, { "epoch": 0.62, "grad_norm": 2.453125, "learning_rate": 0.00015670196982723485, "loss": 2.0002, "step": 261840 }, { "epoch": 0.62, "grad_norm": 2.53125, "learning_rate": 0.00015670044737204356, "loss": 2.2802, "step": 261845 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.00015669892489748242, "loss": 2.2398, "step": 261850 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015669740240355185, "loss": 2.1463, "step": 261855 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015669587989025247, "loss": 1.9052, "step": 261860 }, { "epoch": 0.62, "grad_norm": 1.921875, "learning_rate": 0.00015669435735758474, "loss": 2.0459, "step": 261865 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.0001566928348055492, "loss": 2.156, "step": 261870 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015669131223414632, "loss": 2.1327, "step": 261875 }, { "epoch": 0.62, "grad_norm": 2.53125, "learning_rate": 0.00015668978964337668, "loss": 1.9966, "step": 261880 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.0001566882670332408, "loss": 2.075, "step": 261885 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.00015668674440373913, "loss": 2.0735, "step": 261890 }, { "epoch": 0.62, "grad_norm": 1.828125, "learning_rate": 0.00015668522175487227, "loss": 1.9194, "step": 261895 }, { "epoch": 0.62, "grad_norm": 1.921875, "learning_rate": 0.0001566836990866407, "loss": 1.9809, "step": 261900 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015668217639904498, "loss": 2.2345, "step": 261905 }, { "epoch": 0.62, "grad_norm": 2.484375, "learning_rate": 0.00015668065369208557, "loss": 2.0647, "step": 261910 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015667913096576304, "loss": 2.1915, "step": 261915 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015667760822007788, "loss": 1.8515, "step": 261920 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015667608545503063, "loss": 1.9588, "step": 261925 }, { "epoch": 0.62, "grad_norm": 1.8125, "learning_rate": 0.00015667456267062178, "loss": 2.0598, "step": 261930 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.0001566730398668519, "loss": 2.3005, "step": 261935 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015667151704372144, "loss": 2.0383, "step": 261940 }, { "epoch": 0.62, "grad_norm": 1.953125, "learning_rate": 0.000156669994201231, "loss": 2.0073, "step": 261945 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015666847133938107, "loss": 2.1042, "step": 261950 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015666694845817216, "loss": 2.2161, "step": 261955 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015666542555760473, "loss": 1.9975, "step": 261960 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015666390263767942, "loss": 2.1988, "step": 261965 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.00015666237969839668, "loss": 2.0652, "step": 261970 }, { "epoch": 0.62, "grad_norm": 2.390625, "learning_rate": 0.00015666085673975708, "loss": 2.0624, "step": 261975 }, { "epoch": 0.62, "grad_norm": 2.734375, "learning_rate": 0.00015665933376176106, "loss": 2.1151, "step": 261980 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.00015665781076440918, "loss": 2.1829, "step": 261985 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015665628774770197, "loss": 2.2207, "step": 261990 }, { "epoch": 0.62, "grad_norm": 2.4375, "learning_rate": 0.00015665476471163995, "loss": 1.9882, "step": 261995 }, { "epoch": 0.62, "grad_norm": 2.484375, "learning_rate": 0.00015665324165622366, "loss": 2.2202, "step": 262000 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015665171858145357, "loss": 2.1265, "step": 262005 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015665019548733025, "loss": 2.0484, "step": 262010 }, { "epoch": 0.62, "grad_norm": 1.9609375, "learning_rate": 0.00015664867237385414, "loss": 2.0088, "step": 262015 }, { "epoch": 0.62, "grad_norm": 1.7109375, "learning_rate": 0.00015664714924102586, "loss": 2.1027, "step": 262020 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.00015664562608884588, "loss": 2.149, "step": 262025 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015664410291731473, "loss": 2.1007, "step": 262030 }, { "epoch": 0.62, "grad_norm": 1.953125, "learning_rate": 0.00015664257972643292, "loss": 1.9672, "step": 262035 }, { "epoch": 0.62, "grad_norm": 1.921875, "learning_rate": 0.00015664105651620096, "loss": 2.1489, "step": 262040 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015663953328661943, "loss": 2.0877, "step": 262045 }, { "epoch": 0.62, "grad_norm": 1.8359375, "learning_rate": 0.00015663801003768877, "loss": 2.0176, "step": 262050 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.00015663648676940957, "loss": 2.1486, "step": 262055 }, { "epoch": 0.62, "grad_norm": 1.9609375, "learning_rate": 0.00015663496348178232, "loss": 2.0543, "step": 262060 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015663344017480748, "loss": 2.0489, "step": 262065 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015663191684848568, "loss": 1.7001, "step": 262070 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015663039350281735, "loss": 2.2213, "step": 262075 }, { "epoch": 0.62, "grad_norm": 2.625, "learning_rate": 0.0001566288701378031, "loss": 2.2749, "step": 262080 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.0001566273467534434, "loss": 2.1288, "step": 262085 }, { "epoch": 0.62, "grad_norm": 2.40625, "learning_rate": 0.00015662582334973876, "loss": 2.2776, "step": 262090 }, { "epoch": 0.62, "grad_norm": 2.296875, "learning_rate": 0.00015662429992668967, "loss": 2.2276, "step": 262095 }, { "epoch": 0.62, "grad_norm": 2.453125, "learning_rate": 0.00015662277648429676, "loss": 2.014, "step": 262100 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.00015662125302256047, "loss": 1.8729, "step": 262105 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.0001566197295414813, "loss": 1.9627, "step": 262110 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.0001566182060410598, "loss": 2.255, "step": 262115 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.0001566166825212965, "loss": 2.2049, "step": 262120 }, { "epoch": 0.62, "grad_norm": 1.9375, "learning_rate": 0.00015661515898219194, "loss": 2.098, "step": 262125 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015661363542374658, "loss": 1.8555, "step": 262130 }, { "epoch": 0.62, "grad_norm": 2.4375, "learning_rate": 0.00015661211184596102, "loss": 1.902, "step": 262135 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015661058824883572, "loss": 2.0935, "step": 262140 }, { "epoch": 0.62, "grad_norm": 4.5625, "learning_rate": 0.0001566090646323712, "loss": 2.0608, "step": 262145 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015660754099656803, "loss": 2.075, "step": 262150 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015660601734142666, "loss": 2.0528, "step": 262155 }, { "epoch": 0.62, "grad_norm": 2.4375, "learning_rate": 0.00015660449366694765, "loss": 2.1061, "step": 262160 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015660296997313153, "loss": 2.1627, "step": 262165 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.0001566014462599788, "loss": 2.024, "step": 262170 }, { "epoch": 0.62, "grad_norm": 2.46875, "learning_rate": 0.00015659992252749002, "loss": 1.8643, "step": 262175 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015659839877566565, "loss": 1.9367, "step": 262180 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015659687500450627, "loss": 2.0495, "step": 262185 }, { "epoch": 0.62, "grad_norm": 2.484375, "learning_rate": 0.00015659535121401231, "loss": 2.121, "step": 262190 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.0001565938274041844, "loss": 2.1461, "step": 262195 }, { "epoch": 0.62, "grad_norm": 1.859375, "learning_rate": 0.00015659230357502302, "loss": 2.098, "step": 262200 }, { "epoch": 0.62, "grad_norm": 1.9296875, "learning_rate": 0.00015659077972652864, "loss": 1.9526, "step": 262205 }, { "epoch": 0.62, "grad_norm": 1.984375, "learning_rate": 0.00015658925585870188, "loss": 1.9843, "step": 262210 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.00015658773197154316, "loss": 2.0365, "step": 262215 }, { "epoch": 0.62, "grad_norm": 1.8125, "learning_rate": 0.00015658620806505304, "loss": 1.9699, "step": 262220 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.00015658468413923206, "loss": 1.9679, "step": 262225 }, { "epoch": 0.62, "grad_norm": 1.953125, "learning_rate": 0.00015658316019408075, "loss": 2.1333, "step": 262230 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.0001565816362295996, "loss": 1.979, "step": 262235 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.0001565801122457891, "loss": 2.0601, "step": 262240 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.0001565785882426498, "loss": 2.0807, "step": 262245 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.0001565770642201823, "loss": 2.0328, "step": 262250 }, { "epoch": 0.62, "grad_norm": 1.8671875, "learning_rate": 0.000156575540178387, "loss": 2.0739, "step": 262255 }, { "epoch": 0.62, "grad_norm": 1.8828125, "learning_rate": 0.00015657401611726446, "loss": 2.0393, "step": 262260 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015657249203681524, "loss": 2.0085, "step": 262265 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.0001565709679370398, "loss": 2.0409, "step": 262270 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.0001565694438179387, "loss": 2.0733, "step": 262275 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.00015656791967951246, "loss": 2.0947, "step": 262280 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.0001565663955217616, "loss": 2.2144, "step": 262285 }, { "epoch": 0.62, "grad_norm": 1.859375, "learning_rate": 0.00015656487134468663, "loss": 2.0453, "step": 262290 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015656334714828806, "loss": 1.99, "step": 262295 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015656182293256646, "loss": 2.2404, "step": 262300 }, { "epoch": 0.62, "grad_norm": 3.0, "learning_rate": 0.00015656029869752226, "loss": 2.2104, "step": 262305 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.00015655877444315606, "loss": 2.0336, "step": 262310 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.00015655725016946835, "loss": 1.9955, "step": 262315 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.0001565557258764597, "loss": 2.2461, "step": 262320 }, { "epoch": 0.62, "grad_norm": 1.828125, "learning_rate": 0.00015655420156413053, "loss": 1.9963, "step": 262325 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015655267723248145, "loss": 1.9368, "step": 262330 }, { "epoch": 0.62, "grad_norm": 1.78125, "learning_rate": 0.00015655115288151298, "loss": 2.066, "step": 262335 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015654962851122556, "loss": 2.0169, "step": 262340 }, { "epoch": 0.62, "grad_norm": 1.875, "learning_rate": 0.0001565481041216198, "loss": 2.1368, "step": 262345 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.00015654657971269614, "loss": 2.1964, "step": 262350 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015654505528445517, "loss": 2.1829, "step": 262355 }, { "epoch": 0.62, "grad_norm": 1.7578125, "learning_rate": 0.0001565435308368974, "loss": 2.1272, "step": 262360 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.0001565420063700233, "loss": 2.1199, "step": 262365 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015654048188383344, "loss": 2.0607, "step": 262370 }, { "epoch": 0.62, "grad_norm": 1.8984375, "learning_rate": 0.00015653895737832835, "loss": 2.0242, "step": 262375 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015653743285350846, "loss": 2.0487, "step": 262380 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.0001565359083093744, "loss": 2.2478, "step": 262385 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.00015653438374592664, "loss": 1.9035, "step": 262390 }, { "epoch": 0.62, "grad_norm": 1.90625, "learning_rate": 0.0001565328591631657, "loss": 2.1207, "step": 262395 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015653133456109214, "loss": 1.8758, "step": 262400 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015652980993970643, "loss": 2.0504, "step": 262405 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015652828529900912, "loss": 2.2763, "step": 262410 }, { "epoch": 0.62, "grad_norm": 1.5859375, "learning_rate": 0.00015652676063900074, "loss": 2.1485, "step": 262415 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.00015652523595968178, "loss": 2.1797, "step": 262420 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015652371126105276, "loss": 2.227, "step": 262425 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.0001565221865431142, "loss": 1.9153, "step": 262430 }, { "epoch": 0.62, "grad_norm": 2.4375, "learning_rate": 0.0001565206618058667, "loss": 2.1192, "step": 262435 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015651913704931066, "loss": 2.1216, "step": 262440 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015651761227344668, "loss": 2.2116, "step": 262445 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015651608747827524, "loss": 2.0848, "step": 262450 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.0001565145626637969, "loss": 2.0151, "step": 262455 }, { "epoch": 0.62, "grad_norm": 1.9765625, "learning_rate": 0.00015651303783001215, "loss": 2.0483, "step": 262460 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.0001565115129769215, "loss": 2.0827, "step": 262465 }, { "epoch": 0.62, "grad_norm": 1.8828125, "learning_rate": 0.00015650998810452554, "loss": 2.1165, "step": 262470 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.00015650846321282473, "loss": 2.1453, "step": 262475 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.00015650693830181957, "loss": 1.9924, "step": 262480 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015650541337151064, "loss": 2.0318, "step": 262485 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.00015650388842189842, "loss": 2.1071, "step": 262490 }, { "epoch": 0.62, "grad_norm": 2.71875, "learning_rate": 0.0001565023634529835, "loss": 2.0484, "step": 262495 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.0001565008384647663, "loss": 2.0491, "step": 262500 }, { "epoch": 0.62, "grad_norm": 2.390625, "learning_rate": 0.0001564993134572474, "loss": 2.0172, "step": 262505 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.00015649778843042727, "loss": 2.2025, "step": 262510 }, { "epoch": 0.62, "grad_norm": 1.9375, "learning_rate": 0.00015649626338430653, "loss": 2.0886, "step": 262515 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.00015649473831888563, "loss": 2.1686, "step": 262520 }, { "epoch": 0.62, "grad_norm": 1.9921875, "learning_rate": 0.0001564932132341651, "loss": 1.9276, "step": 262525 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015649168813014545, "loss": 2.1036, "step": 262530 }, { "epoch": 0.62, "grad_norm": 1.9765625, "learning_rate": 0.0001564901630068272, "loss": 2.1481, "step": 262535 }, { "epoch": 0.62, "grad_norm": 2.40625, "learning_rate": 0.00015648863786421092, "loss": 2.0852, "step": 262540 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.0001564871127022971, "loss": 2.1005, "step": 262545 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015648558752108624, "loss": 2.0783, "step": 262550 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.00015648406232057887, "loss": 2.1676, "step": 262555 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015648253710077555, "loss": 2.0944, "step": 262560 }, { "epoch": 0.62, "grad_norm": 2.296875, "learning_rate": 0.00015648101186167672, "loss": 2.0089, "step": 262565 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.000156479486603283, "loss": 2.0866, "step": 262570 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015647796132559484, "loss": 2.043, "step": 262575 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.0001564764360286128, "loss": 2.2419, "step": 262580 }, { "epoch": 0.62, "grad_norm": 2.40625, "learning_rate": 0.00015647491071233736, "loss": 2.2013, "step": 262585 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.0001564733853767691, "loss": 2.0211, "step": 262590 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015647186002190847, "loss": 2.1013, "step": 262595 }, { "epoch": 0.62, "grad_norm": 1.9296875, "learning_rate": 0.00015647033464775606, "loss": 2.2545, "step": 262600 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015646880925431237, "loss": 1.983, "step": 262605 }, { "epoch": 0.62, "grad_norm": 2.65625, "learning_rate": 0.0001564672838415779, "loss": 2.1284, "step": 262610 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015646575840955315, "loss": 2.1482, "step": 262615 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015646423295823868, "loss": 1.9932, "step": 262620 }, { "epoch": 0.62, "grad_norm": 2.390625, "learning_rate": 0.00015646270748763503, "loss": 1.9484, "step": 262625 }, { "epoch": 0.62, "grad_norm": 2.421875, "learning_rate": 0.0001564611819977427, "loss": 1.8914, "step": 262630 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015645965648856218, "loss": 2.0211, "step": 262635 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015645813096009406, "loss": 2.0685, "step": 262640 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015645660541233874, "loss": 1.953, "step": 262645 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.00015645507984529688, "loss": 2.1128, "step": 262650 }, { "epoch": 0.62, "grad_norm": 1.9296875, "learning_rate": 0.0001564535542589689, "loss": 2.2062, "step": 262655 }, { "epoch": 0.62, "grad_norm": 1.984375, "learning_rate": 0.00015645202865335542, "loss": 2.1011, "step": 262660 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015645050302845687, "loss": 2.1675, "step": 262665 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.0001564489773842738, "loss": 2.1769, "step": 262670 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015644745172080672, "loss": 2.042, "step": 262675 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.00015644592603805622, "loss": 2.0749, "step": 262680 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.0001564444003360227, "loss": 1.9643, "step": 262685 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.00015644287461470682, "loss": 2.0939, "step": 262690 }, { "epoch": 0.62, "grad_norm": 2.53125, "learning_rate": 0.000156441348874109, "loss": 2.1276, "step": 262695 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015643982311422977, "loss": 2.0794, "step": 262700 }, { "epoch": 0.62, "grad_norm": 2.75, "learning_rate": 0.00015643829733506966, "loss": 2.224, "step": 262705 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.00015643677153662927, "loss": 1.8973, "step": 262710 }, { "epoch": 0.62, "grad_norm": 2.390625, "learning_rate": 0.00015643524571890902, "loss": 2.0545, "step": 262715 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015643371988190947, "loss": 1.9702, "step": 262720 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015643219402563112, "loss": 1.9606, "step": 262725 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.0001564306681500745, "loss": 2.2177, "step": 262730 }, { "epoch": 0.62, "grad_norm": 2.53125, "learning_rate": 0.0001564291422552402, "loss": 2.1373, "step": 262735 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015642761634112863, "loss": 2.1327, "step": 262740 }, { "epoch": 0.62, "grad_norm": 2.578125, "learning_rate": 0.00015642609040774036, "loss": 2.1257, "step": 262745 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.00015642456445507597, "loss": 1.8373, "step": 262750 }, { "epoch": 0.62, "grad_norm": 1.9140625, "learning_rate": 0.00015642303848313586, "loss": 2.0896, "step": 262755 }, { "epoch": 0.62, "grad_norm": 1.9921875, "learning_rate": 0.00015642151249192066, "loss": 1.9643, "step": 262760 }, { "epoch": 0.62, "grad_norm": 1.875, "learning_rate": 0.0001564199864814308, "loss": 2.1154, "step": 262765 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.0001564184604516669, "loss": 2.049, "step": 262770 }, { "epoch": 0.62, "grad_norm": 1.9453125, "learning_rate": 0.00015641693440262942, "loss": 2.0689, "step": 262775 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015641540833431886, "loss": 1.9986, "step": 262780 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.0001564138822467358, "loss": 2.1484, "step": 262785 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015641235613988075, "loss": 2.2054, "step": 262790 }, { "epoch": 0.62, "grad_norm": 1.859375, "learning_rate": 0.0001564108300137542, "loss": 1.8852, "step": 262795 }, { "epoch": 0.62, "grad_norm": 2.609375, "learning_rate": 0.00015640930386835668, "loss": 2.2288, "step": 262800 }, { "epoch": 0.62, "grad_norm": 1.984375, "learning_rate": 0.0001564077777036887, "loss": 2.0739, "step": 262805 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015640625151975085, "loss": 2.0607, "step": 262810 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015640472531654358, "loss": 2.1535, "step": 262815 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015640319909406743, "loss": 2.1379, "step": 262820 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.0001564016728523229, "loss": 2.0589, "step": 262825 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015640014659131058, "loss": 1.8876, "step": 262830 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.0001563986203110309, "loss": 2.1349, "step": 262835 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.00015639709401148446, "loss": 2.1312, "step": 262840 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015639556769267174, "loss": 2.1507, "step": 262845 }, { "epoch": 0.62, "grad_norm": 2.453125, "learning_rate": 0.00015639404135459327, "loss": 2.1611, "step": 262850 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015639251499724961, "loss": 1.8749, "step": 262855 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.0001563909886206412, "loss": 2.0533, "step": 262860 }, { "epoch": 0.62, "grad_norm": 2.796875, "learning_rate": 0.00015638946222476863, "loss": 1.9435, "step": 262865 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.0001563879358096324, "loss": 2.182, "step": 262870 }, { "epoch": 0.62, "grad_norm": 2.53125, "learning_rate": 0.000156386409375233, "loss": 2.0724, "step": 262875 }, { "epoch": 0.62, "grad_norm": 2.59375, "learning_rate": 0.00015638488292157098, "loss": 2.2223, "step": 262880 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.00015638335644864687, "loss": 1.9372, "step": 262885 }, { "epoch": 0.62, "grad_norm": 2.5, "learning_rate": 0.00015638182995646117, "loss": 2.2496, "step": 262890 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015638030344501444, "loss": 1.9714, "step": 262895 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015637877691430718, "loss": 1.9935, "step": 262900 }, { "epoch": 0.62, "grad_norm": 2.40625, "learning_rate": 0.0001563772503643399, "loss": 2.0974, "step": 262905 }, { "epoch": 0.62, "grad_norm": 2.40625, "learning_rate": 0.0001563757237951131, "loss": 2.1388, "step": 262910 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015637419720662736, "loss": 2.0228, "step": 262915 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.0001563726705988832, "loss": 1.9686, "step": 262920 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.00015637114397188106, "loss": 2.0529, "step": 262925 }, { "epoch": 0.62, "grad_norm": 1.96875, "learning_rate": 0.00015636961732562154, "loss": 2.17, "step": 262930 }, { "epoch": 0.62, "grad_norm": 2.46875, "learning_rate": 0.00015636809066010515, "loss": 2.0519, "step": 262935 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015636656397533235, "loss": 2.1789, "step": 262940 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015636503727130374, "loss": 2.0731, "step": 262945 }, { "epoch": 0.62, "grad_norm": 2.75, "learning_rate": 0.00015636351054801982, "loss": 2.0191, "step": 262950 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015636198380548108, "loss": 2.2861, "step": 262955 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015636045704368808, "loss": 2.0795, "step": 262960 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015635893026264133, "loss": 2.0828, "step": 262965 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015635740346234132, "loss": 2.1504, "step": 262970 }, { "epoch": 0.62, "grad_norm": 1.9765625, "learning_rate": 0.0001563558766427886, "loss": 2.0506, "step": 262975 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015635434980398373, "loss": 2.0952, "step": 262980 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015635282294592718, "loss": 2.2958, "step": 262985 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015635129606861946, "loss": 2.1823, "step": 262990 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015634976917206113, "loss": 2.2475, "step": 262995 }, { "epoch": 0.62, "grad_norm": 2.296875, "learning_rate": 0.00015634824225625268, "loss": 2.0236, "step": 263000 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.0001563467153211947, "loss": 2.0107, "step": 263005 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.0001563451883668876, "loss": 2.0649, "step": 263010 }, { "epoch": 0.62, "grad_norm": 1.828125, "learning_rate": 0.000156343661393332, "loss": 1.8578, "step": 263015 }, { "epoch": 0.62, "grad_norm": 2.421875, "learning_rate": 0.00015634213440052837, "loss": 2.1964, "step": 263020 }, { "epoch": 0.62, "grad_norm": 1.75, "learning_rate": 0.00015634060738847723, "loss": 2.1773, "step": 263025 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.00015633908035717916, "loss": 2.0537, "step": 263030 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.0001563375533066346, "loss": 2.0025, "step": 263035 }, { "epoch": 0.62, "grad_norm": 2.421875, "learning_rate": 0.0001563360262368441, "loss": 2.2742, "step": 263040 }, { "epoch": 0.62, "grad_norm": 2.390625, "learning_rate": 0.00015633449914780823, "loss": 1.9731, "step": 263045 }, { "epoch": 0.62, "grad_norm": 1.9453125, "learning_rate": 0.00015633297203952745, "loss": 1.9431, "step": 263050 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015633144491200232, "loss": 2.2144, "step": 263055 }, { "epoch": 0.62, "grad_norm": 2.515625, "learning_rate": 0.0001563299177652333, "loss": 1.9888, "step": 263060 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015632839059922103, "loss": 2.015, "step": 263065 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015632686341396593, "loss": 2.063, "step": 263070 }, { "epoch": 0.62, "grad_norm": 2.453125, "learning_rate": 0.00015632533620946851, "loss": 1.9244, "step": 263075 }, { "epoch": 0.62, "grad_norm": 1.921875, "learning_rate": 0.00015632380898572939, "loss": 2.2226, "step": 263080 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015632228174274903, "loss": 2.2802, "step": 263085 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015632075448052793, "loss": 2.2118, "step": 263090 }, { "epoch": 0.62, "grad_norm": 1.953125, "learning_rate": 0.00015631922719906667, "loss": 2.2267, "step": 263095 }, { "epoch": 0.62, "grad_norm": 2.484375, "learning_rate": 0.00015631769989836572, "loss": 2.0387, "step": 263100 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.0001563161725784256, "loss": 2.1892, "step": 263105 }, { "epoch": 0.62, "grad_norm": 2.65625, "learning_rate": 0.00015631464523924691, "loss": 2.2459, "step": 263110 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015631311788083006, "loss": 1.9627, "step": 263115 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015631159050317564, "loss": 2.0625, "step": 263120 }, { "epoch": 0.62, "grad_norm": 1.875, "learning_rate": 0.0001563100631062842, "loss": 2.0434, "step": 263125 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.00015630853569015618, "loss": 2.2879, "step": 263130 }, { "epoch": 0.62, "grad_norm": 2.484375, "learning_rate": 0.00015630700825479217, "loss": 2.0547, "step": 263135 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015630548080019266, "loss": 2.138, "step": 263140 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015630395332635817, "loss": 2.0112, "step": 263145 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.0001563024258332892, "loss": 2.063, "step": 263150 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015630089832098634, "loss": 2.0332, "step": 263155 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015629937078945005, "loss": 1.9857, "step": 263160 }, { "epoch": 0.62, "grad_norm": 1.9453125, "learning_rate": 0.00015629784323868088, "loss": 2.0436, "step": 263165 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015629631566867937, "loss": 2.263, "step": 263170 }, { "epoch": 0.62, "grad_norm": 1.7421875, "learning_rate": 0.00015629478807944599, "loss": 2.1491, "step": 263175 }, { "epoch": 0.62, "grad_norm": 1.7890625, "learning_rate": 0.0001562932604709813, "loss": 2.0648, "step": 263180 }, { "epoch": 0.62, "grad_norm": 1.984375, "learning_rate": 0.0001562917328432858, "loss": 2.0112, "step": 263185 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015629020519636005, "loss": 2.0658, "step": 263190 }, { "epoch": 0.62, "grad_norm": 2.65625, "learning_rate": 0.0001562886775302045, "loss": 1.8803, "step": 263195 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015628714984481973, "loss": 2.1375, "step": 263200 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.0001562856221402063, "loss": 2.2529, "step": 263205 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.0001562840944163646, "loss": 1.9751, "step": 263210 }, { "epoch": 0.62, "grad_norm": 2.5, "learning_rate": 0.0001562825666732953, "loss": 2.1772, "step": 263215 }, { "epoch": 0.62, "grad_norm": 1.7265625, "learning_rate": 0.0001562810389109988, "loss": 2.1031, "step": 263220 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.0001562795111294757, "loss": 1.9741, "step": 263225 }, { "epoch": 0.62, "grad_norm": 1.8828125, "learning_rate": 0.00015627798332872654, "loss": 1.9449, "step": 263230 }, { "epoch": 0.62, "grad_norm": 2.4375, "learning_rate": 0.00015627645550875174, "loss": 2.1345, "step": 263235 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.0001562749276695519, "loss": 1.9947, "step": 263240 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.00015627339981112755, "loss": 2.2084, "step": 263245 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.00015627187193347918, "loss": 2.067, "step": 263250 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.0001562703440366073, "loss": 2.0293, "step": 263255 }, { "epoch": 0.62, "grad_norm": 1.8125, "learning_rate": 0.00015626881612051244, "loss": 2.002, "step": 263260 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015626728818519514, "loss": 2.2324, "step": 263265 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.0001562657602306559, "loss": 2.121, "step": 263270 }, { "epoch": 0.62, "grad_norm": 2.453125, "learning_rate": 0.0001562642322568953, "loss": 2.1819, "step": 263275 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015626270426391382, "loss": 2.2224, "step": 263280 }, { "epoch": 0.62, "grad_norm": 1.9296875, "learning_rate": 0.00015626117625171192, "loss": 2.0836, "step": 263285 }, { "epoch": 0.62, "grad_norm": 1.65625, "learning_rate": 0.00015625964822029023, "loss": 2.2042, "step": 263290 }, { "epoch": 0.62, "grad_norm": 1.8671875, "learning_rate": 0.0001562581201696492, "loss": 2.0675, "step": 263295 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015625659209978938, "loss": 2.0716, "step": 263300 }, { "epoch": 0.62, "grad_norm": 2.546875, "learning_rate": 0.0001562550640107113, "loss": 2.0739, "step": 263305 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015625353590241546, "loss": 2.0167, "step": 263310 }, { "epoch": 0.62, "grad_norm": 1.6171875, "learning_rate": 0.0001562520077749024, "loss": 1.9642, "step": 263315 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.0001562504796281726, "loss": 2.0496, "step": 263320 }, { "epoch": 0.62, "grad_norm": 1.8515625, "learning_rate": 0.00015624895146222666, "loss": 1.9792, "step": 263325 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015624742327706505, "loss": 2.2084, "step": 263330 }, { "epoch": 0.62, "grad_norm": 2.5625, "learning_rate": 0.0001562458950726883, "loss": 2.1089, "step": 263335 }, { "epoch": 0.62, "grad_norm": 2.515625, "learning_rate": 0.0001562443668490969, "loss": 2.122, "step": 263340 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015624283860629144, "loss": 2.1587, "step": 263345 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.0001562413103442724, "loss": 2.0549, "step": 263350 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.0001562397820630403, "loss": 1.9069, "step": 263355 }, { "epoch": 0.62, "grad_norm": 2.796875, "learning_rate": 0.00015623825376259566, "loss": 1.9024, "step": 263360 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.00015623672544293906, "loss": 1.9718, "step": 263365 }, { "epoch": 0.62, "grad_norm": 2.453125, "learning_rate": 0.00015623519710407092, "loss": 2.0116, "step": 263370 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015623366874599186, "loss": 1.8823, "step": 263375 }, { "epoch": 0.62, "grad_norm": 1.953125, "learning_rate": 0.00015623214036870233, "loss": 2.0208, "step": 263380 }, { "epoch": 0.62, "grad_norm": 1.703125, "learning_rate": 0.00015623061197220288, "loss": 2.1661, "step": 263385 }, { "epoch": 0.62, "grad_norm": 2.421875, "learning_rate": 0.00015622908355649405, "loss": 2.0429, "step": 263390 }, { "epoch": 0.62, "grad_norm": 1.9609375, "learning_rate": 0.00015622755512157633, "loss": 2.1821, "step": 263395 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015622602666745026, "loss": 2.0241, "step": 263400 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.00015622449819411639, "loss": 2.0489, "step": 263405 }, { "epoch": 0.62, "grad_norm": 1.78125, "learning_rate": 0.00015622296970157516, "loss": 2.098, "step": 263410 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015622144118982717, "loss": 2.1157, "step": 263415 }, { "epoch": 0.62, "grad_norm": 2.75, "learning_rate": 0.0001562199126588729, "loss": 2.0182, "step": 263420 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.0001562183841087129, "loss": 2.0498, "step": 263425 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.0001562168555393477, "loss": 2.0626, "step": 263430 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015621532695077778, "loss": 1.9509, "step": 263435 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.0001562137983430037, "loss": 1.8464, "step": 263440 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015621226971602596, "loss": 2.0825, "step": 263445 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015621074106984505, "loss": 2.1936, "step": 263450 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.0001562092124044616, "loss": 2.1138, "step": 263455 }, { "epoch": 0.62, "grad_norm": 1.96875, "learning_rate": 0.00015620768371987605, "loss": 2.0245, "step": 263460 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.0001562061550160889, "loss": 2.0372, "step": 263465 }, { "epoch": 0.62, "grad_norm": 1.7578125, "learning_rate": 0.00015620462629310072, "loss": 2.0465, "step": 263470 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015620309755091203, "loss": 2.1529, "step": 263475 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015620156878952334, "loss": 2.3526, "step": 263480 }, { "epoch": 0.62, "grad_norm": 1.9140625, "learning_rate": 0.00015620004000893519, "loss": 2.0047, "step": 263485 }, { "epoch": 0.62, "grad_norm": 1.8359375, "learning_rate": 0.00015619851120914806, "loss": 2.0413, "step": 263490 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.0001561969823901625, "loss": 2.122, "step": 263495 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015619545355197907, "loss": 2.1244, "step": 263500 }, { "epoch": 0.62, "grad_norm": 1.890625, "learning_rate": 0.0001561939246945982, "loss": 2.1151, "step": 263505 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.0001561923958180205, "loss": 1.7343, "step": 263510 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015619086692224645, "loss": 2.117, "step": 263515 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015618933800727656, "loss": 2.0847, "step": 263520 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015618780907311145, "loss": 2.1577, "step": 263525 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.00015618628011975148, "loss": 2.0625, "step": 263530 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015618475114719728, "loss": 2.0924, "step": 263535 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.00015618322215544937, "loss": 2.0081, "step": 263540 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.00015618169314450824, "loss": 2.2305, "step": 263545 }, { "epoch": 0.62, "grad_norm": 2.296875, "learning_rate": 0.00015618016411437445, "loss": 2.2128, "step": 263550 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015617863506504844, "loss": 2.0711, "step": 263555 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015617710599653083, "loss": 2.117, "step": 263560 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.00015617557690882208, "loss": 2.1647, "step": 263565 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015617404780192274, "loss": 2.0611, "step": 263570 }, { "epoch": 0.62, "grad_norm": 1.9921875, "learning_rate": 0.00015617251867583334, "loss": 2.0154, "step": 263575 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.0001561709895305544, "loss": 2.0515, "step": 263580 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.0001561694603660864, "loss": 2.0225, "step": 263585 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.0001561679311824299, "loss": 1.9671, "step": 263590 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015616640197958542, "loss": 2.2592, "step": 263595 }, { "epoch": 0.62, "grad_norm": 1.890625, "learning_rate": 0.0001561648727575535, "loss": 2.0697, "step": 263600 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015616334351633464, "loss": 1.8839, "step": 263605 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015616181425592932, "loss": 2.166, "step": 263610 }, { "epoch": 0.62, "grad_norm": 2.296875, "learning_rate": 0.00015616028497633812, "loss": 2.2266, "step": 263615 }, { "epoch": 0.62, "grad_norm": 1.6953125, "learning_rate": 0.0001561587556775616, "loss": 1.941, "step": 263620 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.00015615722635960017, "loss": 2.1769, "step": 263625 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015615569702245443, "loss": 2.1212, "step": 263630 }, { "epoch": 0.62, "grad_norm": 1.984375, "learning_rate": 0.0001561541676661249, "loss": 2.0813, "step": 263635 }, { "epoch": 0.62, "grad_norm": 1.875, "learning_rate": 0.00015615263829061207, "loss": 2.1364, "step": 263640 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.0001561511088959165, "loss": 1.9874, "step": 263645 }, { "epoch": 0.62, "grad_norm": 1.8359375, "learning_rate": 0.00015614957948203867, "loss": 2.1089, "step": 263650 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015614805004897914, "loss": 2.2117, "step": 263655 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015614652059673843, "loss": 2.1694, "step": 263660 }, { "epoch": 0.62, "grad_norm": 2.6875, "learning_rate": 0.000156144991125317, "loss": 1.8949, "step": 263665 }, { "epoch": 0.62, "grad_norm": 3.28125, "learning_rate": 0.00015614346163471545, "loss": 2.125, "step": 263670 }, { "epoch": 0.62, "grad_norm": 2.296875, "learning_rate": 0.0001561419321249343, "loss": 2.0694, "step": 263675 }, { "epoch": 0.62, "grad_norm": 2.46875, "learning_rate": 0.00015614040259597403, "loss": 2.0338, "step": 263680 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015613887304783517, "loss": 2.1063, "step": 263685 }, { "epoch": 0.62, "grad_norm": 2.4375, "learning_rate": 0.00015613734348051826, "loss": 2.1188, "step": 263690 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.00015613581389402383, "loss": 2.0919, "step": 263695 }, { "epoch": 0.62, "grad_norm": 2.6875, "learning_rate": 0.00015613428428835237, "loss": 1.9866, "step": 263700 }, { "epoch": 0.62, "grad_norm": 1.96875, "learning_rate": 0.00015613275466350442, "loss": 2.0035, "step": 263705 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015613122501948053, "loss": 2.1053, "step": 263710 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.00015612969535628115, "loss": 2.0717, "step": 263715 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015612816567390686, "loss": 2.1514, "step": 263720 }, { "epoch": 0.62, "grad_norm": 1.875, "learning_rate": 0.00015612663597235822, "loss": 2.0697, "step": 263725 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.00015612510625163565, "loss": 1.9833, "step": 263730 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015612357651173975, "loss": 2.112, "step": 263735 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.000156122046752671, "loss": 2.1166, "step": 263740 }, { "epoch": 0.62, "grad_norm": 1.8046875, "learning_rate": 0.00015612051697442994, "loss": 2.2014, "step": 263745 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015611898717701714, "loss": 2.0887, "step": 263750 }, { "epoch": 0.62, "grad_norm": 2.828125, "learning_rate": 0.00015611745736043302, "loss": 2.0627, "step": 263755 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015611592752467818, "loss": 2.041, "step": 263760 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.00015611439766975314, "loss": 2.0651, "step": 263765 }, { "epoch": 0.62, "grad_norm": 1.703125, "learning_rate": 0.00015611286779565838, "loss": 2.0742, "step": 263770 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015611133790239444, "loss": 2.1035, "step": 263775 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015610980798996186, "loss": 1.9506, "step": 263780 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015610827805836116, "loss": 2.2247, "step": 263785 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.00015610674810759284, "loss": 1.9769, "step": 263790 }, { "epoch": 0.62, "grad_norm": 2.296875, "learning_rate": 0.00015610521813765746, "loss": 2.0442, "step": 263795 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.0001561036881485555, "loss": 2.0772, "step": 263800 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015610215814028752, "loss": 2.1813, "step": 263805 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.000156100628112854, "loss": 2.0279, "step": 263810 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.0001560990980662555, "loss": 1.763, "step": 263815 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015609756800049257, "loss": 2.0438, "step": 263820 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015609603791556564, "loss": 2.1451, "step": 263825 }, { "epoch": 0.62, "grad_norm": 1.8828125, "learning_rate": 0.0001560945078114753, "loss": 2.1995, "step": 263830 }, { "epoch": 0.62, "grad_norm": 2.40625, "learning_rate": 0.00015609297768822206, "loss": 2.0328, "step": 263835 }, { "epoch": 0.62, "grad_norm": 2.453125, "learning_rate": 0.00015609144754580645, "loss": 2.0797, "step": 263840 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.00015608991738422896, "loss": 1.9799, "step": 263845 }, { "epoch": 0.62, "grad_norm": 1.8828125, "learning_rate": 0.00015608838720349016, "loss": 2.2218, "step": 263850 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015608685700359054, "loss": 2.0819, "step": 263855 }, { "epoch": 0.62, "grad_norm": 1.609375, "learning_rate": 0.00015608532678453062, "loss": 2.0718, "step": 263860 }, { "epoch": 0.62, "grad_norm": 1.953125, "learning_rate": 0.00015608379654631098, "loss": 1.964, "step": 263865 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.00015608226628893208, "loss": 2.2388, "step": 263870 }, { "epoch": 0.62, "grad_norm": 1.6796875, "learning_rate": 0.00015608073601239446, "loss": 2.0505, "step": 263875 }, { "epoch": 0.62, "grad_norm": 2.484375, "learning_rate": 0.0001560792057166986, "loss": 1.9972, "step": 263880 }, { "epoch": 0.62, "grad_norm": 1.90625, "learning_rate": 0.0001560776754018451, "loss": 1.8091, "step": 263885 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015607614506783446, "loss": 2.0433, "step": 263890 }, { "epoch": 0.62, "grad_norm": 2.40625, "learning_rate": 0.00015607461471466719, "loss": 2.1671, "step": 263895 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.0001560730843423438, "loss": 2.006, "step": 263900 }, { "epoch": 0.62, "grad_norm": 2.640625, "learning_rate": 0.0001560715539508648, "loss": 2.0723, "step": 263905 }, { "epoch": 0.62, "grad_norm": 1.9921875, "learning_rate": 0.0001560700235402308, "loss": 1.9627, "step": 263910 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015606849311044222, "loss": 2.0252, "step": 263915 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015606696266149967, "loss": 1.9231, "step": 263920 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.0001560654321934036, "loss": 2.1488, "step": 263925 }, { "epoch": 0.62, "grad_norm": 1.984375, "learning_rate": 0.00015606390170615453, "loss": 2.0855, "step": 263930 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015606237119975303, "loss": 2.0571, "step": 263935 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015606084067419965, "loss": 2.1831, "step": 263940 }, { "epoch": 0.62, "grad_norm": 1.6953125, "learning_rate": 0.00015605931012949484, "loss": 1.8992, "step": 263945 }, { "epoch": 0.62, "grad_norm": 1.8828125, "learning_rate": 0.00015605777956563917, "loss": 2.1627, "step": 263950 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015605624898263313, "loss": 2.134, "step": 263955 }, { "epoch": 0.62, "grad_norm": 2.296875, "learning_rate": 0.00015605471838047724, "loss": 2.0035, "step": 263960 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015605318775917208, "loss": 2.0738, "step": 263965 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.00015605165711871812, "loss": 1.9898, "step": 263970 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.0001560501264591159, "loss": 2.05, "step": 263975 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015604859578036595, "loss": 1.9577, "step": 263980 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.00015604706508246877, "loss": 2.0585, "step": 263985 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015604553436542488, "loss": 2.1609, "step": 263990 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.0001560440036292348, "loss": 2.1794, "step": 263995 }, { "epoch": 0.62, "grad_norm": 1.9765625, "learning_rate": 0.00015604247287389912, "loss": 1.8571, "step": 264000 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.0001560409420994183, "loss": 2.0956, "step": 264005 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.0001560394113057929, "loss": 2.0326, "step": 264010 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015603788049302336, "loss": 2.1147, "step": 264015 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015603634966111034, "loss": 1.9399, "step": 264020 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.00015603481881005423, "loss": 2.1011, "step": 264025 }, { "epoch": 0.62, "grad_norm": 1.7578125, "learning_rate": 0.00015603328793985566, "loss": 2.1054, "step": 264030 }, { "epoch": 0.62, "grad_norm": 2.515625, "learning_rate": 0.00015603175705051507, "loss": 2.2591, "step": 264035 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.000156030226142033, "loss": 1.8599, "step": 264040 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.00015602869521441002, "loss": 1.8729, "step": 264045 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.0001560271642676466, "loss": 2.1033, "step": 264050 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015602563330174328, "loss": 2.1751, "step": 264055 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015602410231670063, "loss": 2.1342, "step": 264060 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015602257131251907, "loss": 2.1388, "step": 264065 }, { "epoch": 0.62, "grad_norm": 2.40625, "learning_rate": 0.00015602104028919923, "loss": 2.2686, "step": 264070 }, { "epoch": 0.62, "grad_norm": 2.625, "learning_rate": 0.00015601950924674156, "loss": 1.9607, "step": 264075 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015601797818514662, "loss": 1.9078, "step": 264080 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.0001560164471044149, "loss": 2.0217, "step": 264085 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.00015601491600454698, "loss": 2.1459, "step": 264090 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015601338488554334, "loss": 1.9933, "step": 264095 }, { "epoch": 0.62, "grad_norm": 1.8359375, "learning_rate": 0.0001560118537474045, "loss": 2.1078, "step": 264100 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.00015601032259013103, "loss": 2.1787, "step": 264105 }, { "epoch": 0.62, "grad_norm": 2.484375, "learning_rate": 0.0001560087914137234, "loss": 2.1399, "step": 264110 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.00015600726021818212, "loss": 2.3149, "step": 264115 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015600572900350778, "loss": 2.1728, "step": 264120 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015600419776970082, "loss": 2.0969, "step": 264125 }, { "epoch": 0.62, "grad_norm": 2.46875, "learning_rate": 0.00015600266651676187, "loss": 2.0855, "step": 264130 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015600113524469138, "loss": 2.0837, "step": 264135 }, { "epoch": 0.62, "grad_norm": 1.953125, "learning_rate": 0.00015599960395348988, "loss": 1.9615, "step": 264140 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.0001559980726431579, "loss": 2.1898, "step": 264145 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015599654131369595, "loss": 2.1227, "step": 264150 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015599500996510453, "loss": 2.1725, "step": 264155 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015599347859738427, "loss": 2.0606, "step": 264160 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.0001559919472105356, "loss": 2.1491, "step": 264165 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015599041580455905, "loss": 1.9339, "step": 264170 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015598888437945516, "loss": 2.1492, "step": 264175 }, { "epoch": 0.62, "grad_norm": 1.9765625, "learning_rate": 0.00015598735293522449, "loss": 2.1656, "step": 264180 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015598582147186748, "loss": 2.1862, "step": 264185 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015598428998938474, "loss": 2.0868, "step": 264190 }, { "epoch": 0.62, "grad_norm": 2.609375, "learning_rate": 0.0001559827584877767, "loss": 2.0725, "step": 264195 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015598122696704395, "loss": 2.136, "step": 264200 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.000155979695427187, "loss": 2.1973, "step": 264205 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.0001559781638682064, "loss": 1.9327, "step": 264210 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015597663229010263, "loss": 2.1047, "step": 264215 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.0001559751006928762, "loss": 1.9255, "step": 264220 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.0001559735690765277, "loss": 2.1157, "step": 264225 }, { "epoch": 0.62, "grad_norm": 1.90625, "learning_rate": 0.00015597203744105757, "loss": 2.0021, "step": 264230 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015597050578646638, "loss": 2.2917, "step": 264235 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015596897411275471, "loss": 1.9087, "step": 264240 }, { "epoch": 0.62, "grad_norm": 2.46875, "learning_rate": 0.00015596744241992298, "loss": 2.0493, "step": 264245 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015596591070797175, "loss": 2.0641, "step": 264250 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015596437897690154, "loss": 1.8188, "step": 264255 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.0001559628472267129, "loss": 2.1606, "step": 264260 }, { "epoch": 0.62, "grad_norm": 2.71875, "learning_rate": 0.00015596131545740635, "loss": 2.0713, "step": 264265 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015595978366898238, "loss": 2.1308, "step": 264270 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.00015595825186144157, "loss": 2.0576, "step": 264275 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015595672003478436, "loss": 1.8956, "step": 264280 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.00015595518818901132, "loss": 2.1327, "step": 264285 }, { "epoch": 0.62, "grad_norm": 1.90625, "learning_rate": 0.00015595365632412302, "loss": 2.0788, "step": 264290 }, { "epoch": 0.62, "grad_norm": 1.7109375, "learning_rate": 0.0001559521244401199, "loss": 1.8934, "step": 264295 }, { "epoch": 0.62, "grad_norm": 1.984375, "learning_rate": 0.00015595059253700253, "loss": 2.1101, "step": 264300 }, { "epoch": 0.62, "grad_norm": 1.984375, "learning_rate": 0.0001559490606147714, "loss": 2.1357, "step": 264305 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015594752867342711, "loss": 2.0702, "step": 264310 }, { "epoch": 0.62, "grad_norm": 2.46875, "learning_rate": 0.00015594599671297008, "loss": 2.0524, "step": 264315 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.0001559444647334009, "loss": 2.0868, "step": 264320 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.00015594293273472006, "loss": 2.062, "step": 264325 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015594140071692814, "loss": 2.1693, "step": 264330 }, { "epoch": 0.62, "grad_norm": 1.8046875, "learning_rate": 0.00015593986868002558, "loss": 2.0269, "step": 264335 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015593833662401298, "loss": 1.9559, "step": 264340 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.0001559368045488908, "loss": 2.1963, "step": 264345 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.0001559352724546596, "loss": 1.9614, "step": 264350 }, { "epoch": 0.62, "grad_norm": 1.890625, "learning_rate": 0.0001559337403413199, "loss": 1.9087, "step": 264355 }, { "epoch": 0.62, "grad_norm": 1.9375, "learning_rate": 0.00015593220820887222, "loss": 2.1743, "step": 264360 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.0001559306760573171, "loss": 2.1381, "step": 264365 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015592914388665502, "loss": 1.9608, "step": 264370 }, { "epoch": 0.62, "grad_norm": 1.890625, "learning_rate": 0.00015592761169688654, "loss": 1.9764, "step": 264375 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.0001559260794880122, "loss": 2.0257, "step": 264380 }, { "epoch": 0.62, "grad_norm": 2.40625, "learning_rate": 0.00015592454726003245, "loss": 2.1569, "step": 264385 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.0001559230150129479, "loss": 2.085, "step": 264390 }, { "epoch": 0.62, "grad_norm": 2.609375, "learning_rate": 0.000155921482746759, "loss": 2.2014, "step": 264395 }, { "epoch": 0.62, "grad_norm": 1.8359375, "learning_rate": 0.00015591995046146634, "loss": 1.9083, "step": 264400 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.0001559184181570704, "loss": 1.9662, "step": 264405 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.0001559168858335717, "loss": 1.935, "step": 264410 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.0001559153534909708, "loss": 1.9545, "step": 264415 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.0001559138211292682, "loss": 2.0378, "step": 264420 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.0001559122887484644, "loss": 1.9834, "step": 264425 }, { "epoch": 0.62, "grad_norm": 2.46875, "learning_rate": 0.00015591075634855997, "loss": 2.0433, "step": 264430 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015590922392955537, "loss": 2.0586, "step": 264435 }, { "epoch": 0.62, "grad_norm": 1.9375, "learning_rate": 0.0001559076914914512, "loss": 2.0681, "step": 264440 }, { "epoch": 0.62, "grad_norm": 2.4375, "learning_rate": 0.00015590615903424796, "loss": 2.056, "step": 264445 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.00015590462655794615, "loss": 2.0775, "step": 264450 }, { "epoch": 0.62, "grad_norm": 1.84375, "learning_rate": 0.00015590309406254631, "loss": 2.1542, "step": 264455 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015590156154804893, "loss": 2.0519, "step": 264460 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.0001559000290144546, "loss": 2.1407, "step": 264465 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.0001558984964617638, "loss": 1.9441, "step": 264470 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015589696388997704, "loss": 2.0414, "step": 264475 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015589543129909488, "loss": 2.0299, "step": 264480 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.0001558938986891178, "loss": 2.0483, "step": 264485 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.0001558923660600464, "loss": 1.9684, "step": 264490 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.00015589083341188112, "loss": 2.1409, "step": 264495 }, { "epoch": 0.62, "grad_norm": 1.953125, "learning_rate": 0.00015588930074462252, "loss": 2.2024, "step": 264500 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015588776805827115, "loss": 2.14, "step": 264505 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.00015588623535282747, "loss": 2.13, "step": 264510 }, { "epoch": 0.62, "grad_norm": 2.40625, "learning_rate": 0.00015588470262829204, "loss": 1.9393, "step": 264515 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.00015588316988466542, "loss": 2.1271, "step": 264520 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015588163712194802, "loss": 2.0592, "step": 264525 }, { "epoch": 0.62, "grad_norm": 2.734375, "learning_rate": 0.0001558801043401405, "loss": 2.0178, "step": 264530 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015587857153924331, "loss": 1.9647, "step": 264535 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.000155877038719257, "loss": 1.9506, "step": 264540 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015587550588018204, "loss": 2.2425, "step": 264545 }, { "epoch": 0.62, "grad_norm": 2.421875, "learning_rate": 0.00015587397302201903, "loss": 2.1592, "step": 264550 }, { "epoch": 0.62, "grad_norm": 1.734375, "learning_rate": 0.00015587244014476847, "loss": 1.9201, "step": 264555 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015587090724843082, "loss": 2.0469, "step": 264560 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.0001558693743330067, "loss": 2.1225, "step": 264565 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.0001558678413984966, "loss": 2.1687, "step": 264570 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015586630844490097, "loss": 2.1574, "step": 264575 }, { "epoch": 0.62, "grad_norm": 2.484375, "learning_rate": 0.00015586477547222045, "loss": 2.0949, "step": 264580 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015586324248045548, "loss": 2.029, "step": 264585 }, { "epoch": 0.62, "grad_norm": 1.796875, "learning_rate": 0.00015586170946960663, "loss": 2.1313, "step": 264590 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.0001558601764396744, "loss": 2.0946, "step": 264595 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.0001558586433906593, "loss": 2.2181, "step": 264600 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.0001558571103225619, "loss": 2.0914, "step": 264605 }, { "epoch": 0.62, "grad_norm": 2.4375, "learning_rate": 0.00015585557723538272, "loss": 2.1195, "step": 264610 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.0001558540441291222, "loss": 1.9334, "step": 264615 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015585251100378098, "loss": 2.0992, "step": 264620 }, { "epoch": 0.62, "grad_norm": 1.78125, "learning_rate": 0.0001558509778593595, "loss": 2.1893, "step": 264625 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015584944469585832, "loss": 1.9804, "step": 264630 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015584791151327794, "loss": 1.8628, "step": 264635 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015584637831161894, "loss": 2.217, "step": 264640 }, { "epoch": 0.62, "grad_norm": 1.953125, "learning_rate": 0.0001558448450908818, "loss": 1.9793, "step": 264645 }, { "epoch": 0.62, "grad_norm": 1.890625, "learning_rate": 0.00015584331185106702, "loss": 1.9933, "step": 264650 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.00015584177859217518, "loss": 2.149, "step": 264655 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015584024531420675, "loss": 2.0461, "step": 264660 }, { "epoch": 0.62, "grad_norm": 1.953125, "learning_rate": 0.0001558387120171623, "loss": 2.0869, "step": 264665 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015583717870104233, "loss": 2.0953, "step": 264670 }, { "epoch": 0.62, "grad_norm": 2.5, "learning_rate": 0.00015583564536584733, "loss": 2.0105, "step": 264675 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.0001558341120115779, "loss": 2.0785, "step": 264680 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.0001558325786382345, "loss": 2.0731, "step": 264685 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.00015583104524581772, "loss": 2.1009, "step": 264690 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.000155829511834328, "loss": 2.2113, "step": 264695 }, { "epoch": 0.62, "grad_norm": 2.5625, "learning_rate": 0.00015582797840376595, "loss": 2.1659, "step": 264700 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.00015582644495413202, "loss": 1.9477, "step": 264705 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015582491148542676, "loss": 2.0943, "step": 264710 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.0001558233779976507, "loss": 2.1479, "step": 264715 }, { "epoch": 0.62, "grad_norm": 2.46875, "learning_rate": 0.00015582184449080435, "loss": 2.0295, "step": 264720 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.00015582031096488826, "loss": 2.0165, "step": 264725 }, { "epoch": 0.62, "grad_norm": 2.484375, "learning_rate": 0.00015581877741990295, "loss": 2.1149, "step": 264730 }, { "epoch": 0.62, "grad_norm": 2.78125, "learning_rate": 0.00015581724385584891, "loss": 2.1845, "step": 264735 }, { "epoch": 0.62, "grad_norm": 2.625, "learning_rate": 0.0001558157102727267, "loss": 2.1835, "step": 264740 }, { "epoch": 0.62, "grad_norm": 2.5, "learning_rate": 0.00015581417667053687, "loss": 2.1971, "step": 264745 }, { "epoch": 0.62, "grad_norm": 2.625, "learning_rate": 0.00015581264304927986, "loss": 2.0638, "step": 264750 }, { "epoch": 0.62, "grad_norm": 2.8125, "learning_rate": 0.00015581110940895624, "loss": 2.1819, "step": 264755 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015580957574956654, "loss": 2.0455, "step": 264760 }, { "epoch": 0.62, "grad_norm": 2.703125, "learning_rate": 0.00015580804207111125, "loss": 1.9996, "step": 264765 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015580650837359098, "loss": 1.8842, "step": 264770 }, { "epoch": 0.62, "grad_norm": 2.421875, "learning_rate": 0.00015580497465700615, "loss": 1.8497, "step": 264775 }, { "epoch": 0.62, "grad_norm": 2.703125, "learning_rate": 0.00015580344092135735, "loss": 2.0635, "step": 264780 }, { "epoch": 0.62, "grad_norm": 1.9375, "learning_rate": 0.0001558019071666451, "loss": 2.1792, "step": 264785 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015580037339286987, "loss": 2.0414, "step": 264790 }, { "epoch": 0.62, "grad_norm": 1.9453125, "learning_rate": 0.0001557988396000322, "loss": 1.9357, "step": 264795 }, { "epoch": 0.62, "grad_norm": 2.53125, "learning_rate": 0.00015579730578813266, "loss": 2.0411, "step": 264800 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.0001557957719571718, "loss": 1.9731, "step": 264805 }, { "epoch": 0.62, "grad_norm": 1.7890625, "learning_rate": 0.00015579423810715002, "loss": 1.9543, "step": 264810 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.00015579270423806793, "loss": 2.0303, "step": 264815 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.00015579117034992605, "loss": 1.98, "step": 264820 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015578963644272492, "loss": 2.0511, "step": 264825 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.000155788102516465, "loss": 2.1549, "step": 264830 }, { "epoch": 0.62, "grad_norm": 2.890625, "learning_rate": 0.00015578656857114686, "loss": 2.0622, "step": 264835 }, { "epoch": 0.62, "grad_norm": 2.15625, "learning_rate": 0.00015578503460677104, "loss": 2.2361, "step": 264840 }, { "epoch": 0.62, "grad_norm": 1.953125, "learning_rate": 0.000155783500623338, "loss": 2.1671, "step": 264845 }, { "epoch": 0.62, "grad_norm": 2.53125, "learning_rate": 0.00015578196662084835, "loss": 2.2, "step": 264850 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015578043259930252, "loss": 2.1241, "step": 264855 }, { "epoch": 0.62, "grad_norm": 2.46875, "learning_rate": 0.0001557788985587011, "loss": 2.0722, "step": 264860 }, { "epoch": 0.62, "grad_norm": 1.9765625, "learning_rate": 0.00015577736449904462, "loss": 2.2418, "step": 264865 }, { "epoch": 0.62, "grad_norm": 1.8671875, "learning_rate": 0.00015577583042033358, "loss": 2.0492, "step": 264870 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015577429632256847, "loss": 2.0034, "step": 264875 }, { "epoch": 0.62, "grad_norm": 2.546875, "learning_rate": 0.0001557727622057499, "loss": 2.1407, "step": 264880 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.0001557712280698783, "loss": 1.9958, "step": 264885 }, { "epoch": 0.62, "grad_norm": 1.9609375, "learning_rate": 0.00015576969391495427, "loss": 2.0968, "step": 264890 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015576815974097826, "loss": 2.096, "step": 264895 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.00015576662554795084, "loss": 2.1662, "step": 264900 }, { "epoch": 0.62, "grad_norm": 1.8515625, "learning_rate": 0.00015576509133587258, "loss": 1.9178, "step": 264905 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.0001557635571047439, "loss": 2.0429, "step": 264910 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.0001557620228545654, "loss": 2.0648, "step": 264915 }, { "epoch": 0.62, "grad_norm": 3.0625, "learning_rate": 0.00015576048858533757, "loss": 1.999, "step": 264920 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015575895429706095, "loss": 2.1049, "step": 264925 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015575741998973607, "loss": 1.8919, "step": 264930 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015575588566336346, "loss": 2.0501, "step": 264935 }, { "epoch": 0.62, "grad_norm": 1.8203125, "learning_rate": 0.0001557543513179436, "loss": 2.05, "step": 264940 }, { "epoch": 0.62, "grad_norm": 2.390625, "learning_rate": 0.00015575281695347706, "loss": 1.9816, "step": 264945 }, { "epoch": 0.62, "grad_norm": 1.84375, "learning_rate": 0.00015575128256996431, "loss": 1.9569, "step": 264950 }, { "epoch": 0.62, "grad_norm": 2.40625, "learning_rate": 0.00015574974816740596, "loss": 2.163, "step": 264955 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015574821374580243, "loss": 2.0403, "step": 264960 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.00015574667930515435, "loss": 2.1348, "step": 264965 }, { "epoch": 0.62, "grad_norm": 1.9765625, "learning_rate": 0.0001557451448454622, "loss": 2.1756, "step": 264970 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015574361036672644, "loss": 2.1802, "step": 264975 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.00015574207586894768, "loss": 2.1133, "step": 264980 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.00015574054135212643, "loss": 2.0204, "step": 264985 }, { "epoch": 0.62, "grad_norm": 2.625, "learning_rate": 0.0001557390068162632, "loss": 2.0524, "step": 264990 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.0001557374722613585, "loss": 2.182, "step": 264995 }, { "epoch": 0.62, "grad_norm": 2.59375, "learning_rate": 0.00015573593768741288, "loss": 2.1274, "step": 265000 }, { "epoch": 0.62, "grad_norm": 2.546875, "learning_rate": 0.00015573440309442683, "loss": 2.0228, "step": 265005 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.0001557328684824009, "loss": 1.9619, "step": 265010 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015573133385133562, "loss": 2.0758, "step": 265015 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015572979920123152, "loss": 2.1539, "step": 265020 }, { "epoch": 0.62, "grad_norm": 2.5625, "learning_rate": 0.00015572826453208913, "loss": 1.935, "step": 265025 }, { "epoch": 0.62, "grad_norm": 1.8984375, "learning_rate": 0.0001557267298439089, "loss": 2.1184, "step": 265030 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015572519513669142, "loss": 2.0571, "step": 265035 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015572366041043722, "loss": 2.201, "step": 265040 }, { "epoch": 0.62, "grad_norm": 2.609375, "learning_rate": 0.0001557221256651468, "loss": 2.078, "step": 265045 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.0001557205909008207, "loss": 2.0988, "step": 265050 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.00015571905611745944, "loss": 2.0826, "step": 265055 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.0001557175213150635, "loss": 2.09, "step": 265060 }, { "epoch": 0.62, "grad_norm": 1.984375, "learning_rate": 0.0001557159864936335, "loss": 2.186, "step": 265065 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015571445165316988, "loss": 2.1448, "step": 265070 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.0001557129167936732, "loss": 2.1028, "step": 265075 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.000155711381915144, "loss": 2.0363, "step": 265080 }, { "epoch": 0.62, "grad_norm": 1.765625, "learning_rate": 0.00015570984701758272, "loss": 2.0098, "step": 265085 }, { "epoch": 0.62, "grad_norm": 1.9375, "learning_rate": 0.00015570831210098997, "loss": 2.0014, "step": 265090 }, { "epoch": 0.62, "grad_norm": 1.9375, "learning_rate": 0.00015570677716536627, "loss": 2.0967, "step": 265095 }, { "epoch": 0.62, "grad_norm": 1.9609375, "learning_rate": 0.00015570524221071209, "loss": 2.0632, "step": 265100 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015570370723702804, "loss": 2.0994, "step": 265105 }, { "epoch": 0.62, "grad_norm": 1.90625, "learning_rate": 0.00015570217224431456, "loss": 1.9753, "step": 265110 }, { "epoch": 0.62, "grad_norm": 2.0, "learning_rate": 0.0001557006372325722, "loss": 1.9549, "step": 265115 }, { "epoch": 0.62, "grad_norm": 1.9453125, "learning_rate": 0.0001556991022018015, "loss": 2.1442, "step": 265120 }, { "epoch": 0.62, "grad_norm": 1.9375, "learning_rate": 0.00015569756715200298, "loss": 1.8038, "step": 265125 }, { "epoch": 0.62, "grad_norm": 3.21875, "learning_rate": 0.0001556960320831772, "loss": 2.2634, "step": 265130 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.0001556944969953246, "loss": 2.0907, "step": 265135 }, { "epoch": 0.62, "grad_norm": 2.4375, "learning_rate": 0.00015569296188844574, "loss": 2.1612, "step": 265140 }, { "epoch": 0.62, "grad_norm": 2.203125, "learning_rate": 0.00015569142676254117, "loss": 1.9637, "step": 265145 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.0001556898916176114, "loss": 1.9704, "step": 265150 }, { "epoch": 0.62, "grad_norm": 2.296875, "learning_rate": 0.00015568835645365695, "loss": 2.1281, "step": 265155 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015568682127067838, "loss": 1.9517, "step": 265160 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015568528606867615, "loss": 2.1085, "step": 265165 }, { "epoch": 0.62, "grad_norm": 1.8203125, "learning_rate": 0.0001556837508476508, "loss": 2.0307, "step": 265170 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.0001556822156076029, "loss": 1.7668, "step": 265175 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.00015568068034853294, "loss": 2.3329, "step": 265180 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.00015567914507044143, "loss": 1.9439, "step": 265185 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015567760977332892, "loss": 1.9021, "step": 265190 }, { "epoch": 0.62, "grad_norm": 2.5, "learning_rate": 0.00015567607445719593, "loss": 2.3384, "step": 265195 }, { "epoch": 0.62, "grad_norm": 1.75, "learning_rate": 0.00015567453912204303, "loss": 2.039, "step": 265200 }, { "epoch": 0.62, "grad_norm": 2.296875, "learning_rate": 0.00015567300376787067, "loss": 2.0828, "step": 265205 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.0001556714683946794, "loss": 2.2207, "step": 265210 }, { "epoch": 0.62, "grad_norm": 1.8515625, "learning_rate": 0.0001556699330024697, "loss": 1.9099, "step": 265215 }, { "epoch": 0.62, "grad_norm": 2.09375, "learning_rate": 0.00015566839759124218, "loss": 1.9927, "step": 265220 }, { "epoch": 0.62, "grad_norm": 1.796875, "learning_rate": 0.00015566686216099735, "loss": 2.162, "step": 265225 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.0001556653267117357, "loss": 2.2625, "step": 265230 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.00015566379124345775, "loss": 2.0514, "step": 265235 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015566225575616404, "loss": 2.0695, "step": 265240 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.00015566072024985508, "loss": 2.1716, "step": 265245 }, { "epoch": 0.62, "grad_norm": 2.109375, "learning_rate": 0.0001556591847245314, "loss": 1.9639, "step": 265250 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015565764918019356, "loss": 2.1668, "step": 265255 }, { "epoch": 0.62, "grad_norm": 1.953125, "learning_rate": 0.00015565611361684206, "loss": 2.136, "step": 265260 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015565457803447743, "loss": 1.9208, "step": 265265 }, { "epoch": 0.62, "grad_norm": 2.046875, "learning_rate": 0.00015565304243310015, "loss": 1.9789, "step": 265270 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.0001556515068127108, "loss": 2.0775, "step": 265275 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015564997117330993, "loss": 2.097, "step": 265280 }, { "epoch": 0.62, "grad_norm": 2.265625, "learning_rate": 0.00015564843551489797, "loss": 2.2264, "step": 265285 }, { "epoch": 0.62, "grad_norm": 1.890625, "learning_rate": 0.0001556468998374755, "loss": 1.9557, "step": 265290 }, { "epoch": 0.62, "grad_norm": 1.8671875, "learning_rate": 0.00015564536414104305, "loss": 1.9064, "step": 265295 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015564382842560114, "loss": 2.0747, "step": 265300 }, { "epoch": 0.62, "grad_norm": 2.390625, "learning_rate": 0.00015564229269115026, "loss": 2.0722, "step": 265305 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.000155640756937691, "loss": 2.2764, "step": 265310 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015563922116522384, "loss": 2.3305, "step": 265315 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.0001556376853737493, "loss": 2.1887, "step": 265320 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.0001556361495632679, "loss": 2.0658, "step": 265325 }, { "epoch": 0.62, "grad_norm": 2.953125, "learning_rate": 0.00015563461373378022, "loss": 1.8626, "step": 265330 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015563307788528673, "loss": 2.0792, "step": 265335 }, { "epoch": 0.62, "grad_norm": 1.984375, "learning_rate": 0.00015563154201778798, "loss": 2.1327, "step": 265340 }, { "epoch": 0.62, "grad_norm": 1.578125, "learning_rate": 0.00015563000613128444, "loss": 2.0665, "step": 265345 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.00015562847022577674, "loss": 2.1002, "step": 265350 }, { "epoch": 0.62, "grad_norm": 2.4375, "learning_rate": 0.0001556269343012653, "loss": 2.1264, "step": 265355 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.00015562539835775073, "loss": 2.0571, "step": 265360 }, { "epoch": 0.62, "grad_norm": 2.28125, "learning_rate": 0.0001556238623952335, "loss": 2.2074, "step": 265365 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015562232641371418, "loss": 1.8848, "step": 265370 }, { "epoch": 0.62, "grad_norm": 2.21875, "learning_rate": 0.0001556207904131932, "loss": 2.0728, "step": 265375 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.0001556192543936712, "loss": 2.0664, "step": 265380 }, { "epoch": 0.62, "grad_norm": 2.453125, "learning_rate": 0.00015561771835514865, "loss": 2.038, "step": 265385 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.00015561618229762605, "loss": 2.0699, "step": 265390 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.000155614646221104, "loss": 2.1134, "step": 265395 }, { "epoch": 0.62, "grad_norm": 2.1875, "learning_rate": 0.00015561311012558292, "loss": 2.4508, "step": 265400 }, { "epoch": 0.62, "grad_norm": 2.03125, "learning_rate": 0.00015561157401106342, "loss": 1.9886, "step": 265405 }, { "epoch": 0.62, "grad_norm": 1.8671875, "learning_rate": 0.000155610037877546, "loss": 1.8899, "step": 265410 }, { "epoch": 0.62, "grad_norm": 1.859375, "learning_rate": 0.00015560850172503118, "loss": 2.0901, "step": 265415 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.0001556069655535195, "loss": 1.9111, "step": 265420 }, { "epoch": 0.62, "grad_norm": 1.5390625, "learning_rate": 0.00015560542936301147, "loss": 1.9541, "step": 265425 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.0001556038931535076, "loss": 2.0025, "step": 265430 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015560235692500842, "loss": 2.0432, "step": 265435 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.0001556008206775145, "loss": 2.1729, "step": 265440 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.00015559928441102633, "loss": 2.0394, "step": 265445 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015559774812554442, "loss": 2.1465, "step": 265450 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015559621182106934, "loss": 2.1437, "step": 265455 }, { "epoch": 0.62, "grad_norm": 2.171875, "learning_rate": 0.00015559467549760154, "loss": 2.0798, "step": 265460 }, { "epoch": 0.62, "grad_norm": 2.25, "learning_rate": 0.0001555931391551416, "loss": 2.1589, "step": 265465 }, { "epoch": 0.62, "grad_norm": 1.9765625, "learning_rate": 0.00015559160279369005, "loss": 1.9671, "step": 265470 }, { "epoch": 0.62, "grad_norm": 2.3125, "learning_rate": 0.0001555900664132474, "loss": 2.0544, "step": 265475 }, { "epoch": 0.62, "grad_norm": 2.296875, "learning_rate": 0.00015558853001381418, "loss": 2.164, "step": 265480 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.0001555869935953909, "loss": 2.2867, "step": 265485 }, { "epoch": 0.62, "grad_norm": 2.359375, "learning_rate": 0.0001555854571579781, "loss": 2.1359, "step": 265490 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.0001555839207015763, "loss": 2.1249, "step": 265495 }, { "epoch": 0.62, "grad_norm": 2.015625, "learning_rate": 0.000155582384226186, "loss": 2.1105, "step": 265500 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.0001555808477318078, "loss": 2.2342, "step": 265505 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015557931121844214, "loss": 1.9963, "step": 265510 }, { "epoch": 0.62, "grad_norm": 2.375, "learning_rate": 0.00015557777468608956, "loss": 2.2407, "step": 265515 }, { "epoch": 0.62, "grad_norm": 2.0625, "learning_rate": 0.00015557623813475065, "loss": 2.1123, "step": 265520 }, { "epoch": 0.62, "grad_norm": 2.34375, "learning_rate": 0.00015557470156442587, "loss": 2.0987, "step": 265525 }, { "epoch": 0.62, "grad_norm": 2.53125, "learning_rate": 0.00015557316497511576, "loss": 2.0387, "step": 265530 }, { "epoch": 0.62, "grad_norm": 2.328125, "learning_rate": 0.00015557162836682083, "loss": 2.0567, "step": 265535 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.00015557009173954164, "loss": 2.1424, "step": 265540 }, { "epoch": 0.62, "grad_norm": 2.515625, "learning_rate": 0.00015556855509327871, "loss": 2.0384, "step": 265545 }, { "epoch": 0.62, "grad_norm": 2.234375, "learning_rate": 0.00015556701842803255, "loss": 2.0286, "step": 265550 }, { "epoch": 0.62, "grad_norm": 2.734375, "learning_rate": 0.0001555654817438037, "loss": 2.1088, "step": 265555 }, { "epoch": 0.62, "grad_norm": 2.140625, "learning_rate": 0.00015556394504059266, "loss": 2.0878, "step": 265560 }, { "epoch": 0.62, "grad_norm": 2.078125, "learning_rate": 0.00015556240831839996, "loss": 1.9062, "step": 265565 }, { "epoch": 0.62, "grad_norm": 2.90625, "learning_rate": 0.00015556087157722614, "loss": 2.0504, "step": 265570 }, { "epoch": 0.62, "grad_norm": 2.125, "learning_rate": 0.0001555593348170717, "loss": 2.0732, "step": 265575 }, { "epoch": 0.62, "grad_norm": 2.390625, "learning_rate": 0.00015555779803793723, "loss": 2.0122, "step": 265580 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.00015555626123982318, "loss": 2.1567, "step": 265585 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.0001555547244227301, "loss": 1.9127, "step": 265590 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015555318758665853, "loss": 1.973, "step": 265595 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.000155551650731609, "loss": 2.1141, "step": 265600 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.00015555011385758199, "loss": 2.0236, "step": 265605 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.00015554857696457807, "loss": 1.9788, "step": 265610 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.00015554704005259776, "loss": 2.0945, "step": 265615 }, { "epoch": 0.63, "grad_norm": 1.8125, "learning_rate": 0.0001555455031216415, "loss": 1.9495, "step": 265620 }, { "epoch": 0.63, "grad_norm": 2.765625, "learning_rate": 0.00015554396617170998, "loss": 2.1902, "step": 265625 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015554242920280358, "loss": 2.0252, "step": 265630 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.0001555408922149229, "loss": 2.0786, "step": 265635 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015553935520806845, "loss": 2.1914, "step": 265640 }, { "epoch": 0.63, "grad_norm": 1.8828125, "learning_rate": 0.0001555378181822407, "loss": 2.0962, "step": 265645 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.00015553628113744027, "loss": 1.9242, "step": 265650 }, { "epoch": 0.63, "grad_norm": 2.515625, "learning_rate": 0.00015553474407366765, "loss": 2.0303, "step": 265655 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.00015553320699092333, "loss": 2.1178, "step": 265660 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.00015553166988920787, "loss": 2.0247, "step": 265665 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015553013276852179, "loss": 2.0584, "step": 265670 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015552859562886558, "loss": 2.0294, "step": 265675 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.00015552705847023983, "loss": 2.1609, "step": 265680 }, { "epoch": 0.63, "grad_norm": 1.859375, "learning_rate": 0.00015552552129264498, "loss": 2.1416, "step": 265685 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015552398409608166, "loss": 2.021, "step": 265690 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015552244688055034, "loss": 1.9335, "step": 265695 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.0001555209096460515, "loss": 1.8568, "step": 265700 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015551937239258572, "loss": 2.2151, "step": 265705 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015551783512015354, "loss": 2.1268, "step": 265710 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015551629782875547, "loss": 2.0022, "step": 265715 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.00015551476051839198, "loss": 2.0576, "step": 265720 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.00015551322318906367, "loss": 2.1611, "step": 265725 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015551168584077103, "loss": 1.9703, "step": 265730 }, { "epoch": 0.63, "grad_norm": 1.8046875, "learning_rate": 0.0001555101484735146, "loss": 1.9715, "step": 265735 }, { "epoch": 0.63, "grad_norm": 2.671875, "learning_rate": 0.0001555086110872949, "loss": 2.1111, "step": 265740 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.00015550707368211243, "loss": 2.0612, "step": 265745 }, { "epoch": 0.63, "grad_norm": 1.8828125, "learning_rate": 0.00015550553625796778, "loss": 1.9891, "step": 265750 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015550399881486136, "loss": 2.1332, "step": 265755 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015550246135279382, "loss": 2.1905, "step": 265760 }, { "epoch": 0.63, "grad_norm": 1.9765625, "learning_rate": 0.00015550092387176561, "loss": 2.0813, "step": 265765 }, { "epoch": 0.63, "grad_norm": 2.546875, "learning_rate": 0.0001554993863717773, "loss": 1.9323, "step": 265770 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.0001554978488528294, "loss": 2.0392, "step": 265775 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015549631131492238, "loss": 2.0919, "step": 265780 }, { "epoch": 0.63, "grad_norm": 2.515625, "learning_rate": 0.00015549477375805687, "loss": 2.0694, "step": 265785 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.0001554932361822333, "loss": 1.9532, "step": 265790 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015549169858745224, "loss": 2.1246, "step": 265795 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.00015549016097371422, "loss": 2.0056, "step": 265800 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.00015548862334101976, "loss": 2.254, "step": 265805 }, { "epoch": 0.63, "grad_norm": 1.984375, "learning_rate": 0.00015548708568936936, "loss": 2.036, "step": 265810 }, { "epoch": 0.63, "grad_norm": 2.453125, "learning_rate": 0.00015548554801876358, "loss": 2.0784, "step": 265815 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015548401032920294, "loss": 2.2649, "step": 265820 }, { "epoch": 0.63, "grad_norm": 2.484375, "learning_rate": 0.00015548247262068795, "loss": 2.0898, "step": 265825 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.00015548093489321912, "loss": 2.1983, "step": 265830 }, { "epoch": 0.63, "grad_norm": 2.578125, "learning_rate": 0.00015547939714679704, "loss": 2.2221, "step": 265835 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015547785938142215, "loss": 2.0495, "step": 265840 }, { "epoch": 0.63, "grad_norm": 3.0, "learning_rate": 0.00015547632159709502, "loss": 1.9844, "step": 265845 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015547478379381618, "loss": 2.1751, "step": 265850 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015547324597158614, "loss": 2.0219, "step": 265855 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015547170813040543, "loss": 2.1154, "step": 265860 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.0001554701702702746, "loss": 1.8924, "step": 265865 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015546863239119415, "loss": 2.1128, "step": 265870 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.0001554670944931646, "loss": 2.0886, "step": 265875 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.00015546555657618648, "loss": 2.008, "step": 265880 }, { "epoch": 0.63, "grad_norm": 1.78125, "learning_rate": 0.00015546401864026036, "loss": 1.8569, "step": 265885 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.0001554624806853867, "loss": 2.0624, "step": 265890 }, { "epoch": 0.63, "grad_norm": 1.75, "learning_rate": 0.00015546094271156604, "loss": 2.0383, "step": 265895 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.00015545940471879888, "loss": 2.2237, "step": 265900 }, { "epoch": 0.63, "grad_norm": 1.9375, "learning_rate": 0.00015545786670708586, "loss": 1.9059, "step": 265905 }, { "epoch": 0.63, "grad_norm": 1.734375, "learning_rate": 0.00015545632867642738, "loss": 2.0101, "step": 265910 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.000155454790626824, "loss": 2.2454, "step": 265915 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.0001554532525582763, "loss": 2.0305, "step": 265920 }, { "epoch": 0.63, "grad_norm": 1.984375, "learning_rate": 0.00015545171447078475, "loss": 2.0596, "step": 265925 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.0001554501763643499, "loss": 1.9099, "step": 265930 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.00015544863823897223, "loss": 2.0855, "step": 265935 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.0001554471000946523, "loss": 1.9791, "step": 265940 }, { "epoch": 0.63, "grad_norm": 1.7421875, "learning_rate": 0.00015544556193139065, "loss": 2.0341, "step": 265945 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.0001554440237491878, "loss": 2.2762, "step": 265950 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015544248554804426, "loss": 2.024, "step": 265955 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.00015544094732796056, "loss": 2.2012, "step": 265960 }, { "epoch": 0.63, "grad_norm": 2.640625, "learning_rate": 0.0001554394090889372, "loss": 2.1325, "step": 265965 }, { "epoch": 0.63, "grad_norm": 1.6484375, "learning_rate": 0.00015543787083097476, "loss": 2.018, "step": 265970 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015543633255407375, "loss": 2.134, "step": 265975 }, { "epoch": 0.63, "grad_norm": 1.828125, "learning_rate": 0.00015543479425823468, "loss": 1.93, "step": 265980 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015543325594345806, "loss": 1.9809, "step": 265985 }, { "epoch": 0.63, "grad_norm": 1.8515625, "learning_rate": 0.00015543171760974445, "loss": 2.1443, "step": 265990 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015543017925709432, "loss": 2.017, "step": 265995 }, { "epoch": 0.63, "grad_norm": 1.6328125, "learning_rate": 0.0001554286408855083, "loss": 1.9746, "step": 266000 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.0001554271024949868, "loss": 2.1543, "step": 266005 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.00015542556408553045, "loss": 2.2171, "step": 266010 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015542402565713967, "loss": 2.2225, "step": 266015 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.00015542248720981503, "loss": 2.0703, "step": 266020 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015542094874355708, "loss": 1.9816, "step": 266025 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015541941025836638, "loss": 2.2279, "step": 266030 }, { "epoch": 0.63, "grad_norm": 1.9765625, "learning_rate": 0.00015541787175424334, "loss": 1.9519, "step": 266035 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015541633323118856, "loss": 2.0842, "step": 266040 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015541479468920256, "loss": 2.2003, "step": 266045 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.0001554132561282859, "loss": 2.1484, "step": 266050 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.000155411717548439, "loss": 2.284, "step": 266055 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.00015541017894966252, "loss": 2.0894, "step": 266060 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.0001554086403319569, "loss": 2.073, "step": 266065 }, { "epoch": 0.63, "grad_norm": 1.875, "learning_rate": 0.00015540710169532264, "loss": 1.9074, "step": 266070 }, { "epoch": 0.63, "grad_norm": 2.953125, "learning_rate": 0.00015540556303976036, "loss": 2.1652, "step": 266075 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.00015540402436527048, "loss": 2.1546, "step": 266080 }, { "epoch": 0.63, "grad_norm": 2.53125, "learning_rate": 0.00015540248567185363, "loss": 2.0468, "step": 266085 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015540094695951027, "loss": 2.1181, "step": 266090 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.00015539940822824092, "loss": 2.0476, "step": 266095 }, { "epoch": 0.63, "grad_norm": 2.5625, "learning_rate": 0.00015539786947804616, "loss": 2.1932, "step": 266100 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.00015539633070892647, "loss": 2.0952, "step": 266105 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.0001553947919208824, "loss": 1.8547, "step": 266110 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.00015539325311391444, "loss": 2.0417, "step": 266115 }, { "epoch": 0.63, "grad_norm": 1.7890625, "learning_rate": 0.00015539171428802318, "loss": 2.0452, "step": 266120 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.00015539017544320907, "loss": 2.0986, "step": 266125 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.00015538863657947268, "loss": 2.2103, "step": 266130 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015538709769681453, "loss": 1.9946, "step": 266135 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.00015538555879523512, "loss": 2.0151, "step": 266140 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.00015538401987473502, "loss": 2.0674, "step": 266145 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.00015538248093531476, "loss": 2.101, "step": 266150 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.00015538094197697482, "loss": 2.0668, "step": 266155 }, { "epoch": 0.63, "grad_norm": 2.71875, "learning_rate": 0.00015537940299971572, "loss": 2.0881, "step": 266160 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.00015537786400353804, "loss": 2.1787, "step": 266165 }, { "epoch": 0.63, "grad_norm": 1.7578125, "learning_rate": 0.00015537632498844228, "loss": 2.1062, "step": 266170 }, { "epoch": 0.63, "grad_norm": 1.9921875, "learning_rate": 0.00015537478595442896, "loss": 2.0485, "step": 266175 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015537324690149856, "loss": 2.0668, "step": 266180 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.0001553717078296517, "loss": 2.1262, "step": 266185 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015537016873888888, "loss": 2.0195, "step": 266190 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.0001553686296292106, "loss": 2.0503, "step": 266195 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.00015536709050061737, "loss": 2.0202, "step": 266200 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015536555135310973, "loss": 2.1196, "step": 266205 }, { "epoch": 0.63, "grad_norm": 1.765625, "learning_rate": 0.00015536401218668822, "loss": 2.0965, "step": 266210 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015536247300135337, "loss": 2.1177, "step": 266215 }, { "epoch": 0.63, "grad_norm": 1.75, "learning_rate": 0.00015536093379710567, "loss": 2.106, "step": 266220 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015535939457394572, "loss": 2.2068, "step": 266225 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.00015535785533187397, "loss": 2.1099, "step": 266230 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.00015535631607089097, "loss": 2.0373, "step": 266235 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015535477679099727, "loss": 2.1818, "step": 266240 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015535323749219334, "loss": 2.0751, "step": 266245 }, { "epoch": 0.63, "grad_norm": 1.9609375, "learning_rate": 0.00015535169817447976, "loss": 2.0757, "step": 266250 }, { "epoch": 0.63, "grad_norm": 1.953125, "learning_rate": 0.00015535015883785702, "loss": 2.1544, "step": 266255 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.00015534861948232566, "loss": 2.0359, "step": 266260 }, { "epoch": 0.63, "grad_norm": 2.53125, "learning_rate": 0.00015534708010788624, "loss": 1.9575, "step": 266265 }, { "epoch": 0.63, "grad_norm": 1.8515625, "learning_rate": 0.0001553455407145392, "loss": 2.0773, "step": 266270 }, { "epoch": 0.63, "grad_norm": 1.984375, "learning_rate": 0.00015534400130228518, "loss": 2.024, "step": 266275 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015534246187112462, "loss": 2.1759, "step": 266280 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015534092242105806, "loss": 2.0517, "step": 266285 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015533938295208606, "loss": 2.1092, "step": 266290 }, { "epoch": 0.63, "grad_norm": 2.421875, "learning_rate": 0.0001553378434642091, "loss": 2.1611, "step": 266295 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.00015533630395742773, "loss": 2.2351, "step": 266300 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015533476443174252, "loss": 1.9185, "step": 266305 }, { "epoch": 0.63, "grad_norm": 1.8984375, "learning_rate": 0.0001553332248871539, "loss": 2.094, "step": 266310 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015533168532366244, "loss": 2.0045, "step": 266315 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.0001553301457412687, "loss": 2.0756, "step": 266320 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.0001553286061399732, "loss": 1.9931, "step": 266325 }, { "epoch": 0.63, "grad_norm": 1.9296875, "learning_rate": 0.0001553270665197764, "loss": 2.0062, "step": 266330 }, { "epoch": 0.63, "grad_norm": 3.265625, "learning_rate": 0.0001553255268806789, "loss": 2.0837, "step": 266335 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015532398722268117, "loss": 1.9706, "step": 266340 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015532244754578377, "loss": 2.1231, "step": 266345 }, { "epoch": 0.63, "grad_norm": 2.546875, "learning_rate": 0.00015532090784998724, "loss": 2.1974, "step": 266350 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015531936813529206, "loss": 2.0444, "step": 266355 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.0001553178284016988, "loss": 2.1774, "step": 266360 }, { "epoch": 0.63, "grad_norm": 2.734375, "learning_rate": 0.00015531628864920799, "loss": 2.0587, "step": 266365 }, { "epoch": 0.63, "grad_norm": 1.984375, "learning_rate": 0.00015531474887782006, "loss": 2.1042, "step": 266370 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015531320908753566, "loss": 1.9306, "step": 266375 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015531166927835525, "loss": 2.0854, "step": 266380 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.0001553101294502794, "loss": 2.1405, "step": 266385 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.00015530858960330858, "loss": 1.9991, "step": 266390 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.0001553070497374433, "loss": 1.9564, "step": 266395 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.0001553055098526842, "loss": 2.0775, "step": 266400 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.0001553039699490317, "loss": 2.124, "step": 266405 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015530243002648638, "loss": 2.0513, "step": 266410 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015530089008504872, "loss": 2.0971, "step": 266415 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.00015529935012471933, "loss": 2.0337, "step": 266420 }, { "epoch": 0.63, "grad_norm": 1.984375, "learning_rate": 0.00015529781014549863, "loss": 2.0882, "step": 266425 }, { "epoch": 0.63, "grad_norm": 2.734375, "learning_rate": 0.00015529627014738718, "loss": 2.153, "step": 266430 }, { "epoch": 0.63, "grad_norm": 2.421875, "learning_rate": 0.00015529473013038556, "loss": 1.955, "step": 266435 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015529319009449424, "loss": 2.1454, "step": 266440 }, { "epoch": 0.63, "grad_norm": 2.984375, "learning_rate": 0.0001552916500397138, "loss": 2.2357, "step": 266445 }, { "epoch": 0.63, "grad_norm": 1.8671875, "learning_rate": 0.00015529010996604464, "loss": 2.0129, "step": 266450 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015528856987348744, "loss": 2.2164, "step": 266455 }, { "epoch": 0.63, "grad_norm": 2.8125, "learning_rate": 0.00015528702976204265, "loss": 2.1114, "step": 266460 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015528548963171082, "loss": 2.0279, "step": 266465 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015528394948249248, "loss": 2.0146, "step": 266470 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.0001552824093143881, "loss": 2.1289, "step": 266475 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.00015528086912739824, "loss": 2.1142, "step": 266480 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.00015527932892152348, "loss": 2.0794, "step": 266485 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015527778869676425, "loss": 1.9861, "step": 266490 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015527624845312118, "loss": 2.1371, "step": 266495 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.0001552747081905947, "loss": 2.1231, "step": 266500 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015527316790918535, "loss": 2.1431, "step": 266505 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015527162760889374, "loss": 2.045, "step": 266510 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015527008728972032, "loss": 2.1693, "step": 266515 }, { "epoch": 0.63, "grad_norm": 1.9609375, "learning_rate": 0.00015526854695166564, "loss": 2.1734, "step": 266520 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015526700659473022, "loss": 2.2152, "step": 266525 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015526546621891457, "loss": 2.2389, "step": 266530 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015526392582421926, "loss": 2.0778, "step": 266535 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015526238541064478, "loss": 2.0222, "step": 266540 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015526084497819167, "loss": 2.0121, "step": 266545 }, { "epoch": 0.63, "grad_norm": 1.9609375, "learning_rate": 0.00015525930452686045, "loss": 1.9206, "step": 266550 }, { "epoch": 0.63, "grad_norm": 2.453125, "learning_rate": 0.00015525776405665161, "loss": 2.0857, "step": 266555 }, { "epoch": 0.63, "grad_norm": 1.875, "learning_rate": 0.00015525622356756577, "loss": 1.7056, "step": 266560 }, { "epoch": 0.63, "grad_norm": 2.421875, "learning_rate": 0.00015525468305960341, "loss": 2.1396, "step": 266565 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015525314253276502, "loss": 2.1686, "step": 266570 }, { "epoch": 0.63, "grad_norm": 2.59375, "learning_rate": 0.00015525160198705119, "loss": 2.0799, "step": 266575 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015525006142246234, "loss": 2.0295, "step": 266580 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.0001552485208389991, "loss": 2.3768, "step": 266585 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015524698023666197, "loss": 2.0911, "step": 266590 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.00015524543961545147, "loss": 2.1104, "step": 266595 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015524389897536814, "loss": 2.1019, "step": 266600 }, { "epoch": 0.63, "grad_norm": 2.5, "learning_rate": 0.00015524235831641247, "loss": 2.0259, "step": 266605 }, { "epoch": 0.63, "grad_norm": 2.421875, "learning_rate": 0.00015524081763858502, "loss": 2.2566, "step": 266610 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.0001552392769418863, "loss": 2.2661, "step": 266615 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.00015523773622631683, "loss": 2.1267, "step": 266620 }, { "epoch": 0.63, "grad_norm": 1.9765625, "learning_rate": 0.0001552361954918772, "loss": 2.1173, "step": 266625 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015523465473856783, "loss": 1.9003, "step": 266630 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.00015523311396638931, "loss": 1.9834, "step": 266635 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.00015523157317534215, "loss": 2.0149, "step": 266640 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.0001552300323654269, "loss": 2.0191, "step": 266645 }, { "epoch": 0.63, "grad_norm": 1.9453125, "learning_rate": 0.00015522849153664408, "loss": 2.0778, "step": 266650 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.00015522695068899417, "loss": 2.1992, "step": 266655 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.00015522540982247776, "loss": 2.038, "step": 266660 }, { "epoch": 0.63, "grad_norm": 2.5625, "learning_rate": 0.00015522386893709533, "loss": 2.0672, "step": 266665 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.00015522232803284744, "loss": 2.1651, "step": 266670 }, { "epoch": 0.63, "grad_norm": 2.484375, "learning_rate": 0.0001552207871097346, "loss": 2.0446, "step": 266675 }, { "epoch": 0.63, "grad_norm": 1.953125, "learning_rate": 0.00015521924616775733, "loss": 1.987, "step": 266680 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.00015521770520691617, "loss": 1.9869, "step": 266685 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015521616422721162, "loss": 2.0398, "step": 266690 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.00015521462322864424, "loss": 2.1514, "step": 266695 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015521308221121455, "loss": 2.0224, "step": 266700 }, { "epoch": 0.63, "grad_norm": 2.765625, "learning_rate": 0.00015521154117492309, "loss": 2.1976, "step": 266705 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.00015521000011977032, "loss": 2.0453, "step": 266710 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015520845904575686, "loss": 1.9601, "step": 266715 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015520691795288313, "loss": 2.0665, "step": 266720 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015520537684114979, "loss": 2.278, "step": 266725 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015520383571055723, "loss": 1.9985, "step": 266730 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015520229456110607, "loss": 2.0489, "step": 266735 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015520075339279678, "loss": 2.0779, "step": 266740 }, { "epoch": 0.63, "grad_norm": 2.5625, "learning_rate": 0.00015519921220562993, "loss": 2.1244, "step": 266745 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015519767099960604, "loss": 2.1285, "step": 266750 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.00015519612977472563, "loss": 2.0347, "step": 266755 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015519458853098918, "loss": 2.2007, "step": 266760 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.0001551930472683973, "loss": 2.0002, "step": 266765 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015519150598695042, "loss": 1.9314, "step": 266770 }, { "epoch": 0.63, "grad_norm": 2.734375, "learning_rate": 0.00015518996468664916, "loss": 2.1325, "step": 266775 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.000155188423367494, "loss": 2.1032, "step": 266780 }, { "epoch": 0.63, "grad_norm": 1.75, "learning_rate": 0.00015518688202948547, "loss": 2.1253, "step": 266785 }, { "epoch": 0.63, "grad_norm": 2.53125, "learning_rate": 0.0001551853406726241, "loss": 2.2122, "step": 266790 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.00015518379929691046, "loss": 2.0879, "step": 266795 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.00015518225790234498, "loss": 2.261, "step": 266800 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015518071648892823, "loss": 1.8752, "step": 266805 }, { "epoch": 0.63, "grad_norm": 1.96875, "learning_rate": 0.00015517917505666076, "loss": 2.0862, "step": 266810 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.00015517763360554314, "loss": 2.1564, "step": 266815 }, { "epoch": 0.63, "grad_norm": 1.8671875, "learning_rate": 0.0001551760921355758, "loss": 1.8731, "step": 266820 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.00015517455064675928, "loss": 2.0482, "step": 266825 }, { "epoch": 0.63, "grad_norm": 2.5625, "learning_rate": 0.00015517300913909417, "loss": 1.8223, "step": 266830 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.00015517146761258094, "loss": 2.1667, "step": 266835 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015516992606722015, "loss": 1.9599, "step": 266840 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015516838450301226, "loss": 2.0264, "step": 266845 }, { "epoch": 0.63, "grad_norm": 2.484375, "learning_rate": 0.00015516684291995788, "loss": 1.9462, "step": 266850 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015516530131805752, "loss": 2.1646, "step": 266855 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015516375969731172, "loss": 2.232, "step": 266860 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015516221805772094, "loss": 2.0252, "step": 266865 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015516067639928576, "loss": 1.9518, "step": 266870 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.0001551591347220067, "loss": 2.0338, "step": 266875 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015515759302588426, "loss": 2.2113, "step": 266880 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015515605131091896, "loss": 2.0713, "step": 266885 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015515450957711142, "loss": 2.2526, "step": 266890 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015515296782446205, "loss": 2.0671, "step": 266895 }, { "epoch": 0.63, "grad_norm": 2.78125, "learning_rate": 0.00015515142605297143, "loss": 2.0439, "step": 266900 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.0001551498842626401, "loss": 2.0658, "step": 266905 }, { "epoch": 0.63, "grad_norm": 1.9375, "learning_rate": 0.00015514834245346858, "loss": 1.849, "step": 266910 }, { "epoch": 0.63, "grad_norm": 2.59375, "learning_rate": 0.00015514680062545736, "loss": 2.1006, "step": 266915 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.000155145258778607, "loss": 2.1893, "step": 266920 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.000155143716912918, "loss": 1.9499, "step": 266925 }, { "epoch": 0.63, "grad_norm": 1.9765625, "learning_rate": 0.00015514217502839094, "loss": 1.9826, "step": 266930 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.0001551406331250263, "loss": 2.1246, "step": 266935 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.0001551390912028246, "loss": 2.1224, "step": 266940 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015513754926178643, "loss": 2.0854, "step": 266945 }, { "epoch": 0.63, "grad_norm": 2.453125, "learning_rate": 0.00015513600730191228, "loss": 2.0964, "step": 266950 }, { "epoch": 0.63, "grad_norm": 2.96875, "learning_rate": 0.00015513446532320263, "loss": 2.1148, "step": 266955 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015513292332565803, "loss": 2.0073, "step": 266960 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015513138130927905, "loss": 2.136, "step": 266965 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.0001551298392740662, "loss": 1.9084, "step": 266970 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015512829722001998, "loss": 2.2485, "step": 266975 }, { "epoch": 0.63, "grad_norm": 1.8671875, "learning_rate": 0.00015512675514714092, "loss": 2.0286, "step": 266980 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015512521305542957, "loss": 2.1278, "step": 266985 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.0001551236709448865, "loss": 2.0943, "step": 266990 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.0001551221288155121, "loss": 2.1808, "step": 266995 }, { "epoch": 0.63, "grad_norm": 1.953125, "learning_rate": 0.00015512058666730707, "loss": 2.1518, "step": 267000 }, { "epoch": 0.63, "grad_norm": 1.9453125, "learning_rate": 0.0001551190445002718, "loss": 2.2071, "step": 267005 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015511750231440684, "loss": 2.1662, "step": 267010 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.00015511596010971278, "loss": 2.2514, "step": 267015 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015511441788619007, "loss": 2.0044, "step": 267020 }, { "epoch": 0.63, "grad_norm": 1.8046875, "learning_rate": 0.0001551128756438393, "loss": 2.1576, "step": 267025 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.00015511133338266098, "loss": 2.1793, "step": 267030 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015510979110265564, "loss": 2.2136, "step": 267035 }, { "epoch": 0.63, "grad_norm": 1.7890625, "learning_rate": 0.00015510824880382378, "loss": 2.0353, "step": 267040 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.00015510670648616594, "loss": 2.2049, "step": 267045 }, { "epoch": 0.63, "grad_norm": 1.9375, "learning_rate": 0.00015510516414968267, "loss": 2.015, "step": 267050 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.00015510362179437442, "loss": 1.9761, "step": 267055 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015510207942024185, "loss": 1.9911, "step": 267060 }, { "epoch": 0.63, "grad_norm": 1.9140625, "learning_rate": 0.00015510053702728533, "loss": 1.9312, "step": 267065 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015509899461550551, "loss": 2.0282, "step": 267070 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.0001550974521849029, "loss": 2.0787, "step": 267075 }, { "epoch": 0.63, "grad_norm": 1.8203125, "learning_rate": 0.00015509590973547795, "loss": 2.1191, "step": 267080 }, { "epoch": 0.63, "grad_norm": 1.8046875, "learning_rate": 0.00015509436726723126, "loss": 1.9622, "step": 267085 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.00015509282478016334, "loss": 2.0395, "step": 267090 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015509128227427472, "loss": 2.0949, "step": 267095 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.0001550897397495659, "loss": 2.11, "step": 267100 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.00015508819720603745, "loss": 2.202, "step": 267105 }, { "epoch": 0.63, "grad_norm": 2.609375, "learning_rate": 0.00015508665464368984, "loss": 2.0583, "step": 267110 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.00015508511206252369, "loss": 2.0392, "step": 267115 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.00015508356946253939, "loss": 2.0485, "step": 267120 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.0001550820268437376, "loss": 1.9904, "step": 267125 }, { "epoch": 0.63, "grad_norm": 2.703125, "learning_rate": 0.00015508048420611874, "loss": 1.9379, "step": 267130 }, { "epoch": 0.63, "grad_norm": 2.75, "learning_rate": 0.00015507894154968344, "loss": 2.1181, "step": 267135 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.00015507739887443215, "loss": 2.0879, "step": 267140 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.0001550758561803654, "loss": 2.1851, "step": 267145 }, { "epoch": 0.63, "grad_norm": 1.8984375, "learning_rate": 0.00015507431346748377, "loss": 1.8843, "step": 267150 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015507277073578774, "loss": 2.0375, "step": 267155 }, { "epoch": 0.63, "grad_norm": 1.90625, "learning_rate": 0.00015507122798527788, "loss": 1.9061, "step": 267160 }, { "epoch": 0.63, "grad_norm": 2.578125, "learning_rate": 0.00015506968521595464, "loss": 1.9907, "step": 267165 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015506814242781863, "loss": 2.1378, "step": 267170 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015506659962087033, "loss": 2.0401, "step": 267175 }, { "epoch": 0.63, "grad_norm": 2.640625, "learning_rate": 0.0001550650567951103, "loss": 2.0931, "step": 267180 }, { "epoch": 0.63, "grad_norm": 1.78125, "learning_rate": 0.00015506351395053902, "loss": 2.144, "step": 267185 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.00015506197108715707, "loss": 2.2323, "step": 267190 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015506042820496494, "loss": 2.1772, "step": 267195 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.0001550588853039632, "loss": 2.2371, "step": 267200 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.0001550573423841523, "loss": 2.2032, "step": 267205 }, { "epoch": 0.63, "grad_norm": 1.9609375, "learning_rate": 0.00015505579944553283, "loss": 2.1207, "step": 267210 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015505425648810531, "loss": 2.163, "step": 267215 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.00015505271351187025, "loss": 2.0042, "step": 267220 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015505117051682819, "loss": 2.0464, "step": 267225 }, { "epoch": 0.63, "grad_norm": 1.8671875, "learning_rate": 0.00015504962750297966, "loss": 2.0423, "step": 267230 }, { "epoch": 0.63, "grad_norm": 1.9453125, "learning_rate": 0.00015504808447032515, "loss": 2.0551, "step": 267235 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.00015504654141886525, "loss": 2.0287, "step": 267240 }, { "epoch": 0.63, "grad_norm": 3.359375, "learning_rate": 0.00015504499834860042, "loss": 1.9449, "step": 267245 }, { "epoch": 0.63, "grad_norm": 1.734375, "learning_rate": 0.00015504345525953124, "loss": 2.1918, "step": 267250 }, { "epoch": 0.63, "grad_norm": 1.8984375, "learning_rate": 0.0001550419121516582, "loss": 2.015, "step": 267255 }, { "epoch": 0.63, "grad_norm": 1.8671875, "learning_rate": 0.00015504036902498186, "loss": 2.1407, "step": 267260 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015503882587950275, "loss": 1.8961, "step": 267265 }, { "epoch": 0.63, "grad_norm": 2.609375, "learning_rate": 0.00015503728271522136, "loss": 2.0707, "step": 267270 }, { "epoch": 0.63, "grad_norm": 1.8046875, "learning_rate": 0.00015503573953213823, "loss": 2.0171, "step": 267275 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.00015503419633025386, "loss": 2.1898, "step": 267280 }, { "epoch": 0.63, "grad_norm": 2.703125, "learning_rate": 0.00015503265310956888, "loss": 2.2417, "step": 267285 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.0001550311098700837, "loss": 2.0116, "step": 267290 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015502956661179893, "loss": 2.1848, "step": 267295 }, { "epoch": 0.63, "grad_norm": 2.625, "learning_rate": 0.00015502802333471505, "loss": 2.1752, "step": 267300 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015502648003883258, "loss": 2.1347, "step": 267305 }, { "epoch": 0.63, "grad_norm": 2.453125, "learning_rate": 0.00015502493672415207, "loss": 2.0626, "step": 267310 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.00015502339339067405, "loss": 1.886, "step": 267315 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.00015502185003839905, "loss": 2.0342, "step": 267320 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.0001550203066673276, "loss": 1.9898, "step": 267325 }, { "epoch": 0.63, "grad_norm": 2.734375, "learning_rate": 0.00015501876327746018, "loss": 2.1192, "step": 267330 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.00015501721986879735, "loss": 1.9925, "step": 267335 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015501567644133967, "loss": 2.0255, "step": 267340 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015501413299508766, "loss": 1.9549, "step": 267345 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015501258953004175, "loss": 2.0823, "step": 267350 }, { "epoch": 0.63, "grad_norm": 1.9765625, "learning_rate": 0.00015501104604620261, "loss": 2.0474, "step": 267355 }, { "epoch": 0.63, "grad_norm": 1.796875, "learning_rate": 0.00015500950254357065, "loss": 2.2106, "step": 267360 }, { "epoch": 0.63, "grad_norm": 1.8828125, "learning_rate": 0.00015500795902214647, "loss": 2.0464, "step": 267365 }, { "epoch": 0.63, "grad_norm": 1.8203125, "learning_rate": 0.00015500641548193058, "loss": 2.0644, "step": 267370 }, { "epoch": 0.63, "grad_norm": 2.484375, "learning_rate": 0.00015500487192292347, "loss": 2.1312, "step": 267375 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015500332834512574, "loss": 1.8464, "step": 267380 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.00015500178474853784, "loss": 2.0935, "step": 267385 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.00015500024113316037, "loss": 2.0782, "step": 267390 }, { "epoch": 0.63, "grad_norm": 2.59375, "learning_rate": 0.00015499869749899378, "loss": 1.9273, "step": 267395 }, { "epoch": 0.63, "grad_norm": 1.6796875, "learning_rate": 0.00015499715384603867, "loss": 1.8939, "step": 267400 }, { "epoch": 0.63, "grad_norm": 1.9921875, "learning_rate": 0.00015499561017429553, "loss": 2.132, "step": 267405 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015499406648376488, "loss": 1.8827, "step": 267410 }, { "epoch": 0.63, "grad_norm": 1.8984375, "learning_rate": 0.00015499252277444725, "loss": 2.0876, "step": 267415 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.0001549909790463432, "loss": 1.8617, "step": 267420 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.0001549894352994532, "loss": 1.964, "step": 267425 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015498789153377786, "loss": 1.9793, "step": 267430 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015498634774931765, "loss": 2.0044, "step": 267435 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.00015498480394607306, "loss": 2.074, "step": 267440 }, { "epoch": 0.63, "grad_norm": 2.796875, "learning_rate": 0.0001549832601240447, "loss": 2.0429, "step": 267445 }, { "epoch": 0.63, "grad_norm": 2.421875, "learning_rate": 0.00015498171628323306, "loss": 2.0344, "step": 267450 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015498017242363864, "loss": 2.144, "step": 267455 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015497862854526204, "loss": 2.0258, "step": 267460 }, { "epoch": 0.63, "grad_norm": 1.78125, "learning_rate": 0.0001549770846481037, "loss": 2.1857, "step": 267465 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015497554073216424, "loss": 2.0, "step": 267470 }, { "epoch": 0.63, "grad_norm": 1.8984375, "learning_rate": 0.0001549739967974441, "loss": 1.8831, "step": 267475 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.0001549724528439439, "loss": 2.0867, "step": 267480 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015497090887166403, "loss": 1.987, "step": 267485 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.00015496936488060517, "loss": 2.1312, "step": 267490 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015496782087076773, "loss": 2.0237, "step": 267495 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.0001549662768421523, "loss": 2.0567, "step": 267500 }, { "epoch": 0.63, "grad_norm": 1.859375, "learning_rate": 0.00015496473279475942, "loss": 1.9859, "step": 267505 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.00015496318872858957, "loss": 2.125, "step": 267510 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.0001549616446436433, "loss": 2.3095, "step": 267515 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.00015496010053992108, "loss": 1.9154, "step": 267520 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015495855641742357, "loss": 1.9998, "step": 267525 }, { "epoch": 0.63, "grad_norm": 2.6875, "learning_rate": 0.0001549570122761512, "loss": 2.0633, "step": 267530 }, { "epoch": 0.63, "grad_norm": 1.8671875, "learning_rate": 0.0001549554681161045, "loss": 2.1786, "step": 267535 }, { "epoch": 0.63, "grad_norm": 1.8515625, "learning_rate": 0.00015495392393728404, "loss": 1.9707, "step": 267540 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.00015495237973969035, "loss": 1.8994, "step": 267545 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015495083552332387, "loss": 1.834, "step": 267550 }, { "epoch": 0.63, "grad_norm": 1.859375, "learning_rate": 0.0001549492912881852, "loss": 2.1657, "step": 267555 }, { "epoch": 0.63, "grad_norm": 1.90625, "learning_rate": 0.00015494774703427489, "loss": 1.9304, "step": 267560 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.0001549462027615934, "loss": 2.0692, "step": 267565 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015494465847014127, "loss": 2.0167, "step": 267570 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.0001549431141599191, "loss": 2.0509, "step": 267575 }, { "epoch": 0.63, "grad_norm": 1.9140625, "learning_rate": 0.00015494156983092734, "loss": 2.4049, "step": 267580 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.00015494002548316655, "loss": 1.9811, "step": 267585 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015493848111663725, "loss": 2.1182, "step": 267590 }, { "epoch": 0.63, "grad_norm": 1.953125, "learning_rate": 0.00015493693673133998, "loss": 2.0855, "step": 267595 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015493539232727522, "loss": 1.9933, "step": 267600 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015493384790444357, "loss": 2.1248, "step": 267605 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.0001549323034628455, "loss": 1.9958, "step": 267610 }, { "epoch": 0.63, "grad_norm": 1.984375, "learning_rate": 0.00015493075900248156, "loss": 2.15, "step": 267615 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015492921452335228, "loss": 2.1674, "step": 267620 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.0001549276700254582, "loss": 2.1261, "step": 267625 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.0001549261255087998, "loss": 2.16, "step": 267630 }, { "epoch": 0.63, "grad_norm": 2.609375, "learning_rate": 0.00015492458097337767, "loss": 1.7709, "step": 267635 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.0001549230364191923, "loss": 2.0285, "step": 267640 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015492149184624424, "loss": 2.1381, "step": 267645 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015491994725453395, "loss": 1.9521, "step": 267650 }, { "epoch": 0.63, "grad_norm": 4.03125, "learning_rate": 0.00015491840264406204, "loss": 2.0121, "step": 267655 }, { "epoch": 0.63, "grad_norm": 2.515625, "learning_rate": 0.00015491685801482903, "loss": 2.0918, "step": 267660 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.0001549153133668354, "loss": 2.0589, "step": 267665 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.0001549137687000817, "loss": 2.0839, "step": 267670 }, { "epoch": 0.63, "grad_norm": 1.90625, "learning_rate": 0.00015491222401456847, "loss": 2.1159, "step": 267675 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.0001549106793102962, "loss": 1.9815, "step": 267680 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.0001549091345872655, "loss": 1.9188, "step": 267685 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015490758984547682, "loss": 1.9267, "step": 267690 }, { "epoch": 0.63, "grad_norm": 1.8359375, "learning_rate": 0.00015490604508493073, "loss": 2.0899, "step": 267695 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015490450030562772, "loss": 2.1883, "step": 267700 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015490295550756833, "loss": 1.9772, "step": 267705 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015490141069075307, "loss": 2.0111, "step": 267710 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015489986585518254, "loss": 2.0893, "step": 267715 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015489832100085718, "loss": 1.9326, "step": 267720 }, { "epoch": 0.63, "grad_norm": 2.53125, "learning_rate": 0.0001548967761277776, "loss": 2.0894, "step": 267725 }, { "epoch": 0.63, "grad_norm": 1.953125, "learning_rate": 0.00015489523123594427, "loss": 1.9942, "step": 267730 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.0001548936863253577, "loss": 2.0115, "step": 267735 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.0001548921413960185, "loss": 2.1919, "step": 267740 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.00015489059644792713, "loss": 2.1046, "step": 267745 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.0001548890514810841, "loss": 2.1082, "step": 267750 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015488750649549004, "loss": 2.0771, "step": 267755 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.0001548859614911454, "loss": 2.0189, "step": 267760 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.0001548844164680507, "loss": 2.1933, "step": 267765 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.0001548828714262065, "loss": 2.102, "step": 267770 }, { "epoch": 0.63, "grad_norm": 1.8203125, "learning_rate": 0.0001548813263656133, "loss": 1.8438, "step": 267775 }, { "epoch": 0.63, "grad_norm": 1.734375, "learning_rate": 0.00015487978128627162, "loss": 2.0867, "step": 267780 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015487823618818208, "loss": 2.0427, "step": 267785 }, { "epoch": 0.63, "grad_norm": 1.9609375, "learning_rate": 0.0001548766910713451, "loss": 2.0251, "step": 267790 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.00015487514593576123, "loss": 2.2725, "step": 267795 }, { "epoch": 0.63, "grad_norm": 2.625, "learning_rate": 0.00015487360078143104, "loss": 2.214, "step": 267800 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015487205560835502, "loss": 2.1203, "step": 267805 }, { "epoch": 0.63, "grad_norm": 1.9375, "learning_rate": 0.00015487051041653373, "loss": 2.0737, "step": 267810 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.00015486896520596765, "loss": 2.1658, "step": 267815 }, { "epoch": 0.63, "grad_norm": 1.8984375, "learning_rate": 0.00015486741997665734, "loss": 1.9633, "step": 267820 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015486587472860334, "loss": 2.1853, "step": 267825 }, { "epoch": 0.63, "grad_norm": 1.9296875, "learning_rate": 0.00015486432946180617, "loss": 2.2042, "step": 267830 }, { "epoch": 0.63, "grad_norm": 1.984375, "learning_rate": 0.00015486278417626634, "loss": 2.0096, "step": 267835 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015486123887198437, "loss": 2.0081, "step": 267840 }, { "epoch": 0.63, "grad_norm": 1.859375, "learning_rate": 0.00015485969354896083, "loss": 2.0988, "step": 267845 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.0001548581482071962, "loss": 2.1328, "step": 267850 }, { "epoch": 0.63, "grad_norm": 2.8125, "learning_rate": 0.0001548566028466911, "loss": 2.1982, "step": 267855 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.00015485505746744592, "loss": 2.1436, "step": 267860 }, { "epoch": 0.63, "grad_norm": 1.78125, "learning_rate": 0.00015485351206946126, "loss": 2.0586, "step": 267865 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.00015485196665273766, "loss": 1.8982, "step": 267870 }, { "epoch": 0.63, "grad_norm": 5.40625, "learning_rate": 0.00015485042121727563, "loss": 2.1878, "step": 267875 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015484887576307568, "loss": 2.1574, "step": 267880 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.0001548473302901384, "loss": 2.1986, "step": 267885 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015484578479846425, "loss": 2.1135, "step": 267890 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.0001548442392880538, "loss": 2.0288, "step": 267895 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.00015484269375890756, "loss": 2.1405, "step": 267900 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015484114821102607, "loss": 2.0909, "step": 267905 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015483960264440984, "loss": 2.2247, "step": 267910 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015483805705905943, "loss": 2.1476, "step": 267915 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015483651145497532, "loss": 1.9922, "step": 267920 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.00015483496583215805, "loss": 2.0296, "step": 267925 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.0001548334201906082, "loss": 1.9648, "step": 267930 }, { "epoch": 0.63, "grad_norm": 2.671875, "learning_rate": 0.0001548318745303262, "loss": 2.0526, "step": 267935 }, { "epoch": 0.63, "grad_norm": 1.953125, "learning_rate": 0.00015483032885131267, "loss": 1.9643, "step": 267940 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.0001548287831535681, "loss": 2.1524, "step": 267945 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015482723743709303, "loss": 2.0596, "step": 267950 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.000154825691701888, "loss": 2.195, "step": 267955 }, { "epoch": 0.63, "grad_norm": 1.9921875, "learning_rate": 0.0001548241459479535, "loss": 2.0509, "step": 267960 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.00015482260017529007, "loss": 1.9685, "step": 267965 }, { "epoch": 0.63, "grad_norm": 2.59375, "learning_rate": 0.0001548210543838983, "loss": 1.9934, "step": 267970 }, { "epoch": 0.63, "grad_norm": 1.859375, "learning_rate": 0.00015481950857377857, "loss": 2.1525, "step": 267975 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015481796274493157, "loss": 2.043, "step": 267980 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015481641689735773, "loss": 2.2512, "step": 267985 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.0001548148710310576, "loss": 2.0744, "step": 267990 }, { "epoch": 0.63, "grad_norm": 1.8359375, "learning_rate": 0.00015481332514603175, "loss": 1.9875, "step": 267995 }, { "epoch": 0.63, "grad_norm": 1.78125, "learning_rate": 0.00015481177924228068, "loss": 1.9271, "step": 268000 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015481023331980487, "loss": 2.0263, "step": 268005 }, { "epoch": 0.63, "grad_norm": 1.828125, "learning_rate": 0.0001548086873786049, "loss": 2.1775, "step": 268010 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015480714141868131, "loss": 2.1176, "step": 268015 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015480559544003462, "loss": 1.9744, "step": 268020 }, { "epoch": 0.63, "grad_norm": 1.96875, "learning_rate": 0.0001548040494426653, "loss": 1.9841, "step": 268025 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015480250342657392, "loss": 1.8739, "step": 268030 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015480095739176105, "loss": 2.1004, "step": 268035 }, { "epoch": 0.63, "grad_norm": 1.96875, "learning_rate": 0.00015479941133822719, "loss": 1.9637, "step": 268040 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.0001547978652659728, "loss": 2.0805, "step": 268045 }, { "epoch": 0.63, "grad_norm": 1.9375, "learning_rate": 0.0001547963191749985, "loss": 2.1354, "step": 268050 }, { "epoch": 0.63, "grad_norm": 1.9140625, "learning_rate": 0.00015479477306530476, "loss": 2.1427, "step": 268055 }, { "epoch": 0.63, "grad_norm": 2.875, "learning_rate": 0.00015479322693689218, "loss": 2.0233, "step": 268060 }, { "epoch": 0.63, "grad_norm": 1.96875, "learning_rate": 0.0001547916807897612, "loss": 2.1215, "step": 268065 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015479013462391241, "loss": 1.9216, "step": 268070 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.0001547885884393463, "loss": 2.2097, "step": 268075 }, { "epoch": 0.63, "grad_norm": 1.9609375, "learning_rate": 0.00015478704223606342, "loss": 1.9563, "step": 268080 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015478549601406427, "loss": 1.893, "step": 268085 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015478394977334943, "loss": 2.1414, "step": 268090 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015478240351391942, "loss": 2.0338, "step": 268095 }, { "epoch": 0.63, "grad_norm": 1.8203125, "learning_rate": 0.0001547808572357747, "loss": 1.9547, "step": 268100 }, { "epoch": 0.63, "grad_norm": 1.9296875, "learning_rate": 0.00015477931093891585, "loss": 2.1762, "step": 268105 }, { "epoch": 0.63, "grad_norm": 1.890625, "learning_rate": 0.00015477776462334344, "loss": 1.9312, "step": 268110 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015477621828905791, "loss": 2.1029, "step": 268115 }, { "epoch": 0.63, "grad_norm": 2.515625, "learning_rate": 0.00015477467193605985, "loss": 1.9686, "step": 268120 }, { "epoch": 0.63, "grad_norm": 2.421875, "learning_rate": 0.00015477312556434975, "loss": 2.1452, "step": 268125 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.0001547715791739282, "loss": 1.8262, "step": 268130 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.00015477003276479563, "loss": 2.0838, "step": 268135 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015476848633695265, "loss": 2.0577, "step": 268140 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015476693989039975, "loss": 2.0826, "step": 268145 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.0001547653934251375, "loss": 2.0804, "step": 268150 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.00015476384694116635, "loss": 2.065, "step": 268155 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.0001547623004384869, "loss": 2.0008, "step": 268160 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015476075391709966, "loss": 2.1557, "step": 268165 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015475920737700516, "loss": 1.9599, "step": 268170 }, { "epoch": 0.63, "grad_norm": 1.8984375, "learning_rate": 0.00015475766081820392, "loss": 2.1135, "step": 268175 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015475611424069645, "loss": 2.0455, "step": 268180 }, { "epoch": 0.63, "grad_norm": 1.8125, "learning_rate": 0.00015475456764448331, "loss": 2.0123, "step": 268185 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.000154753021029565, "loss": 2.0527, "step": 268190 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.0001547514743959421, "loss": 2.1852, "step": 268195 }, { "epoch": 0.63, "grad_norm": 1.96875, "learning_rate": 0.0001547499277436151, "loss": 2.1882, "step": 268200 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.0001547483810725845, "loss": 1.9393, "step": 268205 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.0001547468343828509, "loss": 2.1001, "step": 268210 }, { "epoch": 0.63, "grad_norm": 1.984375, "learning_rate": 0.00015474528767441475, "loss": 2.2847, "step": 268215 }, { "epoch": 0.63, "grad_norm": 2.90625, "learning_rate": 0.00015474374094727664, "loss": 2.1143, "step": 268220 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015474219420143705, "loss": 2.2171, "step": 268225 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.00015474064743689657, "loss": 2.1027, "step": 268230 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.0001547391006536557, "loss": 2.0845, "step": 268235 }, { "epoch": 0.63, "grad_norm": 1.796875, "learning_rate": 0.00015473755385171493, "loss": 2.0145, "step": 268240 }, { "epoch": 0.63, "grad_norm": 2.546875, "learning_rate": 0.0001547360070310748, "loss": 2.0787, "step": 268245 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.0001547344601917359, "loss": 1.985, "step": 268250 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.0001547329133336987, "loss": 2.1404, "step": 268255 }, { "epoch": 0.63, "grad_norm": 1.9609375, "learning_rate": 0.00015473136645696373, "loss": 2.0876, "step": 268260 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015472981956153156, "loss": 2.2062, "step": 268265 }, { "epoch": 0.63, "grad_norm": 2.546875, "learning_rate": 0.00015472827264740265, "loss": 1.9277, "step": 268270 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.0001547267257145776, "loss": 2.1596, "step": 268275 }, { "epoch": 0.63, "grad_norm": 1.859375, "learning_rate": 0.0001547251787630569, "loss": 1.9918, "step": 268280 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.0001547236317928411, "loss": 2.1054, "step": 268285 }, { "epoch": 0.63, "grad_norm": 1.8515625, "learning_rate": 0.0001547220848039307, "loss": 1.996, "step": 268290 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015472053779632625, "loss": 2.0489, "step": 268295 }, { "epoch": 0.63, "grad_norm": 2.515625, "learning_rate": 0.00015471899077002825, "loss": 2.1346, "step": 268300 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.00015471744372503728, "loss": 2.1009, "step": 268305 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.00015471589666135382, "loss": 1.9678, "step": 268310 }, { "epoch": 0.63, "grad_norm": 3.21875, "learning_rate": 0.00015471434957897845, "loss": 2.1898, "step": 268315 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015471280247791164, "loss": 2.0511, "step": 268320 }, { "epoch": 0.63, "grad_norm": 1.859375, "learning_rate": 0.00015471125535815392, "loss": 2.0811, "step": 268325 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015470970821970586, "loss": 2.0648, "step": 268330 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.000154708161062568, "loss": 2.2051, "step": 268335 }, { "epoch": 0.63, "grad_norm": 1.9140625, "learning_rate": 0.00015470661388674084, "loss": 1.9945, "step": 268340 }, { "epoch": 0.63, "grad_norm": 1.640625, "learning_rate": 0.0001547050666922249, "loss": 2.198, "step": 268345 }, { "epoch": 0.63, "grad_norm": 1.9296875, "learning_rate": 0.00015470351947902068, "loss": 2.1815, "step": 268350 }, { "epoch": 0.63, "grad_norm": 1.890625, "learning_rate": 0.00015470197224712876, "loss": 2.0325, "step": 268355 }, { "epoch": 0.63, "grad_norm": 1.9375, "learning_rate": 0.00015470042499654966, "loss": 2.1627, "step": 268360 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.00015469887772728394, "loss": 2.055, "step": 268365 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015469733043933207, "loss": 1.945, "step": 268370 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.00015469578313269457, "loss": 2.0753, "step": 268375 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.000154694235807372, "loss": 2.1184, "step": 268380 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.0001546926884633649, "loss": 1.9838, "step": 268385 }, { "epoch": 0.63, "grad_norm": 1.9609375, "learning_rate": 0.00015469114110067383, "loss": 2.0232, "step": 268390 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015468959371929924, "loss": 1.9829, "step": 268395 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.00015468804631924168, "loss": 2.0836, "step": 268400 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.00015468649890050168, "loss": 2.0424, "step": 268405 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.0001546849514630798, "loss": 2.106, "step": 268410 }, { "epoch": 0.63, "grad_norm": 2.71875, "learning_rate": 0.00015468340400697655, "loss": 2.0453, "step": 268415 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.00015468185653219247, "loss": 1.9004, "step": 268420 }, { "epoch": 0.63, "grad_norm": 2.734375, "learning_rate": 0.00015468030903872804, "loss": 2.0949, "step": 268425 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.00015467876152658384, "loss": 1.9415, "step": 268430 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.0001546772139957604, "loss": 1.9472, "step": 268435 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.00015467566644625824, "loss": 2.1292, "step": 268440 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015467411887807786, "loss": 2.1995, "step": 268445 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.0001546725712912198, "loss": 2.0283, "step": 268450 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015467102368568462, "loss": 2.2097, "step": 268455 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015466947606147281, "loss": 2.0976, "step": 268460 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.00015466792841858492, "loss": 2.0227, "step": 268465 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015466638075702148, "loss": 2.1556, "step": 268470 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015466483307678304, "loss": 2.0307, "step": 268475 }, { "epoch": 0.63, "grad_norm": 1.8203125, "learning_rate": 0.00015466328537787005, "loss": 2.0937, "step": 268480 }, { "epoch": 0.63, "grad_norm": 1.8984375, "learning_rate": 0.00015466173766028312, "loss": 1.8989, "step": 268485 }, { "epoch": 0.63, "grad_norm": 1.921875, "learning_rate": 0.00015466018992402276, "loss": 1.8489, "step": 268490 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015465864216908945, "loss": 2.257, "step": 268495 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.0001546570943954838, "loss": 2.1289, "step": 268500 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015465554660320624, "loss": 2.0987, "step": 268505 }, { "epoch": 0.63, "grad_norm": 1.8515625, "learning_rate": 0.00015465399879225738, "loss": 2.185, "step": 268510 }, { "epoch": 0.63, "grad_norm": 1.9140625, "learning_rate": 0.00015465245096263775, "loss": 2.1324, "step": 268515 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.0001546509031143478, "loss": 2.0435, "step": 268520 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.00015464935524738818, "loss": 2.1242, "step": 268525 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.0001546478073617593, "loss": 2.1176, "step": 268530 }, { "epoch": 0.63, "grad_norm": 2.578125, "learning_rate": 0.00015464625945746176, "loss": 1.9683, "step": 268535 }, { "epoch": 0.63, "grad_norm": 2.453125, "learning_rate": 0.00015464471153449604, "loss": 2.0089, "step": 268540 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015464316359286272, "loss": 2.0773, "step": 268545 }, { "epoch": 0.63, "grad_norm": 2.640625, "learning_rate": 0.0001546416156325623, "loss": 1.9125, "step": 268550 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.0001546400676535953, "loss": 2.1455, "step": 268555 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015463851965596227, "loss": 2.1979, "step": 268560 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.0001546369716396637, "loss": 2.1126, "step": 268565 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.0001546354236047002, "loss": 2.1949, "step": 268570 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.0001546338755510722, "loss": 2.1126, "step": 268575 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.00015463232747878035, "loss": 2.0409, "step": 268580 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015463077938782503, "loss": 1.9258, "step": 268585 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.00015462923127820689, "loss": 2.0376, "step": 268590 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.0001546276831499264, "loss": 1.858, "step": 268595 }, { "epoch": 0.63, "grad_norm": 1.921875, "learning_rate": 0.0001546261350029841, "loss": 1.9771, "step": 268600 }, { "epoch": 0.63, "grad_norm": 1.9375, "learning_rate": 0.00015462458683738052, "loss": 2.1359, "step": 268605 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.00015462303865311622, "loss": 2.0342, "step": 268610 }, { "epoch": 0.63, "grad_norm": 1.859375, "learning_rate": 0.00015462149045019166, "loss": 2.199, "step": 268615 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.0001546199422286074, "loss": 2.2406, "step": 268620 }, { "epoch": 0.63, "grad_norm": 2.515625, "learning_rate": 0.000154618393988364, "loss": 2.0701, "step": 268625 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.000154616845729462, "loss": 2.076, "step": 268630 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015461529745190185, "loss": 1.9063, "step": 268635 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.00015461374915568413, "loss": 2.094, "step": 268640 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015461220084080933, "loss": 2.1322, "step": 268645 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.0001546106525072781, "loss": 2.2089, "step": 268650 }, { "epoch": 0.63, "grad_norm": 3.0, "learning_rate": 0.0001546091041550908, "loss": 2.0319, "step": 268655 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015460755578424807, "loss": 1.8846, "step": 268660 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015460600739475042, "loss": 2.0954, "step": 268665 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.00015460445898659835, "loss": 2.1294, "step": 268670 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.0001546029105597924, "loss": 2.0467, "step": 268675 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015460136211433313, "loss": 2.102, "step": 268680 }, { "epoch": 0.63, "grad_norm": 1.9375, "learning_rate": 0.00015459981365022104, "loss": 2.1041, "step": 268685 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015459826516745664, "loss": 2.0556, "step": 268690 }, { "epoch": 0.63, "grad_norm": 2.671875, "learning_rate": 0.0001545967166660405, "loss": 2.0602, "step": 268695 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015459516814597313, "loss": 1.9904, "step": 268700 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015459361960725506, "loss": 2.0981, "step": 268705 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.0001545920710498868, "loss": 2.2472, "step": 268710 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015459052247386893, "loss": 1.9164, "step": 268715 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.00015458897387920194, "loss": 2.0809, "step": 268720 }, { "epoch": 0.63, "grad_norm": 3.125, "learning_rate": 0.00015458742526588636, "loss": 2.2621, "step": 268725 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.00015458587663392273, "loss": 2.2023, "step": 268730 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.00015458432798331158, "loss": 2.257, "step": 268735 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.00015458277931405343, "loss": 1.9722, "step": 268740 }, { "epoch": 0.63, "grad_norm": 3.140625, "learning_rate": 0.0001545812306261488, "loss": 2.4157, "step": 268745 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015457968191959825, "loss": 2.0477, "step": 268750 }, { "epoch": 0.63, "grad_norm": 2.546875, "learning_rate": 0.00015457813319440225, "loss": 1.9317, "step": 268755 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015457658445056143, "loss": 2.0865, "step": 268760 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015457503568807622, "loss": 2.1523, "step": 268765 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.0001545734869069472, "loss": 2.019, "step": 268770 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015457193810717486, "loss": 2.2015, "step": 268775 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.0001545703892887598, "loss": 1.9654, "step": 268780 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015456884045170248, "loss": 2.0342, "step": 268785 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.00015456729159600346, "loss": 2.1046, "step": 268790 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015456574272166326, "loss": 2.1132, "step": 268795 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015456419382868243, "loss": 2.1333, "step": 268800 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.00015456264491706145, "loss": 2.2331, "step": 268805 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.00015456109598680092, "loss": 2.0243, "step": 268810 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.0001545595470379013, "loss": 2.3623, "step": 268815 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.00015455799807036316, "loss": 2.2, "step": 268820 }, { "epoch": 0.63, "grad_norm": 2.546875, "learning_rate": 0.000154556449084187, "loss": 2.0664, "step": 268825 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.0001545549000793734, "loss": 2.1096, "step": 268830 }, { "epoch": 0.63, "grad_norm": 2.75, "learning_rate": 0.00015455335105592284, "loss": 1.9129, "step": 268835 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015455180201383585, "loss": 2.3464, "step": 268840 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.000154550252953113, "loss": 2.0862, "step": 268845 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015454870387375478, "loss": 2.0365, "step": 268850 }, { "epoch": 0.63, "grad_norm": 2.71875, "learning_rate": 0.00015454715477576176, "loss": 1.9319, "step": 268855 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.0001545456056591344, "loss": 2.0242, "step": 268860 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.0001545440565238733, "loss": 1.8453, "step": 268865 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015454250736997898, "loss": 2.1376, "step": 268870 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.0001545409581974519, "loss": 2.0435, "step": 268875 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015453940900629268, "loss": 2.0564, "step": 268880 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.0001545378597965018, "loss": 2.0429, "step": 268885 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015453631056807978, "loss": 2.0455, "step": 268890 }, { "epoch": 0.63, "grad_norm": 1.6484375, "learning_rate": 0.00015453476132102717, "loss": 1.9044, "step": 268895 }, { "epoch": 0.63, "grad_norm": 2.625, "learning_rate": 0.00015453321205534452, "loss": 2.115, "step": 268900 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.00015453166277103233, "loss": 2.1011, "step": 268905 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015453011346809112, "loss": 2.0671, "step": 268910 }, { "epoch": 0.63, "grad_norm": 1.984375, "learning_rate": 0.00015452856414652143, "loss": 1.9574, "step": 268915 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015452701480632382, "loss": 1.7429, "step": 268920 }, { "epoch": 0.63, "grad_norm": 1.8359375, "learning_rate": 0.00015452546544749877, "loss": 2.0927, "step": 268925 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015452391607004683, "loss": 1.9469, "step": 268930 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015452236667396857, "loss": 2.24, "step": 268935 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015452081725926444, "loss": 1.9674, "step": 268940 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015451926782593503, "loss": 2.1962, "step": 268945 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.0001545177183739808, "loss": 2.1796, "step": 268950 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015451616890340238, "loss": 2.1962, "step": 268955 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.00015451461941420023, "loss": 2.2699, "step": 268960 }, { "epoch": 0.63, "grad_norm": 2.5625, "learning_rate": 0.0001545130699063749, "loss": 2.2293, "step": 268965 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.00015451152037992695, "loss": 2.0642, "step": 268970 }, { "epoch": 0.63, "grad_norm": 1.921875, "learning_rate": 0.00015450997083485684, "loss": 2.1775, "step": 268975 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.00015450842127116515, "loss": 1.9802, "step": 268980 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.0001545068716888524, "loss": 1.8502, "step": 268985 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.0001545053220879191, "loss": 2.0234, "step": 268990 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015450377246836578, "loss": 2.0263, "step": 268995 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.000154502222830193, "loss": 2.0106, "step": 269000 }, { "epoch": 0.63, "grad_norm": 1.8984375, "learning_rate": 0.0001545006731734013, "loss": 2.1556, "step": 269005 }, { "epoch": 0.63, "grad_norm": 1.9609375, "learning_rate": 0.00015449912349799113, "loss": 2.1319, "step": 269010 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.0001544975738039631, "loss": 2.0694, "step": 269015 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015449602409131768, "loss": 1.9643, "step": 269020 }, { "epoch": 0.63, "grad_norm": 2.578125, "learning_rate": 0.00015449447436005547, "loss": 2.1074, "step": 269025 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015449292461017694, "loss": 2.0848, "step": 269030 }, { "epoch": 0.63, "grad_norm": 2.453125, "learning_rate": 0.00015449137484168263, "loss": 2.2357, "step": 269035 }, { "epoch": 0.63, "grad_norm": 2.640625, "learning_rate": 0.00015448982505457311, "loss": 2.0791, "step": 269040 }, { "epoch": 0.63, "grad_norm": 2.6875, "learning_rate": 0.00015448827524884885, "loss": 2.279, "step": 269045 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.0001544867254245104, "loss": 1.9558, "step": 269050 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.0001544851755815583, "loss": 1.9822, "step": 269055 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015448362571999312, "loss": 2.2896, "step": 269060 }, { "epoch": 0.63, "grad_norm": 1.96875, "learning_rate": 0.00015448207583981528, "loss": 2.186, "step": 269065 }, { "epoch": 0.63, "grad_norm": 1.6796875, "learning_rate": 0.00015448052594102543, "loss": 2.0483, "step": 269070 }, { "epoch": 0.63, "grad_norm": 2.421875, "learning_rate": 0.000154478976023624, "loss": 2.045, "step": 269075 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.0001544774260876116, "loss": 2.1504, "step": 269080 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.0001544758761329887, "loss": 2.1009, "step": 269085 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015447432615975587, "loss": 2.1112, "step": 269090 }, { "epoch": 0.63, "grad_norm": 2.5, "learning_rate": 0.0001544727761679136, "loss": 2.4221, "step": 269095 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015447122615746246, "loss": 2.0489, "step": 269100 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.00015446967612840295, "loss": 1.9847, "step": 269105 }, { "epoch": 0.63, "grad_norm": 1.96875, "learning_rate": 0.0001544681260807356, "loss": 2.1068, "step": 269110 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.000154466576014461, "loss": 2.0141, "step": 269115 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.0001544650259295796, "loss": 1.9917, "step": 269120 }, { "epoch": 0.63, "grad_norm": 2.421875, "learning_rate": 0.00015446347582609196, "loss": 1.978, "step": 269125 }, { "epoch": 0.63, "grad_norm": 2.046875, "learning_rate": 0.0001544619257039986, "loss": 2.2076, "step": 269130 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015446037556330003, "loss": 2.0704, "step": 269135 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.00015445882540399686, "loss": 2.1612, "step": 269140 }, { "epoch": 0.63, "grad_norm": 1.8203125, "learning_rate": 0.00015445727522608955, "loss": 2.0277, "step": 269145 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.00015445572502957865, "loss": 1.9074, "step": 269150 }, { "epoch": 0.63, "grad_norm": 3.03125, "learning_rate": 0.00015445417481446466, "loss": 2.079, "step": 269155 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.0001544526245807482, "loss": 1.9592, "step": 269160 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.00015445107432842967, "loss": 2.111, "step": 269165 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.0001544495240575097, "loss": 1.9142, "step": 269170 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.0001544479737679888, "loss": 2.0346, "step": 269175 }, { "epoch": 0.63, "grad_norm": 2.546875, "learning_rate": 0.00015444642345986744, "loss": 1.9352, "step": 269180 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015444487313314622, "loss": 2.0266, "step": 269185 }, { "epoch": 0.63, "grad_norm": 1.890625, "learning_rate": 0.00015444332278782561, "loss": 2.0591, "step": 269190 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.00015444177242390623, "loss": 1.9461, "step": 269195 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.00015444022204138852, "loss": 2.0756, "step": 269200 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015443867164027303, "loss": 1.9936, "step": 269205 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015443712122056034, "loss": 2.2106, "step": 269210 }, { "epoch": 0.63, "grad_norm": 2.5, "learning_rate": 0.0001544355707822509, "loss": 2.0653, "step": 269215 }, { "epoch": 0.63, "grad_norm": 2.140625, "learning_rate": 0.00015443402032534534, "loss": 2.0711, "step": 269220 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.00015443246984984406, "loss": 2.085, "step": 269225 }, { "epoch": 0.63, "grad_norm": 1.75, "learning_rate": 0.00015443091935574772, "loss": 2.0482, "step": 269230 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.00015442936884305675, "loss": 1.9934, "step": 269235 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015442781831177173, "loss": 2.0991, "step": 269240 }, { "epoch": 0.63, "grad_norm": 1.765625, "learning_rate": 0.00015442626776189319, "loss": 2.178, "step": 269245 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.00015442471719342166, "loss": 2.1212, "step": 269250 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015442316660635767, "loss": 2.1161, "step": 269255 }, { "epoch": 0.63, "grad_norm": 1.9921875, "learning_rate": 0.00015442161600070172, "loss": 2.1677, "step": 269260 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015442006537645433, "loss": 2.2306, "step": 269265 }, { "epoch": 0.63, "grad_norm": 1.8515625, "learning_rate": 0.00015441851473361608, "loss": 1.9981, "step": 269270 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.0001544169640721875, "loss": 2.1386, "step": 269275 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015441541339216908, "loss": 1.9905, "step": 269280 }, { "epoch": 0.63, "grad_norm": 1.9140625, "learning_rate": 0.00015441386269356136, "loss": 2.1362, "step": 269285 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015441231197636487, "loss": 1.911, "step": 269290 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015441076124058018, "loss": 2.0213, "step": 269295 }, { "epoch": 0.63, "grad_norm": 2.53125, "learning_rate": 0.00015440921048620777, "loss": 2.1109, "step": 269300 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015440765971324818, "loss": 1.8991, "step": 269305 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.00015440610892170198, "loss": 2.3343, "step": 269310 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.00015440455811156962, "loss": 2.1147, "step": 269315 }, { "epoch": 0.63, "grad_norm": 1.9921875, "learning_rate": 0.0001544030072828517, "loss": 2.1705, "step": 269320 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015440145643554873, "loss": 2.1183, "step": 269325 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015439990556966122, "loss": 2.0755, "step": 269330 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015439835468518976, "loss": 2.263, "step": 269335 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.00015439680378213477, "loss": 2.1524, "step": 269340 }, { "epoch": 0.63, "grad_norm": 2.40625, "learning_rate": 0.00015439525286049688, "loss": 2.0364, "step": 269345 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015439370192027658, "loss": 2.1384, "step": 269350 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.0001543921509614744, "loss": 1.9784, "step": 269355 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015439059998409088, "loss": 2.0433, "step": 269360 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.00015438904898812657, "loss": 1.9851, "step": 269365 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015438749797358194, "loss": 2.1065, "step": 269370 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015438594694045757, "loss": 2.1638, "step": 269375 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015438439588875394, "loss": 2.1114, "step": 269380 }, { "epoch": 0.63, "grad_norm": 1.8828125, "learning_rate": 0.00015438284481847166, "loss": 2.2034, "step": 269385 }, { "epoch": 0.63, "grad_norm": 2.515625, "learning_rate": 0.0001543812937296112, "loss": 2.1377, "step": 269390 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.0001543797426221731, "loss": 2.0554, "step": 269395 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.0001543781914961579, "loss": 2.0169, "step": 269400 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.0001543766403515661, "loss": 2.1384, "step": 269405 }, { "epoch": 0.63, "grad_norm": 1.921875, "learning_rate": 0.00015437508918839828, "loss": 1.9575, "step": 269410 }, { "epoch": 0.63, "grad_norm": 1.875, "learning_rate": 0.00015437353800665493, "loss": 2.1056, "step": 269415 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.0001543719868063366, "loss": 2.0351, "step": 269420 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.0001543704355874438, "loss": 2.0883, "step": 269425 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.0001543688843499771, "loss": 2.1695, "step": 269430 }, { "epoch": 0.63, "grad_norm": 2.578125, "learning_rate": 0.000154367333093937, "loss": 2.0334, "step": 269435 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.000154365781819324, "loss": 2.2006, "step": 269440 }, { "epoch": 0.63, "grad_norm": 1.4453125, "learning_rate": 0.00015436423052613872, "loss": 2.0337, "step": 269445 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015436267921438158, "loss": 1.9703, "step": 269450 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015436112788405317, "loss": 2.2964, "step": 269455 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015435957653515403, "loss": 2.1959, "step": 269460 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015435802516768464, "loss": 2.0483, "step": 269465 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015435647378164559, "loss": 2.1348, "step": 269470 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.0001543549223770374, "loss": 2.307, "step": 269475 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015435337095386054, "loss": 2.1515, "step": 269480 }, { "epoch": 0.63, "grad_norm": 2.453125, "learning_rate": 0.0001543518195121156, "loss": 2.1702, "step": 269485 }, { "epoch": 0.63, "grad_norm": 1.8203125, "learning_rate": 0.00015435026805180312, "loss": 2.1793, "step": 269490 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.0001543487165729236, "loss": 2.0871, "step": 269495 }, { "epoch": 0.63, "grad_norm": 1.8671875, "learning_rate": 0.00015434716507547753, "loss": 1.9805, "step": 269500 }, { "epoch": 0.63, "grad_norm": 1.90625, "learning_rate": 0.0001543456135594655, "loss": 2.1817, "step": 269505 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015434406202488802, "loss": 2.1581, "step": 269510 }, { "epoch": 0.63, "grad_norm": 1.984375, "learning_rate": 0.00015434251047174565, "loss": 2.1152, "step": 269515 }, { "epoch": 0.63, "grad_norm": 2.71875, "learning_rate": 0.00015434095890003887, "loss": 2.25, "step": 269520 }, { "epoch": 0.63, "grad_norm": 1.96875, "learning_rate": 0.00015433940730976827, "loss": 1.9053, "step": 269525 }, { "epoch": 0.63, "grad_norm": 2.65625, "learning_rate": 0.0001543378557009343, "loss": 2.2987, "step": 269530 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.00015433630407353754, "loss": 1.8653, "step": 269535 }, { "epoch": 0.63, "grad_norm": 1.78125, "learning_rate": 0.0001543347524275785, "loss": 1.9812, "step": 269540 }, { "epoch": 0.63, "grad_norm": 2.390625, "learning_rate": 0.00015433320076305775, "loss": 1.9938, "step": 269545 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.00015433164907997576, "loss": 1.9429, "step": 269550 }, { "epoch": 0.63, "grad_norm": 2.4375, "learning_rate": 0.00015433009737833312, "loss": 1.9678, "step": 269555 }, { "epoch": 0.63, "grad_norm": 2.65625, "learning_rate": 0.00015432854565813035, "loss": 2.1173, "step": 269560 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015432699391936794, "loss": 2.1445, "step": 269565 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015432544216204645, "loss": 2.1535, "step": 269570 }, { "epoch": 0.63, "grad_norm": 2.59375, "learning_rate": 0.0001543238903861664, "loss": 2.0486, "step": 269575 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015432233859172834, "loss": 2.1621, "step": 269580 }, { "epoch": 0.63, "grad_norm": 2.15625, "learning_rate": 0.00015432078677873274, "loss": 1.8954, "step": 269585 }, { "epoch": 0.63, "grad_norm": 2.578125, "learning_rate": 0.0001543192349471802, "loss": 2.2142, "step": 269590 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.00015431768309707124, "loss": 1.9542, "step": 269595 }, { "epoch": 0.63, "grad_norm": 2.0625, "learning_rate": 0.00015431613122840636, "loss": 2.1955, "step": 269600 }, { "epoch": 0.63, "grad_norm": 1.953125, "learning_rate": 0.00015431457934118612, "loss": 2.1458, "step": 269605 }, { "epoch": 0.63, "grad_norm": 2.65625, "learning_rate": 0.00015431302743541098, "loss": 2.1414, "step": 269610 }, { "epoch": 0.63, "grad_norm": 2.484375, "learning_rate": 0.0001543114755110816, "loss": 1.9991, "step": 269615 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.0001543099235681984, "loss": 2.1977, "step": 269620 }, { "epoch": 0.63, "grad_norm": 1.96875, "learning_rate": 0.00015430837160676194, "loss": 1.938, "step": 269625 }, { "epoch": 0.63, "grad_norm": 1.96875, "learning_rate": 0.00015430681962677278, "loss": 2.1897, "step": 269630 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.00015430526762823138, "loss": 2.2454, "step": 269635 }, { "epoch": 0.63, "grad_norm": 2.328125, "learning_rate": 0.00015430371561113836, "loss": 2.2574, "step": 269640 }, { "epoch": 0.63, "grad_norm": 2.203125, "learning_rate": 0.00015430216357549417, "loss": 2.0214, "step": 269645 }, { "epoch": 0.63, "grad_norm": 2.1875, "learning_rate": 0.0001543006115212994, "loss": 2.078, "step": 269650 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.00015429905944855453, "loss": 2.0411, "step": 269655 }, { "epoch": 0.63, "grad_norm": 2.421875, "learning_rate": 0.00015429750735726017, "loss": 2.3071, "step": 269660 }, { "epoch": 0.63, "grad_norm": 2.0, "learning_rate": 0.00015429595524741677, "loss": 2.2077, "step": 269665 }, { "epoch": 0.63, "grad_norm": 1.7421875, "learning_rate": 0.00015429440311902487, "loss": 2.1128, "step": 269670 }, { "epoch": 0.63, "grad_norm": 1.890625, "learning_rate": 0.00015429285097208503, "loss": 2.0139, "step": 269675 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.00015429129880659777, "loss": 2.1969, "step": 269680 }, { "epoch": 0.63, "grad_norm": 2.078125, "learning_rate": 0.0001542897466225636, "loss": 1.9878, "step": 269685 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015428819441998308, "loss": 1.8734, "step": 269690 }, { "epoch": 0.63, "grad_norm": 2.25, "learning_rate": 0.00015428664219885673, "loss": 2.2284, "step": 269695 }, { "epoch": 0.63, "grad_norm": 2.375, "learning_rate": 0.00015428508995918506, "loss": 2.2654, "step": 269700 }, { "epoch": 0.63, "grad_norm": 2.296875, "learning_rate": 0.00015428353770096867, "loss": 1.9324, "step": 269705 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.000154281985424208, "loss": 2.1299, "step": 269710 }, { "epoch": 0.63, "grad_norm": 2.125, "learning_rate": 0.0001542804331289036, "loss": 2.2031, "step": 269715 }, { "epoch": 0.63, "grad_norm": 2.171875, "learning_rate": 0.00015427888081505605, "loss": 2.0001, "step": 269720 }, { "epoch": 0.63, "grad_norm": 2.71875, "learning_rate": 0.00015427732848266585, "loss": 1.9827, "step": 269725 }, { "epoch": 0.63, "grad_norm": 2.03125, "learning_rate": 0.00015427577613173353, "loss": 2.1462, "step": 269730 }, { "epoch": 0.63, "grad_norm": 2.359375, "learning_rate": 0.00015427422376225962, "loss": 2.0487, "step": 269735 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.00015427267137424464, "loss": 2.033, "step": 269740 }, { "epoch": 0.63, "grad_norm": 2.984375, "learning_rate": 0.00015427111896768912, "loss": 2.251, "step": 269745 }, { "epoch": 0.63, "grad_norm": 2.5625, "learning_rate": 0.00015426956654259364, "loss": 2.0446, "step": 269750 }, { "epoch": 0.63, "grad_norm": 2.78125, "learning_rate": 0.00015426801409895865, "loss": 2.0739, "step": 269755 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015426646163678475, "loss": 2.1973, "step": 269760 }, { "epoch": 0.63, "grad_norm": 2.671875, "learning_rate": 0.00015426490915607243, "loss": 2.3043, "step": 269765 }, { "epoch": 0.63, "grad_norm": 2.234375, "learning_rate": 0.00015426335665682226, "loss": 2.1242, "step": 269770 }, { "epoch": 0.63, "grad_norm": 2.21875, "learning_rate": 0.0001542618041390347, "loss": 2.1524, "step": 269775 }, { "epoch": 0.63, "grad_norm": 2.28125, "learning_rate": 0.00015426025160271035, "loss": 2.0408, "step": 269780 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015425869904784972, "loss": 2.0996, "step": 269785 }, { "epoch": 0.63, "grad_norm": 2.109375, "learning_rate": 0.00015425714647445332, "loss": 2.0352, "step": 269790 }, { "epoch": 0.63, "grad_norm": 2.34375, "learning_rate": 0.0001542555938825217, "loss": 1.9682, "step": 269795 }, { "epoch": 0.63, "grad_norm": 2.640625, "learning_rate": 0.00015425404127205537, "loss": 2.2162, "step": 269800 }, { "epoch": 0.63, "grad_norm": 2.265625, "learning_rate": 0.00015425248864305488, "loss": 2.0912, "step": 269805 }, { "epoch": 0.63, "grad_norm": 2.484375, "learning_rate": 0.0001542509359955208, "loss": 2.0814, "step": 269810 }, { "epoch": 0.63, "grad_norm": 2.3125, "learning_rate": 0.00015424938332945357, "loss": 2.1863, "step": 269815 }, { "epoch": 0.63, "grad_norm": 2.09375, "learning_rate": 0.00015424783064485378, "loss": 2.3277, "step": 269820 }, { "epoch": 0.63, "grad_norm": 2.015625, "learning_rate": 0.00015424627794172194, "loss": 2.0933, "step": 269825 }, { "epoch": 0.63, "grad_norm": 2.46875, "learning_rate": 0.0001542447252200586, "loss": 1.974, "step": 269830 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015424317247986427, "loss": 2.1418, "step": 269835 }, { "epoch": 0.64, "grad_norm": 1.859375, "learning_rate": 0.00015424161972113945, "loss": 2.0202, "step": 269840 }, { "epoch": 0.64, "grad_norm": 2.625, "learning_rate": 0.00015424006694388478, "loss": 2.23, "step": 269845 }, { "epoch": 0.64, "grad_norm": 2.65625, "learning_rate": 0.00015423851414810068, "loss": 2.1732, "step": 269850 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015423696133378773, "loss": 1.8395, "step": 269855 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015423540850094644, "loss": 2.0405, "step": 269860 }, { "epoch": 0.64, "grad_norm": 2.484375, "learning_rate": 0.00015423385564957738, "loss": 2.1237, "step": 269865 }, { "epoch": 0.64, "grad_norm": 2.484375, "learning_rate": 0.00015423230277968104, "loss": 2.0334, "step": 269870 }, { "epoch": 0.64, "grad_norm": 2.578125, "learning_rate": 0.00015423074989125794, "loss": 2.0174, "step": 269875 }, { "epoch": 0.64, "grad_norm": 3.578125, "learning_rate": 0.00015422919698430864, "loss": 2.0512, "step": 269880 }, { "epoch": 0.64, "grad_norm": 1.96875, "learning_rate": 0.00015422764405883366, "loss": 2.1753, "step": 269885 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015422609111483354, "loss": 2.1168, "step": 269890 }, { "epoch": 0.64, "grad_norm": 1.8828125, "learning_rate": 0.00015422453815230883, "loss": 1.976, "step": 269895 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015422298517126, "loss": 2.0676, "step": 269900 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015422143217168758, "loss": 2.0388, "step": 269905 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015421987915359222, "loss": 1.9634, "step": 269910 }, { "epoch": 0.64, "grad_norm": 1.828125, "learning_rate": 0.00015421832611697432, "loss": 2.1608, "step": 269915 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015421677306183445, "loss": 2.1431, "step": 269920 }, { "epoch": 0.64, "grad_norm": 2.609375, "learning_rate": 0.00015421521998817316, "loss": 2.0493, "step": 269925 }, { "epoch": 0.64, "grad_norm": 2.390625, "learning_rate": 0.00015421366689599095, "loss": 2.0187, "step": 269930 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.0001542121137852884, "loss": 2.1889, "step": 269935 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015421056065606598, "loss": 2.1401, "step": 269940 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015420900750832427, "loss": 1.9941, "step": 269945 }, { "epoch": 0.64, "grad_norm": 2.953125, "learning_rate": 0.0001542074543420638, "loss": 2.1145, "step": 269950 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015420590115728503, "loss": 2.1051, "step": 269955 }, { "epoch": 0.64, "grad_norm": 1.8828125, "learning_rate": 0.00015420434795398858, "loss": 2.1271, "step": 269960 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.0001542027947321749, "loss": 2.207, "step": 269965 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.0001542012414918446, "loss": 1.9711, "step": 269970 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015419968823299817, "loss": 2.1262, "step": 269975 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015419813495563613, "loss": 1.9943, "step": 269980 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.000154196581659759, "loss": 2.0682, "step": 269985 }, { "epoch": 0.64, "grad_norm": 2.453125, "learning_rate": 0.00015419502834536738, "loss": 1.9983, "step": 269990 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015419347501246174, "loss": 2.1363, "step": 269995 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015419192166104263, "loss": 2.0446, "step": 270000 }, { "epoch": 0.64, "grad_norm": 1.453125, "learning_rate": 0.00015419036829111057, "loss": 1.8469, "step": 270005 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.0001541888149026661, "loss": 2.0582, "step": 270010 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015418726149570974, "loss": 2.219, "step": 270015 }, { "epoch": 0.64, "grad_norm": 2.515625, "learning_rate": 0.00015418570807024205, "loss": 2.1197, "step": 270020 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015418415462626353, "loss": 2.1973, "step": 270025 }, { "epoch": 0.64, "grad_norm": 2.453125, "learning_rate": 0.0001541826011637747, "loss": 2.0024, "step": 270030 }, { "epoch": 0.64, "grad_norm": 1.953125, "learning_rate": 0.00015418104768277614, "loss": 2.1264, "step": 270035 }, { "epoch": 0.64, "grad_norm": 2.515625, "learning_rate": 0.0001541794941832683, "loss": 2.1557, "step": 270040 }, { "epoch": 0.64, "grad_norm": 2.4375, "learning_rate": 0.00015417794066525185, "loss": 2.1192, "step": 270045 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015417638712872714, "loss": 2.0253, "step": 270050 }, { "epoch": 0.64, "grad_norm": 1.75, "learning_rate": 0.00015417483357369486, "loss": 1.9789, "step": 270055 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015417328000015546, "loss": 2.0297, "step": 270060 }, { "epoch": 0.64, "grad_norm": 2.71875, "learning_rate": 0.00015417172640810946, "loss": 2.1199, "step": 270065 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.00015417017279755743, "loss": 2.192, "step": 270070 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015416861916849988, "loss": 2.1928, "step": 270075 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.0001541670655209374, "loss": 2.0419, "step": 270080 }, { "epoch": 0.64, "grad_norm": 1.7421875, "learning_rate": 0.0001541655118548704, "loss": 2.0727, "step": 270085 }, { "epoch": 0.64, "grad_norm": 1.9453125, "learning_rate": 0.00015416395817029947, "loss": 2.0828, "step": 270090 }, { "epoch": 0.64, "grad_norm": 2.390625, "learning_rate": 0.00015416240446722518, "loss": 2.1265, "step": 270095 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015416085074564806, "loss": 1.9823, "step": 270100 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015415929700556856, "loss": 2.1123, "step": 270105 }, { "epoch": 0.64, "grad_norm": 2.5, "learning_rate": 0.00015415774324698732, "loss": 2.1392, "step": 270110 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015415618946990476, "loss": 1.9947, "step": 270115 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015415463567432146, "loss": 2.0633, "step": 270120 }, { "epoch": 0.64, "grad_norm": 2.53125, "learning_rate": 0.00015415308186023802, "loss": 2.1411, "step": 270125 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015415152802765483, "loss": 2.1712, "step": 270130 }, { "epoch": 0.64, "grad_norm": 2.8125, "learning_rate": 0.00015414997417657254, "loss": 1.9907, "step": 270135 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.0001541484203069916, "loss": 2.2117, "step": 270140 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.0001541468664189126, "loss": 2.1531, "step": 270145 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.00015414531251233606, "loss": 1.9954, "step": 270150 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015414375858726248, "loss": 1.9882, "step": 270155 }, { "epoch": 0.64, "grad_norm": 1.9375, "learning_rate": 0.00015414220464369244, "loss": 2.0949, "step": 270160 }, { "epoch": 0.64, "grad_norm": 2.640625, "learning_rate": 0.0001541406506816264, "loss": 2.1557, "step": 270165 }, { "epoch": 0.64, "grad_norm": 2.59375, "learning_rate": 0.00015413909670106496, "loss": 2.1436, "step": 270170 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.0001541375427020086, "loss": 2.173, "step": 270175 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.0001541359886844579, "loss": 2.1032, "step": 270180 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015413443464841334, "loss": 2.026, "step": 270185 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015413288059387548, "loss": 2.0394, "step": 270190 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015413132652084487, "loss": 2.1445, "step": 270195 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015412977242932198, "loss": 2.2326, "step": 270200 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015412821831930744, "loss": 1.9813, "step": 270205 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.00015412666419080165, "loss": 2.0066, "step": 270210 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.00015412511004380525, "loss": 2.1352, "step": 270215 }, { "epoch": 0.64, "grad_norm": 1.921875, "learning_rate": 0.00015412355587831873, "loss": 1.974, "step": 270220 }, { "epoch": 0.64, "grad_norm": 3.0625, "learning_rate": 0.00015412200169434258, "loss": 2.1714, "step": 270225 }, { "epoch": 0.64, "grad_norm": 1.6953125, "learning_rate": 0.00015412044749187742, "loss": 2.0544, "step": 270230 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.0001541188932709237, "loss": 1.9841, "step": 270235 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015411733903148204, "loss": 2.1563, "step": 270240 }, { "epoch": 0.64, "grad_norm": 2.5, "learning_rate": 0.00015411578477355286, "loss": 1.925, "step": 270245 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015411423049713677, "loss": 2.04, "step": 270250 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015411267620223424, "loss": 2.1004, "step": 270255 }, { "epoch": 0.64, "grad_norm": 2.390625, "learning_rate": 0.0001541111218888459, "loss": 2.0809, "step": 270260 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.0001541095675569722, "loss": 2.2631, "step": 270265 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.00015410801320661367, "loss": 2.0463, "step": 270270 }, { "epoch": 0.64, "grad_norm": 2.390625, "learning_rate": 0.00015410645883777086, "loss": 2.0818, "step": 270275 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.0001541049044504443, "loss": 2.144, "step": 270280 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015410335004463455, "loss": 2.0728, "step": 270285 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.0001541017956203421, "loss": 2.1181, "step": 270290 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.0001541002411775675, "loss": 2.0411, "step": 270295 }, { "epoch": 0.64, "grad_norm": 1.8828125, "learning_rate": 0.00015409868671631126, "loss": 2.1237, "step": 270300 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.00015409713223657394, "loss": 2.0472, "step": 270305 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015409557773835605, "loss": 2.0727, "step": 270310 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015409402322165815, "loss": 2.1369, "step": 270315 }, { "epoch": 0.64, "grad_norm": 2.4375, "learning_rate": 0.00015409246868648073, "loss": 2.0816, "step": 270320 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015409091413282435, "loss": 2.1609, "step": 270325 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015408935956068954, "loss": 2.3129, "step": 270330 }, { "epoch": 0.64, "grad_norm": 2.484375, "learning_rate": 0.00015408780497007683, "loss": 2.0775, "step": 270335 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015408625036098672, "loss": 2.1313, "step": 270340 }, { "epoch": 0.64, "grad_norm": 2.4375, "learning_rate": 0.0001540846957334198, "loss": 1.9786, "step": 270345 }, { "epoch": 0.64, "grad_norm": 1.65625, "learning_rate": 0.00015408314108737652, "loss": 2.0414, "step": 270350 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.0001540815864228575, "loss": 2.246, "step": 270355 }, { "epoch": 0.64, "grad_norm": 2.421875, "learning_rate": 0.0001540800317398632, "loss": 2.1597, "step": 270360 }, { "epoch": 0.64, "grad_norm": 2.609375, "learning_rate": 0.0001540784770383942, "loss": 1.9781, "step": 270365 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015407692231845098, "loss": 2.0681, "step": 270370 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015407536758003416, "loss": 2.0266, "step": 270375 }, { "epoch": 0.64, "grad_norm": 2.84375, "learning_rate": 0.00015407381282314417, "loss": 2.2024, "step": 270380 }, { "epoch": 0.64, "grad_norm": 2.3125, "learning_rate": 0.00015407225804778157, "loss": 2.0068, "step": 270385 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.00015407070325394693, "loss": 2.1461, "step": 270390 }, { "epoch": 0.64, "grad_norm": 1.84375, "learning_rate": 0.00015406914844164077, "loss": 2.1415, "step": 270395 }, { "epoch": 0.64, "grad_norm": 2.9375, "learning_rate": 0.0001540675936108636, "loss": 2.1831, "step": 270400 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015406603876161597, "loss": 2.0865, "step": 270405 }, { "epoch": 0.64, "grad_norm": 3.078125, "learning_rate": 0.00015406448389389837, "loss": 2.1723, "step": 270410 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.00015406292900771137, "loss": 2.0652, "step": 270415 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.00015406137410305551, "loss": 2.2236, "step": 270420 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.0001540598191799313, "loss": 2.1827, "step": 270425 }, { "epoch": 0.64, "grad_norm": 2.390625, "learning_rate": 0.00015405826423833927, "loss": 2.0833, "step": 270430 }, { "epoch": 0.64, "grad_norm": 2.953125, "learning_rate": 0.00015405670927827995, "loss": 2.1695, "step": 270435 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.00015405515429975388, "loss": 2.2528, "step": 270440 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.0001540535993027616, "loss": 2.0014, "step": 270445 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015405204428730362, "loss": 2.1519, "step": 270450 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015405048925338052, "loss": 1.9433, "step": 270455 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.00015404893420099274, "loss": 1.8497, "step": 270460 }, { "epoch": 0.64, "grad_norm": 1.75, "learning_rate": 0.00015404737913014087, "loss": 1.9716, "step": 270465 }, { "epoch": 0.64, "grad_norm": 1.9921875, "learning_rate": 0.00015404582404082544, "loss": 1.8258, "step": 270470 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.000154044268933047, "loss": 2.0576, "step": 270475 }, { "epoch": 0.64, "grad_norm": 2.484375, "learning_rate": 0.00015404271380680605, "loss": 2.02, "step": 270480 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015404115866210312, "loss": 1.9844, "step": 270485 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.00015403960349893872, "loss": 1.9966, "step": 270490 }, { "epoch": 0.64, "grad_norm": 3.03125, "learning_rate": 0.00015403804831731343, "loss": 2.0724, "step": 270495 }, { "epoch": 0.64, "grad_norm": 1.921875, "learning_rate": 0.0001540364931172278, "loss": 2.1185, "step": 270500 }, { "epoch": 0.64, "grad_norm": 1.8046875, "learning_rate": 0.0001540349378986823, "loss": 2.046, "step": 270505 }, { "epoch": 0.64, "grad_norm": 1.9140625, "learning_rate": 0.00015403338266167747, "loss": 2.1215, "step": 270510 }, { "epoch": 0.64, "grad_norm": 1.671875, "learning_rate": 0.00015403182740621389, "loss": 2.1336, "step": 270515 }, { "epoch": 0.64, "grad_norm": 1.953125, "learning_rate": 0.00015403027213229203, "loss": 2.0662, "step": 270520 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015402871683991244, "loss": 2.0596, "step": 270525 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.0001540271615290757, "loss": 2.1674, "step": 270530 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015402560619978228, "loss": 2.0862, "step": 270535 }, { "epoch": 0.64, "grad_norm": 2.640625, "learning_rate": 0.00015402405085203274, "loss": 2.1365, "step": 270540 }, { "epoch": 0.64, "grad_norm": 2.453125, "learning_rate": 0.00015402249548582758, "loss": 1.9614, "step": 270545 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015402094010116737, "loss": 2.0439, "step": 270550 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015401938469805263, "loss": 2.0884, "step": 270555 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015401782927648388, "loss": 2.0914, "step": 270560 }, { "epoch": 0.64, "grad_norm": 1.796875, "learning_rate": 0.00015401627383646165, "loss": 2.0997, "step": 270565 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015401471837798653, "loss": 2.0365, "step": 270570 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015401316290105895, "loss": 1.9911, "step": 270575 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015401160740567953, "loss": 2.189, "step": 270580 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015401005189184875, "loss": 2.0993, "step": 270585 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015400849635956716, "loss": 2.3244, "step": 270590 }, { "epoch": 0.64, "grad_norm": 1.9375, "learning_rate": 0.00015400694080883529, "loss": 1.8857, "step": 270595 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015400538523965365, "loss": 1.9333, "step": 270600 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.0001540038296520228, "loss": 1.9729, "step": 270605 }, { "epoch": 0.64, "grad_norm": 1.859375, "learning_rate": 0.0001540022740459433, "loss": 2.1072, "step": 270610 }, { "epoch": 0.64, "grad_norm": 1.8359375, "learning_rate": 0.00015400071842141562, "loss": 2.0472, "step": 270615 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.00015399916277844032, "loss": 2.096, "step": 270620 }, { "epoch": 0.64, "grad_norm": 2.515625, "learning_rate": 0.00015399760711701788, "loss": 2.2921, "step": 270625 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.0001539960514371489, "loss": 2.0265, "step": 270630 }, { "epoch": 0.64, "grad_norm": 2.8125, "learning_rate": 0.0001539944957388339, "loss": 2.0611, "step": 270635 }, { "epoch": 0.64, "grad_norm": 2.625, "learning_rate": 0.00015399294002207343, "loss": 1.977, "step": 270640 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.00015399138428686798, "loss": 2.1124, "step": 270645 }, { "epoch": 0.64, "grad_norm": 2.640625, "learning_rate": 0.00015398982853321807, "loss": 2.0107, "step": 270650 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.00015398827276112424, "loss": 2.1588, "step": 270655 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.00015398671697058707, "loss": 2.0447, "step": 270660 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015398516116160707, "loss": 2.1223, "step": 270665 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.00015398360533418472, "loss": 1.8775, "step": 270670 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015398204948832063, "loss": 1.991, "step": 270675 }, { "epoch": 0.64, "grad_norm": 2.390625, "learning_rate": 0.00015398049362401527, "loss": 1.9498, "step": 270680 }, { "epoch": 0.64, "grad_norm": 1.8515625, "learning_rate": 0.0001539789377412692, "loss": 2.0597, "step": 270685 }, { "epoch": 0.64, "grad_norm": 2.53125, "learning_rate": 0.00015397738184008293, "loss": 2.0514, "step": 270690 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.00015397582592045704, "loss": 2.038, "step": 270695 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.00015397426998239202, "loss": 2.0581, "step": 270700 }, { "epoch": 0.64, "grad_norm": 2.453125, "learning_rate": 0.00015397271402588838, "loss": 1.8371, "step": 270705 }, { "epoch": 0.64, "grad_norm": 2.421875, "learning_rate": 0.0001539711580509467, "loss": 2.0674, "step": 270710 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.00015396960205756749, "loss": 2.1368, "step": 270715 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.0001539680460457513, "loss": 2.1633, "step": 270720 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015396649001549862, "loss": 2.013, "step": 270725 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015396493396681006, "loss": 2.0289, "step": 270730 }, { "epoch": 0.64, "grad_norm": 1.9609375, "learning_rate": 0.00015396337789968606, "loss": 2.147, "step": 270735 }, { "epoch": 0.64, "grad_norm": 2.484375, "learning_rate": 0.0001539618218141272, "loss": 1.9455, "step": 270740 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015396026571013399, "loss": 2.1291, "step": 270745 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.000153958709587707, "loss": 1.8746, "step": 270750 }, { "epoch": 0.64, "grad_norm": 2.71875, "learning_rate": 0.0001539571534468467, "loss": 2.0326, "step": 270755 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.0001539555972875537, "loss": 2.1958, "step": 270760 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.00015395404110982844, "loss": 2.0234, "step": 270765 }, { "epoch": 0.64, "grad_norm": 2.421875, "learning_rate": 0.00015395248491367153, "loss": 2.1035, "step": 270770 }, { "epoch": 0.64, "grad_norm": 1.8203125, "learning_rate": 0.00015395092869908348, "loss": 2.0023, "step": 270775 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.0001539493724660648, "loss": 1.995, "step": 270780 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015394781621461609, "loss": 2.0994, "step": 270785 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015394625994473774, "loss": 1.9968, "step": 270790 }, { "epoch": 0.64, "grad_norm": 1.953125, "learning_rate": 0.00015394470365643042, "loss": 2.0338, "step": 270795 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015394314734969462, "loss": 2.1405, "step": 270800 }, { "epoch": 0.64, "grad_norm": 1.6640625, "learning_rate": 0.00015394159102453085, "loss": 1.7539, "step": 270805 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.00015394003468093966, "loss": 2.0798, "step": 270810 }, { "epoch": 0.64, "grad_norm": 1.8828125, "learning_rate": 0.00015393847831892156, "loss": 1.9211, "step": 270815 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015393692193847706, "loss": 2.1276, "step": 270820 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015393536553960679, "loss": 1.9227, "step": 270825 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.0001539338091223112, "loss": 2.0088, "step": 270830 }, { "epoch": 0.64, "grad_norm": 2.671875, "learning_rate": 0.00015393225268659085, "loss": 2.1126, "step": 270835 }, { "epoch": 0.64, "grad_norm": 2.96875, "learning_rate": 0.00015393069623244627, "loss": 2.0085, "step": 270840 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015392913975987798, "loss": 2.1484, "step": 270845 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.0001539275832688865, "loss": 2.1627, "step": 270850 }, { "epoch": 0.64, "grad_norm": 1.8203125, "learning_rate": 0.00015392602675947238, "loss": 2.2501, "step": 270855 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015392447023163615, "loss": 2.0268, "step": 270860 }, { "epoch": 0.64, "grad_norm": 1.9296875, "learning_rate": 0.00015392291368537835, "loss": 2.0776, "step": 270865 }, { "epoch": 0.64, "grad_norm": 2.515625, "learning_rate": 0.00015392135712069953, "loss": 2.0379, "step": 270870 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.0001539198005376002, "loss": 2.2015, "step": 270875 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015391824393608085, "loss": 2.0978, "step": 270880 }, { "epoch": 0.64, "grad_norm": 2.546875, "learning_rate": 0.00015391668731614208, "loss": 2.032, "step": 270885 }, { "epoch": 0.64, "grad_norm": 1.875, "learning_rate": 0.00015391513067778437, "loss": 2.1432, "step": 270890 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015391357402100829, "loss": 1.9237, "step": 270895 }, { "epoch": 0.64, "grad_norm": 2.53125, "learning_rate": 0.00015391201734581432, "loss": 2.012, "step": 270900 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.00015391046065220306, "loss": 2.0108, "step": 270905 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.000153908903940175, "loss": 2.121, "step": 270910 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.00015390734720973072, "loss": 2.0148, "step": 270915 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.0001539057904608707, "loss": 1.9316, "step": 270920 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.0001539042336935954, "loss": 2.1334, "step": 270925 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.0001539026769079055, "loss": 2.0578, "step": 270930 }, { "epoch": 0.64, "grad_norm": 2.5625, "learning_rate": 0.00015390112010380146, "loss": 2.1202, "step": 270935 }, { "epoch": 0.64, "grad_norm": 2.4375, "learning_rate": 0.00015389956328128385, "loss": 2.0881, "step": 270940 }, { "epoch": 0.64, "grad_norm": 1.96875, "learning_rate": 0.00015389800644035316, "loss": 2.0602, "step": 270945 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.0001538964495810099, "loss": 2.0156, "step": 270950 }, { "epoch": 0.64, "grad_norm": 1.96875, "learning_rate": 0.00015389489270325462, "loss": 2.0724, "step": 270955 }, { "epoch": 0.64, "grad_norm": 1.9921875, "learning_rate": 0.00015389333580708795, "loss": 2.1047, "step": 270960 }, { "epoch": 0.64, "grad_norm": 2.46875, "learning_rate": 0.00015389177889251027, "loss": 2.0892, "step": 270965 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015389022195952222, "loss": 2.1687, "step": 270970 }, { "epoch": 0.64, "grad_norm": 1.859375, "learning_rate": 0.00015388866500812425, "loss": 2.2907, "step": 270975 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.000153887108038317, "loss": 2.0036, "step": 270980 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.00015388555105010088, "loss": 2.1166, "step": 270985 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.0001538839940434765, "loss": 2.0436, "step": 270990 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.00015388243701844437, "loss": 2.1372, "step": 270995 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.000153880879975005, "loss": 2.0319, "step": 271000 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015387932291315893, "loss": 2.1576, "step": 271005 }, { "epoch": 0.64, "grad_norm": 2.546875, "learning_rate": 0.00015387776583290677, "loss": 2.0651, "step": 271010 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.00015387620873424896, "loss": 2.0478, "step": 271015 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.00015387465161718607, "loss": 2.1167, "step": 271020 }, { "epoch": 0.64, "grad_norm": 1.9609375, "learning_rate": 0.00015387309448171857, "loss": 2.1268, "step": 271025 }, { "epoch": 0.64, "grad_norm": 2.625, "learning_rate": 0.0001538715373278471, "loss": 2.0071, "step": 271030 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015386998015557213, "loss": 2.0778, "step": 271035 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015386842296489416, "loss": 2.2082, "step": 271040 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015386686575581378, "loss": 2.2039, "step": 271045 }, { "epoch": 0.64, "grad_norm": 2.453125, "learning_rate": 0.0001538653085283315, "loss": 2.0868, "step": 271050 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.0001538637512824479, "loss": 2.2781, "step": 271055 }, { "epoch": 0.64, "grad_norm": 1.890625, "learning_rate": 0.0001538621940181634, "loss": 2.0046, "step": 271060 }, { "epoch": 0.64, "grad_norm": 1.9453125, "learning_rate": 0.00015386063673547863, "loss": 2.0727, "step": 271065 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.00015385907943439407, "loss": 2.1836, "step": 271070 }, { "epoch": 0.64, "grad_norm": 2.390625, "learning_rate": 0.0001538575221149103, "loss": 2.0851, "step": 271075 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015385596477702782, "loss": 2.2361, "step": 271080 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015385440742074713, "loss": 2.0759, "step": 271085 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.00015385285004606883, "loss": 2.0823, "step": 271090 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015385129265299343, "loss": 2.1352, "step": 271095 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015384973524152143, "loss": 2.1434, "step": 271100 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.0001538481778116534, "loss": 1.9453, "step": 271105 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015384662036338983, "loss": 1.9951, "step": 271110 }, { "epoch": 0.64, "grad_norm": 2.515625, "learning_rate": 0.00015384506289673125, "loss": 2.1826, "step": 271115 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.0001538435054116783, "loss": 1.9744, "step": 271120 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.00015384194790823142, "loss": 2.0694, "step": 271125 }, { "epoch": 0.64, "grad_norm": 2.3125, "learning_rate": 0.0001538403903863911, "loss": 2.2213, "step": 271130 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.00015383883284615792, "loss": 1.9828, "step": 271135 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.0001538372752875325, "loss": 2.0069, "step": 271140 }, { "epoch": 0.64, "grad_norm": 2.609375, "learning_rate": 0.00015383571771051524, "loss": 2.2152, "step": 271145 }, { "epoch": 0.64, "grad_norm": 2.46875, "learning_rate": 0.0001538341601151067, "loss": 2.0389, "step": 271150 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015383260250130749, "loss": 2.1477, "step": 271155 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.00015383104486911802, "loss": 2.1456, "step": 271160 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015382948721853894, "loss": 1.9876, "step": 271165 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015382792954957071, "loss": 2.0184, "step": 271170 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.0001538263718622139, "loss": 2.0788, "step": 271175 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015382481415646902, "loss": 2.0965, "step": 271180 }, { "epoch": 0.64, "grad_norm": 2.59375, "learning_rate": 0.00015382325643233658, "loss": 2.1086, "step": 271185 }, { "epoch": 0.64, "grad_norm": 1.875, "learning_rate": 0.00015382169868981716, "loss": 2.0036, "step": 271190 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.00015382014092891127, "loss": 1.9092, "step": 271195 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015381858314961945, "loss": 2.097, "step": 271200 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015381702535194225, "loss": 2.1431, "step": 271205 }, { "epoch": 0.64, "grad_norm": 1.8125, "learning_rate": 0.00015381546753588015, "loss": 2.0329, "step": 271210 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.00015381390970143372, "loss": 2.2347, "step": 271215 }, { "epoch": 0.64, "grad_norm": 2.703125, "learning_rate": 0.00015381235184860342, "loss": 2.191, "step": 271220 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015381079397738993, "loss": 2.145, "step": 271225 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015380923608779366, "loss": 1.9249, "step": 271230 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015380767817981518, "loss": 2.1741, "step": 271235 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015380612025345505, "loss": 2.1847, "step": 271240 }, { "epoch": 0.64, "grad_norm": 1.7890625, "learning_rate": 0.0001538045623087137, "loss": 2.0667, "step": 271245 }, { "epoch": 0.64, "grad_norm": 2.484375, "learning_rate": 0.0001538030043455918, "loss": 1.9737, "step": 271250 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015380144636408983, "loss": 2.0031, "step": 271255 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015379988836420827, "loss": 2.0637, "step": 271260 }, { "epoch": 0.64, "grad_norm": 1.8125, "learning_rate": 0.0001537983303459477, "loss": 2.1624, "step": 271265 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015379677230930864, "loss": 1.9853, "step": 271270 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015379521425429164, "loss": 2.0194, "step": 271275 }, { "epoch": 0.64, "grad_norm": 1.921875, "learning_rate": 0.00015379365618089724, "loss": 2.0873, "step": 271280 }, { "epoch": 0.64, "grad_norm": 1.8984375, "learning_rate": 0.0001537920980891259, "loss": 1.9406, "step": 271285 }, { "epoch": 0.64, "grad_norm": 2.6875, "learning_rate": 0.00015379053997897826, "loss": 2.1838, "step": 271290 }, { "epoch": 0.64, "grad_norm": 1.96875, "learning_rate": 0.00015378898185045475, "loss": 2.23, "step": 271295 }, { "epoch": 0.64, "grad_norm": 1.6796875, "learning_rate": 0.00015378742370355597, "loss": 2.0272, "step": 271300 }, { "epoch": 0.64, "grad_norm": 2.609375, "learning_rate": 0.00015378586553828244, "loss": 2.0214, "step": 271305 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.00015378430735463465, "loss": 1.9843, "step": 271310 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.0001537827491526132, "loss": 2.0759, "step": 271315 }, { "epoch": 0.64, "grad_norm": 1.9375, "learning_rate": 0.00015378119093221857, "loss": 2.0327, "step": 271320 }, { "epoch": 0.64, "grad_norm": 2.578125, "learning_rate": 0.0001537796326934513, "loss": 2.0819, "step": 271325 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015377807443631194, "loss": 2.122, "step": 271330 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.000153776516160801, "loss": 2.1867, "step": 271335 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015377495786691908, "loss": 2.167, "step": 271340 }, { "epoch": 0.64, "grad_norm": 1.875, "learning_rate": 0.0001537733995546666, "loss": 1.9258, "step": 271345 }, { "epoch": 0.64, "grad_norm": 1.8828125, "learning_rate": 0.00015377184122404417, "loss": 2.1293, "step": 271350 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.0001537702828750523, "loss": 1.9768, "step": 271355 }, { "epoch": 0.64, "grad_norm": 2.703125, "learning_rate": 0.00015376872450769153, "loss": 2.0183, "step": 271360 }, { "epoch": 0.64, "grad_norm": 2.5, "learning_rate": 0.0001537671661219624, "loss": 1.9972, "step": 271365 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.00015376560771786547, "loss": 2.2574, "step": 271370 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015376404929540115, "loss": 2.0386, "step": 271375 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015376249085457012, "loss": 2.1339, "step": 271380 }, { "epoch": 0.64, "grad_norm": 2.875, "learning_rate": 0.0001537609323953728, "loss": 2.0234, "step": 271385 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.0001537593739178098, "loss": 2.1668, "step": 271390 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015375781542188163, "loss": 2.166, "step": 271395 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.0001537562569075888, "loss": 2.0048, "step": 271400 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.00015375469837493184, "loss": 1.9352, "step": 271405 }, { "epoch": 0.64, "grad_norm": 1.625, "learning_rate": 0.0001537531398239113, "loss": 2.0344, "step": 271410 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015375158125452776, "loss": 2.0019, "step": 271415 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015375002266678167, "loss": 2.06, "step": 271420 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015374846406067362, "loss": 1.8287, "step": 271425 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015374690543620408, "loss": 2.0749, "step": 271430 }, { "epoch": 0.64, "grad_norm": 1.953125, "learning_rate": 0.00015374534679337364, "loss": 2.1871, "step": 271435 }, { "epoch": 0.64, "grad_norm": 1.78125, "learning_rate": 0.00015374378813218283, "loss": 1.935, "step": 271440 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015374222945263214, "loss": 2.1034, "step": 271445 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.00015374067075472217, "loss": 2.0998, "step": 271450 }, { "epoch": 0.64, "grad_norm": 2.5, "learning_rate": 0.0001537391120384534, "loss": 2.1003, "step": 271455 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015373755330382632, "loss": 1.8446, "step": 271460 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.0001537359945508416, "loss": 2.0424, "step": 271465 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015373443577949963, "loss": 2.0378, "step": 271470 }, { "epoch": 0.64, "grad_norm": 3.0625, "learning_rate": 0.00015373287698980102, "loss": 1.9598, "step": 271475 }, { "epoch": 0.64, "grad_norm": 2.671875, "learning_rate": 0.0001537313181817463, "loss": 2.028, "step": 271480 }, { "epoch": 0.64, "grad_norm": 1.9453125, "learning_rate": 0.00015372975935533595, "loss": 2.0477, "step": 271485 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.0001537282005105706, "loss": 1.9392, "step": 271490 }, { "epoch": 0.64, "grad_norm": 2.3125, "learning_rate": 0.00015372664164745067, "loss": 1.9699, "step": 271495 }, { "epoch": 0.64, "grad_norm": 2.5625, "learning_rate": 0.00015372508276597678, "loss": 2.1736, "step": 271500 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.0001537235238661494, "loss": 1.9612, "step": 271505 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015372196494796908, "loss": 2.1755, "step": 271510 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015372040601143637, "loss": 2.1288, "step": 271515 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.0001537188470565518, "loss": 1.9847, "step": 271520 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015371728808331593, "loss": 2.1008, "step": 271525 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015371572909172928, "loss": 2.1472, "step": 271530 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.0001537141700817923, "loss": 2.0608, "step": 271535 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015371261105350556, "loss": 2.0856, "step": 271540 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015371105200686968, "loss": 2.0507, "step": 271545 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.00015370949294188512, "loss": 1.9956, "step": 271550 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015370793385855242, "loss": 2.2151, "step": 271555 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.0001537063747568721, "loss": 2.1425, "step": 271560 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.00015370481563684472, "loss": 1.8928, "step": 271565 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.00015370325649847078, "loss": 1.9993, "step": 271570 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015370169734175087, "loss": 2.2505, "step": 271575 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.00015370013816668547, "loss": 2.0897, "step": 271580 }, { "epoch": 0.64, "grad_norm": 2.59375, "learning_rate": 0.00015369857897327513, "loss": 2.1036, "step": 271585 }, { "epoch": 0.64, "grad_norm": 2.5, "learning_rate": 0.0001536970197615204, "loss": 2.2242, "step": 271590 }, { "epoch": 0.64, "grad_norm": 1.875, "learning_rate": 0.00015369546053142175, "loss": 1.9556, "step": 271595 }, { "epoch": 0.64, "grad_norm": 2.453125, "learning_rate": 0.0001536939012829798, "loss": 2.0134, "step": 271600 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.000153692342016195, "loss": 1.8041, "step": 271605 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015369078273106795, "loss": 2.0642, "step": 271610 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015368922342759916, "loss": 1.989, "step": 271615 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.0001536876641057891, "loss": 2.1974, "step": 271620 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015368610476563843, "loss": 1.9757, "step": 271625 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.0001536845454071476, "loss": 2.2546, "step": 271630 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.00015368298603031714, "loss": 2.1979, "step": 271635 }, { "epoch": 0.64, "grad_norm": 2.421875, "learning_rate": 0.0001536814266351476, "loss": 2.0615, "step": 271640 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015367986722163952, "loss": 1.968, "step": 271645 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.0001536783077897934, "loss": 2.0773, "step": 271650 }, { "epoch": 0.64, "grad_norm": 3.90625, "learning_rate": 0.0001536767483396098, "loss": 1.9834, "step": 271655 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015367518887108927, "loss": 2.0313, "step": 271660 }, { "epoch": 0.64, "grad_norm": 1.7890625, "learning_rate": 0.00015367362938423234, "loss": 2.0925, "step": 271665 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.0001536720698790395, "loss": 2.0522, "step": 271670 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.0001536705103555113, "loss": 1.998, "step": 271675 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015366895081364828, "loss": 1.9459, "step": 271680 }, { "epoch": 0.64, "grad_norm": 1.921875, "learning_rate": 0.000153667391253451, "loss": 2.0547, "step": 271685 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015366583167491995, "loss": 2.1673, "step": 271690 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015366427207805567, "loss": 2.1043, "step": 271695 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.0001536627124628587, "loss": 1.9913, "step": 271700 }, { "epoch": 0.64, "grad_norm": 2.484375, "learning_rate": 0.00015366115282932955, "loss": 2.0317, "step": 271705 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015365959317746884, "loss": 2.1126, "step": 271710 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015365803350727702, "loss": 1.9175, "step": 271715 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.0001536564738187546, "loss": 1.9411, "step": 271720 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.00015365491411190217, "loss": 2.1411, "step": 271725 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.00015365335438672031, "loss": 1.9896, "step": 271730 }, { "epoch": 0.64, "grad_norm": 1.9296875, "learning_rate": 0.00015365179464320945, "loss": 1.8543, "step": 271735 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015365023488137015, "loss": 1.9157, "step": 271740 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015364867510120296, "loss": 2.1583, "step": 271745 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.0001536471153027084, "loss": 2.1327, "step": 271750 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.00015364555548588705, "loss": 2.1239, "step": 271755 }, { "epoch": 0.64, "grad_norm": 2.453125, "learning_rate": 0.0001536439956507394, "loss": 2.1411, "step": 271760 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015364243579726593, "loss": 2.0262, "step": 271765 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015364087592546728, "loss": 2.1293, "step": 271770 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015363931603534393, "loss": 2.267, "step": 271775 }, { "epoch": 0.64, "grad_norm": 1.90625, "learning_rate": 0.00015363775612689643, "loss": 1.9616, "step": 271780 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015363619620012528, "loss": 2.1137, "step": 271785 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015363463625503102, "loss": 2.2921, "step": 271790 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015363307629161422, "loss": 1.9236, "step": 271795 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015363151630987538, "loss": 2.0413, "step": 271800 }, { "epoch": 0.64, "grad_norm": 2.453125, "learning_rate": 0.00015362995630981504, "loss": 1.8565, "step": 271805 }, { "epoch": 0.64, "grad_norm": 2.46875, "learning_rate": 0.00015362839629143372, "loss": 1.9988, "step": 271810 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.000153626836254732, "loss": 1.9986, "step": 271815 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015362527619971035, "loss": 1.8511, "step": 271820 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015362371612636936, "loss": 2.2359, "step": 271825 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.00015362215603470953, "loss": 2.2273, "step": 271830 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.00015362059592473136, "loss": 1.9267, "step": 271835 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.00015361903579643547, "loss": 1.8895, "step": 271840 }, { "epoch": 0.64, "grad_norm": 2.515625, "learning_rate": 0.00015361747564982233, "loss": 1.9864, "step": 271845 }, { "epoch": 0.64, "grad_norm": 1.8671875, "learning_rate": 0.0001536159154848925, "loss": 2.0914, "step": 271850 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015361435530164647, "loss": 1.862, "step": 271855 }, { "epoch": 0.64, "grad_norm": 2.484375, "learning_rate": 0.0001536127951000848, "loss": 2.2046, "step": 271860 }, { "epoch": 0.64, "grad_norm": 1.8046875, "learning_rate": 0.0001536112348802081, "loss": 2.027, "step": 271865 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.00015360967464201676, "loss": 1.8713, "step": 271870 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.00015360811438551138, "loss": 2.0504, "step": 271875 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015360655411069253, "loss": 2.0621, "step": 271880 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.0001536049938175607, "loss": 2.0486, "step": 271885 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.0001536034335061164, "loss": 2.2444, "step": 271890 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015360187317636025, "loss": 2.1318, "step": 271895 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.0001536003128282927, "loss": 1.9641, "step": 271900 }, { "epoch": 0.64, "grad_norm": 2.46875, "learning_rate": 0.0001535987524619143, "loss": 2.1494, "step": 271905 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.0001535971920772256, "loss": 2.1345, "step": 271910 }, { "epoch": 0.64, "grad_norm": 1.8984375, "learning_rate": 0.00015359563167422714, "loss": 2.1501, "step": 271915 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.00015359407125291942, "loss": 1.8498, "step": 271920 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.000153592510813303, "loss": 2.0959, "step": 271925 }, { "epoch": 0.64, "grad_norm": 1.9453125, "learning_rate": 0.00015359095035537843, "loss": 2.0282, "step": 271930 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.0001535893898791462, "loss": 2.0747, "step": 271935 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015358782938460684, "loss": 2.1862, "step": 271940 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.00015358626887176095, "loss": 2.1818, "step": 271945 }, { "epoch": 0.64, "grad_norm": 1.6015625, "learning_rate": 0.000153584708340609, "loss": 2.0081, "step": 271950 }, { "epoch": 0.64, "grad_norm": 2.671875, "learning_rate": 0.00015358314779115152, "loss": 2.0881, "step": 271955 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.0001535815872233891, "loss": 2.032, "step": 271960 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015358002663732223, "loss": 2.2539, "step": 271965 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.00015357846603295143, "loss": 2.0549, "step": 271970 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015357690541027726, "loss": 2.0365, "step": 271975 }, { "epoch": 0.64, "grad_norm": 1.78125, "learning_rate": 0.0001535753447693003, "loss": 2.0147, "step": 271980 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015357378411002097, "loss": 1.9719, "step": 271985 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015357222343243988, "loss": 2.0942, "step": 271990 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015357066273655754, "loss": 2.1721, "step": 271995 }, { "epoch": 0.64, "grad_norm": 3.234375, "learning_rate": 0.00015356910202237448, "loss": 1.9698, "step": 272000 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.0001535675412898913, "loss": 2.1252, "step": 272005 }, { "epoch": 0.64, "grad_norm": 2.4375, "learning_rate": 0.00015356598053910842, "loss": 2.0726, "step": 272010 }, { "epoch": 0.64, "grad_norm": 2.796875, "learning_rate": 0.00015356441977002646, "loss": 2.053, "step": 272015 }, { "epoch": 0.64, "grad_norm": 1.953125, "learning_rate": 0.00015356285898264588, "loss": 2.0515, "step": 272020 }, { "epoch": 0.64, "grad_norm": 1.9453125, "learning_rate": 0.0001535612981769673, "loss": 1.9292, "step": 272025 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015355973735299122, "loss": 2.1353, "step": 272030 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.0001535581765107181, "loss": 2.0879, "step": 272035 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.0001535566156501486, "loss": 2.1982, "step": 272040 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015355505477128315, "loss": 2.1265, "step": 272045 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015355349387412233, "loss": 1.9751, "step": 272050 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.00015355193295866666, "loss": 2.0317, "step": 272055 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.0001535503720249167, "loss": 2.3638, "step": 272060 }, { "epoch": 0.64, "grad_norm": 1.953125, "learning_rate": 0.00015354881107287294, "loss": 2.0311, "step": 272065 }, { "epoch": 0.64, "grad_norm": 1.9921875, "learning_rate": 0.00015354725010253596, "loss": 1.9983, "step": 272070 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015354568911390623, "loss": 2.0935, "step": 272075 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015354412810698436, "loss": 2.1418, "step": 272080 }, { "epoch": 0.64, "grad_norm": 1.8828125, "learning_rate": 0.0001535425670817708, "loss": 1.8783, "step": 272085 }, { "epoch": 0.64, "grad_norm": 1.921875, "learning_rate": 0.00015354100603826617, "loss": 1.9174, "step": 272090 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.00015353944497647096, "loss": 2.0569, "step": 272095 }, { "epoch": 0.64, "grad_norm": 1.7890625, "learning_rate": 0.0001535378838963857, "loss": 2.2237, "step": 272100 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015353632279801088, "loss": 2.0224, "step": 272105 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015353476168134713, "loss": 2.1088, "step": 272110 }, { "epoch": 0.64, "grad_norm": 2.671875, "learning_rate": 0.00015353320054639492, "loss": 1.9745, "step": 272115 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015353163939315481, "loss": 2.0795, "step": 272120 }, { "epoch": 0.64, "grad_norm": 1.8125, "learning_rate": 0.00015353007822162734, "loss": 2.157, "step": 272125 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015352851703181298, "loss": 1.9446, "step": 272130 }, { "epoch": 0.64, "grad_norm": 2.3125, "learning_rate": 0.00015352695582371233, "loss": 2.0713, "step": 272135 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.0001535253945973259, "loss": 2.0237, "step": 272140 }, { "epoch": 0.64, "grad_norm": 2.53125, "learning_rate": 0.00015352383335265422, "loss": 2.1942, "step": 272145 }, { "epoch": 0.64, "grad_norm": 2.546875, "learning_rate": 0.00015352227208969782, "loss": 2.1421, "step": 272150 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015352071080845724, "loss": 2.0606, "step": 272155 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015351914950893306, "loss": 2.2371, "step": 272160 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015351758819112573, "loss": 1.8852, "step": 272165 }, { "epoch": 0.64, "grad_norm": 1.9453125, "learning_rate": 0.00015351602685503583, "loss": 1.9728, "step": 272170 }, { "epoch": 0.64, "grad_norm": 3.546875, "learning_rate": 0.00015351446550066386, "loss": 1.9677, "step": 272175 }, { "epoch": 0.64, "grad_norm": 1.8984375, "learning_rate": 0.00015351290412801038, "loss": 2.1432, "step": 272180 }, { "epoch": 0.64, "grad_norm": 1.96875, "learning_rate": 0.00015351134273707597, "loss": 1.9937, "step": 272185 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015350978132786108, "loss": 2.0818, "step": 272190 }, { "epoch": 0.64, "grad_norm": 1.9140625, "learning_rate": 0.0001535082199003663, "loss": 2.016, "step": 272195 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015350665845459212, "loss": 2.1909, "step": 272200 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015350509699053907, "loss": 2.0388, "step": 272205 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.00015350353550820775, "loss": 2.3006, "step": 272210 }, { "epoch": 0.64, "grad_norm": 2.5, "learning_rate": 0.00015350197400759865, "loss": 1.8512, "step": 272215 }, { "epoch": 0.64, "grad_norm": 1.8984375, "learning_rate": 0.00015350041248871229, "loss": 2.1707, "step": 272220 }, { "epoch": 0.64, "grad_norm": 1.796875, "learning_rate": 0.0001534988509515492, "loss": 2.0044, "step": 272225 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.00015349728939610997, "loss": 1.8538, "step": 272230 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.0001534957278223951, "loss": 1.9993, "step": 272235 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015349416623040507, "loss": 2.1678, "step": 272240 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.0001534926046201405, "loss": 2.0453, "step": 272245 }, { "epoch": 0.64, "grad_norm": 1.9375, "learning_rate": 0.00015349104299160188, "loss": 2.1334, "step": 272250 }, { "epoch": 0.64, "grad_norm": 1.828125, "learning_rate": 0.00015348948134478974, "loss": 1.7482, "step": 272255 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.00015348791967970463, "loss": 2.1939, "step": 272260 }, { "epoch": 0.64, "grad_norm": 1.9296875, "learning_rate": 0.00015348635799634707, "loss": 2.1145, "step": 272265 }, { "epoch": 0.64, "grad_norm": 1.8203125, "learning_rate": 0.00015348479629471765, "loss": 2.0622, "step": 272270 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.0001534832345748168, "loss": 2.0904, "step": 272275 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015348167283664514, "loss": 2.0705, "step": 272280 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.00015348011108020314, "loss": 2.2281, "step": 272285 }, { "epoch": 0.64, "grad_norm": 1.875, "learning_rate": 0.00015347854930549133, "loss": 2.1454, "step": 272290 }, { "epoch": 0.64, "grad_norm": 2.390625, "learning_rate": 0.00015347698751251034, "loss": 2.1752, "step": 272295 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.00015347542570126062, "loss": 1.9426, "step": 272300 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015347386387174275, "loss": 2.2019, "step": 272305 }, { "epoch": 0.64, "grad_norm": 1.828125, "learning_rate": 0.0001534723020239572, "loss": 1.9757, "step": 272310 }, { "epoch": 0.64, "grad_norm": 1.890625, "learning_rate": 0.00015347074015790456, "loss": 2.2331, "step": 272315 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015346917827358536, "loss": 2.0528, "step": 272320 }, { "epoch": 0.64, "grad_norm": 2.453125, "learning_rate": 0.0001534676163710001, "loss": 1.916, "step": 272325 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.00015346605445014936, "loss": 2.1542, "step": 272330 }, { "epoch": 0.64, "grad_norm": 2.5, "learning_rate": 0.0001534644925110336, "loss": 2.1193, "step": 272335 }, { "epoch": 0.64, "grad_norm": 1.9921875, "learning_rate": 0.0001534629305536534, "loss": 1.904, "step": 272340 }, { "epoch": 0.64, "grad_norm": 2.546875, "learning_rate": 0.00015346136857800934, "loss": 1.959, "step": 272345 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015345980658410189, "loss": 2.0468, "step": 272350 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015345824457193156, "loss": 2.075, "step": 272355 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015345668254149896, "loss": 1.9146, "step": 272360 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.0001534551204928046, "loss": 1.9544, "step": 272365 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015345355842584897, "loss": 2.017, "step": 272370 }, { "epoch": 0.64, "grad_norm": 1.671875, "learning_rate": 0.00015345199634063264, "loss": 2.137, "step": 272375 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015345043423715615, "loss": 2.1435, "step": 272380 }, { "epoch": 0.64, "grad_norm": 2.421875, "learning_rate": 0.00015344887211542004, "loss": 2.149, "step": 272385 }, { "epoch": 0.64, "grad_norm": 1.921875, "learning_rate": 0.00015344730997542483, "loss": 2.0939, "step": 272390 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.000153445747817171, "loss": 2.1542, "step": 272395 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015344418564065915, "loss": 2.2022, "step": 272400 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.0001534426234458898, "loss": 2.1173, "step": 272405 }, { "epoch": 0.64, "grad_norm": 2.5625, "learning_rate": 0.00015344106123286352, "loss": 2.0662, "step": 272410 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015343949900158076, "loss": 2.1339, "step": 272415 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015343793675204212, "loss": 2.135, "step": 272420 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015343637448424807, "loss": 2.0223, "step": 272425 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015343481219819922, "loss": 1.9025, "step": 272430 }, { "epoch": 0.64, "grad_norm": 2.53125, "learning_rate": 0.0001534332498938961, "loss": 2.0986, "step": 272435 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.00015343168757133915, "loss": 1.9905, "step": 272440 }, { "epoch": 0.64, "grad_norm": 2.53125, "learning_rate": 0.00015343012523052902, "loss": 2.156, "step": 272445 }, { "epoch": 0.64, "grad_norm": 1.9296875, "learning_rate": 0.00015342856287146615, "loss": 1.9648, "step": 272450 }, { "epoch": 0.64, "grad_norm": 1.828125, "learning_rate": 0.00015342700049415114, "loss": 2.1476, "step": 272455 }, { "epoch": 0.64, "grad_norm": 3.15625, "learning_rate": 0.00015342543809858448, "loss": 2.0217, "step": 272460 }, { "epoch": 0.64, "grad_norm": 1.921875, "learning_rate": 0.00015342387568476672, "loss": 1.9998, "step": 272465 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.0001534223132526984, "loss": 2.3569, "step": 272470 }, { "epoch": 0.64, "grad_norm": 1.578125, "learning_rate": 0.00015342075080238004, "loss": 1.9658, "step": 272475 }, { "epoch": 0.64, "grad_norm": 2.453125, "learning_rate": 0.0001534191883338122, "loss": 2.1808, "step": 272480 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.0001534176258469954, "loss": 2.1042, "step": 272485 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.00015341606334193017, "loss": 2.2853, "step": 272490 }, { "epoch": 0.64, "grad_norm": 1.7421875, "learning_rate": 0.00015341450081861704, "loss": 1.9559, "step": 272495 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015341293827705654, "loss": 2.0791, "step": 272500 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.0001534113757172492, "loss": 1.9284, "step": 272505 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.0001534098131391956, "loss": 2.2151, "step": 272510 }, { "epoch": 0.64, "grad_norm": 1.8671875, "learning_rate": 0.0001534082505428962, "loss": 2.1994, "step": 272515 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.0001534066879283516, "loss": 2.0784, "step": 272520 }, { "epoch": 0.64, "grad_norm": 2.5, "learning_rate": 0.0001534051252955623, "loss": 2.1275, "step": 272525 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015340356264452885, "loss": 1.9924, "step": 272530 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.00015340199997525175, "loss": 2.0603, "step": 272535 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015340043728773157, "loss": 2.1161, "step": 272540 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015339887458196884, "loss": 2.2499, "step": 272545 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.0001533973118579641, "loss": 2.1139, "step": 272550 }, { "epoch": 0.64, "grad_norm": 1.9375, "learning_rate": 0.00015339574911571783, "loss": 2.027, "step": 272555 }, { "epoch": 0.64, "grad_norm": 1.9296875, "learning_rate": 0.00015339418635523062, "loss": 2.1014, "step": 272560 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.00015339262357650299, "loss": 2.1446, "step": 272565 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015339106077953552, "loss": 2.0395, "step": 272570 }, { "epoch": 0.64, "grad_norm": 2.484375, "learning_rate": 0.00015338949796432863, "loss": 1.9609, "step": 272575 }, { "epoch": 0.64, "grad_norm": 2.609375, "learning_rate": 0.00015338793513088294, "loss": 2.1069, "step": 272580 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.0001533863722791989, "loss": 1.8513, "step": 272585 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.0001533848094092772, "loss": 2.167, "step": 272590 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.0001533832465211183, "loss": 2.0363, "step": 272595 }, { "epoch": 0.64, "grad_norm": 1.9375, "learning_rate": 0.00015338168361472265, "loss": 2.0234, "step": 272600 }, { "epoch": 0.64, "grad_norm": 2.3125, "learning_rate": 0.00015338012069009084, "loss": 2.2624, "step": 272605 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015337855774722343, "loss": 2.133, "step": 272610 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015337699478612095, "loss": 2.0091, "step": 272615 }, { "epoch": 0.64, "grad_norm": 2.53125, "learning_rate": 0.00015337543180678393, "loss": 2.0664, "step": 272620 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015337386880921288, "loss": 1.9083, "step": 272625 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015337230579340832, "loss": 2.143, "step": 272630 }, { "epoch": 0.64, "grad_norm": 1.515625, "learning_rate": 0.00015337074275937086, "loss": 2.0859, "step": 272635 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015336917970710094, "loss": 2.0453, "step": 272640 }, { "epoch": 0.64, "grad_norm": 1.890625, "learning_rate": 0.0001533676166365992, "loss": 2.1147, "step": 272645 }, { "epoch": 0.64, "grad_norm": 1.96875, "learning_rate": 0.00015336605354786603, "loss": 2.1414, "step": 272650 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015336449044090213, "loss": 2.1339, "step": 272655 }, { "epoch": 0.64, "grad_norm": 2.515625, "learning_rate": 0.0001533629273157079, "loss": 2.0929, "step": 272660 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015336136417228396, "loss": 2.1079, "step": 272665 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.00015335980101063078, "loss": 2.121, "step": 272670 }, { "epoch": 0.64, "grad_norm": 1.984375, "learning_rate": 0.00015335823783074894, "loss": 2.2212, "step": 272675 }, { "epoch": 0.64, "grad_norm": 1.8125, "learning_rate": 0.00015335667463263895, "loss": 2.0827, "step": 272680 }, { "epoch": 0.64, "grad_norm": 1.9140625, "learning_rate": 0.00015335511141630138, "loss": 1.9945, "step": 272685 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.00015335354818173672, "loss": 2.0439, "step": 272690 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015335198492894548, "loss": 1.9545, "step": 272695 }, { "epoch": 0.64, "grad_norm": 2.5625, "learning_rate": 0.0001533504216579283, "loss": 2.2506, "step": 272700 }, { "epoch": 0.64, "grad_norm": 1.9140625, "learning_rate": 0.00015334885836868563, "loss": 2.1215, "step": 272705 }, { "epoch": 0.64, "grad_norm": 2.4375, "learning_rate": 0.00015334729506121802, "loss": 2.1981, "step": 272710 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.000153345731735526, "loss": 2.0584, "step": 272715 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.0001533441683916101, "loss": 1.9352, "step": 272720 }, { "epoch": 0.64, "grad_norm": 2.453125, "learning_rate": 0.00015334260502947086, "loss": 1.9093, "step": 272725 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015334104164910884, "loss": 2.1715, "step": 272730 }, { "epoch": 0.64, "grad_norm": 2.546875, "learning_rate": 0.00015333947825052455, "loss": 2.1606, "step": 272735 }, { "epoch": 0.64, "grad_norm": 2.4375, "learning_rate": 0.00015333791483371854, "loss": 2.1092, "step": 272740 }, { "epoch": 0.64, "grad_norm": 2.53125, "learning_rate": 0.00015333635139869127, "loss": 2.0575, "step": 272745 }, { "epoch": 0.64, "grad_norm": 2.46875, "learning_rate": 0.0001533347879454434, "loss": 2.152, "step": 272750 }, { "epoch": 0.64, "grad_norm": 2.53125, "learning_rate": 0.0001533332244739754, "loss": 1.9305, "step": 272755 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015333166098428777, "loss": 2.0719, "step": 272760 }, { "epoch": 0.64, "grad_norm": 2.3125, "learning_rate": 0.00015333009747638108, "loss": 2.3572, "step": 272765 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015332853395025583, "loss": 2.0854, "step": 272770 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.00015332697040591265, "loss": 2.01, "step": 272775 }, { "epoch": 0.64, "grad_norm": 2.515625, "learning_rate": 0.000153325406843352, "loss": 2.0267, "step": 272780 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.0001533238432625744, "loss": 1.9693, "step": 272785 }, { "epoch": 0.64, "grad_norm": 2.5, "learning_rate": 0.00015332227966358044, "loss": 2.0079, "step": 272790 }, { "epoch": 0.64, "grad_norm": 1.75, "learning_rate": 0.00015332071604637058, "loss": 2.0706, "step": 272795 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.00015331915241094542, "loss": 2.0675, "step": 272800 }, { "epoch": 0.64, "grad_norm": 2.59375, "learning_rate": 0.00015331758875730545, "loss": 2.2974, "step": 272805 }, { "epoch": 0.64, "grad_norm": 1.734375, "learning_rate": 0.00015331602508545123, "loss": 2.1753, "step": 272810 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.00015331446139538332, "loss": 2.0592, "step": 272815 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.0001533128976871022, "loss": 2.0955, "step": 272820 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015331133396060843, "loss": 1.9793, "step": 272825 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015330977021590255, "loss": 2.0668, "step": 272830 }, { "epoch": 0.64, "grad_norm": 1.8828125, "learning_rate": 0.00015330820645298508, "loss": 2.0357, "step": 272835 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015330664267185654, "loss": 2.2015, "step": 272840 }, { "epoch": 0.64, "grad_norm": 1.828125, "learning_rate": 0.00015330507887251752, "loss": 2.0074, "step": 272845 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.0001533035150549685, "loss": 2.0564, "step": 272850 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015330195121921004, "loss": 2.1819, "step": 272855 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.00015330038736524262, "loss": 2.0254, "step": 272860 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.0001532988234930669, "loss": 2.0462, "step": 272865 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.00015329725960268328, "loss": 1.9215, "step": 272870 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.00015329569569409238, "loss": 1.9649, "step": 272875 }, { "epoch": 0.64, "grad_norm": 2.46875, "learning_rate": 0.00015329413176729467, "loss": 1.9705, "step": 272880 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.00015329256782229073, "loss": 2.1825, "step": 272885 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.00015329100385908114, "loss": 2.0782, "step": 272890 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.0001532894398776663, "loss": 2.1568, "step": 272895 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.00015328787587804685, "loss": 1.9258, "step": 272900 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.00015328631186022326, "loss": 1.9747, "step": 272905 }, { "epoch": 0.64, "grad_norm": 1.9921875, "learning_rate": 0.00015328474782419615, "loss": 1.9071, "step": 272910 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015328318376996598, "loss": 2.2019, "step": 272915 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015328161969753332, "loss": 1.9405, "step": 272920 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.0001532800556068987, "loss": 2.0032, "step": 272925 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015327849149806262, "loss": 1.9593, "step": 272930 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015327692737102564, "loss": 2.0927, "step": 272935 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.0001532753632257883, "loss": 1.9162, "step": 272940 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.00015327379906235117, "loss": 1.9818, "step": 272945 }, { "epoch": 0.64, "grad_norm": 2.625, "learning_rate": 0.00015327223488071468, "loss": 2.2628, "step": 272950 }, { "epoch": 0.64, "grad_norm": 1.984375, "learning_rate": 0.0001532706706808795, "loss": 2.1458, "step": 272955 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.000153269106462846, "loss": 2.037, "step": 272960 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.0001532675422266149, "loss": 2.2756, "step": 272965 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.0001532659779721866, "loss": 1.9916, "step": 272970 }, { "epoch": 0.64, "grad_norm": 2.75, "learning_rate": 0.00015326441369956166, "loss": 2.1008, "step": 272975 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.0001532628494087407, "loss": 2.0901, "step": 272980 }, { "epoch": 0.64, "grad_norm": 2.953125, "learning_rate": 0.0001532612850997241, "loss": 2.2028, "step": 272985 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.00015325972077251252, "loss": 1.9828, "step": 272990 }, { "epoch": 0.64, "grad_norm": 1.984375, "learning_rate": 0.0001532581564271064, "loss": 1.9996, "step": 272995 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.0001532565920635064, "loss": 2.1985, "step": 273000 }, { "epoch": 0.64, "grad_norm": 1.9375, "learning_rate": 0.00015325502768171297, "loss": 1.9649, "step": 273005 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015325346328172663, "loss": 2.0741, "step": 273010 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.00015325189886354794, "loss": 1.9899, "step": 273015 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015325033442717745, "loss": 2.0806, "step": 273020 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.0001532487699726157, "loss": 2.0893, "step": 273025 }, { "epoch": 0.64, "grad_norm": 2.421875, "learning_rate": 0.00015324720549986318, "loss": 2.083, "step": 273030 }, { "epoch": 0.64, "grad_norm": 1.8359375, "learning_rate": 0.00015324564100892046, "loss": 1.9232, "step": 273035 }, { "epoch": 0.64, "grad_norm": 1.875, "learning_rate": 0.00015324407649978806, "loss": 2.2101, "step": 273040 }, { "epoch": 0.64, "grad_norm": 2.609375, "learning_rate": 0.0001532425119724665, "loss": 2.0212, "step": 273045 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015324094742695635, "loss": 1.8805, "step": 273050 }, { "epoch": 0.64, "grad_norm": 2.59375, "learning_rate": 0.0001532393828632581, "loss": 1.984, "step": 273055 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015323781828137235, "loss": 1.8424, "step": 273060 }, { "epoch": 0.64, "grad_norm": 1.671875, "learning_rate": 0.00015323625368129958, "loss": 2.0518, "step": 273065 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015323468906304035, "loss": 2.2186, "step": 273070 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015323312442659515, "loss": 2.0378, "step": 273075 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.0001532315597719646, "loss": 2.1519, "step": 273080 }, { "epoch": 0.64, "grad_norm": 2.515625, "learning_rate": 0.00015322999509914914, "loss": 2.2036, "step": 273085 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015322843040814938, "loss": 1.9639, "step": 273090 }, { "epoch": 0.64, "grad_norm": 2.625, "learning_rate": 0.00015322686569896581, "loss": 2.1421, "step": 273095 }, { "epoch": 0.64, "grad_norm": 2.390625, "learning_rate": 0.00015322530097159899, "loss": 2.0011, "step": 273100 }, { "epoch": 0.64, "grad_norm": 1.9375, "learning_rate": 0.0001532237362260494, "loss": 2.0583, "step": 273105 }, { "epoch": 0.64, "grad_norm": 1.734375, "learning_rate": 0.00015322217146231766, "loss": 2.1288, "step": 273110 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.00015322060668040424, "loss": 1.9587, "step": 273115 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.0001532190418803097, "loss": 2.0937, "step": 273120 }, { "epoch": 0.64, "grad_norm": 1.8359375, "learning_rate": 0.00015321747706203458, "loss": 2.1543, "step": 273125 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.0001532159122255794, "loss": 2.2168, "step": 273130 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015321434737094469, "loss": 2.1393, "step": 273135 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.000153212782498131, "loss": 2.2106, "step": 273140 }, { "epoch": 0.64, "grad_norm": 1.9609375, "learning_rate": 0.00015321121760713885, "loss": 1.9591, "step": 273145 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.0001532096526979688, "loss": 2.1285, "step": 273150 }, { "epoch": 0.64, "grad_norm": 1.859375, "learning_rate": 0.00015320808777062134, "loss": 2.0013, "step": 273155 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.0001532065228250971, "loss": 1.9855, "step": 273160 }, { "epoch": 0.64, "grad_norm": 1.9921875, "learning_rate": 0.00015320495786139645, "loss": 2.0873, "step": 273165 }, { "epoch": 0.64, "grad_norm": 2.515625, "learning_rate": 0.0001532033928795201, "loss": 2.2037, "step": 273170 }, { "epoch": 0.64, "grad_norm": 1.96875, "learning_rate": 0.00015320182787946846, "loss": 2.1385, "step": 273175 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015320026286124211, "loss": 1.9492, "step": 273180 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015319869782484162, "loss": 1.9581, "step": 273185 }, { "epoch": 0.64, "grad_norm": 1.7265625, "learning_rate": 0.00015319713277026748, "loss": 2.0032, "step": 273190 }, { "epoch": 0.64, "grad_norm": 1.90625, "learning_rate": 0.00015319556769752022, "loss": 2.0147, "step": 273195 }, { "epoch": 0.64, "grad_norm": 1.765625, "learning_rate": 0.00015319400260660037, "loss": 2.0351, "step": 273200 }, { "epoch": 0.64, "grad_norm": 2.46875, "learning_rate": 0.00015319243749750854, "loss": 1.9984, "step": 273205 }, { "epoch": 0.64, "grad_norm": 1.953125, "learning_rate": 0.00015319087237024518, "loss": 2.0502, "step": 273210 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015318930722481083, "loss": 1.8222, "step": 273215 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015318774206120607, "loss": 2.1495, "step": 273220 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.0001531861768794314, "loss": 1.9167, "step": 273225 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015318461167948742, "loss": 2.1641, "step": 273230 }, { "epoch": 0.64, "grad_norm": 2.46875, "learning_rate": 0.00015318304646137456, "loss": 2.1552, "step": 273235 }, { "epoch": 0.64, "grad_norm": 1.8671875, "learning_rate": 0.0001531814812250934, "loss": 2.2225, "step": 273240 }, { "epoch": 0.64, "grad_norm": 1.921875, "learning_rate": 0.0001531799159706445, "loss": 2.1461, "step": 273245 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.0001531783506980284, "loss": 2.0667, "step": 273250 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015317678540724557, "loss": 2.1927, "step": 273255 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.0001531752200982966, "loss": 2.0091, "step": 273260 }, { "epoch": 0.64, "grad_norm": 2.515625, "learning_rate": 0.000153173654771182, "loss": 2.0501, "step": 273265 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015317208942590234, "loss": 2.0555, "step": 273270 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.00015317052406245814, "loss": 1.9991, "step": 273275 }, { "epoch": 0.64, "grad_norm": 1.984375, "learning_rate": 0.0001531689586808499, "loss": 2.1565, "step": 273280 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015316739328107817, "loss": 2.0525, "step": 273285 }, { "epoch": 0.64, "grad_norm": 2.578125, "learning_rate": 0.00015316582786314352, "loss": 2.1674, "step": 273290 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.0001531642624270464, "loss": 2.0914, "step": 273295 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.0001531626969727875, "loss": 2.1347, "step": 273300 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015316113150036717, "loss": 2.0168, "step": 273305 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015315956600978607, "loss": 2.0375, "step": 273310 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.0001531580005010447, "loss": 2.0672, "step": 273315 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.0001531564349741436, "loss": 2.1063, "step": 273320 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.0001531548694290833, "loss": 2.1759, "step": 273325 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.00015315330386586433, "loss": 1.9206, "step": 273330 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.0001531517382844872, "loss": 2.2278, "step": 273335 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.0001531501726849525, "loss": 2.2017, "step": 273340 }, { "epoch": 0.64, "grad_norm": 1.7109375, "learning_rate": 0.00015314860706726072, "loss": 1.9975, "step": 273345 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015314704143141243, "loss": 1.9735, "step": 273350 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015314547577740815, "loss": 2.1451, "step": 273355 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.00015314391010524836, "loss": 2.1461, "step": 273360 }, { "epoch": 0.64, "grad_norm": 2.421875, "learning_rate": 0.0001531423444149337, "loss": 2.0379, "step": 273365 }, { "epoch": 0.64, "grad_norm": 2.453125, "learning_rate": 0.00015314077870646467, "loss": 2.0412, "step": 273370 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015313921297984174, "loss": 2.0121, "step": 273375 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.0001531376472350655, "loss": 2.0818, "step": 273380 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.0001531360814721365, "loss": 2.2183, "step": 273385 }, { "epoch": 0.64, "grad_norm": 2.3125, "learning_rate": 0.0001531345156910552, "loss": 1.868, "step": 273390 }, { "epoch": 0.64, "grad_norm": 2.6875, "learning_rate": 0.00015313294989182223, "loss": 1.8938, "step": 273395 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.0001531313840744381, "loss": 2.029, "step": 273400 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.00015312981823890323, "loss": 1.886, "step": 273405 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015312825238521834, "loss": 2.1746, "step": 273410 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015312668651338383, "loss": 2.026, "step": 273415 }, { "epoch": 0.64, "grad_norm": 2.625, "learning_rate": 0.00015312512062340031, "loss": 2.0524, "step": 273420 }, { "epoch": 0.64, "grad_norm": 2.578125, "learning_rate": 0.00015312355471526828, "loss": 2.2201, "step": 273425 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015312198878898826, "loss": 2.0806, "step": 273430 }, { "epoch": 0.64, "grad_norm": 1.796875, "learning_rate": 0.00015312042284456085, "loss": 2.1977, "step": 273435 }, { "epoch": 0.64, "grad_norm": 2.421875, "learning_rate": 0.00015311885688198647, "loss": 1.956, "step": 273440 }, { "epoch": 0.64, "grad_norm": 2.234375, "learning_rate": 0.00015311729090126578, "loss": 2.1284, "step": 273445 }, { "epoch": 0.64, "grad_norm": 2.53125, "learning_rate": 0.00015311572490239926, "loss": 2.1882, "step": 273450 }, { "epoch": 0.64, "grad_norm": 3.28125, "learning_rate": 0.00015311415888538744, "loss": 2.2329, "step": 273455 }, { "epoch": 0.64, "grad_norm": 2.515625, "learning_rate": 0.00015311259285023088, "loss": 2.1145, "step": 273460 }, { "epoch": 0.64, "grad_norm": 1.90625, "learning_rate": 0.00015311102679693006, "loss": 2.1623, "step": 273465 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015310946072548553, "loss": 2.1435, "step": 273470 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.00015310789463589787, "loss": 2.0886, "step": 273475 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.0001531063285281676, "loss": 2.1458, "step": 273480 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.00015310476240229526, "loss": 2.1289, "step": 273485 }, { "epoch": 0.64, "grad_norm": 1.9453125, "learning_rate": 0.0001531031962582813, "loss": 2.1119, "step": 273490 }, { "epoch": 0.64, "grad_norm": 1.9375, "learning_rate": 0.00015310163009612637, "loss": 2.0798, "step": 273495 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.000153100063915831, "loss": 2.0486, "step": 273500 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015309849771739565, "loss": 1.8828, "step": 273505 }, { "epoch": 0.64, "grad_norm": 2.484375, "learning_rate": 0.00015309693150082087, "loss": 1.9562, "step": 273510 }, { "epoch": 0.64, "grad_norm": 2.890625, "learning_rate": 0.00015309536526610725, "loss": 2.1796, "step": 273515 }, { "epoch": 0.64, "grad_norm": 2.78125, "learning_rate": 0.00015309379901325528, "loss": 2.2117, "step": 273520 }, { "epoch": 0.64, "grad_norm": 2.140625, "learning_rate": 0.00015309223274226547, "loss": 1.9687, "step": 273525 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.0001530906664531384, "loss": 2.2133, "step": 273530 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015308910014587464, "loss": 2.1353, "step": 273535 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015308753382047466, "loss": 1.8261, "step": 273540 }, { "epoch": 0.64, "grad_norm": 1.96875, "learning_rate": 0.000153085967476939, "loss": 1.9258, "step": 273545 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.0001530844011152682, "loss": 2.1056, "step": 273550 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015308283473546282, "loss": 2.1858, "step": 273555 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.0001530812683375234, "loss": 1.9731, "step": 273560 }, { "epoch": 0.64, "grad_norm": 1.8671875, "learning_rate": 0.00015307970192145044, "loss": 2.0179, "step": 273565 }, { "epoch": 0.64, "grad_norm": 1.90625, "learning_rate": 0.0001530781354872445, "loss": 2.0893, "step": 273570 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.0001530765690349061, "loss": 2.124, "step": 273575 }, { "epoch": 0.64, "grad_norm": 1.78125, "learning_rate": 0.0001530750025644358, "loss": 1.8349, "step": 273580 }, { "epoch": 0.64, "grad_norm": 2.609375, "learning_rate": 0.0001530734360758341, "loss": 2.125, "step": 273585 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015307186956910152, "loss": 2.0232, "step": 273590 }, { "epoch": 0.64, "grad_norm": 1.859375, "learning_rate": 0.00015307030304423868, "loss": 2.0454, "step": 273595 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.00015306873650124602, "loss": 2.1181, "step": 273600 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015306716994012415, "loss": 2.2362, "step": 273605 }, { "epoch": 0.64, "grad_norm": 1.921875, "learning_rate": 0.00015306560336087355, "loss": 2.0934, "step": 273610 }, { "epoch": 0.64, "grad_norm": 2.171875, "learning_rate": 0.00015306403676349482, "loss": 2.131, "step": 273615 }, { "epoch": 0.64, "grad_norm": 2.0625, "learning_rate": 0.0001530624701479884, "loss": 2.0576, "step": 273620 }, { "epoch": 0.64, "grad_norm": 1.828125, "learning_rate": 0.00015306090351435491, "loss": 1.8169, "step": 273625 }, { "epoch": 0.64, "grad_norm": 1.9296875, "learning_rate": 0.00015305933686259482, "loss": 2.1407, "step": 273630 }, { "epoch": 0.64, "grad_norm": 1.9375, "learning_rate": 0.00015305777019270872, "loss": 2.0021, "step": 273635 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.00015305620350469714, "loss": 2.1715, "step": 273640 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.00015305463679856057, "loss": 2.0408, "step": 273645 }, { "epoch": 0.64, "grad_norm": 2.28125, "learning_rate": 0.00015305307007429958, "loss": 2.1576, "step": 273650 }, { "epoch": 0.64, "grad_norm": 2.3125, "learning_rate": 0.0001530515033319147, "loss": 1.9108, "step": 273655 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.00015304993657140647, "loss": 2.0454, "step": 273660 }, { "epoch": 0.64, "grad_norm": 1.9453125, "learning_rate": 0.00015304836979277542, "loss": 2.0914, "step": 273665 }, { "epoch": 0.64, "grad_norm": 2.4375, "learning_rate": 0.0001530468029960221, "loss": 2.0329, "step": 273670 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015304523618114702, "loss": 2.0208, "step": 273675 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.0001530436693481507, "loss": 1.9309, "step": 273680 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.0001530421024970337, "loss": 2.201, "step": 273685 }, { "epoch": 0.64, "grad_norm": 1.8984375, "learning_rate": 0.00015304053562779658, "loss": 2.0145, "step": 273690 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.00015303896874043987, "loss": 2.1438, "step": 273695 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015303740183496406, "loss": 2.0823, "step": 273700 }, { "epoch": 0.64, "grad_norm": 2.484375, "learning_rate": 0.0001530358349113697, "loss": 1.9022, "step": 273705 }, { "epoch": 0.64, "grad_norm": 2.21875, "learning_rate": 0.00015303426796965734, "loss": 1.8862, "step": 273710 }, { "epoch": 0.64, "grad_norm": 2.328125, "learning_rate": 0.00015303270100982755, "loss": 2.0421, "step": 273715 }, { "epoch": 0.64, "grad_norm": 2.828125, "learning_rate": 0.00015303113403188076, "loss": 1.9849, "step": 273720 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.00015302956703581763, "loss": 2.1173, "step": 273725 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015302800002163863, "loss": 1.998, "step": 273730 }, { "epoch": 0.64, "grad_norm": 2.015625, "learning_rate": 0.00015302643298934426, "loss": 2.0163, "step": 273735 }, { "epoch": 0.64, "grad_norm": 1.7734375, "learning_rate": 0.00015302486593893514, "loss": 1.8658, "step": 273740 }, { "epoch": 0.64, "grad_norm": 2.390625, "learning_rate": 0.00015302329887041175, "loss": 2.0578, "step": 273745 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.00015302173178377467, "loss": 2.106, "step": 273750 }, { "epoch": 0.64, "grad_norm": 2.390625, "learning_rate": 0.0001530201646790244, "loss": 2.0808, "step": 273755 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.00015301859755616147, "loss": 2.1098, "step": 273760 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.00015301703041518638, "loss": 2.2353, "step": 273765 }, { "epoch": 0.64, "grad_norm": 2.0, "learning_rate": 0.00015301546325609974, "loss": 2.1728, "step": 273770 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.0001530138960789021, "loss": 2.1318, "step": 273775 }, { "epoch": 0.64, "grad_norm": 2.46875, "learning_rate": 0.00015301232888359392, "loss": 2.099, "step": 273780 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015301076167017577, "loss": 1.9336, "step": 273785 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.00015300919443864814, "loss": 2.1137, "step": 273790 }, { "epoch": 0.64, "grad_norm": 1.8828125, "learning_rate": 0.00015300762718901164, "loss": 2.1761, "step": 273795 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.0001530060599212668, "loss": 2.2985, "step": 273800 }, { "epoch": 0.64, "grad_norm": 1.984375, "learning_rate": 0.0001530044926354141, "loss": 2.0507, "step": 273805 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015300292533145413, "loss": 2.1403, "step": 273810 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.00015300135800938738, "loss": 2.0147, "step": 273815 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.0001529997906692144, "loss": 1.9107, "step": 273820 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.00015299822331093572, "loss": 2.0497, "step": 273825 }, { "epoch": 0.64, "grad_norm": 2.203125, "learning_rate": 0.00015299665593455187, "loss": 2.0949, "step": 273830 }, { "epoch": 0.64, "grad_norm": 1.9609375, "learning_rate": 0.00015299508854006345, "loss": 2.0507, "step": 273835 }, { "epoch": 0.64, "grad_norm": 1.9609375, "learning_rate": 0.00015299352112747093, "loss": 1.9414, "step": 273840 }, { "epoch": 0.64, "grad_norm": 2.640625, "learning_rate": 0.00015299195369677487, "loss": 1.9387, "step": 273845 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015299038624797578, "loss": 2.1068, "step": 273850 }, { "epoch": 0.64, "grad_norm": 2.421875, "learning_rate": 0.00015298881878107422, "loss": 2.016, "step": 273855 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.0001529872512960707, "loss": 2.0802, "step": 273860 }, { "epoch": 0.64, "grad_norm": 1.96875, "learning_rate": 0.00015298568379296584, "loss": 2.0825, "step": 273865 }, { "epoch": 0.64, "grad_norm": 2.25, "learning_rate": 0.00015298411627176, "loss": 1.9898, "step": 273870 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.00015298254873245392, "loss": 2.0317, "step": 273875 }, { "epoch": 0.64, "grad_norm": 1.984375, "learning_rate": 0.000152980981175048, "loss": 2.0266, "step": 273880 }, { "epoch": 0.64, "grad_norm": 2.3125, "learning_rate": 0.00015297941359954283, "loss": 2.1928, "step": 273885 }, { "epoch": 0.64, "grad_norm": 1.9375, "learning_rate": 0.00015297784600593894, "loss": 1.9808, "step": 273890 }, { "epoch": 0.64, "grad_norm": 1.9765625, "learning_rate": 0.00015297627839423682, "loss": 2.1247, "step": 273895 }, { "epoch": 0.64, "grad_norm": 1.96875, "learning_rate": 0.00015297471076443706, "loss": 2.081, "step": 273900 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015297314311654017, "loss": 1.9932, "step": 273905 }, { "epoch": 0.64, "grad_norm": 2.34375, "learning_rate": 0.00015297157545054673, "loss": 2.0787, "step": 273910 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015297000776645723, "loss": 1.9449, "step": 273915 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.00015296844006427218, "loss": 1.9997, "step": 273920 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.00015296687234399217, "loss": 1.9184, "step": 273925 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015296530460561774, "loss": 2.0084, "step": 273930 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015296373684914934, "loss": 1.7387, "step": 273935 }, { "epoch": 0.64, "grad_norm": 2.484375, "learning_rate": 0.0001529621690745876, "loss": 2.1553, "step": 273940 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015296060128193304, "loss": 2.0837, "step": 273945 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 0.00015295903347118617, "loss": 1.9806, "step": 273950 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.00015295746564234755, "loss": 2.2032, "step": 273955 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.00015295589779541766, "loss": 1.9079, "step": 273960 }, { "epoch": 0.64, "grad_norm": 2.125, "learning_rate": 0.0001529543299303971, "loss": 2.2473, "step": 273965 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015295276204728637, "loss": 1.9657, "step": 273970 }, { "epoch": 0.64, "grad_norm": 2.296875, "learning_rate": 0.00015295119414608603, "loss": 2.1828, "step": 273975 }, { "epoch": 0.64, "grad_norm": 2.03125, "learning_rate": 0.00015294962622679656, "loss": 2.1698, "step": 273980 }, { "epoch": 0.64, "grad_norm": 2.265625, "learning_rate": 0.0001529480582894186, "loss": 2.0367, "step": 273985 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015294649033395258, "loss": 2.0893, "step": 273990 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015294492236039907, "loss": 1.9964, "step": 273995 }, { "epoch": 0.64, "grad_norm": 2.40625, "learning_rate": 0.00015294335436875865, "loss": 2.1663, "step": 274000 }, { "epoch": 0.64, "grad_norm": 2.375, "learning_rate": 0.0001529417863590318, "loss": 2.151, "step": 274005 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015294021833121908, "loss": 2.1425, "step": 274010 }, { "epoch": 0.64, "grad_norm": 1.921875, "learning_rate": 0.000152938650285321, "loss": 2.122, "step": 274015 }, { "epoch": 0.64, "grad_norm": 1.984375, "learning_rate": 0.00015293708222133815, "loss": 2.1916, "step": 274020 }, { "epoch": 0.64, "grad_norm": 2.109375, "learning_rate": 0.00015293551413927105, "loss": 2.1567, "step": 274025 }, { "epoch": 0.64, "grad_norm": 2.359375, "learning_rate": 0.00015293394603912018, "loss": 1.9458, "step": 274030 }, { "epoch": 0.64, "grad_norm": 1.8203125, "learning_rate": 0.00015293237792088612, "loss": 2.2294, "step": 274035 }, { "epoch": 0.64, "grad_norm": 2.625, "learning_rate": 0.00015293080978456938, "loss": 1.9904, "step": 274040 }, { "epoch": 0.64, "grad_norm": 2.1875, "learning_rate": 0.00015292924163017057, "loss": 1.9406, "step": 274045 }, { "epoch": 0.64, "grad_norm": 2.046875, "learning_rate": 0.00015292767345769014, "loss": 2.1477, "step": 274050 }, { "epoch": 0.64, "grad_norm": 2.4375, "learning_rate": 0.00015292610526712865, "loss": 2.1343, "step": 274055 }, { "epoch": 0.64, "grad_norm": 1.65625, "learning_rate": 0.00015292453705848663, "loss": 2.0153, "step": 274060 }, { "epoch": 0.64, "grad_norm": 2.078125, "learning_rate": 0.00015292296883176465, "loss": 2.2419, "step": 274065 }, { "epoch": 0.64, "grad_norm": 2.609375, "learning_rate": 0.00015292140058696324, "loss": 2.0613, "step": 274070 }, { "epoch": 0.64, "grad_norm": 2.15625, "learning_rate": 0.0001529198323240829, "loss": 2.0917, "step": 274075 }, { "epoch": 0.64, "grad_norm": 2.75, "learning_rate": 0.00015291826404312419, "loss": 2.1337, "step": 274080 }, { "epoch": 0.65, "grad_norm": 1.9921875, "learning_rate": 0.0001529166957440876, "loss": 2.0127, "step": 274085 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015291512742697378, "loss": 2.2806, "step": 274090 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015291355909178314, "loss": 2.1957, "step": 274095 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.0001529119907385163, "loss": 2.0927, "step": 274100 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015291042236717373, "loss": 2.2612, "step": 274105 }, { "epoch": 0.65, "grad_norm": 2.515625, "learning_rate": 0.00015290885397775602, "loss": 2.292, "step": 274110 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.0001529072855702637, "loss": 2.12, "step": 274115 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.0001529057171446973, "loss": 2.041, "step": 274120 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.0001529041487010573, "loss": 2.1627, "step": 274125 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015290258023934432, "loss": 2.1651, "step": 274130 }, { "epoch": 0.65, "grad_norm": 1.9296875, "learning_rate": 0.00015290101175955884, "loss": 2.1893, "step": 274135 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015289944326170142, "loss": 2.0825, "step": 274140 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.0001528978747457726, "loss": 2.0747, "step": 274145 }, { "epoch": 0.65, "grad_norm": 1.7421875, "learning_rate": 0.00015289630621177292, "loss": 1.8688, "step": 274150 }, { "epoch": 0.65, "grad_norm": 2.625, "learning_rate": 0.00015289473765970287, "loss": 2.1347, "step": 274155 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.00015289316908956307, "loss": 2.1394, "step": 274160 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.0001528916005013539, "loss": 1.9857, "step": 274165 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.00015289003189507608, "loss": 2.1032, "step": 274170 }, { "epoch": 0.65, "grad_norm": 1.796875, "learning_rate": 0.00015288846327073005, "loss": 2.1221, "step": 274175 }, { "epoch": 0.65, "grad_norm": 1.84375, "learning_rate": 0.0001528868946283164, "loss": 1.9563, "step": 274180 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.0001528853259678356, "loss": 2.0618, "step": 274185 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015288375728928817, "loss": 2.051, "step": 274190 }, { "epoch": 0.65, "grad_norm": 2.34375, "learning_rate": 0.00015288218859267473, "loss": 2.1102, "step": 274195 }, { "epoch": 0.65, "grad_norm": 1.890625, "learning_rate": 0.0001528806198779958, "loss": 2.2329, "step": 274200 }, { "epoch": 0.65, "grad_norm": 1.7265625, "learning_rate": 0.00015287905114525185, "loss": 2.1473, "step": 274205 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.00015287748239444345, "loss": 2.0863, "step": 274210 }, { "epoch": 0.65, "grad_norm": 1.8828125, "learning_rate": 0.00015287591362557116, "loss": 2.114, "step": 274215 }, { "epoch": 0.65, "grad_norm": 1.96875, "learning_rate": 0.00015287434483863548, "loss": 2.0699, "step": 274220 }, { "epoch": 0.65, "grad_norm": 1.65625, "learning_rate": 0.000152872776033637, "loss": 2.1126, "step": 274225 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.0001528712072105762, "loss": 2.1015, "step": 274230 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015286963836945368, "loss": 2.2643, "step": 274235 }, { "epoch": 0.65, "grad_norm": 1.984375, "learning_rate": 0.00015286806951026987, "loss": 1.873, "step": 274240 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.0001528665006330254, "loss": 1.9612, "step": 274245 }, { "epoch": 0.65, "grad_norm": 1.8515625, "learning_rate": 0.00015286493173772076, "loss": 2.2318, "step": 274250 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.0001528633628243565, "loss": 2.098, "step": 274255 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015286179389293318, "loss": 2.0774, "step": 274260 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.0001528602249434513, "loss": 1.9582, "step": 274265 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.0001528586559759114, "loss": 2.0096, "step": 274270 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.00015285708699031402, "loss": 2.1645, "step": 274275 }, { "epoch": 0.65, "grad_norm": 1.984375, "learning_rate": 0.00015285551798665972, "loss": 2.2376, "step": 274280 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015285394896494902, "loss": 2.0143, "step": 274285 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.0001528523799251824, "loss": 1.9623, "step": 274290 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015285081086736054, "loss": 2.0683, "step": 274295 }, { "epoch": 0.65, "grad_norm": 2.890625, "learning_rate": 0.00015284924179148382, "loss": 2.2344, "step": 274300 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015284767269755284, "loss": 1.9528, "step": 274305 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015284610358556815, "loss": 2.1377, "step": 274310 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015284453445553028, "loss": 1.8866, "step": 274315 }, { "epoch": 0.65, "grad_norm": 2.53125, "learning_rate": 0.0001528429653074398, "loss": 1.9994, "step": 274320 }, { "epoch": 0.65, "grad_norm": 2.53125, "learning_rate": 0.00015284139614129712, "loss": 2.101, "step": 274325 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015283982695710288, "loss": 2.0996, "step": 274330 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.0001528382577548576, "loss": 2.13, "step": 274335 }, { "epoch": 0.65, "grad_norm": 1.9375, "learning_rate": 0.00015283668853456186, "loss": 1.9684, "step": 274340 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015283511929621612, "loss": 2.0106, "step": 274345 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.0001528335500398209, "loss": 2.1627, "step": 274350 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015283198076537682, "loss": 2.1145, "step": 274355 }, { "epoch": 0.65, "grad_norm": 2.40625, "learning_rate": 0.0001528304114728844, "loss": 2.236, "step": 274360 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015282884216234416, "loss": 2.0972, "step": 274365 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015282727283375658, "loss": 2.0696, "step": 274370 }, { "epoch": 0.65, "grad_norm": 1.953125, "learning_rate": 0.00015282570348712228, "loss": 2.0688, "step": 274375 }, { "epoch": 0.65, "grad_norm": 1.7890625, "learning_rate": 0.00015282413412244172, "loss": 2.0451, "step": 274380 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.00015282256473971552, "loss": 2.1581, "step": 274385 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015282099533894415, "loss": 2.2365, "step": 274390 }, { "epoch": 0.65, "grad_norm": 1.9375, "learning_rate": 0.0001528194259201282, "loss": 2.1732, "step": 274395 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015281785648326818, "loss": 2.12, "step": 274400 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015281628702836457, "loss": 1.9528, "step": 274405 }, { "epoch": 0.65, "grad_norm": 2.5, "learning_rate": 0.00015281471755541797, "loss": 1.9743, "step": 274410 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015281314806442895, "loss": 1.9971, "step": 274415 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015281157855539796, "loss": 2.1367, "step": 274420 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015281000902832558, "loss": 2.2029, "step": 274425 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015280843948321237, "loss": 2.0107, "step": 274430 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015280686992005883, "loss": 2.0429, "step": 274435 }, { "epoch": 0.65, "grad_norm": 2.625, "learning_rate": 0.00015280530033886546, "loss": 2.0387, "step": 274440 }, { "epoch": 0.65, "grad_norm": 2.65625, "learning_rate": 0.0001528037307396329, "loss": 1.8888, "step": 274445 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.0001528021611223616, "loss": 2.0421, "step": 274450 }, { "epoch": 0.65, "grad_norm": 2.484375, "learning_rate": 0.00015280059148705215, "loss": 2.1903, "step": 274455 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015279902183370502, "loss": 2.1538, "step": 274460 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.00015279745216232083, "loss": 1.9381, "step": 274465 }, { "epoch": 0.65, "grad_norm": 2.578125, "learning_rate": 0.00015279588247290001, "loss": 1.9874, "step": 274470 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015279431276544322, "loss": 2.1526, "step": 274475 }, { "epoch": 0.65, "grad_norm": 2.453125, "learning_rate": 0.00015279274303995095, "loss": 2.0715, "step": 274480 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015279117329642365, "loss": 1.9691, "step": 274485 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.00015278960353486197, "loss": 2.0017, "step": 274490 }, { "epoch": 0.65, "grad_norm": 2.53125, "learning_rate": 0.0001527880337552664, "loss": 2.0951, "step": 274495 }, { "epoch": 0.65, "grad_norm": 2.515625, "learning_rate": 0.0001527864639576375, "loss": 2.0838, "step": 274500 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015278489414197576, "loss": 2.1219, "step": 274505 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.00015278332430828173, "loss": 1.9804, "step": 274510 }, { "epoch": 0.65, "grad_norm": 2.40625, "learning_rate": 0.00015278175445655599, "loss": 2.0984, "step": 274515 }, { "epoch": 0.65, "grad_norm": 1.984375, "learning_rate": 0.00015278018458679906, "loss": 2.0635, "step": 274520 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015277861469901143, "loss": 2.1013, "step": 274525 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.00015277704479319367, "loss": 2.07, "step": 274530 }, { "epoch": 0.65, "grad_norm": 2.59375, "learning_rate": 0.0001527754748693463, "loss": 2.1554, "step": 274535 }, { "epoch": 0.65, "grad_norm": 2.546875, "learning_rate": 0.00015277390492746988, "loss": 1.9851, "step": 274540 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015277233496756496, "loss": 2.059, "step": 274545 }, { "epoch": 0.65, "grad_norm": 2.875, "learning_rate": 0.00015277076498963204, "loss": 2.2684, "step": 274550 }, { "epoch": 0.65, "grad_norm": 2.484375, "learning_rate": 0.00015276919499367165, "loss": 2.0545, "step": 274555 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015276762497968437, "loss": 1.943, "step": 274560 }, { "epoch": 0.65, "grad_norm": 2.4375, "learning_rate": 0.00015276605494767072, "loss": 2.0884, "step": 274565 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.0001527644848976312, "loss": 2.1537, "step": 274570 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.0001527629148295664, "loss": 2.062, "step": 274575 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.0001527613447434768, "loss": 1.8999, "step": 274580 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.00015275977463936302, "loss": 2.0711, "step": 274585 }, { "epoch": 0.65, "grad_norm": 2.46875, "learning_rate": 0.00015275820451722552, "loss": 2.0405, "step": 274590 }, { "epoch": 0.65, "grad_norm": 1.875, "learning_rate": 0.00015275663437706483, "loss": 1.9153, "step": 274595 }, { "epoch": 0.65, "grad_norm": 2.4375, "learning_rate": 0.00015275506421888154, "loss": 2.0199, "step": 274600 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015275349404267615, "loss": 1.8864, "step": 274605 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015275192384844925, "loss": 2.0563, "step": 274610 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015275035363620132, "loss": 2.0938, "step": 274615 }, { "epoch": 0.65, "grad_norm": 1.75, "learning_rate": 0.0001527487834059329, "loss": 1.9541, "step": 274620 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015274721315764454, "loss": 2.161, "step": 274625 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015274564289133674, "loss": 2.0657, "step": 274630 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015274407260701013, "loss": 2.0521, "step": 274635 }, { "epoch": 0.65, "grad_norm": 2.34375, "learning_rate": 0.00015274250230466518, "loss": 2.1839, "step": 274640 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015274093198430243, "loss": 2.0797, "step": 274645 }, { "epoch": 0.65, "grad_norm": 2.453125, "learning_rate": 0.00015273936164592237, "loss": 1.99, "step": 274650 }, { "epoch": 0.65, "grad_norm": 2.578125, "learning_rate": 0.00015273779128952564, "loss": 1.9271, "step": 274655 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015273622091511272, "loss": 2.0353, "step": 274660 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.00015273465052268416, "loss": 1.9755, "step": 274665 }, { "epoch": 0.65, "grad_norm": 1.90625, "learning_rate": 0.00015273308011224047, "loss": 2.1376, "step": 274670 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015273150968378218, "loss": 2.0769, "step": 274675 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.0001527299392373099, "loss": 1.9524, "step": 274680 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015272836877282408, "loss": 1.9315, "step": 274685 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.0001527267982903253, "loss": 2.0795, "step": 274690 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.0001527252277898141, "loss": 2.1542, "step": 274695 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.000152723657271291, "loss": 2.0722, "step": 274700 }, { "epoch": 0.65, "grad_norm": 1.484375, "learning_rate": 0.00015272208673475658, "loss": 2.1595, "step": 274705 }, { "epoch": 0.65, "grad_norm": 1.96875, "learning_rate": 0.00015272051618021125, "loss": 2.1792, "step": 274710 }, { "epoch": 0.65, "grad_norm": 2.546875, "learning_rate": 0.0001527189456076557, "loss": 1.9804, "step": 274715 }, { "epoch": 0.65, "grad_norm": 1.8515625, "learning_rate": 0.0001527173750170904, "loss": 2.0574, "step": 274720 }, { "epoch": 0.65, "grad_norm": 2.46875, "learning_rate": 0.00015271580440851586, "loss": 2.1066, "step": 274725 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015271423378193267, "loss": 2.0664, "step": 274730 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015271266313734134, "loss": 2.0814, "step": 274735 }, { "epoch": 0.65, "grad_norm": 2.453125, "learning_rate": 0.0001527110924747424, "loss": 2.0292, "step": 274740 }, { "epoch": 0.65, "grad_norm": 1.8984375, "learning_rate": 0.0001527095217941364, "loss": 2.1647, "step": 274745 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015270795109552387, "loss": 1.9651, "step": 274750 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015270638037890533, "loss": 2.2002, "step": 274755 }, { "epoch": 0.65, "grad_norm": 1.9296875, "learning_rate": 0.00015270480964428137, "loss": 2.2234, "step": 274760 }, { "epoch": 0.65, "grad_norm": 2.34375, "learning_rate": 0.00015270323889165246, "loss": 2.0046, "step": 274765 }, { "epoch": 0.65, "grad_norm": 1.8671875, "learning_rate": 0.0001527016681210192, "loss": 1.9242, "step": 274770 }, { "epoch": 0.65, "grad_norm": 1.8203125, "learning_rate": 0.0001527000973323821, "loss": 2.0836, "step": 274775 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015269852652574165, "loss": 2.1326, "step": 274780 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015269695570109844, "loss": 2.1303, "step": 274785 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015269538485845297, "loss": 2.015, "step": 274790 }, { "epoch": 0.65, "grad_norm": 1.78125, "learning_rate": 0.00015269381399780584, "loss": 2.0948, "step": 274795 }, { "epoch": 0.65, "grad_norm": 1.984375, "learning_rate": 0.00015269224311915754, "loss": 1.8871, "step": 274800 }, { "epoch": 0.65, "grad_norm": 2.453125, "learning_rate": 0.0001526906722225086, "loss": 2.2513, "step": 274805 }, { "epoch": 0.65, "grad_norm": 1.796875, "learning_rate": 0.00015268910130785958, "loss": 2.1892, "step": 274810 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.00015268753037521102, "loss": 2.0117, "step": 274815 }, { "epoch": 0.65, "grad_norm": 2.5, "learning_rate": 0.00015268595942456346, "loss": 2.2075, "step": 274820 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015268438845591738, "loss": 2.0402, "step": 274825 }, { "epoch": 0.65, "grad_norm": 1.8828125, "learning_rate": 0.00015268281746927338, "loss": 2.0463, "step": 274830 }, { "epoch": 0.65, "grad_norm": 1.8828125, "learning_rate": 0.00015268124646463193, "loss": 1.7823, "step": 274835 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015267967544199367, "loss": 2.1625, "step": 274840 }, { "epoch": 0.65, "grad_norm": 2.40625, "learning_rate": 0.00015267810440135904, "loss": 2.1396, "step": 274845 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015267653334272862, "loss": 2.171, "step": 274850 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015267496226610295, "loss": 2.0757, "step": 274855 }, { "epoch": 0.65, "grad_norm": 2.734375, "learning_rate": 0.00015267339117148256, "loss": 2.2283, "step": 274860 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015267182005886798, "loss": 2.2226, "step": 274865 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.00015267024892825975, "loss": 1.8105, "step": 274870 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015266867777965842, "loss": 2.1871, "step": 274875 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015266710661306448, "loss": 2.1349, "step": 274880 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015266553542847853, "loss": 2.0729, "step": 274885 }, { "epoch": 0.65, "grad_norm": 2.625, "learning_rate": 0.00015266396422590107, "loss": 2.0151, "step": 274890 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015266239300533265, "loss": 1.9333, "step": 274895 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.00015266082176677382, "loss": 2.0331, "step": 274900 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015265925051022506, "loss": 2.2205, "step": 274905 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015265767923568698, "loss": 1.9182, "step": 274910 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015265610794316006, "loss": 1.9351, "step": 274915 }, { "epoch": 0.65, "grad_norm": 1.84375, "learning_rate": 0.00015265453663264485, "loss": 2.0866, "step": 274920 }, { "epoch": 0.65, "grad_norm": 1.984375, "learning_rate": 0.0001526529653041419, "loss": 2.2224, "step": 274925 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.00015265139395765175, "loss": 2.1758, "step": 274930 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015264982259317495, "loss": 2.21, "step": 274935 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.00015264825121071195, "loss": 2.0027, "step": 274940 }, { "epoch": 0.65, "grad_norm": 1.8671875, "learning_rate": 0.0001526466798102634, "loss": 2.1383, "step": 274945 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.0001526451083918298, "loss": 1.9898, "step": 274950 }, { "epoch": 0.65, "grad_norm": 1.9609375, "learning_rate": 0.0001526435369554117, "loss": 2.3037, "step": 274955 }, { "epoch": 0.65, "grad_norm": 2.546875, "learning_rate": 0.00015264196550100956, "loss": 2.2006, "step": 274960 }, { "epoch": 0.65, "grad_norm": 1.8515625, "learning_rate": 0.00015264039402862397, "loss": 2.0108, "step": 274965 }, { "epoch": 0.65, "grad_norm": 1.953125, "learning_rate": 0.00015263882253825547, "loss": 2.0131, "step": 274970 }, { "epoch": 0.65, "grad_norm": 2.5, "learning_rate": 0.00015263725102990464, "loss": 2.1245, "step": 274975 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015263567950357193, "loss": 1.961, "step": 274980 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015263410795925792, "loss": 1.9601, "step": 274985 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015263253639696313, "loss": 1.9002, "step": 274990 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.0001526309648166881, "loss": 2.2387, "step": 274995 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015262939321843345, "loss": 2.1436, "step": 275000 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.0001526278216021996, "loss": 2.2425, "step": 275005 }, { "epoch": 0.65, "grad_norm": 1.8125, "learning_rate": 0.00015262624996798714, "loss": 2.1674, "step": 275010 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015262467831579657, "loss": 2.0087, "step": 275015 }, { "epoch": 0.65, "grad_norm": 1.890625, "learning_rate": 0.00015262310664562848, "loss": 2.1152, "step": 275020 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015262153495748337, "loss": 2.0849, "step": 275025 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.0001526199632513618, "loss": 2.105, "step": 275030 }, { "epoch": 0.65, "grad_norm": 1.90625, "learning_rate": 0.0001526183915272643, "loss": 2.0451, "step": 275035 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.0001526168197851914, "loss": 2.1771, "step": 275040 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015261524802514365, "loss": 2.2507, "step": 275045 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015261367624712154, "loss": 2.0525, "step": 275050 }, { "epoch": 0.65, "grad_norm": 2.40625, "learning_rate": 0.00015261210445112567, "loss": 2.0406, "step": 275055 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015261053263715657, "loss": 2.1031, "step": 275060 }, { "epoch": 0.65, "grad_norm": 1.9609375, "learning_rate": 0.00015260896080521473, "loss": 2.018, "step": 275065 }, { "epoch": 0.65, "grad_norm": 1.859375, "learning_rate": 0.00015260738895530075, "loss": 2.3191, "step": 275070 }, { "epoch": 0.65, "grad_norm": 1.8671875, "learning_rate": 0.00015260581708741508, "loss": 2.0669, "step": 275075 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.0001526042452015583, "loss": 2.1154, "step": 275080 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015260267329773105, "loss": 2.0507, "step": 275085 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015260110137593367, "loss": 2.16, "step": 275090 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015259952943616687, "loss": 1.9459, "step": 275095 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.0001525979574784311, "loss": 2.0114, "step": 275100 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015259638550272687, "loss": 2.0926, "step": 275105 }, { "epoch": 0.65, "grad_norm": 2.703125, "learning_rate": 0.00015259481350905483, "loss": 2.0677, "step": 275110 }, { "epoch": 0.65, "grad_norm": 1.9140625, "learning_rate": 0.00015259324149741538, "loss": 2.1801, "step": 275115 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015259166946780917, "loss": 2.1294, "step": 275120 }, { "epoch": 0.65, "grad_norm": 2.53125, "learning_rate": 0.0001525900974202367, "loss": 2.0925, "step": 275125 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015258852535469843, "loss": 1.9802, "step": 275130 }, { "epoch": 0.65, "grad_norm": 1.921875, "learning_rate": 0.00015258695327119502, "loss": 1.972, "step": 275135 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015258538116972697, "loss": 2.1114, "step": 275140 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015258380905029478, "loss": 1.9866, "step": 275145 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015258223691289898, "loss": 2.0126, "step": 275150 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015258066475754017, "loss": 2.116, "step": 275155 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015257909258421882, "loss": 1.9068, "step": 275160 }, { "epoch": 0.65, "grad_norm": 3.09375, "learning_rate": 0.00015257752039293554, "loss": 2.095, "step": 275165 }, { "epoch": 0.65, "grad_norm": 1.703125, "learning_rate": 0.0001525759481836908, "loss": 1.9561, "step": 275170 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015257437595648515, "loss": 1.9519, "step": 275175 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015257280371131917, "loss": 1.9605, "step": 275180 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.00015257123144819335, "loss": 2.0132, "step": 275185 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015256965916710824, "loss": 2.1103, "step": 275190 }, { "epoch": 0.65, "grad_norm": 1.90625, "learning_rate": 0.0001525680868680644, "loss": 2.0115, "step": 275195 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.00015256651455106232, "loss": 2.1666, "step": 275200 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.0001525649422161026, "loss": 2.1477, "step": 275205 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.0001525633698631857, "loss": 1.9488, "step": 275210 }, { "epoch": 0.65, "grad_norm": 1.9453125, "learning_rate": 0.00015256179749231224, "loss": 1.988, "step": 275215 }, { "epoch": 0.65, "grad_norm": 1.984375, "learning_rate": 0.0001525602251034827, "loss": 2.1755, "step": 275220 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015255865269669765, "loss": 2.143, "step": 275225 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.0001525570802719576, "loss": 2.0835, "step": 275230 }, { "epoch": 0.65, "grad_norm": 1.890625, "learning_rate": 0.00015255550782926304, "loss": 1.9868, "step": 275235 }, { "epoch": 0.65, "grad_norm": 1.9296875, "learning_rate": 0.00015255393536861464, "loss": 2.142, "step": 275240 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015255236289001282, "loss": 2.0829, "step": 275245 }, { "epoch": 0.65, "grad_norm": 1.8828125, "learning_rate": 0.0001525507903934582, "loss": 1.8827, "step": 275250 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015254921787895124, "loss": 2.0269, "step": 275255 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.00015254764534649256, "loss": 2.1809, "step": 275260 }, { "epoch": 0.65, "grad_norm": 1.953125, "learning_rate": 0.0001525460727960826, "loss": 2.2752, "step": 275265 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015254450022772196, "loss": 2.0023, "step": 275270 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.0001525429276414112, "loss": 2.1756, "step": 275275 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.0001525413550371508, "loss": 2.0003, "step": 275280 }, { "epoch": 0.65, "grad_norm": 1.65625, "learning_rate": 0.0001525397824149413, "loss": 1.9319, "step": 275285 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.00015253820977478326, "loss": 1.9854, "step": 275290 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.00015253663711667725, "loss": 2.1015, "step": 275295 }, { "epoch": 0.65, "grad_norm": 1.9609375, "learning_rate": 0.00015253506444062377, "loss": 2.1869, "step": 275300 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015253349174662332, "loss": 2.1382, "step": 275305 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.0001525319190346765, "loss": 2.1029, "step": 275310 }, { "epoch": 0.65, "grad_norm": 2.921875, "learning_rate": 0.00015253034630478385, "loss": 2.0516, "step": 275315 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015252877355694585, "loss": 2.2496, "step": 275320 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.00015252720079116306, "loss": 2.0506, "step": 275325 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015252562800743605, "loss": 2.1809, "step": 275330 }, { "epoch": 0.65, "grad_norm": 2.5625, "learning_rate": 0.00015252405520576533, "loss": 2.227, "step": 275335 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015252248238615144, "loss": 2.0786, "step": 275340 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015252090954859487, "loss": 1.9816, "step": 275345 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.00015251933669309625, "loss": 2.0605, "step": 275350 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.0001525177638196561, "loss": 2.1306, "step": 275355 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015251619092827485, "loss": 2.0403, "step": 275360 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.0001525146180189532, "loss": 2.0938, "step": 275365 }, { "epoch": 0.65, "grad_norm": 3.203125, "learning_rate": 0.00015251304509169154, "loss": 1.9358, "step": 275370 }, { "epoch": 0.65, "grad_norm": 1.890625, "learning_rate": 0.0001525114721464905, "loss": 1.9412, "step": 275375 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015250989918335058, "loss": 2.0466, "step": 275380 }, { "epoch": 0.65, "grad_norm": 2.40625, "learning_rate": 0.00015250832620227234, "loss": 1.91, "step": 275385 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015250675320325628, "loss": 2.0686, "step": 275390 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.000152505180186303, "loss": 2.0723, "step": 275395 }, { "epoch": 0.65, "grad_norm": 1.953125, "learning_rate": 0.00015250360715141295, "loss": 1.9609, "step": 275400 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015250203409858677, "loss": 2.1728, "step": 275405 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.0001525004610278249, "loss": 2.0825, "step": 275410 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015249888793912795, "loss": 1.949, "step": 275415 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015249731483249642, "loss": 1.9329, "step": 275420 }, { "epoch": 0.65, "grad_norm": 1.9453125, "learning_rate": 0.00015249574170793083, "loss": 2.1603, "step": 275425 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015249416856543175, "loss": 1.9189, "step": 275430 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015249259540499975, "loss": 2.0581, "step": 275435 }, { "epoch": 0.65, "grad_norm": 1.8359375, "learning_rate": 0.00015249102222663526, "loss": 2.1699, "step": 275440 }, { "epoch": 0.65, "grad_norm": 2.53125, "learning_rate": 0.00015248944903033894, "loss": 2.0946, "step": 275445 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015248787581611124, "loss": 2.1411, "step": 275450 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015248630258395273, "loss": 2.2716, "step": 275455 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015248472933386399, "loss": 2.3103, "step": 275460 }, { "epoch": 0.65, "grad_norm": 1.890625, "learning_rate": 0.00015248315606584547, "loss": 1.9397, "step": 275465 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.00015248158277989776, "loss": 1.9327, "step": 275470 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.0001524800094760214, "loss": 1.8614, "step": 275475 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.0001524784361542169, "loss": 2.239, "step": 275480 }, { "epoch": 0.65, "grad_norm": 2.515625, "learning_rate": 0.00015247686281448487, "loss": 2.0934, "step": 275485 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015247528945682573, "loss": 2.1734, "step": 275490 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015247371608124007, "loss": 2.0766, "step": 275495 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.0001524721426877285, "loss": 2.0775, "step": 275500 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.00015247056927629145, "loss": 1.9191, "step": 275505 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.0001524689958469295, "loss": 2.02, "step": 275510 }, { "epoch": 0.65, "grad_norm": 1.8203125, "learning_rate": 0.00015246742239964322, "loss": 2.0638, "step": 275515 }, { "epoch": 0.65, "grad_norm": 2.59375, "learning_rate": 0.0001524658489344331, "loss": 1.9102, "step": 275520 }, { "epoch": 0.65, "grad_norm": 2.4375, "learning_rate": 0.0001524642754512997, "loss": 1.8946, "step": 275525 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015246270195024353, "loss": 1.9195, "step": 275530 }, { "epoch": 0.65, "grad_norm": 2.78125, "learning_rate": 0.00015246112843126516, "loss": 1.9084, "step": 275535 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015245955489436513, "loss": 1.9694, "step": 275540 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015245798133954396, "loss": 2.0469, "step": 275545 }, { "epoch": 0.65, "grad_norm": 1.96875, "learning_rate": 0.0001524564077668022, "loss": 2.2055, "step": 275550 }, { "epoch": 0.65, "grad_norm": 1.984375, "learning_rate": 0.00015245483417614035, "loss": 1.9086, "step": 275555 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.000152453260567559, "loss": 2.226, "step": 275560 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015245168694105868, "loss": 2.0533, "step": 275565 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.00015245011329663988, "loss": 2.0852, "step": 275570 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.0001524485396343032, "loss": 1.9979, "step": 275575 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015244696595404913, "loss": 1.9275, "step": 275580 }, { "epoch": 0.65, "grad_norm": 2.40625, "learning_rate": 0.0001524453922558782, "loss": 2.0924, "step": 275585 }, { "epoch": 0.65, "grad_norm": 2.453125, "learning_rate": 0.000152443818539791, "loss": 2.0145, "step": 275590 }, { "epoch": 0.65, "grad_norm": 1.515625, "learning_rate": 0.00015244224480578806, "loss": 2.0223, "step": 275595 }, { "epoch": 0.65, "grad_norm": 1.953125, "learning_rate": 0.00015244067105386989, "loss": 1.8695, "step": 275600 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.000152439097284037, "loss": 2.1406, "step": 275605 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015243752349629, "loss": 2.2742, "step": 275610 }, { "epoch": 0.65, "grad_norm": 1.96875, "learning_rate": 0.00015243594969062936, "loss": 2.091, "step": 275615 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015243437586705567, "loss": 1.9441, "step": 275620 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015243280202556945, "loss": 1.9609, "step": 275625 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015243122816617122, "loss": 2.0332, "step": 275630 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015242965428886155, "loss": 1.9565, "step": 275635 }, { "epoch": 0.65, "grad_norm": 2.625, "learning_rate": 0.00015242808039364093, "loss": 2.068, "step": 275640 }, { "epoch": 0.65, "grad_norm": 2.4375, "learning_rate": 0.00015242650648050995, "loss": 1.8462, "step": 275645 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.0001524249325494691, "loss": 2.2745, "step": 275650 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.000152423358600519, "loss": 1.9405, "step": 275655 }, { "epoch": 0.65, "grad_norm": 2.546875, "learning_rate": 0.0001524217846336601, "loss": 2.1737, "step": 275660 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015242021064889292, "loss": 2.0667, "step": 275665 }, { "epoch": 0.65, "grad_norm": 1.9453125, "learning_rate": 0.0001524186366462181, "loss": 2.2359, "step": 275670 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015241706262563608, "loss": 1.9873, "step": 275675 }, { "epoch": 0.65, "grad_norm": 1.9375, "learning_rate": 0.0001524154885871475, "loss": 2.0376, "step": 275680 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.0001524139145307528, "loss": 2.1565, "step": 275685 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015241234045645254, "loss": 2.112, "step": 275690 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.0001524107663642473, "loss": 2.0902, "step": 275695 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.0001524091922541376, "loss": 2.1289, "step": 275700 }, { "epoch": 0.65, "grad_norm": 1.9609375, "learning_rate": 0.00015240761812612398, "loss": 2.0904, "step": 275705 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015240604398020696, "loss": 1.828, "step": 275710 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.00015240446981638705, "loss": 2.0861, "step": 275715 }, { "epoch": 0.65, "grad_norm": 1.53125, "learning_rate": 0.00015240289563466482, "loss": 1.8641, "step": 275720 }, { "epoch": 0.65, "grad_norm": 1.9921875, "learning_rate": 0.00015240132143504086, "loss": 2.0687, "step": 275725 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015239974721751562, "loss": 2.0717, "step": 275730 }, { "epoch": 0.65, "grad_norm": 1.9921875, "learning_rate": 0.00015239817298208968, "loss": 2.0556, "step": 275735 }, { "epoch": 0.65, "grad_norm": 2.578125, "learning_rate": 0.0001523965987287636, "loss": 2.0989, "step": 275740 }, { "epoch": 0.65, "grad_norm": 1.96875, "learning_rate": 0.00015239502445753785, "loss": 2.142, "step": 275745 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.00015239345016841303, "loss": 1.8225, "step": 275750 }, { "epoch": 0.65, "grad_norm": 1.75, "learning_rate": 0.00015239187586138967, "loss": 2.1206, "step": 275755 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.0001523903015364683, "loss": 1.8945, "step": 275760 }, { "epoch": 0.65, "grad_norm": 3.3125, "learning_rate": 0.00015238872719364942, "loss": 2.0598, "step": 275765 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015238715283293362, "loss": 1.8512, "step": 275770 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015238557845432142, "loss": 2.3142, "step": 275775 }, { "epoch": 0.65, "grad_norm": 1.9609375, "learning_rate": 0.00015238400405781337, "loss": 2.0327, "step": 275780 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015238242964340997, "loss": 1.9087, "step": 275785 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.00015238085521111175, "loss": 2.1427, "step": 275790 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015237928076091935, "loss": 2.109, "step": 275795 }, { "epoch": 0.65, "grad_norm": 1.9140625, "learning_rate": 0.0001523777062928332, "loss": 1.862, "step": 275800 }, { "epoch": 0.65, "grad_norm": 2.453125, "learning_rate": 0.0001523761318068539, "loss": 2.0248, "step": 275805 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.0001523745573029819, "loss": 2.0106, "step": 275810 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015237298278121788, "loss": 1.8509, "step": 275815 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.0001523714082415623, "loss": 2.2638, "step": 275820 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015236983368401564, "loss": 2.048, "step": 275825 }, { "epoch": 0.65, "grad_norm": 1.6953125, "learning_rate": 0.0001523682591085785, "loss": 1.9757, "step": 275830 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015236668451525142, "loss": 2.0811, "step": 275835 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.000152365109904035, "loss": 1.97, "step": 275840 }, { "epoch": 0.65, "grad_norm": 2.40625, "learning_rate": 0.0001523635352749296, "loss": 2.01, "step": 275845 }, { "epoch": 0.65, "grad_norm": 1.828125, "learning_rate": 0.00015236196062793592, "loss": 2.1862, "step": 275850 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015236038596305444, "loss": 2.0159, "step": 275855 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015235881128028574, "loss": 2.025, "step": 275860 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015235723657963028, "loss": 2.1668, "step": 275865 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015235566186108863, "loss": 2.1726, "step": 275870 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.00015235408712466138, "loss": 1.9961, "step": 275875 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015235251237034894, "loss": 2.1903, "step": 275880 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.000152350937598152, "loss": 2.0606, "step": 275885 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015234936280807105, "loss": 2.1176, "step": 275890 }, { "epoch": 0.65, "grad_norm": 1.65625, "learning_rate": 0.00015234778800010657, "loss": 2.0915, "step": 275895 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015234621317425912, "loss": 1.8293, "step": 275900 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015234463833052928, "loss": 2.2003, "step": 275905 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015234306346891756, "loss": 2.0659, "step": 275910 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.0001523414885894245, "loss": 2.0712, "step": 275915 }, { "epoch": 0.65, "grad_norm": 2.4375, "learning_rate": 0.00015233991369205066, "loss": 2.0935, "step": 275920 }, { "epoch": 0.65, "grad_norm": 1.9453125, "learning_rate": 0.0001523383387767965, "loss": 1.9878, "step": 275925 }, { "epoch": 0.65, "grad_norm": 1.734375, "learning_rate": 0.00015233676384366268, "loss": 2.0205, "step": 275930 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015233518889264962, "loss": 2.1542, "step": 275935 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.00015233361392375793, "loss": 2.1579, "step": 275940 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015233203893698813, "loss": 1.8501, "step": 275945 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015233046393234073, "loss": 2.1447, "step": 275950 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015232888890981636, "loss": 2.1059, "step": 275955 }, { "epoch": 0.65, "grad_norm": 2.578125, "learning_rate": 0.0001523273138694154, "loss": 2.0937, "step": 275960 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015232573881113855, "loss": 2.0567, "step": 275965 }, { "epoch": 0.65, "grad_norm": 2.4375, "learning_rate": 0.00015232416373498628, "loss": 2.1833, "step": 275970 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.00015232258864095912, "loss": 2.0676, "step": 275975 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.0001523210135290576, "loss": 2.0726, "step": 275980 }, { "epoch": 0.65, "grad_norm": 2.40625, "learning_rate": 0.00015231943839928225, "loss": 2.2121, "step": 275985 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015231786325163365, "loss": 2.055, "step": 275990 }, { "epoch": 0.65, "grad_norm": 1.828125, "learning_rate": 0.00015231628808611234, "loss": 2.1216, "step": 275995 }, { "epoch": 0.65, "grad_norm": 2.34375, "learning_rate": 0.00015231471290271884, "loss": 2.1138, "step": 276000 }, { "epoch": 0.65, "grad_norm": 1.9921875, "learning_rate": 0.00015231313770145365, "loss": 2.2417, "step": 276005 }, { "epoch": 0.65, "grad_norm": 2.59375, "learning_rate": 0.00015231156248231734, "loss": 2.109, "step": 276010 }, { "epoch": 0.65, "grad_norm": 2.515625, "learning_rate": 0.00015230998724531048, "loss": 2.2974, "step": 276015 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015230841199043358, "loss": 2.1724, "step": 276020 }, { "epoch": 0.65, "grad_norm": 2.40625, "learning_rate": 0.00015230683671768716, "loss": 2.2058, "step": 276025 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.0001523052614270718, "loss": 2.2404, "step": 276030 }, { "epoch": 0.65, "grad_norm": 1.9453125, "learning_rate": 0.000152303686118588, "loss": 2.0561, "step": 276035 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.0001523021107922363, "loss": 1.8928, "step": 276040 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015230053544801727, "loss": 2.1178, "step": 276045 }, { "epoch": 0.65, "grad_norm": 1.7265625, "learning_rate": 0.00015229896008593142, "loss": 1.9151, "step": 276050 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.0001522973847059793, "loss": 2.1537, "step": 276055 }, { "epoch": 0.65, "grad_norm": 2.5625, "learning_rate": 0.00015229580930816144, "loss": 1.9578, "step": 276060 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015229423389247839, "loss": 2.0696, "step": 276065 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.0001522926584589307, "loss": 2.0813, "step": 276070 }, { "epoch": 0.65, "grad_norm": 2.46875, "learning_rate": 0.00015229108300751884, "loss": 1.9117, "step": 276075 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.00015228950753824343, "loss": 2.1013, "step": 276080 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.000152287932051105, "loss": 2.0365, "step": 276085 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015228635654610404, "loss": 1.9479, "step": 276090 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015228478102324107, "loss": 2.1037, "step": 276095 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.0001522832054825167, "loss": 2.2358, "step": 276100 }, { "epoch": 0.65, "grad_norm": 1.9375, "learning_rate": 0.00015228162992393144, "loss": 2.1713, "step": 276105 }, { "epoch": 0.65, "grad_norm": 2.65625, "learning_rate": 0.00015228005434748587, "loss": 2.2231, "step": 276110 }, { "epoch": 0.65, "grad_norm": 1.8984375, "learning_rate": 0.00015227847875318043, "loss": 2.0957, "step": 276115 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.0001522769031410157, "loss": 2.0557, "step": 276120 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015227532751099229, "loss": 2.0913, "step": 276125 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015227375186311067, "loss": 2.0318, "step": 276130 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015227217619737137, "loss": 2.0115, "step": 276135 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015227060051377494, "loss": 1.9684, "step": 276140 }, { "epoch": 0.65, "grad_norm": 1.9296875, "learning_rate": 0.00015226902481232192, "loss": 2.0418, "step": 276145 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015226744909301287, "loss": 2.0989, "step": 276150 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015226587335584831, "loss": 2.1621, "step": 276155 }, { "epoch": 0.65, "grad_norm": 2.71875, "learning_rate": 0.0001522642976008288, "loss": 2.082, "step": 276160 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015226272182795482, "loss": 2.2616, "step": 276165 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.00015226114603722696, "loss": 2.1832, "step": 276170 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.00015225957022864574, "loss": 2.1212, "step": 276175 }, { "epoch": 0.65, "grad_norm": 2.734375, "learning_rate": 0.00015225799440221174, "loss": 2.0102, "step": 276180 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015225641855792543, "loss": 1.9295, "step": 276185 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015225484269578736, "loss": 2.1213, "step": 276190 }, { "epoch": 0.65, "grad_norm": 1.8125, "learning_rate": 0.00015225326681579811, "loss": 1.9651, "step": 276195 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.0001522516909179582, "loss": 2.0371, "step": 276200 }, { "epoch": 0.65, "grad_norm": 2.34375, "learning_rate": 0.00015225011500226817, "loss": 2.0295, "step": 276205 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015224853906872852, "loss": 2.149, "step": 276210 }, { "epoch": 0.65, "grad_norm": 2.578125, "learning_rate": 0.00015224696311733985, "loss": 2.0967, "step": 276215 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015224538714810268, "loss": 1.9318, "step": 276220 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015224381116101752, "loss": 1.9634, "step": 276225 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015224223515608492, "loss": 1.8199, "step": 276230 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015224065913330546, "loss": 2.0327, "step": 276235 }, { "epoch": 0.65, "grad_norm": 1.921875, "learning_rate": 0.0001522390830926796, "loss": 1.995, "step": 276240 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.00015223750703420797, "loss": 2.1992, "step": 276245 }, { "epoch": 0.65, "grad_norm": 1.828125, "learning_rate": 0.00015223593095789104, "loss": 2.1302, "step": 276250 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.0001522343548637293, "loss": 1.8678, "step": 276255 }, { "epoch": 0.65, "grad_norm": 2.609375, "learning_rate": 0.00015223277875172343, "loss": 2.0494, "step": 276260 }, { "epoch": 0.65, "grad_norm": 1.9140625, "learning_rate": 0.00015223120262187388, "loss": 2.0512, "step": 276265 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015222962647418124, "loss": 2.159, "step": 276270 }, { "epoch": 0.65, "grad_norm": 1.90625, "learning_rate": 0.00015222805030864597, "loss": 1.9483, "step": 276275 }, { "epoch": 0.65, "grad_norm": 2.578125, "learning_rate": 0.00015222647412526863, "loss": 1.8763, "step": 276280 }, { "epoch": 0.65, "grad_norm": 1.9765625, "learning_rate": 0.00015222489792404982, "loss": 1.9282, "step": 276285 }, { "epoch": 0.65, "grad_norm": 1.8671875, "learning_rate": 0.00015222332170499004, "loss": 2.0814, "step": 276290 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.00015222174546808984, "loss": 2.0988, "step": 276295 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.0001522201692133497, "loss": 2.0825, "step": 276300 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.00015221859294077019, "loss": 2.0128, "step": 276305 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.0001522170166503519, "loss": 1.9043, "step": 276310 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.0001522154403420953, "loss": 2.1482, "step": 276315 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.000152213864016001, "loss": 2.1194, "step": 276320 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015221228767206947, "loss": 1.9112, "step": 276325 }, { "epoch": 0.65, "grad_norm": 1.84375, "learning_rate": 0.0001522107113103013, "loss": 2.1647, "step": 276330 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015220913493069698, "loss": 2.1015, "step": 276335 }, { "epoch": 0.65, "grad_norm": 2.546875, "learning_rate": 0.0001522075585332571, "loss": 2.1302, "step": 276340 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015220598211798214, "loss": 1.8756, "step": 276345 }, { "epoch": 0.65, "grad_norm": 1.921875, "learning_rate": 0.00015220440568487268, "loss": 2.1639, "step": 276350 }, { "epoch": 0.65, "grad_norm": 1.9921875, "learning_rate": 0.00015220282923392926, "loss": 2.2334, "step": 276355 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015220125276515237, "loss": 1.9982, "step": 276360 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.0001521996762785426, "loss": 2.1791, "step": 276365 }, { "epoch": 0.65, "grad_norm": 1.8125, "learning_rate": 0.0001521980997741005, "loss": 2.0946, "step": 276370 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015219652325182656, "loss": 2.0815, "step": 276375 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015219494671172136, "loss": 2.2524, "step": 276380 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015219337015378543, "loss": 2.1769, "step": 276385 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015219179357801925, "loss": 2.0174, "step": 276390 }, { "epoch": 0.65, "grad_norm": 2.4375, "learning_rate": 0.00015219021698442342, "loss": 2.0666, "step": 276395 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015218864037299848, "loss": 2.1083, "step": 276400 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015218706374374495, "loss": 1.9499, "step": 276405 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.0001521854870966634, "loss": 2.1413, "step": 276410 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.0001521839104317543, "loss": 2.1293, "step": 276415 }, { "epoch": 0.65, "grad_norm": 1.9296875, "learning_rate": 0.00015218233374901825, "loss": 2.0787, "step": 276420 }, { "epoch": 0.65, "grad_norm": 1.8984375, "learning_rate": 0.00015218075704845577, "loss": 2.0632, "step": 276425 }, { "epoch": 0.65, "grad_norm": 1.828125, "learning_rate": 0.0001521791803300674, "loss": 2.0322, "step": 276430 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015217760359385367, "loss": 2.1809, "step": 276435 }, { "epoch": 0.65, "grad_norm": 2.5625, "learning_rate": 0.00015217602683981508, "loss": 2.1097, "step": 276440 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.0001521744500679523, "loss": 2.1877, "step": 276445 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015217287327826573, "loss": 2.0899, "step": 276450 }, { "epoch": 0.65, "grad_norm": 2.515625, "learning_rate": 0.00015217129647075598, "loss": 2.0667, "step": 276455 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015216971964542353, "loss": 1.9623, "step": 276460 }, { "epoch": 0.65, "grad_norm": 1.921875, "learning_rate": 0.00015216814280226898, "loss": 2.0487, "step": 276465 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015216656594129285, "loss": 2.0211, "step": 276470 }, { "epoch": 0.65, "grad_norm": 1.9453125, "learning_rate": 0.00015216498906249566, "loss": 2.0157, "step": 276475 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015216341216587798, "loss": 1.9078, "step": 276480 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015216183525144033, "loss": 2.0526, "step": 276485 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015216025831918327, "loss": 1.9542, "step": 276490 }, { "epoch": 0.65, "grad_norm": 2.453125, "learning_rate": 0.0001521586813691073, "loss": 2.0468, "step": 276495 }, { "epoch": 0.65, "grad_norm": 2.953125, "learning_rate": 0.00015215710440121298, "loss": 2.2013, "step": 276500 }, { "epoch": 0.65, "grad_norm": 2.671875, "learning_rate": 0.00015215552741550082, "loss": 2.0978, "step": 276505 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015215395041197143, "loss": 1.9726, "step": 276510 }, { "epoch": 0.65, "grad_norm": 2.40625, "learning_rate": 0.0001521523733906253, "loss": 2.0745, "step": 276515 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015215079635146295, "loss": 2.0499, "step": 276520 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015214921929448494, "loss": 2.2263, "step": 276525 }, { "epoch": 0.65, "grad_norm": 1.8359375, "learning_rate": 0.00015214764221969183, "loss": 1.9577, "step": 276530 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015214606512708412, "loss": 2.0217, "step": 276535 }, { "epoch": 0.65, "grad_norm": 1.8984375, "learning_rate": 0.0001521444880166624, "loss": 1.9794, "step": 276540 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015214291088842715, "loss": 2.0316, "step": 276545 }, { "epoch": 0.65, "grad_norm": 1.9609375, "learning_rate": 0.00015214133374237893, "loss": 2.17, "step": 276550 }, { "epoch": 0.65, "grad_norm": 2.34375, "learning_rate": 0.00015213975657851832, "loss": 2.0186, "step": 276555 }, { "epoch": 0.65, "grad_norm": 2.78125, "learning_rate": 0.0001521381793968458, "loss": 2.0532, "step": 276560 }, { "epoch": 0.65, "grad_norm": 2.578125, "learning_rate": 0.00015213660219736194, "loss": 2.0795, "step": 276565 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015213502498006727, "loss": 1.8957, "step": 276570 }, { "epoch": 0.65, "grad_norm": 2.546875, "learning_rate": 0.0001521334477449623, "loss": 2.1298, "step": 276575 }, { "epoch": 0.65, "grad_norm": 2.4375, "learning_rate": 0.00015213187049204762, "loss": 1.9398, "step": 276580 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015213029322132376, "loss": 2.0171, "step": 276585 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.00015212871593279129, "loss": 2.2962, "step": 276590 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015212713862645063, "loss": 1.9914, "step": 276595 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.0001521255613023024, "loss": 2.0792, "step": 276600 }, { "epoch": 0.65, "grad_norm": 1.984375, "learning_rate": 0.00015212398396034714, "loss": 2.1571, "step": 276605 }, { "epoch": 0.65, "grad_norm": 1.9609375, "learning_rate": 0.00015212240660058543, "loss": 2.1361, "step": 276610 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015212082922301773, "loss": 2.0914, "step": 276615 }, { "epoch": 0.65, "grad_norm": 1.890625, "learning_rate": 0.0001521192518276446, "loss": 2.0388, "step": 276620 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015211767441446655, "loss": 2.1813, "step": 276625 }, { "epoch": 0.65, "grad_norm": 2.578125, "learning_rate": 0.00015211609698348422, "loss": 2.056, "step": 276630 }, { "epoch": 0.65, "grad_norm": 2.40625, "learning_rate": 0.00015211451953469807, "loss": 2.131, "step": 276635 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.00015211294206810865, "loss": 2.0674, "step": 276640 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015211136458371648, "loss": 2.0651, "step": 276645 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015210978708152215, "loss": 1.92, "step": 276650 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015210820956152617, "loss": 2.0798, "step": 276655 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015210663202372908, "loss": 2.1046, "step": 276660 }, { "epoch": 0.65, "grad_norm": 2.53125, "learning_rate": 0.0001521050544681314, "loss": 2.0872, "step": 276665 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.00015210347689473372, "loss": 2.0225, "step": 276670 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.00015210189930353653, "loss": 2.0486, "step": 276675 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.0001521003216945404, "loss": 2.1709, "step": 276680 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015209874406774585, "loss": 2.1641, "step": 276685 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.0001520971664231534, "loss": 2.1148, "step": 276690 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015209558876076365, "loss": 2.0169, "step": 276695 }, { "epoch": 0.65, "grad_norm": 2.484375, "learning_rate": 0.00015209401108057707, "loss": 2.2597, "step": 276700 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.0001520924333825943, "loss": 2.1414, "step": 276705 }, { "epoch": 0.65, "grad_norm": 1.9140625, "learning_rate": 0.0001520908556668157, "loss": 1.9987, "step": 276710 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.000152089277933242, "loss": 2.0397, "step": 276715 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015208770018187363, "loss": 1.9229, "step": 276720 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015208612241271116, "loss": 2.1137, "step": 276725 }, { "epoch": 0.65, "grad_norm": 1.828125, "learning_rate": 0.00015208454462575514, "loss": 2.0341, "step": 276730 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015208296682100607, "loss": 2.0354, "step": 276735 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.00015208138899846453, "loss": 1.9819, "step": 276740 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015207981115813104, "loss": 2.1236, "step": 276745 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015207823330000615, "loss": 2.1055, "step": 276750 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015207665542409037, "loss": 2.1915, "step": 276755 }, { "epoch": 0.65, "grad_norm": 2.484375, "learning_rate": 0.00015207507753038426, "loss": 2.0989, "step": 276760 }, { "epoch": 0.65, "grad_norm": 2.40625, "learning_rate": 0.00015207349961888835, "loss": 2.1631, "step": 276765 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015207192168960324, "loss": 2.015, "step": 276770 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015207034374252937, "loss": 2.1221, "step": 276775 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015206876577766735, "loss": 2.115, "step": 276780 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015206718779501766, "loss": 2.243, "step": 276785 }, { "epoch": 0.65, "grad_norm": 2.6875, "learning_rate": 0.0001520656097945809, "loss": 1.9264, "step": 276790 }, { "epoch": 0.65, "grad_norm": 1.8984375, "learning_rate": 0.00015206403177635763, "loss": 1.9804, "step": 276795 }, { "epoch": 0.65, "grad_norm": 1.859375, "learning_rate": 0.00015206245374034828, "loss": 2.2562, "step": 276800 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015206087568655344, "loss": 2.1903, "step": 276805 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015205929761497372, "loss": 2.0726, "step": 276810 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015205771952560957, "loss": 2.2546, "step": 276815 }, { "epoch": 0.65, "grad_norm": 1.96875, "learning_rate": 0.00015205614141846155, "loss": 2.1512, "step": 276820 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015205456329353022, "loss": 2.2456, "step": 276825 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.0001520529851508161, "loss": 2.0053, "step": 276830 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015205140699031975, "loss": 1.9632, "step": 276835 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.0001520498288120417, "loss": 2.0632, "step": 276840 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015204825061598243, "loss": 1.986, "step": 276845 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.0001520466724021426, "loss": 2.0762, "step": 276850 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015204509417052263, "loss": 1.9217, "step": 276855 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015204351592112316, "loss": 2.1611, "step": 276860 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.00015204193765394464, "loss": 2.4422, "step": 276865 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015204035936898768, "loss": 1.9299, "step": 276870 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015203878106625278, "loss": 2.0499, "step": 276875 }, { "epoch": 0.65, "grad_norm": 3.296875, "learning_rate": 0.00015203720274574047, "loss": 2.0528, "step": 276880 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015203562440745135, "loss": 2.0867, "step": 276885 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015203404605138588, "loss": 2.1223, "step": 276890 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.00015203246767754464, "loss": 2.1591, "step": 276895 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015203088928592817, "loss": 1.9631, "step": 276900 }, { "epoch": 0.65, "grad_norm": 1.984375, "learning_rate": 0.00015202931087653703, "loss": 2.0283, "step": 276905 }, { "epoch": 0.65, "grad_norm": 1.953125, "learning_rate": 0.0001520277324493717, "loss": 1.889, "step": 276910 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015202615400443275, "loss": 2.1596, "step": 276915 }, { "epoch": 0.65, "grad_norm": 1.9375, "learning_rate": 0.00015202457554172076, "loss": 2.125, "step": 276920 }, { "epoch": 0.65, "grad_norm": 1.8828125, "learning_rate": 0.00015202299706123623, "loss": 1.8072, "step": 276925 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015202141856297967, "loss": 2.0803, "step": 276930 }, { "epoch": 0.65, "grad_norm": 1.921875, "learning_rate": 0.00015201984004695163, "loss": 2.0046, "step": 276935 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.0001520182615131527, "loss": 2.1132, "step": 276940 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.0001520166829615834, "loss": 2.0502, "step": 276945 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015201510439224425, "loss": 2.0297, "step": 276950 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015201352580513578, "loss": 2.1004, "step": 276955 }, { "epoch": 0.65, "grad_norm": 1.9296875, "learning_rate": 0.00015201194720025854, "loss": 2.1336, "step": 276960 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.0001520103685776131, "loss": 2.0761, "step": 276965 }, { "epoch": 0.65, "grad_norm": 2.703125, "learning_rate": 0.0001520087899372, "loss": 2.1022, "step": 276970 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.0001520072112790197, "loss": 2.0524, "step": 276975 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015200563260307282, "loss": 2.1686, "step": 276980 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015200405390935985, "loss": 2.0527, "step": 276985 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015200247519788138, "loss": 2.1413, "step": 276990 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.0001520008964686379, "loss": 1.8015, "step": 276995 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015199931772163, "loss": 2.0816, "step": 277000 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.00015199773895685818, "loss": 2.0319, "step": 277005 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015199616017432297, "loss": 2.0086, "step": 277010 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015199458137402493, "loss": 2.1276, "step": 277015 }, { "epoch": 0.65, "grad_norm": 1.84375, "learning_rate": 0.00015199300255596464, "loss": 2.0412, "step": 277020 }, { "epoch": 0.65, "grad_norm": 2.34375, "learning_rate": 0.00015199142372014258, "loss": 2.0809, "step": 277025 }, { "epoch": 0.65, "grad_norm": 2.84375, "learning_rate": 0.00015198984486655925, "loss": 2.0464, "step": 277030 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015198826599521528, "loss": 2.2925, "step": 277035 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015198668710611122, "loss": 1.7814, "step": 277040 }, { "epoch": 0.65, "grad_norm": 2.484375, "learning_rate": 0.0001519851081992475, "loss": 2.1928, "step": 277045 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.0001519835292746248, "loss": 2.0972, "step": 277050 }, { "epoch": 0.65, "grad_norm": 2.765625, "learning_rate": 0.0001519819503322435, "loss": 2.0047, "step": 277055 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015198037137210423, "loss": 2.1054, "step": 277060 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015197879239420758, "loss": 1.9934, "step": 277065 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.000151977213398554, "loss": 2.2228, "step": 277070 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015197563438514407, "loss": 2.0399, "step": 277075 }, { "epoch": 0.65, "grad_norm": 1.921875, "learning_rate": 0.00015197405535397832, "loss": 2.1001, "step": 277080 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.0001519724763050573, "loss": 2.0485, "step": 277085 }, { "epoch": 0.65, "grad_norm": 1.96875, "learning_rate": 0.00015197089723838152, "loss": 2.1086, "step": 277090 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015196931815395154, "loss": 2.0457, "step": 277095 }, { "epoch": 0.65, "grad_norm": 1.90625, "learning_rate": 0.00015196773905176792, "loss": 1.9847, "step": 277100 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015196615993183114, "loss": 2.0691, "step": 277105 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.0001519645807941418, "loss": 1.9294, "step": 277110 }, { "epoch": 0.65, "grad_norm": 1.953125, "learning_rate": 0.0001519630016387004, "loss": 2.1909, "step": 277115 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.00015196142246550753, "loss": 2.1462, "step": 277120 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.00015195984327456366, "loss": 2.1099, "step": 277125 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.0001519582640658694, "loss": 2.0569, "step": 277130 }, { "epoch": 0.65, "grad_norm": 1.953125, "learning_rate": 0.00015195668483942523, "loss": 2.0965, "step": 277135 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015195510559523173, "loss": 1.9472, "step": 277140 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.0001519535263332894, "loss": 2.1036, "step": 277145 }, { "epoch": 0.65, "grad_norm": 1.9609375, "learning_rate": 0.00015195194705359884, "loss": 2.0399, "step": 277150 }, { "epoch": 0.65, "grad_norm": 2.6875, "learning_rate": 0.00015195036775616055, "loss": 2.0035, "step": 277155 }, { "epoch": 0.65, "grad_norm": 1.984375, "learning_rate": 0.00015194878844097503, "loss": 2.0751, "step": 277160 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015194720910804288, "loss": 2.0097, "step": 277165 }, { "epoch": 0.65, "grad_norm": 2.609375, "learning_rate": 0.00015194562975736462, "loss": 2.1117, "step": 277170 }, { "epoch": 0.65, "grad_norm": 2.515625, "learning_rate": 0.0001519440503889408, "loss": 2.1548, "step": 277175 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015194247100277196, "loss": 2.0573, "step": 277180 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.00015194089159885864, "loss": 2.0946, "step": 277185 }, { "epoch": 0.65, "grad_norm": 2.546875, "learning_rate": 0.0001519393121772013, "loss": 2.1274, "step": 277190 }, { "epoch": 0.65, "grad_norm": 1.7734375, "learning_rate": 0.0001519377327378006, "loss": 2.1917, "step": 277195 }, { "epoch": 0.65, "grad_norm": 2.453125, "learning_rate": 0.00015193615328065705, "loss": 2.2505, "step": 277200 }, { "epoch": 0.65, "grad_norm": 2.4375, "learning_rate": 0.00015193457380577115, "loss": 2.1817, "step": 277205 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.00015193299431314343, "loss": 2.0288, "step": 277210 }, { "epoch": 0.65, "grad_norm": 2.703125, "learning_rate": 0.0001519314148027745, "loss": 2.1741, "step": 277215 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.0001519298352746648, "loss": 2.0045, "step": 277220 }, { "epoch": 0.65, "grad_norm": 2.546875, "learning_rate": 0.00015192825572881498, "loss": 2.123, "step": 277225 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.0001519266761652255, "loss": 2.0776, "step": 277230 }, { "epoch": 0.65, "grad_norm": 2.578125, "learning_rate": 0.00015192509658389693, "loss": 2.0569, "step": 277235 }, { "epoch": 0.65, "grad_norm": 2.453125, "learning_rate": 0.0001519235169848298, "loss": 1.9758, "step": 277240 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.00015192193736802466, "loss": 2.2056, "step": 277245 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015192035773348205, "loss": 1.9975, "step": 277250 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.0001519187780812025, "loss": 2.0541, "step": 277255 }, { "epoch": 0.65, "grad_norm": 1.8671875, "learning_rate": 0.00015191719841118654, "loss": 1.9048, "step": 277260 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015191561872343474, "loss": 2.0993, "step": 277265 }, { "epoch": 0.65, "grad_norm": 1.9609375, "learning_rate": 0.00015191403901794763, "loss": 2.078, "step": 277270 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015191245929472574, "loss": 2.0453, "step": 277275 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015191087955376958, "loss": 1.7257, "step": 277280 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015190929979507978, "loss": 2.0989, "step": 277285 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.0001519077200186568, "loss": 2.0484, "step": 277290 }, { "epoch": 0.65, "grad_norm": 1.8515625, "learning_rate": 0.0001519061402245012, "loss": 1.8643, "step": 277295 }, { "epoch": 0.65, "grad_norm": 2.34375, "learning_rate": 0.00015190456041261347, "loss": 2.129, "step": 277300 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.00015190298058299426, "loss": 1.8421, "step": 277305 }, { "epoch": 0.65, "grad_norm": 1.953125, "learning_rate": 0.00015190140073564402, "loss": 2.0617, "step": 277310 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015189982087056335, "loss": 2.1012, "step": 277315 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015189824098775276, "loss": 2.1054, "step": 277320 }, { "epoch": 0.65, "grad_norm": 2.75, "learning_rate": 0.00015189666108721275, "loss": 2.1916, "step": 277325 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015189508116894392, "loss": 1.9377, "step": 277330 }, { "epoch": 0.65, "grad_norm": 1.6953125, "learning_rate": 0.0001518935012329468, "loss": 1.9112, "step": 277335 }, { "epoch": 0.65, "grad_norm": 2.484375, "learning_rate": 0.00015189192127922194, "loss": 2.1168, "step": 277340 }, { "epoch": 0.65, "grad_norm": 1.953125, "learning_rate": 0.00015189034130776982, "loss": 2.0846, "step": 277345 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.000151888761318591, "loss": 2.1314, "step": 277350 }, { "epoch": 0.65, "grad_norm": 1.953125, "learning_rate": 0.00015188718131168606, "loss": 2.1181, "step": 277355 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015188560128705556, "loss": 2.2409, "step": 277360 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015188402124469995, "loss": 2.1027, "step": 277365 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015188244118461986, "loss": 2.1227, "step": 277370 }, { "epoch": 0.65, "grad_norm": 1.8984375, "learning_rate": 0.00015188086110681575, "loss": 2.1066, "step": 277375 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.00015187928101128816, "loss": 2.1725, "step": 277380 }, { "epoch": 0.65, "grad_norm": 3.625, "learning_rate": 0.00015187770089803774, "loss": 2.0517, "step": 277385 }, { "epoch": 0.65, "grad_norm": 1.9453125, "learning_rate": 0.00015187612076706492, "loss": 2.0795, "step": 277390 }, { "epoch": 0.65, "grad_norm": 1.9921875, "learning_rate": 0.00015187454061837028, "loss": 2.0828, "step": 277395 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015187296045195435, "loss": 1.9607, "step": 277400 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015187138026781768, "loss": 2.1704, "step": 277405 }, { "epoch": 0.65, "grad_norm": 1.84375, "learning_rate": 0.0001518698000659608, "loss": 1.986, "step": 277410 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.00015186821984638428, "loss": 2.1595, "step": 277415 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015186663960908863, "loss": 2.1162, "step": 277420 }, { "epoch": 0.65, "grad_norm": 1.5859375, "learning_rate": 0.0001518650593540744, "loss": 2.0645, "step": 277425 }, { "epoch": 0.65, "grad_norm": 1.71875, "learning_rate": 0.0001518634790813421, "loss": 2.0744, "step": 277430 }, { "epoch": 0.65, "grad_norm": 1.8671875, "learning_rate": 0.0001518618987908923, "loss": 2.0007, "step": 277435 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.00015186031848272554, "loss": 2.1101, "step": 277440 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015185873815684237, "loss": 2.1697, "step": 277445 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015185715781324333, "loss": 2.1611, "step": 277450 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.0001518555774519289, "loss": 2.1759, "step": 277455 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015185399707289964, "loss": 2.094, "step": 277460 }, { "epoch": 0.65, "grad_norm": 1.984375, "learning_rate": 0.00015185241667615618, "loss": 2.1604, "step": 277465 }, { "epoch": 0.65, "grad_norm": 1.8515625, "learning_rate": 0.00015185083626169897, "loss": 2.1859, "step": 277470 }, { "epoch": 0.65, "grad_norm": 2.578125, "learning_rate": 0.00015184925582952858, "loss": 1.9737, "step": 277475 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015184767537964553, "loss": 1.9271, "step": 277480 }, { "epoch": 0.65, "grad_norm": 2.4375, "learning_rate": 0.00015184609491205038, "loss": 2.0175, "step": 277485 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015184451442674365, "loss": 2.1405, "step": 277490 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.0001518429339237259, "loss": 2.1009, "step": 277495 }, { "epoch": 0.65, "grad_norm": 1.875, "learning_rate": 0.0001518413534029977, "loss": 2.0, "step": 277500 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.00015183977286455954, "loss": 2.1242, "step": 277505 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015183819230841193, "loss": 2.1987, "step": 277510 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015183661173455548, "loss": 2.1223, "step": 277515 }, { "epoch": 0.65, "grad_norm": 2.578125, "learning_rate": 0.0001518350311429907, "loss": 1.9222, "step": 277520 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.00015183345053371814, "loss": 2.0404, "step": 277525 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015183186990673834, "loss": 2.2276, "step": 277530 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015183028926205183, "loss": 2.3563, "step": 277535 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015182870859965913, "loss": 2.1862, "step": 277540 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015182712791956082, "loss": 2.1382, "step": 277545 }, { "epoch": 0.65, "grad_norm": 1.9453125, "learning_rate": 0.00015182554722175743, "loss": 2.0686, "step": 277550 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.00015182396650624948, "loss": 2.2443, "step": 277555 }, { "epoch": 0.65, "grad_norm": 1.890625, "learning_rate": 0.00015182238577303754, "loss": 2.2119, "step": 277560 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.0001518208050221221, "loss": 1.8551, "step": 277565 }, { "epoch": 0.65, "grad_norm": 2.546875, "learning_rate": 0.00015181922425350376, "loss": 2.1758, "step": 277570 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015181764346718305, "loss": 2.0443, "step": 277575 }, { "epoch": 0.65, "grad_norm": 1.8515625, "learning_rate": 0.00015181606266316045, "loss": 2.1935, "step": 277580 }, { "epoch": 0.65, "grad_norm": 1.875, "learning_rate": 0.00015181448184143655, "loss": 2.0345, "step": 277585 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015181290100201196, "loss": 1.9034, "step": 277590 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015181132014488707, "loss": 2.0509, "step": 277595 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015180973927006247, "loss": 1.9841, "step": 277600 }, { "epoch": 0.65, "grad_norm": 2.796875, "learning_rate": 0.00015180815837753878, "loss": 2.067, "step": 277605 }, { "epoch": 0.65, "grad_norm": 2.296875, "learning_rate": 0.00015180657746731647, "loss": 2.0735, "step": 277610 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015180499653939607, "loss": 1.9398, "step": 277615 }, { "epoch": 0.65, "grad_norm": 2.34375, "learning_rate": 0.00015180341559377817, "loss": 2.081, "step": 277620 }, { "epoch": 0.65, "grad_norm": 1.9609375, "learning_rate": 0.00015180183463046326, "loss": 2.1254, "step": 277625 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015180025364945195, "loss": 2.1257, "step": 277630 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015179867265074467, "loss": 2.253, "step": 277635 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015179709163434208, "loss": 2.0048, "step": 277640 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015179551060024463, "loss": 1.9939, "step": 277645 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.0001517939295484529, "loss": 2.0948, "step": 277650 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015179234847896743, "loss": 2.1473, "step": 277655 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015179076739178874, "loss": 2.1219, "step": 277660 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015178918628691742, "loss": 1.9671, "step": 277665 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015178760516435395, "loss": 2.2923, "step": 277670 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.00015178602402409887, "loss": 2.1292, "step": 277675 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.0001517844428661528, "loss": 2.1795, "step": 277680 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.0001517828616905162, "loss": 2.134, "step": 277685 }, { "epoch": 0.65, "grad_norm": 2.453125, "learning_rate": 0.0001517812804971896, "loss": 2.1909, "step": 277690 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015177969928617362, "loss": 1.9901, "step": 277695 }, { "epoch": 0.65, "grad_norm": 2.140625, "learning_rate": 0.00015177811805746876, "loss": 2.1805, "step": 277700 }, { "epoch": 0.65, "grad_norm": 2.609375, "learning_rate": 0.00015177653681107552, "loss": 2.2892, "step": 277705 }, { "epoch": 0.65, "grad_norm": 2.390625, "learning_rate": 0.0001517749555469945, "loss": 1.9704, "step": 277710 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.00015177337426522622, "loss": 2.3436, "step": 277715 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.0001517717929657712, "loss": 2.1504, "step": 277720 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.00015177021164863, "loss": 1.9679, "step": 277725 }, { "epoch": 0.65, "grad_norm": 2.734375, "learning_rate": 0.00015176863031380315, "loss": 2.1515, "step": 277730 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.0001517670489612912, "loss": 2.1029, "step": 277735 }, { "epoch": 0.65, "grad_norm": 2.71875, "learning_rate": 0.00015176546759109472, "loss": 2.1225, "step": 277740 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.0001517638862032142, "loss": 2.2523, "step": 277745 }, { "epoch": 0.65, "grad_norm": 1.9921875, "learning_rate": 0.0001517623047976502, "loss": 2.0655, "step": 277750 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.0001517607233744032, "loss": 2.0509, "step": 277755 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.00015175914193347388, "loss": 2.1004, "step": 277760 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015175756047486268, "loss": 2.0871, "step": 277765 }, { "epoch": 0.65, "grad_norm": 2.84375, "learning_rate": 0.00015175597899857012, "loss": 2.079, "step": 277770 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015175439750459682, "loss": 1.986, "step": 277775 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015175281599294325, "loss": 2.1882, "step": 277780 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015175123446360998, "loss": 2.0513, "step": 277785 }, { "epoch": 0.65, "grad_norm": 1.90625, "learning_rate": 0.0001517496529165976, "loss": 2.1436, "step": 277790 }, { "epoch": 0.65, "grad_norm": 2.34375, "learning_rate": 0.00015174807135190653, "loss": 2.1683, "step": 277795 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015174648976953742, "loss": 2.0369, "step": 277800 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015174490816949074, "loss": 2.0563, "step": 277805 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.0001517433265517671, "loss": 1.9162, "step": 277810 }, { "epoch": 0.65, "grad_norm": 2.4375, "learning_rate": 0.000151741744916367, "loss": 2.0922, "step": 277815 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015174016326329097, "loss": 1.9348, "step": 277820 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015173858159253954, "loss": 2.1282, "step": 277825 }, { "epoch": 0.65, "grad_norm": 2.5625, "learning_rate": 0.0001517369999041133, "loss": 2.0123, "step": 277830 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015173541819801277, "loss": 2.0898, "step": 277835 }, { "epoch": 0.65, "grad_norm": 2.34375, "learning_rate": 0.00015173383647423843, "loss": 2.0611, "step": 277840 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015173225473279092, "loss": 2.0447, "step": 277845 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015173067297367073, "loss": 1.7669, "step": 277850 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015172909119687836, "loss": 2.1605, "step": 277855 }, { "epoch": 0.65, "grad_norm": 2.75, "learning_rate": 0.00015172750940241445, "loss": 1.9956, "step": 277860 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015172592759027945, "loss": 2.3048, "step": 277865 }, { "epoch": 0.65, "grad_norm": 2.59375, "learning_rate": 0.00015172434576047396, "loss": 2.0106, "step": 277870 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.0001517227639129985, "loss": 1.9401, "step": 277875 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.0001517211820478536, "loss": 1.933, "step": 277880 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.0001517196001650398, "loss": 2.2263, "step": 277885 }, { "epoch": 0.65, "grad_norm": 1.9375, "learning_rate": 0.00015171801826455763, "loss": 2.0162, "step": 277890 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015171643634640764, "loss": 2.125, "step": 277895 }, { "epoch": 0.65, "grad_norm": 2.3125, "learning_rate": 0.0001517148544105904, "loss": 2.1035, "step": 277900 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.00015171327245710645, "loss": 2.1496, "step": 277905 }, { "epoch": 0.65, "grad_norm": 1.96875, "learning_rate": 0.0001517116904859563, "loss": 2.1464, "step": 277910 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.00015171010849714044, "loss": 2.2354, "step": 277915 }, { "epoch": 0.65, "grad_norm": 1.859375, "learning_rate": 0.00015170852649065952, "loss": 2.2528, "step": 277920 }, { "epoch": 0.65, "grad_norm": 2.109375, "learning_rate": 0.00015170694446651404, "loss": 2.0821, "step": 277925 }, { "epoch": 0.65, "grad_norm": 2.359375, "learning_rate": 0.0001517053624247045, "loss": 1.9758, "step": 277930 }, { "epoch": 0.65, "grad_norm": 1.90625, "learning_rate": 0.0001517037803652315, "loss": 2.104, "step": 277935 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.0001517021982880955, "loss": 2.0189, "step": 277940 }, { "epoch": 0.65, "grad_norm": 2.0625, "learning_rate": 0.00015170061619329715, "loss": 2.1742, "step": 277945 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.0001516990340808369, "loss": 1.9241, "step": 277950 }, { "epoch": 0.65, "grad_norm": 1.9921875, "learning_rate": 0.00015169745195071533, "loss": 2.1176, "step": 277955 }, { "epoch": 0.65, "grad_norm": 1.875, "learning_rate": 0.00015169586980293297, "loss": 2.0853, "step": 277960 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015169428763749037, "loss": 2.2531, "step": 277965 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015169270545438806, "loss": 2.1022, "step": 277970 }, { "epoch": 0.65, "grad_norm": 1.9453125, "learning_rate": 0.00015169112325362657, "loss": 2.253, "step": 277975 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015168954103520648, "loss": 2.1822, "step": 277980 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.0001516879587991283, "loss": 2.0715, "step": 277985 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015168637654539255, "loss": 1.9929, "step": 277990 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015168479427399984, "loss": 2.004, "step": 277995 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.00015168321198495065, "loss": 2.1874, "step": 278000 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.0001516816296782455, "loss": 1.9252, "step": 278005 }, { "epoch": 0.65, "grad_norm": 2.203125, "learning_rate": 0.00015168004735388498, "loss": 1.991, "step": 278010 }, { "epoch": 0.65, "grad_norm": 2.375, "learning_rate": 0.00015167846501186967, "loss": 1.9951, "step": 278015 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.0001516768826522, "loss": 2.0144, "step": 278020 }, { "epoch": 0.65, "grad_norm": 2.1875, "learning_rate": 0.00015167530027487662, "loss": 2.068, "step": 278025 }, { "epoch": 0.65, "grad_norm": 1.9609375, "learning_rate": 0.00015167371787989998, "loss": 2.0564, "step": 278030 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015167213546727067, "loss": 2.1362, "step": 278035 }, { "epoch": 0.65, "grad_norm": 2.46875, "learning_rate": 0.00015167055303698924, "loss": 2.0925, "step": 278040 }, { "epoch": 0.65, "grad_norm": 1.9375, "learning_rate": 0.00015166897058905621, "loss": 2.0156, "step": 278045 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.0001516673881234721, "loss": 1.9817, "step": 278050 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.00015166580564023748, "loss": 2.2014, "step": 278055 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.0001516642231393529, "loss": 2.1613, "step": 278060 }, { "epoch": 0.65, "grad_norm": 1.765625, "learning_rate": 0.0001516626406208189, "loss": 2.2499, "step": 278065 }, { "epoch": 0.65, "grad_norm": 2.34375, "learning_rate": 0.00015166105808463596, "loss": 2.1864, "step": 278070 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015165947553080468, "loss": 2.0859, "step": 278075 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.00015165789295932557, "loss": 1.9064, "step": 278080 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015165631037019925, "loss": 2.1678, "step": 278085 }, { "epoch": 0.65, "grad_norm": 2.0, "learning_rate": 0.00015165472776342616, "loss": 2.0375, "step": 278090 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015165314513900687, "loss": 1.9646, "step": 278095 }, { "epoch": 0.65, "grad_norm": 2.875, "learning_rate": 0.0001516515624969419, "loss": 2.1269, "step": 278100 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.00015164997983723187, "loss": 2.0644, "step": 278105 }, { "epoch": 0.65, "grad_norm": 1.9140625, "learning_rate": 0.00015164839715987725, "loss": 2.0215, "step": 278110 }, { "epoch": 0.65, "grad_norm": 2.453125, "learning_rate": 0.00015164681446487861, "loss": 2.1823, "step": 278115 }, { "epoch": 0.65, "grad_norm": 1.9140625, "learning_rate": 0.00015164523175223646, "loss": 1.9441, "step": 278120 }, { "epoch": 0.65, "grad_norm": 2.484375, "learning_rate": 0.00015164364902195136, "loss": 2.0717, "step": 278125 }, { "epoch": 0.65, "grad_norm": 2.09375, "learning_rate": 0.00015164206627402388, "loss": 1.97, "step": 278130 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015164048350845452, "loss": 2.0659, "step": 278135 }, { "epoch": 0.65, "grad_norm": 3.359375, "learning_rate": 0.00015163890072524385, "loss": 2.0776, "step": 278140 }, { "epoch": 0.65, "grad_norm": 2.6875, "learning_rate": 0.00015163731792439236, "loss": 1.9971, "step": 278145 }, { "epoch": 0.65, "grad_norm": 2.625, "learning_rate": 0.00015163573510590066, "loss": 2.2495, "step": 278150 }, { "epoch": 0.65, "grad_norm": 1.703125, "learning_rate": 0.00015163415226976924, "loss": 2.0721, "step": 278155 }, { "epoch": 0.65, "grad_norm": 2.546875, "learning_rate": 0.00015163256941599866, "loss": 2.1252, "step": 278160 }, { "epoch": 0.65, "grad_norm": 2.5, "learning_rate": 0.00015163098654458947, "loss": 2.169, "step": 278165 }, { "epoch": 0.65, "grad_norm": 1.7421875, "learning_rate": 0.00015162940365554217, "loss": 2.1533, "step": 278170 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.00015162782074885738, "loss": 2.0202, "step": 278175 }, { "epoch": 0.65, "grad_norm": 2.171875, "learning_rate": 0.00015162623782453554, "loss": 2.0608, "step": 278180 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015162465488257725, "loss": 2.218, "step": 278185 }, { "epoch": 0.65, "grad_norm": 2.015625, "learning_rate": 0.00015162307192298307, "loss": 1.9274, "step": 278190 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015162148894575346, "loss": 1.991, "step": 278195 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.00015161990595088907, "loss": 2.1081, "step": 278200 }, { "epoch": 0.65, "grad_norm": 2.234375, "learning_rate": 0.00015161832293839037, "loss": 2.1309, "step": 278205 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015161673990825788, "loss": 2.2124, "step": 278210 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015161515686049222, "loss": 2.0981, "step": 278215 }, { "epoch": 0.65, "grad_norm": 1.9140625, "learning_rate": 0.00015161357379509386, "loss": 2.0317, "step": 278220 }, { "epoch": 0.65, "grad_norm": 2.96875, "learning_rate": 0.00015161199071206337, "loss": 2.0353, "step": 278225 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.00015161040761140128, "loss": 2.1486, "step": 278230 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.00015160882449310816, "loss": 2.0169, "step": 278235 }, { "epoch": 0.65, "grad_norm": 2.484375, "learning_rate": 0.0001516072413571845, "loss": 2.2864, "step": 278240 }, { "epoch": 0.65, "grad_norm": 2.328125, "learning_rate": 0.00015160565820363092, "loss": 2.0467, "step": 278245 }, { "epoch": 0.65, "grad_norm": 2.28125, "learning_rate": 0.0001516040750324479, "loss": 2.0181, "step": 278250 }, { "epoch": 0.65, "grad_norm": 1.84375, "learning_rate": 0.00015160249184363592, "loss": 2.2078, "step": 278255 }, { "epoch": 0.65, "grad_norm": 2.53125, "learning_rate": 0.00015160090863719568, "loss": 1.9722, "step": 278260 }, { "epoch": 0.65, "grad_norm": 2.421875, "learning_rate": 0.0001515993254131276, "loss": 2.1311, "step": 278265 }, { "epoch": 0.65, "grad_norm": 2.078125, "learning_rate": 0.00015159774217143225, "loss": 1.8855, "step": 278270 }, { "epoch": 0.65, "grad_norm": 1.765625, "learning_rate": 0.00015159615891211016, "loss": 2.0777, "step": 278275 }, { "epoch": 0.65, "grad_norm": 1.9921875, "learning_rate": 0.00015159457563516192, "loss": 1.9872, "step": 278280 }, { "epoch": 0.65, "grad_norm": 2.265625, "learning_rate": 0.00015159299234058802, "loss": 2.0361, "step": 278285 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015159140902838903, "loss": 2.0511, "step": 278290 }, { "epoch": 0.65, "grad_norm": 2.984375, "learning_rate": 0.00015158982569856544, "loss": 1.9724, "step": 278295 }, { "epoch": 0.65, "grad_norm": 2.21875, "learning_rate": 0.00015158824235111788, "loss": 1.9398, "step": 278300 }, { "epoch": 0.65, "grad_norm": 2.125, "learning_rate": 0.00015158665898604682, "loss": 2.0153, "step": 278305 }, { "epoch": 0.65, "grad_norm": 1.9140625, "learning_rate": 0.00015158507560335282, "loss": 2.0954, "step": 278310 }, { "epoch": 0.65, "grad_norm": 2.046875, "learning_rate": 0.00015158349220303644, "loss": 1.96, "step": 278315 }, { "epoch": 0.65, "grad_norm": 2.15625, "learning_rate": 0.00015158190878509817, "loss": 2.1304, "step": 278320 }, { "epoch": 0.65, "grad_norm": 2.25, "learning_rate": 0.0001515803253495386, "loss": 2.0528, "step": 278325 }, { "epoch": 0.65, "grad_norm": 2.03125, "learning_rate": 0.00015157874189635826, "loss": 2.0532, "step": 278330 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015157715842555774, "loss": 2.2158, "step": 278335 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015157557493713743, "loss": 2.1898, "step": 278340 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.000151573991431098, "loss": 2.153, "step": 278345 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.00015157240790744, "loss": 1.9486, "step": 278350 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.0001515708243661639, "loss": 2.1575, "step": 278355 }, { "epoch": 0.66, "grad_norm": 1.7734375, "learning_rate": 0.00015156924080727025, "loss": 2.0667, "step": 278360 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.00015156765723075966, "loss": 2.0492, "step": 278365 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015156607363663258, "loss": 1.8874, "step": 278370 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.0001515644900248896, "loss": 2.0442, "step": 278375 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015156290639553127, "loss": 1.9767, "step": 278380 }, { "epoch": 0.66, "grad_norm": 1.8671875, "learning_rate": 0.0001515613227485581, "loss": 2.0712, "step": 278385 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015155973908397068, "loss": 1.9865, "step": 278390 }, { "epoch": 0.66, "grad_norm": 1.9921875, "learning_rate": 0.00015155815540176948, "loss": 1.9637, "step": 278395 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.0001515565717019551, "loss": 2.1947, "step": 278400 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015155498798452807, "loss": 2.1287, "step": 278405 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.0001515534042494889, "loss": 1.974, "step": 278410 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015155182049683816, "loss": 2.0918, "step": 278415 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015155023672657638, "loss": 2.1426, "step": 278420 }, { "epoch": 0.66, "grad_norm": 2.53125, "learning_rate": 0.0001515486529387041, "loss": 2.0611, "step": 278425 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.00015154706913322188, "loss": 2.047, "step": 278430 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015154548531013024, "loss": 2.0525, "step": 278435 }, { "epoch": 0.66, "grad_norm": 1.984375, "learning_rate": 0.00015154390146942973, "loss": 1.9371, "step": 278440 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015154231761112087, "loss": 2.1613, "step": 278445 }, { "epoch": 0.66, "grad_norm": 2.5625, "learning_rate": 0.00015154073373520424, "loss": 2.1343, "step": 278450 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015153914984168036, "loss": 2.1025, "step": 278455 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015153756593054975, "loss": 2.2083, "step": 278460 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.00015153598200181301, "loss": 1.9944, "step": 278465 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.0001515343980554706, "loss": 2.0165, "step": 278470 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015153281409152313, "loss": 2.1744, "step": 278475 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.0001515312301099711, "loss": 2.0568, "step": 278480 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.0001515296461108151, "loss": 1.7919, "step": 278485 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.00015152806209405562, "loss": 1.9578, "step": 278490 }, { "epoch": 0.66, "grad_norm": 1.8984375, "learning_rate": 0.00015152647805969323, "loss": 1.9154, "step": 278495 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.00015152489400772846, "loss": 1.9994, "step": 278500 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.0001515233099381618, "loss": 2.1138, "step": 278505 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015152172585099392, "loss": 1.9912, "step": 278510 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.00015152014174622523, "loss": 1.9527, "step": 278515 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015151855762385636, "loss": 1.8812, "step": 278520 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.0001515169734838878, "loss": 2.1094, "step": 278525 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015151538932632009, "loss": 1.8166, "step": 278530 }, { "epoch": 0.66, "grad_norm": 1.7890625, "learning_rate": 0.00015151380515115382, "loss": 1.8836, "step": 278535 }, { "epoch": 0.66, "grad_norm": 2.578125, "learning_rate": 0.0001515122209583895, "loss": 1.8861, "step": 278540 }, { "epoch": 0.66, "grad_norm": 2.640625, "learning_rate": 0.00015151063674802764, "loss": 2.0534, "step": 278545 }, { "epoch": 0.66, "grad_norm": 2.578125, "learning_rate": 0.0001515090525200688, "loss": 2.0276, "step": 278550 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015150746827451356, "loss": 2.1488, "step": 278555 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015150588401136248, "loss": 2.0651, "step": 278560 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.00015150429973061598, "loss": 2.1913, "step": 278565 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015150271543227476, "loss": 1.8607, "step": 278570 }, { "epoch": 0.66, "grad_norm": 1.84375, "learning_rate": 0.00015150113111633918, "loss": 2.1047, "step": 278575 }, { "epoch": 0.66, "grad_norm": 1.7890625, "learning_rate": 0.00015149954678280993, "loss": 2.132, "step": 278580 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.0001514979624316875, "loss": 2.0214, "step": 278585 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015149637806297244, "loss": 2.1416, "step": 278590 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015149479367666528, "loss": 1.9654, "step": 278595 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015149320927276656, "loss": 2.1833, "step": 278600 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.0001514916248512768, "loss": 2.0843, "step": 278605 }, { "epoch": 0.66, "grad_norm": 1.921875, "learning_rate": 0.00015149004041219662, "loss": 1.8981, "step": 278610 }, { "epoch": 0.66, "grad_norm": 2.453125, "learning_rate": 0.00015148845595552648, "loss": 2.0124, "step": 278615 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.00015148687148126694, "loss": 2.2497, "step": 278620 }, { "epoch": 0.66, "grad_norm": 1.9765625, "learning_rate": 0.00015148528698941856, "loss": 2.0369, "step": 278625 }, { "epoch": 0.66, "grad_norm": 2.546875, "learning_rate": 0.0001514837024799819, "loss": 2.1533, "step": 278630 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015148211795295744, "loss": 2.2164, "step": 278635 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015148053340834574, "loss": 2.0765, "step": 278640 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.00015147894884614738, "loss": 2.0912, "step": 278645 }, { "epoch": 0.66, "grad_norm": 2.109375, "learning_rate": 0.00015147736426636288, "loss": 1.9863, "step": 278650 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.0001514757796689928, "loss": 2.311, "step": 278655 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.0001514741950540376, "loss": 2.0787, "step": 278660 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015147261042149792, "loss": 1.9267, "step": 278665 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.00015147102577137424, "loss": 2.1601, "step": 278670 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.00015146944110366718, "loss": 1.8748, "step": 278675 }, { "epoch": 0.66, "grad_norm": 1.984375, "learning_rate": 0.00015146785641837718, "loss": 2.135, "step": 278680 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.00015146627171550484, "loss": 2.117, "step": 278685 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015146468699505066, "loss": 2.0628, "step": 278690 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015146310225701526, "loss": 2.2179, "step": 278695 }, { "epoch": 0.66, "grad_norm": 2.65625, "learning_rate": 0.0001514615175013991, "loss": 1.8264, "step": 278700 }, { "epoch": 0.66, "grad_norm": 2.890625, "learning_rate": 0.00015145993272820273, "loss": 2.1473, "step": 278705 }, { "epoch": 0.66, "grad_norm": 1.828125, "learning_rate": 0.00015145834793742674, "loss": 2.0548, "step": 278710 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015145676312907166, "loss": 2.0588, "step": 278715 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.000151455178303138, "loss": 1.9602, "step": 278720 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.00015145359345962632, "loss": 2.038, "step": 278725 }, { "epoch": 0.66, "grad_norm": 1.9140625, "learning_rate": 0.00015145200859853714, "loss": 2.1575, "step": 278730 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015145042371987106, "loss": 2.1271, "step": 278735 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.00015144883882362855, "loss": 2.1181, "step": 278740 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015144725390981017, "loss": 2.0942, "step": 278745 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.00015144566897841652, "loss": 2.1082, "step": 278750 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015144408402944807, "loss": 2.1213, "step": 278755 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.0001514424990629054, "loss": 2.1055, "step": 278760 }, { "epoch": 0.66, "grad_norm": 1.796875, "learning_rate": 0.00015144091407878903, "loss": 1.9072, "step": 278765 }, { "epoch": 0.66, "grad_norm": 1.8984375, "learning_rate": 0.0001514393290770995, "loss": 2.0698, "step": 278770 }, { "epoch": 0.66, "grad_norm": 2.75, "learning_rate": 0.0001514377440578374, "loss": 2.0331, "step": 278775 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.0001514361590210032, "loss": 2.089, "step": 278780 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.0001514345739665975, "loss": 2.0014, "step": 278785 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.0001514329888946208, "loss": 2.1553, "step": 278790 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.00015143140380507366, "loss": 2.1141, "step": 278795 }, { "epoch": 0.66, "grad_norm": 1.765625, "learning_rate": 0.00015142981869795664, "loss": 2.1606, "step": 278800 }, { "epoch": 0.66, "grad_norm": 1.9453125, "learning_rate": 0.00015142823357327024, "loss": 1.9037, "step": 278805 }, { "epoch": 0.66, "grad_norm": 1.8203125, "learning_rate": 0.00015142664843101501, "loss": 2.1459, "step": 278810 }, { "epoch": 0.66, "grad_norm": 2.859375, "learning_rate": 0.00015142506327119152, "loss": 2.04, "step": 278815 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.00015142347809380028, "loss": 1.9481, "step": 278820 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.00015142189289884186, "loss": 2.0393, "step": 278825 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.00015142030768631678, "loss": 1.9496, "step": 278830 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015141872245622563, "loss": 2.1523, "step": 278835 }, { "epoch": 0.66, "grad_norm": 2.390625, "learning_rate": 0.00015141713720856888, "loss": 2.1367, "step": 278840 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.0001514155519433471, "loss": 2.0978, "step": 278845 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015141396666056082, "loss": 2.043, "step": 278850 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015141238136021063, "loss": 2.0978, "step": 278855 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.000151410796042297, "loss": 2.0879, "step": 278860 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015140921070682056, "loss": 2.2489, "step": 278865 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.0001514076253537818, "loss": 2.0905, "step": 278870 }, { "epoch": 0.66, "grad_norm": 2.109375, "learning_rate": 0.00015140603998318124, "loss": 2.1103, "step": 278875 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.00015140445459501943, "loss": 2.157, "step": 278880 }, { "epoch": 0.66, "grad_norm": 2.5625, "learning_rate": 0.00015140286918929692, "loss": 2.1865, "step": 278885 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015140128376601428, "loss": 2.2374, "step": 278890 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015139969832517203, "loss": 2.1157, "step": 278895 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015139811286677071, "loss": 2.0958, "step": 278900 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.00015139652739081086, "loss": 2.0665, "step": 278905 }, { "epoch": 0.66, "grad_norm": 1.8515625, "learning_rate": 0.00015139494189729305, "loss": 2.2528, "step": 278910 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.00015139335638621776, "loss": 2.1382, "step": 278915 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.00015139177085758557, "loss": 2.0213, "step": 278920 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.00015139018531139703, "loss": 2.2045, "step": 278925 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015138859974765266, "loss": 1.8991, "step": 278930 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.00015138701416635302, "loss": 2.1788, "step": 278935 }, { "epoch": 0.66, "grad_norm": 2.109375, "learning_rate": 0.00015138542856749863, "loss": 1.9534, "step": 278940 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.00015138384295109008, "loss": 2.0668, "step": 278945 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015138225731712787, "loss": 2.0175, "step": 278950 }, { "epoch": 0.66, "grad_norm": 1.8984375, "learning_rate": 0.00015138067166561254, "loss": 2.1675, "step": 278955 }, { "epoch": 0.66, "grad_norm": 1.9375, "learning_rate": 0.0001513790859965446, "loss": 1.9149, "step": 278960 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015137750030992466, "loss": 2.1058, "step": 278965 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.00015137591460575325, "loss": 2.0654, "step": 278970 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015137432888403092, "loss": 2.0777, "step": 278975 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.00015137274314475815, "loss": 2.1426, "step": 278980 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.0001513711573879355, "loss": 2.1675, "step": 278985 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015136957161356358, "loss": 2.1925, "step": 278990 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.00015136798582164284, "loss": 1.9291, "step": 278995 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.0001513664000121739, "loss": 2.2082, "step": 279000 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015136481418515725, "loss": 2.301, "step": 279005 }, { "epoch": 0.66, "grad_norm": 2.640625, "learning_rate": 0.00015136322834059346, "loss": 2.1112, "step": 279010 }, { "epoch": 0.66, "grad_norm": 2.578125, "learning_rate": 0.00015136164247848304, "loss": 2.0164, "step": 279015 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.00015136005659882655, "loss": 2.2421, "step": 279020 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.00015135847070162455, "loss": 2.0244, "step": 279025 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015135688478687756, "loss": 2.0056, "step": 279030 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.00015135529885458611, "loss": 2.1171, "step": 279035 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.0001513537129047508, "loss": 2.1717, "step": 279040 }, { "epoch": 0.66, "grad_norm": 1.9375, "learning_rate": 0.00015135212693737207, "loss": 2.175, "step": 279045 }, { "epoch": 0.66, "grad_norm": 1.9140625, "learning_rate": 0.00015135054095245052, "loss": 1.9412, "step": 279050 }, { "epoch": 0.66, "grad_norm": 1.875, "learning_rate": 0.00015134895494998675, "loss": 2.0855, "step": 279055 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015134736892998124, "loss": 2.0172, "step": 279060 }, { "epoch": 0.66, "grad_norm": 2.828125, "learning_rate": 0.0001513457828924345, "loss": 2.1815, "step": 279065 }, { "epoch": 0.66, "grad_norm": 2.5625, "learning_rate": 0.00015134419683734716, "loss": 1.7729, "step": 279070 }, { "epoch": 0.66, "grad_norm": 2.53125, "learning_rate": 0.00015134261076471964, "loss": 2.1312, "step": 279075 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015134102467455262, "loss": 2.2101, "step": 279080 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.00015133943856684656, "loss": 2.1648, "step": 279085 }, { "epoch": 0.66, "grad_norm": 1.96875, "learning_rate": 0.000151337852441602, "loss": 2.1464, "step": 279090 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.0001513362662988195, "loss": 1.975, "step": 279095 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.0001513346801384996, "loss": 2.0141, "step": 279100 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015133309396064286, "loss": 1.842, "step": 279105 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.00015133150776524981, "loss": 2.0262, "step": 279110 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015132992155232096, "loss": 2.2324, "step": 279115 }, { "epoch": 0.66, "grad_norm": 2.75, "learning_rate": 0.00015132833532185687, "loss": 2.2336, "step": 279120 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.00015132674907385808, "loss": 2.1742, "step": 279125 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.0001513251628083252, "loss": 1.9535, "step": 279130 }, { "epoch": 0.66, "grad_norm": 1.8828125, "learning_rate": 0.00015132357652525871, "loss": 2.0687, "step": 279135 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.0001513219902246591, "loss": 2.1036, "step": 279140 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.00015132040390652697, "loss": 2.0646, "step": 279145 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.0001513188175708629, "loss": 1.7562, "step": 279150 }, { "epoch": 0.66, "grad_norm": 1.9296875, "learning_rate": 0.0001513172312176674, "loss": 1.9907, "step": 279155 }, { "epoch": 0.66, "grad_norm": 1.9453125, "learning_rate": 0.00015131564484694097, "loss": 1.9552, "step": 279160 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015131405845868418, "loss": 2.0565, "step": 279165 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.0001513124720528976, "loss": 2.2389, "step": 279170 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015131088562958173, "loss": 1.9077, "step": 279175 }, { "epoch": 0.66, "grad_norm": 2.453125, "learning_rate": 0.00015130929918873712, "loss": 2.1468, "step": 279180 }, { "epoch": 0.66, "grad_norm": 1.859375, "learning_rate": 0.00015130771273036436, "loss": 2.1846, "step": 279185 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.00015130612625446395, "loss": 2.2275, "step": 279190 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.0001513045397610364, "loss": 2.1993, "step": 279195 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.0001513029532500823, "loss": 2.019, "step": 279200 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.0001513013667216022, "loss": 2.1201, "step": 279205 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.00015129978017559663, "loss": 2.0326, "step": 279210 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.0001512981936120661, "loss": 1.8774, "step": 279215 }, { "epoch": 0.66, "grad_norm": 1.9921875, "learning_rate": 0.00015129660703101121, "loss": 2.1024, "step": 279220 }, { "epoch": 0.66, "grad_norm": 1.984375, "learning_rate": 0.00015129502043243243, "loss": 2.0716, "step": 279225 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015129343381633037, "loss": 2.1036, "step": 279230 }, { "epoch": 0.66, "grad_norm": 1.9296875, "learning_rate": 0.00015129184718270554, "loss": 1.9206, "step": 279235 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.00015129026053155843, "loss": 2.0993, "step": 279240 }, { "epoch": 0.66, "grad_norm": 1.953125, "learning_rate": 0.00015128867386288972, "loss": 2.1855, "step": 279245 }, { "epoch": 0.66, "grad_norm": 2.90625, "learning_rate": 0.0001512870871766998, "loss": 2.2613, "step": 279250 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.0001512855004729893, "loss": 1.9908, "step": 279255 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.00015128391375175877, "loss": 1.9227, "step": 279260 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015128232701300872, "loss": 2.0496, "step": 279265 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.0001512807402567397, "loss": 2.0613, "step": 279270 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.00015127915348295222, "loss": 1.9688, "step": 279275 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015127756669164685, "loss": 2.0107, "step": 279280 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.0001512759798828242, "loss": 2.0665, "step": 279285 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015127439305648465, "loss": 2.0867, "step": 279290 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.0001512728062126289, "loss": 2.1343, "step": 279295 }, { "epoch": 0.66, "grad_norm": 1.7734375, "learning_rate": 0.00015127121935125742, "loss": 2.1532, "step": 279300 }, { "epoch": 0.66, "grad_norm": 2.109375, "learning_rate": 0.00015126963247237075, "loss": 2.2071, "step": 279305 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.00015126804557596946, "loss": 2.217, "step": 279310 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015126645866205403, "loss": 1.9515, "step": 279315 }, { "epoch": 0.66, "grad_norm": 2.640625, "learning_rate": 0.0001512648717306251, "loss": 2.0341, "step": 279320 }, { "epoch": 0.66, "grad_norm": 1.9765625, "learning_rate": 0.00015126328478168316, "loss": 2.1269, "step": 279325 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.0001512616978152287, "loss": 1.9368, "step": 279330 }, { "epoch": 0.66, "grad_norm": 1.78125, "learning_rate": 0.00015126011083126234, "loss": 2.0175, "step": 279335 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.0001512585238297846, "loss": 1.9762, "step": 279340 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015125693681079601, "loss": 2.0756, "step": 279345 }, { "epoch": 0.66, "grad_norm": 1.9296875, "learning_rate": 0.00015125534977429716, "loss": 2.1649, "step": 279350 }, { "epoch": 0.66, "grad_norm": 2.65625, "learning_rate": 0.00015125376272028852, "loss": 2.0927, "step": 279355 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015125217564877067, "loss": 2.1987, "step": 279360 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015125058855974417, "loss": 2.1091, "step": 279365 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.0001512490014532095, "loss": 2.1139, "step": 279370 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015124741432916725, "loss": 2.2736, "step": 279375 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015124582718761798, "loss": 2.2853, "step": 279380 }, { "epoch": 0.66, "grad_norm": 2.5625, "learning_rate": 0.00015124424002856222, "loss": 1.9248, "step": 279385 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015124265285200043, "loss": 2.0837, "step": 279390 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.00015124106565793326, "loss": 2.2657, "step": 279395 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015123947844636123, "loss": 2.0746, "step": 279400 }, { "epoch": 0.66, "grad_norm": 1.953125, "learning_rate": 0.00015123789121728484, "loss": 1.9987, "step": 279405 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.00015123630397070469, "loss": 2.0845, "step": 279410 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015123471670662123, "loss": 2.1967, "step": 279415 }, { "epoch": 0.66, "grad_norm": 1.9140625, "learning_rate": 0.00015123312942503508, "loss": 2.2084, "step": 279420 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.0001512315421259468, "loss": 2.0644, "step": 279425 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015122995480935686, "loss": 2.0321, "step": 279430 }, { "epoch": 0.66, "grad_norm": 2.46875, "learning_rate": 0.00015122836747526584, "loss": 1.8781, "step": 279435 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015122678012367428, "loss": 2.087, "step": 279440 }, { "epoch": 0.66, "grad_norm": 2.453125, "learning_rate": 0.00015122519275458278, "loss": 2.054, "step": 279445 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015122360536799176, "loss": 2.1881, "step": 279450 }, { "epoch": 0.66, "grad_norm": 2.46875, "learning_rate": 0.00015122201796390186, "loss": 2.1157, "step": 279455 }, { "epoch": 0.66, "grad_norm": 1.9296875, "learning_rate": 0.00015122043054231358, "loss": 2.0315, "step": 279460 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015121884310322745, "loss": 1.9869, "step": 279465 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.00015121725564664406, "loss": 2.0892, "step": 279470 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.00015121566817256392, "loss": 2.0175, "step": 279475 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.0001512140806809876, "loss": 2.0751, "step": 279480 }, { "epoch": 0.66, "grad_norm": 2.453125, "learning_rate": 0.00015121249317191557, "loss": 1.9965, "step": 279485 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015121090564534847, "loss": 2.1551, "step": 279490 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.0001512093181012868, "loss": 1.922, "step": 279495 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015120773053973105, "loss": 2.1125, "step": 279500 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015120614296068184, "loss": 2.0739, "step": 279505 }, { "epoch": 0.66, "grad_norm": 2.890625, "learning_rate": 0.00015120455536413968, "loss": 2.0949, "step": 279510 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015120296775010514, "loss": 2.0411, "step": 279515 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.0001512013801185787, "loss": 2.2261, "step": 279520 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.00015119979246956095, "loss": 2.1653, "step": 279525 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015119820480305243, "loss": 2.0855, "step": 279530 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.00015119661711905367, "loss": 2.1138, "step": 279535 }, { "epoch": 0.66, "grad_norm": 1.890625, "learning_rate": 0.00015119502941756524, "loss": 2.088, "step": 279540 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015119344169858763, "loss": 2.0581, "step": 279545 }, { "epoch": 0.66, "grad_norm": 2.703125, "learning_rate": 0.0001511918539621214, "loss": 2.059, "step": 279550 }, { "epoch": 0.66, "grad_norm": 1.8671875, "learning_rate": 0.00015119026620816713, "loss": 2.0104, "step": 279555 }, { "epoch": 0.66, "grad_norm": 1.9453125, "learning_rate": 0.00015118867843672535, "loss": 1.9884, "step": 279560 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015118709064779656, "loss": 1.9918, "step": 279565 }, { "epoch": 0.66, "grad_norm": 2.578125, "learning_rate": 0.00015118550284138132, "loss": 1.9807, "step": 279570 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.0001511839150174802, "loss": 2.0596, "step": 279575 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.00015118232717609376, "loss": 2.0067, "step": 279580 }, { "epoch": 0.66, "grad_norm": 1.8359375, "learning_rate": 0.00015118073931722249, "loss": 1.9328, "step": 279585 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015117915144086694, "loss": 2.1713, "step": 279590 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.00015117756354702765, "loss": 1.99, "step": 279595 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.00015117597563570517, "loss": 1.9788, "step": 279600 }, { "epoch": 0.66, "grad_norm": 2.109375, "learning_rate": 0.0001511743877069001, "loss": 2.0777, "step": 279605 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.0001511727997606129, "loss": 2.1742, "step": 279610 }, { "epoch": 0.66, "grad_norm": 2.390625, "learning_rate": 0.00015117121179684413, "loss": 2.0427, "step": 279615 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.00015116962381559436, "loss": 2.145, "step": 279620 }, { "epoch": 0.66, "grad_norm": 2.6875, "learning_rate": 0.00015116803581686415, "loss": 2.071, "step": 279625 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.00015116644780065398, "loss": 2.004, "step": 279630 }, { "epoch": 0.66, "grad_norm": 2.6875, "learning_rate": 0.00015116485976696443, "loss": 2.0482, "step": 279635 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.000151163271715796, "loss": 1.9753, "step": 279640 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.0001511616836471493, "loss": 1.9553, "step": 279645 }, { "epoch": 0.66, "grad_norm": 2.5625, "learning_rate": 0.00015116009556102484, "loss": 2.0743, "step": 279650 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.00015115850745742317, "loss": 2.1255, "step": 279655 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015115691933634482, "loss": 1.8672, "step": 279660 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015115533119779032, "loss": 1.8849, "step": 279665 }, { "epoch": 0.66, "grad_norm": 2.8125, "learning_rate": 0.00015115374304176026, "loss": 1.8779, "step": 279670 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015115215486825515, "loss": 2.0814, "step": 279675 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.00015115056667727554, "loss": 1.9823, "step": 279680 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015114897846882195, "loss": 2.1167, "step": 279685 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.00015114739024289496, "loss": 2.0921, "step": 279690 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.0001511458019994951, "loss": 2.0025, "step": 279695 }, { "epoch": 0.66, "grad_norm": 1.7890625, "learning_rate": 0.00015114421373862286, "loss": 2.0718, "step": 279700 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.00015114262546027886, "loss": 2.2334, "step": 279705 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.0001511410371644636, "loss": 2.0838, "step": 279710 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.00015113944885117764, "loss": 1.9171, "step": 279715 }, { "epoch": 0.66, "grad_norm": 2.65625, "learning_rate": 0.00015113786052042155, "loss": 2.2149, "step": 279720 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.0001511362721721958, "loss": 1.8883, "step": 279725 }, { "epoch": 0.66, "grad_norm": 1.828125, "learning_rate": 0.00015113468380650099, "loss": 2.1753, "step": 279730 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.0001511330954233376, "loss": 2.0662, "step": 279735 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.00015113150702270628, "loss": 1.8561, "step": 279740 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015112991860460748, "loss": 1.9543, "step": 279745 }, { "epoch": 0.66, "grad_norm": 1.90625, "learning_rate": 0.00015112833016904179, "loss": 1.9901, "step": 279750 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.0001511267417160097, "loss": 2.1552, "step": 279755 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015112515324551182, "loss": 2.2352, "step": 279760 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015112356475754864, "loss": 2.1452, "step": 279765 }, { "epoch": 0.66, "grad_norm": 2.453125, "learning_rate": 0.00015112197625212072, "loss": 1.9732, "step": 279770 }, { "epoch": 0.66, "grad_norm": 1.984375, "learning_rate": 0.00015112038772922866, "loss": 2.1432, "step": 279775 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015111879918887287, "loss": 2.1414, "step": 279780 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015111721063105403, "loss": 2.0032, "step": 279785 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.0001511156220557726, "loss": 2.0603, "step": 279790 }, { "epoch": 0.66, "grad_norm": 2.96875, "learning_rate": 0.00015111403346302913, "loss": 2.0597, "step": 279795 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015111244485282421, "loss": 2.2274, "step": 279800 }, { "epoch": 0.66, "grad_norm": 1.7890625, "learning_rate": 0.00015111085622515833, "loss": 2.1682, "step": 279805 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015110926758003207, "loss": 1.8763, "step": 279810 }, { "epoch": 0.66, "grad_norm": 1.9296875, "learning_rate": 0.00015110767891744596, "loss": 2.0655, "step": 279815 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015110609023740053, "loss": 2.199, "step": 279820 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015110450153989633, "loss": 2.053, "step": 279825 }, { "epoch": 0.66, "grad_norm": 2.953125, "learning_rate": 0.0001511029128249339, "loss": 2.0228, "step": 279830 }, { "epoch": 0.66, "grad_norm": 2.53125, "learning_rate": 0.00015110132409251384, "loss": 1.9898, "step": 279835 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.00015109973534263658, "loss": 2.1194, "step": 279840 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.0001510981465753027, "loss": 2.0479, "step": 279845 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.00015109655779051284, "loss": 2.1333, "step": 279850 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015109496898826745, "loss": 2.1561, "step": 279855 }, { "epoch": 0.66, "grad_norm": 1.8828125, "learning_rate": 0.00015109338016856708, "loss": 2.0786, "step": 279860 }, { "epoch": 0.66, "grad_norm": 1.921875, "learning_rate": 0.00015109179133141227, "loss": 1.9783, "step": 279865 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.0001510902024768036, "loss": 2.0628, "step": 279870 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015108861360474158, "loss": 2.1823, "step": 279875 }, { "epoch": 0.66, "grad_norm": 1.953125, "learning_rate": 0.00015108702471522677, "loss": 1.993, "step": 279880 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.0001510854358082597, "loss": 2.2977, "step": 279885 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015108384688384093, "loss": 1.9839, "step": 279890 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.00015108225794197095, "loss": 1.9797, "step": 279895 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015108066898265041, "loss": 2.0321, "step": 279900 }, { "epoch": 0.66, "grad_norm": 1.875, "learning_rate": 0.00015107908000587972, "loss": 2.1513, "step": 279905 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.00015107749101165953, "loss": 2.0133, "step": 279910 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015107590199999032, "loss": 1.9245, "step": 279915 }, { "epoch": 0.66, "grad_norm": 2.59375, "learning_rate": 0.0001510743129708727, "loss": 2.0943, "step": 279920 }, { "epoch": 0.66, "grad_norm": 2.71875, "learning_rate": 0.00015107272392430714, "loss": 2.1469, "step": 279925 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015107113486029417, "loss": 2.1876, "step": 279930 }, { "epoch": 0.66, "grad_norm": 1.9921875, "learning_rate": 0.00015106954577883444, "loss": 1.9908, "step": 279935 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.0001510679566799284, "loss": 2.0294, "step": 279940 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.00015106636756357663, "loss": 2.1468, "step": 279945 }, { "epoch": 0.66, "grad_norm": 1.890625, "learning_rate": 0.00015106477842977966, "loss": 2.0676, "step": 279950 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015106318927853802, "loss": 2.1481, "step": 279955 }, { "epoch": 0.66, "grad_norm": 2.625, "learning_rate": 0.0001510616001098523, "loss": 2.1247, "step": 279960 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015106001092372298, "loss": 2.1088, "step": 279965 }, { "epoch": 0.66, "grad_norm": 1.8203125, "learning_rate": 0.00015105842172015064, "loss": 2.1384, "step": 279970 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.00015105683249913582, "loss": 2.1676, "step": 279975 }, { "epoch": 0.66, "grad_norm": 2.578125, "learning_rate": 0.00015105524326067905, "loss": 2.1594, "step": 279980 }, { "epoch": 0.66, "grad_norm": 2.109375, "learning_rate": 0.0001510536540047809, "loss": 2.0673, "step": 279985 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.0001510520647314419, "loss": 2.2745, "step": 279990 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015105047544066257, "loss": 2.1319, "step": 279995 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.0001510488861324435, "loss": 2.1052, "step": 280000 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015104729680678514, "loss": 1.9988, "step": 280005 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.00015104570746368815, "loss": 2.1499, "step": 280010 }, { "epoch": 0.66, "grad_norm": 1.9765625, "learning_rate": 0.00015104411810315304, "loss": 2.0863, "step": 280015 }, { "epoch": 0.66, "grad_norm": 2.390625, "learning_rate": 0.0001510425287251803, "loss": 2.0665, "step": 280020 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.00015104093932977053, "loss": 2.0485, "step": 280025 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015103934991692421, "loss": 2.1913, "step": 280030 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015103776048664195, "loss": 2.1339, "step": 280035 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015103617103892427, "loss": 2.1047, "step": 280040 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.0001510345815737717, "loss": 2.107, "step": 280045 }, { "epoch": 0.66, "grad_norm": 1.7890625, "learning_rate": 0.00015103299209118478, "loss": 2.1135, "step": 280050 }, { "epoch": 0.66, "grad_norm": 2.109375, "learning_rate": 0.00015103140259116406, "loss": 2.0994, "step": 280055 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015102981307371012, "loss": 1.9768, "step": 280060 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015102822353882346, "loss": 2.112, "step": 280065 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.0001510266339865046, "loss": 2.1066, "step": 280070 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.00015102504441675415, "loss": 2.0017, "step": 280075 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.00015102345482957263, "loss": 2.2347, "step": 280080 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015102186522496054, "loss": 1.9701, "step": 280085 }, { "epoch": 0.66, "grad_norm": 2.6875, "learning_rate": 0.00015102027560291847, "loss": 2.1143, "step": 280090 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015101868596344696, "loss": 2.12, "step": 280095 }, { "epoch": 0.66, "grad_norm": 2.609375, "learning_rate": 0.0001510170963065465, "loss": 1.9431, "step": 280100 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015101550663221774, "loss": 2.028, "step": 280105 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015101391694046109, "loss": 2.1246, "step": 280110 }, { "epoch": 0.66, "grad_norm": 1.9765625, "learning_rate": 0.00015101232723127721, "loss": 1.9259, "step": 280115 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.00015101073750466656, "loss": 2.1089, "step": 280120 }, { "epoch": 0.66, "grad_norm": 2.640625, "learning_rate": 0.00015100914776062973, "loss": 2.0315, "step": 280125 }, { "epoch": 0.66, "grad_norm": 1.7578125, "learning_rate": 0.00015100755799916728, "loss": 2.1158, "step": 280130 }, { "epoch": 0.66, "grad_norm": 1.875, "learning_rate": 0.0001510059682202797, "loss": 2.1149, "step": 280135 }, { "epoch": 0.66, "grad_norm": 2.578125, "learning_rate": 0.00015100437842396753, "loss": 2.0997, "step": 280140 }, { "epoch": 0.66, "grad_norm": 1.9140625, "learning_rate": 0.00015100278861023136, "loss": 2.1572, "step": 280145 }, { "epoch": 0.66, "grad_norm": 1.84375, "learning_rate": 0.00015100119877907173, "loss": 2.1977, "step": 280150 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.00015099960893048917, "loss": 2.1168, "step": 280155 }, { "epoch": 0.66, "grad_norm": 1.8515625, "learning_rate": 0.00015099801906448418, "loss": 1.9788, "step": 280160 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.00015099642918105737, "loss": 1.8175, "step": 280165 }, { "epoch": 0.66, "grad_norm": 1.84375, "learning_rate": 0.00015099483928020924, "loss": 2.2267, "step": 280170 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015099324936194037, "loss": 2.0846, "step": 280175 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.00015099165942625127, "loss": 1.9378, "step": 280180 }, { "epoch": 0.66, "grad_norm": 2.59375, "learning_rate": 0.00015099006947314248, "loss": 1.9812, "step": 280185 }, { "epoch": 0.66, "grad_norm": 1.875, "learning_rate": 0.00015098847950261455, "loss": 2.0296, "step": 280190 }, { "epoch": 0.66, "grad_norm": 2.609375, "learning_rate": 0.00015098688951466804, "loss": 2.0057, "step": 280195 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.0001509852995093035, "loss": 2.0586, "step": 280200 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015098370948652144, "loss": 2.1135, "step": 280205 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015098211944632243, "loss": 2.0429, "step": 280210 }, { "epoch": 0.66, "grad_norm": 1.8828125, "learning_rate": 0.000150980529388707, "loss": 1.975, "step": 280215 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.0001509789393136757, "loss": 1.9172, "step": 280220 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.00015097734922122906, "loss": 2.2361, "step": 280225 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015097575911136765, "loss": 1.9149, "step": 280230 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.000150974168984092, "loss": 2.0895, "step": 280235 }, { "epoch": 0.66, "grad_norm": 1.90625, "learning_rate": 0.00015097257883940263, "loss": 2.0964, "step": 280240 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.0001509709886773001, "loss": 1.9495, "step": 280245 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015096939849778497, "loss": 2.2756, "step": 280250 }, { "epoch": 0.66, "grad_norm": 2.390625, "learning_rate": 0.00015096780830085776, "loss": 1.9771, "step": 280255 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015096621808651902, "loss": 2.0282, "step": 280260 }, { "epoch": 0.66, "grad_norm": 1.7734375, "learning_rate": 0.00015096462785476936, "loss": 1.927, "step": 280265 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015096303760560916, "loss": 2.1296, "step": 280270 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.0001509614473390391, "loss": 2.0056, "step": 280275 }, { "epoch": 0.66, "grad_norm": 2.59375, "learning_rate": 0.0001509598570550597, "loss": 2.0621, "step": 280280 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.00015095826675367148, "loss": 2.0747, "step": 280285 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.000150956676434875, "loss": 2.2144, "step": 280290 }, { "epoch": 0.66, "grad_norm": 2.59375, "learning_rate": 0.00015095508609867076, "loss": 1.9731, "step": 280295 }, { "epoch": 0.66, "grad_norm": 2.9375, "learning_rate": 0.00015095349574505935, "loss": 1.9891, "step": 280300 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.0001509519053740413, "loss": 2.1127, "step": 280305 }, { "epoch": 0.66, "grad_norm": 1.9921875, "learning_rate": 0.00015095031498561722, "loss": 1.9864, "step": 280310 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015094872457978753, "loss": 2.2069, "step": 280315 }, { "epoch": 0.66, "grad_norm": 1.96875, "learning_rate": 0.00015094713415655283, "loss": 2.213, "step": 280320 }, { "epoch": 0.66, "grad_norm": 1.7890625, "learning_rate": 0.00015094554371591364, "loss": 1.9479, "step": 280325 }, { "epoch": 0.66, "grad_norm": 1.90625, "learning_rate": 0.0001509439532578706, "loss": 2.1665, "step": 280330 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015094236278242416, "loss": 2.0, "step": 280335 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015094077228957486, "loss": 2.0782, "step": 280340 }, { "epoch": 0.66, "grad_norm": 2.59375, "learning_rate": 0.00015093918177932326, "loss": 2.2282, "step": 280345 }, { "epoch": 0.66, "grad_norm": 2.109375, "learning_rate": 0.00015093759125166996, "loss": 2.3543, "step": 280350 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.0001509360007066154, "loss": 2.0192, "step": 280355 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.0001509344101441602, "loss": 2.0617, "step": 280360 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.0001509328195643049, "loss": 2.051, "step": 280365 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015093122896705, "loss": 2.085, "step": 280370 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.0001509296383523961, "loss": 2.0486, "step": 280375 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015092804772034367, "loss": 2.1733, "step": 280380 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.0001509264570708933, "loss": 2.2519, "step": 280385 }, { "epoch": 0.66, "grad_norm": 2.453125, "learning_rate": 0.00015092486640404555, "loss": 1.8923, "step": 280390 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.00015092327571980096, "loss": 2.2902, "step": 280395 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015092168501816002, "loss": 2.2061, "step": 280400 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.0001509200942991233, "loss": 2.1187, "step": 280405 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.0001509185035626914, "loss": 1.9346, "step": 280410 }, { "epoch": 0.66, "grad_norm": 1.890625, "learning_rate": 0.00015091691280886478, "loss": 2.0256, "step": 280415 }, { "epoch": 0.66, "grad_norm": 2.46875, "learning_rate": 0.00015091532203764403, "loss": 1.9466, "step": 280420 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015091373124902967, "loss": 1.9766, "step": 280425 }, { "epoch": 0.66, "grad_norm": 2.546875, "learning_rate": 0.0001509121404430223, "loss": 1.9732, "step": 280430 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.00015091054961962236, "loss": 2.2366, "step": 280435 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.00015090895877883047, "loss": 1.9482, "step": 280440 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.0001509073679206472, "loss": 2.2519, "step": 280445 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.000150905777045073, "loss": 2.1356, "step": 280450 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015090418615210848, "loss": 1.9514, "step": 280455 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015090259524175415, "loss": 2.1443, "step": 280460 }, { "epoch": 0.66, "grad_norm": 2.6875, "learning_rate": 0.0001509010043140106, "loss": 2.0992, "step": 280465 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015089941336887832, "loss": 2.1656, "step": 280470 }, { "epoch": 0.66, "grad_norm": 1.9921875, "learning_rate": 0.00015089782240635787, "loss": 2.0396, "step": 280475 }, { "epoch": 0.66, "grad_norm": 2.78125, "learning_rate": 0.00015089623142644984, "loss": 2.0296, "step": 280480 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.0001508946404291547, "loss": 2.127, "step": 280485 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015089304941447304, "loss": 2.0985, "step": 280490 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.00015089145838240539, "loss": 2.1184, "step": 280495 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.0001508898673329523, "loss": 1.9901, "step": 280500 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.0001508882762661143, "loss": 2.1551, "step": 280505 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015088668518189197, "loss": 2.0511, "step": 280510 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015088509408028576, "loss": 2.0613, "step": 280515 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.00015088350296129638, "loss": 2.2777, "step": 280520 }, { "epoch": 0.66, "grad_norm": 2.53125, "learning_rate": 0.0001508819118249242, "loss": 2.0636, "step": 280525 }, { "epoch": 0.66, "grad_norm": 1.703125, "learning_rate": 0.00015088032067116984, "loss": 1.9337, "step": 280530 }, { "epoch": 0.66, "grad_norm": 2.640625, "learning_rate": 0.00015087872950003383, "loss": 2.1256, "step": 280535 }, { "epoch": 0.66, "grad_norm": 1.984375, "learning_rate": 0.00015087713831151675, "loss": 2.1734, "step": 280540 }, { "epoch": 0.66, "grad_norm": 1.9921875, "learning_rate": 0.0001508755471056191, "loss": 2.0976, "step": 280545 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015087395588234145, "loss": 2.0961, "step": 280550 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015087236464168436, "loss": 2.0851, "step": 280555 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015087077338364833, "loss": 2.0534, "step": 280560 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015086918210823388, "loss": 2.1071, "step": 280565 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.00015086759081544163, "loss": 2.1015, "step": 280570 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.00015086599950527209, "loss": 2.1385, "step": 280575 }, { "epoch": 0.66, "grad_norm": 2.453125, "learning_rate": 0.00015086440817772582, "loss": 2.2376, "step": 280580 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.00015086281683280333, "loss": 1.9725, "step": 280585 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015086122547050516, "loss": 2.0724, "step": 280590 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.00015085963409083186, "loss": 2.057, "step": 280595 }, { "epoch": 0.66, "grad_norm": 3.921875, "learning_rate": 0.000150858042693784, "loss": 2.103, "step": 280600 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.00015085645127936215, "loss": 2.0559, "step": 280605 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.0001508548598475668, "loss": 1.991, "step": 280610 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015085326839839848, "loss": 2.1343, "step": 280615 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.00015085167693185776, "loss": 2.0279, "step": 280620 }, { "epoch": 0.66, "grad_norm": 1.9453125, "learning_rate": 0.00015085008544794522, "loss": 2.2106, "step": 280625 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015084849394666133, "loss": 2.1701, "step": 280630 }, { "epoch": 0.66, "grad_norm": 1.9765625, "learning_rate": 0.00015084690242800672, "loss": 1.8385, "step": 280635 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015084531089198185, "loss": 2.1329, "step": 280640 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015084371933858726, "loss": 2.1389, "step": 280645 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.00015084212776782363, "loss": 2.0442, "step": 280650 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015084053617969137, "loss": 1.9913, "step": 280655 }, { "epoch": 0.66, "grad_norm": 2.5625, "learning_rate": 0.00015083894457419103, "loss": 2.1503, "step": 280660 }, { "epoch": 0.66, "grad_norm": 2.640625, "learning_rate": 0.0001508373529513232, "loss": 2.1614, "step": 280665 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.0001508357613110884, "loss": 1.9971, "step": 280670 }, { "epoch": 0.66, "grad_norm": 1.984375, "learning_rate": 0.0001508341696534872, "loss": 2.0319, "step": 280675 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.0001508325779785201, "loss": 2.1737, "step": 280680 }, { "epoch": 0.66, "grad_norm": 1.8125, "learning_rate": 0.0001508309862861877, "loss": 2.0415, "step": 280685 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.0001508293945764905, "loss": 2.1731, "step": 280690 }, { "epoch": 0.66, "grad_norm": 1.78125, "learning_rate": 0.00015082780284942908, "loss": 2.2296, "step": 280695 }, { "epoch": 0.66, "grad_norm": 1.8828125, "learning_rate": 0.00015082621110500393, "loss": 2.1623, "step": 280700 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015082461934321563, "loss": 2.0791, "step": 280705 }, { "epoch": 0.66, "grad_norm": 1.9921875, "learning_rate": 0.0001508230275640647, "loss": 1.9342, "step": 280710 }, { "epoch": 0.66, "grad_norm": 1.9140625, "learning_rate": 0.00015082143576755175, "loss": 2.151, "step": 280715 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.00015081984395367724, "loss": 1.9352, "step": 280720 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015081825212244176, "loss": 1.9909, "step": 280725 }, { "epoch": 0.66, "grad_norm": 1.859375, "learning_rate": 0.00015081666027384584, "loss": 1.9739, "step": 280730 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.00015081506840789003, "loss": 2.0141, "step": 280735 }, { "epoch": 0.66, "grad_norm": 1.6796875, "learning_rate": 0.00015081347652457487, "loss": 2.17, "step": 280740 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015081188462390092, "loss": 1.994, "step": 280745 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015081029270586869, "loss": 1.9177, "step": 280750 }, { "epoch": 0.66, "grad_norm": 1.6171875, "learning_rate": 0.00015080870077047872, "loss": 1.944, "step": 280755 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015080710881773163, "loss": 2.2111, "step": 280760 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.0001508055168476279, "loss": 2.0841, "step": 280765 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015080392486016802, "loss": 1.9479, "step": 280770 }, { "epoch": 0.66, "grad_norm": 2.453125, "learning_rate": 0.00015080233285535267, "loss": 1.8282, "step": 280775 }, { "epoch": 0.66, "grad_norm": 1.9921875, "learning_rate": 0.00015080074083318227, "loss": 1.9559, "step": 280780 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015079914879365748, "loss": 2.1023, "step": 280785 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015079755673677873, "loss": 1.9695, "step": 280790 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.00015079596466254663, "loss": 2.0561, "step": 280795 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.0001507943725709617, "loss": 2.0795, "step": 280800 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.0001507927804620245, "loss": 1.9127, "step": 280805 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015079118833573554, "loss": 2.26, "step": 280810 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.00015078959619209542, "loss": 1.9745, "step": 280815 }, { "epoch": 0.66, "grad_norm": 2.46875, "learning_rate": 0.00015078800403110464, "loss": 1.9876, "step": 280820 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.00015078641185276376, "loss": 2.0494, "step": 280825 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.0001507848196570733, "loss": 2.0936, "step": 280830 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.00015078322744403386, "loss": 2.1693, "step": 280835 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.00015078163521364593, "loss": 1.9775, "step": 280840 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.0001507800429659101, "loss": 2.1967, "step": 280845 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.00015077845070082682, "loss": 2.0717, "step": 280850 }, { "epoch": 0.66, "grad_norm": 1.828125, "learning_rate": 0.00015077685841839678, "loss": 2.1101, "step": 280855 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015077526611862035, "loss": 2.0538, "step": 280860 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.00015077367380149825, "loss": 1.991, "step": 280865 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.0001507720814670309, "loss": 2.1092, "step": 280870 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015077048911521894, "loss": 1.9701, "step": 280875 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.0001507688967460628, "loss": 2.0693, "step": 280880 }, { "epoch": 0.66, "grad_norm": 2.75, "learning_rate": 0.00015076730435956313, "loss": 2.0389, "step": 280885 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.00015076571195572037, "loss": 2.1125, "step": 280890 }, { "epoch": 0.66, "grad_norm": 1.8671875, "learning_rate": 0.00015076411953453517, "loss": 1.8861, "step": 280895 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.000150762527096008, "loss": 2.1972, "step": 280900 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015076093464013948, "loss": 2.1661, "step": 280905 }, { "epoch": 0.66, "grad_norm": 2.109375, "learning_rate": 0.00015075934216693003, "loss": 2.0685, "step": 280910 }, { "epoch": 0.66, "grad_norm": 1.984375, "learning_rate": 0.0001507577496763803, "loss": 2.1006, "step": 280915 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.00015075615716849083, "loss": 2.0974, "step": 280920 }, { "epoch": 0.66, "grad_norm": 1.9453125, "learning_rate": 0.00015075456464326212, "loss": 2.0954, "step": 280925 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.0001507529721006947, "loss": 2.0975, "step": 280930 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.00015075137954078917, "loss": 1.885, "step": 280935 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015074978696354602, "loss": 2.2031, "step": 280940 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015074819436896588, "loss": 2.1341, "step": 280945 }, { "epoch": 0.66, "grad_norm": 2.625, "learning_rate": 0.0001507466017570492, "loss": 2.0353, "step": 280950 }, { "epoch": 0.66, "grad_norm": 2.53125, "learning_rate": 0.00015074500912779656, "loss": 2.0577, "step": 280955 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015074341648120852, "loss": 2.0409, "step": 280960 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.0001507418238172856, "loss": 2.0336, "step": 280965 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.00015074023113602832, "loss": 1.995, "step": 280970 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.0001507386384374373, "loss": 2.0327, "step": 280975 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.00015073704572151305, "loss": 1.8432, "step": 280980 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015073545298825608, "loss": 2.0309, "step": 280985 }, { "epoch": 0.66, "grad_norm": 2.640625, "learning_rate": 0.00015073386023766697, "loss": 2.0344, "step": 280990 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.00015073226746974621, "loss": 1.965, "step": 280995 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015073067468449443, "loss": 2.1389, "step": 281000 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.00015072908188191214, "loss": 2.1483, "step": 281005 }, { "epoch": 0.66, "grad_norm": 1.8671875, "learning_rate": 0.00015072748906199985, "loss": 1.9117, "step": 281010 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.00015072589622475813, "loss": 2.1697, "step": 281015 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015072430337018753, "loss": 2.167, "step": 281020 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.0001507227104982886, "loss": 1.999, "step": 281025 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015072111760906188, "loss": 2.0139, "step": 281030 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.0001507195247025079, "loss": 2.0337, "step": 281035 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015071793177862718, "loss": 2.0934, "step": 281040 }, { "epoch": 0.66, "grad_norm": 1.90625, "learning_rate": 0.00015071633883742035, "loss": 1.95, "step": 281045 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015071474587888782, "loss": 2.0502, "step": 281050 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.0001507131529030303, "loss": 2.1712, "step": 281055 }, { "epoch": 0.66, "grad_norm": 2.84375, "learning_rate": 0.00015071155990984818, "loss": 1.9204, "step": 281060 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.0001507099668993421, "loss": 1.9803, "step": 281065 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015070837387151257, "loss": 2.0891, "step": 281070 }, { "epoch": 0.66, "grad_norm": 2.546875, "learning_rate": 0.00015070678082636013, "loss": 2.1909, "step": 281075 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015070518776388536, "loss": 2.016, "step": 281080 }, { "epoch": 0.66, "grad_norm": 1.9296875, "learning_rate": 0.00015070359468408875, "loss": 2.3092, "step": 281085 }, { "epoch": 0.66, "grad_norm": 1.9453125, "learning_rate": 0.0001507020015869709, "loss": 1.9802, "step": 281090 }, { "epoch": 0.66, "grad_norm": 1.8671875, "learning_rate": 0.0001507004084725323, "loss": 2.0433, "step": 281095 }, { "epoch": 0.66, "grad_norm": 2.78125, "learning_rate": 0.00015069881534077355, "loss": 1.9137, "step": 281100 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015069722219169512, "loss": 2.0053, "step": 281105 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.00015069562902529765, "loss": 2.1689, "step": 281110 }, { "epoch": 0.66, "grad_norm": 2.75, "learning_rate": 0.00015069403584158162, "loss": 2.0873, "step": 281115 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015069244264054755, "loss": 1.9273, "step": 281120 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015069084942219608, "loss": 2.046, "step": 281125 }, { "epoch": 0.66, "grad_norm": 1.984375, "learning_rate": 0.00015068925618652766, "loss": 2.0476, "step": 281130 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.00015068766293354288, "loss": 1.9741, "step": 281135 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.00015068606966324227, "loss": 1.9969, "step": 281140 }, { "epoch": 0.66, "grad_norm": 1.7890625, "learning_rate": 0.0001506844763756264, "loss": 2.0414, "step": 281145 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.0001506828830706958, "loss": 2.0162, "step": 281150 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.00015068128974845096, "loss": 2.2992, "step": 281155 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015067969640889247, "loss": 2.072, "step": 281160 }, { "epoch": 0.66, "grad_norm": 1.8828125, "learning_rate": 0.00015067810305202092, "loss": 2.2015, "step": 281165 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.0001506765096778368, "loss": 1.9867, "step": 281170 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.0001506749162863407, "loss": 1.9318, "step": 281175 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.00015067332287753307, "loss": 2.0486, "step": 281180 }, { "epoch": 0.66, "grad_norm": 2.640625, "learning_rate": 0.00015067172945141454, "loss": 2.0511, "step": 281185 }, { "epoch": 0.66, "grad_norm": 2.578125, "learning_rate": 0.0001506701360079856, "loss": 1.9652, "step": 281190 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.00015066854254724686, "loss": 2.1673, "step": 281195 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.0001506669490691988, "loss": 2.1183, "step": 281200 }, { "epoch": 0.66, "grad_norm": 1.8828125, "learning_rate": 0.00015066535557384204, "loss": 1.9187, "step": 281205 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.000150663762061177, "loss": 2.0745, "step": 281210 }, { "epoch": 0.66, "grad_norm": 1.875, "learning_rate": 0.00015066216853120437, "loss": 1.9705, "step": 281215 }, { "epoch": 0.66, "grad_norm": 1.796875, "learning_rate": 0.0001506605749839246, "loss": 2.1761, "step": 281220 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015065898141933823, "loss": 2.0641, "step": 281225 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015065738783744584, "loss": 2.0497, "step": 281230 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015065579423824798, "loss": 2.0331, "step": 281235 }, { "epoch": 0.66, "grad_norm": 2.90625, "learning_rate": 0.0001506542006217452, "loss": 2.0064, "step": 281240 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.00015065260698793803, "loss": 2.0987, "step": 281245 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.000150651013336827, "loss": 2.0199, "step": 281250 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.00015064941966841264, "loss": 2.0933, "step": 281255 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015064782598269553, "loss": 2.0345, "step": 281260 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.00015064623227967623, "loss": 1.9712, "step": 281265 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015064463855935523, "loss": 2.1037, "step": 281270 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.0001506430448217331, "loss": 1.9872, "step": 281275 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.0001506414510668104, "loss": 2.2342, "step": 281280 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.0001506398572945877, "loss": 2.0468, "step": 281285 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015063826350506543, "loss": 2.0494, "step": 281290 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.00015063666969824426, "loss": 2.0953, "step": 281295 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015063507587412467, "loss": 2.1474, "step": 281300 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.00015063348203270724, "loss": 1.9296, "step": 281305 }, { "epoch": 0.66, "grad_norm": 2.5625, "learning_rate": 0.0001506318881739925, "loss": 2.3524, "step": 281310 }, { "epoch": 0.66, "grad_norm": 2.109375, "learning_rate": 0.00015063029429798094, "loss": 2.0854, "step": 281315 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015062870040467318, "loss": 2.1716, "step": 281320 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.0001506271064940697, "loss": 2.2238, "step": 281325 }, { "epoch": 0.66, "grad_norm": 2.109375, "learning_rate": 0.00015062551256617116, "loss": 2.1375, "step": 281330 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.000150623918620978, "loss": 1.9742, "step": 281335 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.0001506223246584908, "loss": 2.0194, "step": 281340 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015062073067871002, "loss": 1.9578, "step": 281345 }, { "epoch": 0.66, "grad_norm": 1.921875, "learning_rate": 0.00015061913668163633, "loss": 2.0269, "step": 281350 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.00015061754266727025, "loss": 2.1332, "step": 281355 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015061594863561228, "loss": 2.1693, "step": 281360 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.000150614354586663, "loss": 2.2332, "step": 281365 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015061276052042287, "loss": 2.0042, "step": 281370 }, { "epoch": 0.66, "grad_norm": 2.828125, "learning_rate": 0.0001506111664368926, "loss": 1.9944, "step": 281375 }, { "epoch": 0.66, "grad_norm": 2.46875, "learning_rate": 0.00015060957233607259, "loss": 2.103, "step": 281380 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.00015060797821796342, "loss": 2.0514, "step": 281385 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.00015060638408256568, "loss": 2.1691, "step": 281390 }, { "epoch": 0.66, "grad_norm": 1.8828125, "learning_rate": 0.00015060478992987985, "loss": 2.0235, "step": 281395 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.00015060319575990652, "loss": 2.1324, "step": 281400 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015060160157264623, "loss": 1.761, "step": 281405 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.00015060000736809949, "loss": 2.2398, "step": 281410 }, { "epoch": 0.66, "grad_norm": 2.390625, "learning_rate": 0.0001505984131462669, "loss": 2.1367, "step": 281415 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015059681890714895, "loss": 2.1085, "step": 281420 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015059522465074622, "loss": 1.8977, "step": 281425 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.00015059363037705925, "loss": 1.9386, "step": 281430 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015059203608608856, "loss": 2.0119, "step": 281435 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.00015059044177783473, "loss": 2.0176, "step": 281440 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015058884745229828, "loss": 2.1649, "step": 281445 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015058725310947976, "loss": 2.0268, "step": 281450 }, { "epoch": 0.66, "grad_norm": 1.890625, "learning_rate": 0.0001505856587493797, "loss": 1.9633, "step": 281455 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.0001505840643719987, "loss": 1.9397, "step": 281460 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015058246997733728, "loss": 2.1091, "step": 281465 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.00015058087556539591, "loss": 2.0924, "step": 281470 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.0001505792811361752, "loss": 2.0805, "step": 281475 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.0001505776866896757, "loss": 2.1114, "step": 281480 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.00015057609222589798, "loss": 2.193, "step": 281485 }, { "epoch": 0.66, "grad_norm": 2.46875, "learning_rate": 0.00015057449774484255, "loss": 2.2144, "step": 281490 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.00015057290324650992, "loss": 1.9594, "step": 281495 }, { "epoch": 0.66, "grad_norm": 2.453125, "learning_rate": 0.00015057130873090069, "loss": 1.9758, "step": 281500 }, { "epoch": 0.66, "grad_norm": 1.9765625, "learning_rate": 0.00015056971419801535, "loss": 1.9887, "step": 281505 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.00015056811964785454, "loss": 2.1143, "step": 281510 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015056652508041871, "loss": 2.2196, "step": 281515 }, { "epoch": 0.66, "grad_norm": 1.8046875, "learning_rate": 0.00015056493049570843, "loss": 1.9996, "step": 281520 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015056333589372425, "loss": 2.0603, "step": 281525 }, { "epoch": 0.66, "grad_norm": 2.59375, "learning_rate": 0.0001505617412744667, "loss": 2.1191, "step": 281530 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.0001505601466379364, "loss": 1.9475, "step": 281535 }, { "epoch": 0.66, "grad_norm": 1.7578125, "learning_rate": 0.0001505585519841338, "loss": 2.0374, "step": 281540 }, { "epoch": 0.66, "grad_norm": 2.5625, "learning_rate": 0.0001505569573130595, "loss": 2.1045, "step": 281545 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.000150555362624714, "loss": 2.0231, "step": 281550 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.0001505537679190979, "loss": 2.0027, "step": 281555 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.0001505521731962117, "loss": 2.0418, "step": 281560 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015055057845605595, "loss": 1.8496, "step": 281565 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015054898369863123, "loss": 2.1291, "step": 281570 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.00015054738892393804, "loss": 1.9476, "step": 281575 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015054579413197695, "loss": 2.0028, "step": 281580 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.0001505441993227485, "loss": 2.0401, "step": 281585 }, { "epoch": 0.66, "grad_norm": 2.359375, "learning_rate": 0.00015054260449625325, "loss": 2.1156, "step": 281590 }, { "epoch": 0.66, "grad_norm": 2.921875, "learning_rate": 0.0001505410096524917, "loss": 2.1383, "step": 281595 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015053941479146446, "loss": 2.0436, "step": 281600 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.000150537819913172, "loss": 2.0245, "step": 281605 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015053622501761492, "loss": 2.0791, "step": 281610 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015053463010479377, "loss": 2.0327, "step": 281615 }, { "epoch": 0.66, "grad_norm": 1.8828125, "learning_rate": 0.00015053303517470904, "loss": 1.9347, "step": 281620 }, { "epoch": 0.66, "grad_norm": 2.53125, "learning_rate": 0.00015053144022736135, "loss": 1.9448, "step": 281625 }, { "epoch": 0.66, "grad_norm": 1.9765625, "learning_rate": 0.00015052984526275117, "loss": 2.0687, "step": 281630 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.0001505282502808791, "loss": 2.1464, "step": 281635 }, { "epoch": 0.66, "grad_norm": 3.109375, "learning_rate": 0.00015052665528174564, "loss": 2.0364, "step": 281640 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015052506026535137, "loss": 2.0888, "step": 281645 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.00015052346523169682, "loss": 2.0637, "step": 281650 }, { "epoch": 0.66, "grad_norm": 2.65625, "learning_rate": 0.00015052187018078255, "loss": 2.2682, "step": 281655 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.0001505202751126091, "loss": 2.0296, "step": 281660 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015051868002717694, "loss": 2.1171, "step": 281665 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015051708492448676, "loss": 2.1062, "step": 281670 }, { "epoch": 0.66, "grad_norm": 1.7890625, "learning_rate": 0.00015051548980453901, "loss": 1.9663, "step": 281675 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015051389466733426, "loss": 2.1431, "step": 281680 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015051229951287302, "loss": 2.1463, "step": 281685 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015051070434115587, "loss": 1.9957, "step": 281690 }, { "epoch": 0.66, "grad_norm": 3.203125, "learning_rate": 0.00015050910915218334, "loss": 2.1155, "step": 281695 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015050751394595603, "loss": 2.0093, "step": 281700 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.0001505059187224744, "loss": 2.2871, "step": 281705 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015050432348173902, "loss": 1.9559, "step": 281710 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.00015050272822375046, "loss": 2.0486, "step": 281715 }, { "epoch": 0.66, "grad_norm": 1.8359375, "learning_rate": 0.00015050113294850926, "loss": 1.9092, "step": 281720 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.0001504995376560159, "loss": 2.0627, "step": 281725 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.0001504979423462711, "loss": 2.1536, "step": 281730 }, { "epoch": 0.66, "grad_norm": 2.859375, "learning_rate": 0.0001504963470192752, "loss": 1.9627, "step": 281735 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015049475167502885, "loss": 2.158, "step": 281740 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.00015049315631353256, "loss": 1.9753, "step": 281745 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015049156093478692, "loss": 1.8833, "step": 281750 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015048996553879243, "loss": 2.0888, "step": 281755 }, { "epoch": 0.66, "grad_norm": 2.390625, "learning_rate": 0.00015048837012554968, "loss": 1.9075, "step": 281760 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015048677469505916, "loss": 1.9358, "step": 281765 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015048517924732145, "loss": 2.0388, "step": 281770 }, { "epoch": 0.66, "grad_norm": 2.53125, "learning_rate": 0.00015048358378233706, "loss": 2.2378, "step": 281775 }, { "epoch": 0.66, "grad_norm": 1.921875, "learning_rate": 0.0001504819883001066, "loss": 2.221, "step": 281780 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015048039280063057, "loss": 2.1294, "step": 281785 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015047879728390955, "loss": 2.0229, "step": 281790 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015047720174994402, "loss": 1.9843, "step": 281795 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015047560619873453, "loss": 2.1782, "step": 281800 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.00015047401063028173, "loss": 2.0704, "step": 281805 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015047241504458604, "loss": 2.0447, "step": 281810 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015047081944164807, "loss": 2.1753, "step": 281815 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.00015046922382146836, "loss": 2.2313, "step": 281820 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.00015046762818404742, "loss": 1.9811, "step": 281825 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.0001504660325293859, "loss": 1.8547, "step": 281830 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.0001504644368574842, "loss": 2.084, "step": 281835 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.00015046284116834296, "loss": 2.1705, "step": 281840 }, { "epoch": 0.66, "grad_norm": 3.546875, "learning_rate": 0.00015046124546196267, "loss": 2.0263, "step": 281845 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015045964973834391, "loss": 2.0224, "step": 281850 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015045805399748724, "loss": 2.2329, "step": 281855 }, { "epoch": 0.66, "grad_norm": 2.109375, "learning_rate": 0.00015045645823939316, "loss": 1.9047, "step": 281860 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015045486246406225, "loss": 2.0395, "step": 281865 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015045326667149507, "loss": 1.9806, "step": 281870 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015045167086169214, "loss": 2.229, "step": 281875 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.00015045007503465394, "loss": 2.0338, "step": 281880 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.0001504484791903811, "loss": 2.11, "step": 281885 }, { "epoch": 0.66, "grad_norm": 2.890625, "learning_rate": 0.00015044688332887417, "loss": 1.9366, "step": 281890 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015044528745013368, "loss": 2.0464, "step": 281895 }, { "epoch": 0.66, "grad_norm": 2.859375, "learning_rate": 0.00015044369155416018, "loss": 2.12, "step": 281900 }, { "epoch": 0.66, "grad_norm": 2.65625, "learning_rate": 0.00015044209564095415, "loss": 2.0592, "step": 281905 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.0001504404997105162, "loss": 2.0561, "step": 281910 }, { "epoch": 0.66, "grad_norm": 1.890625, "learning_rate": 0.00015043890376284687, "loss": 2.1079, "step": 281915 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.0001504373077979467, "loss": 2.1147, "step": 281920 }, { "epoch": 0.66, "grad_norm": 1.8125, "learning_rate": 0.00015043571181581623, "loss": 2.0815, "step": 281925 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.000150434115816456, "loss": 2.208, "step": 281930 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.00015043251979986652, "loss": 1.9852, "step": 281935 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015043092376604842, "loss": 2.0403, "step": 281940 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.0001504293277150022, "loss": 1.7871, "step": 281945 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.0001504277316467284, "loss": 2.2782, "step": 281950 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015042613556122758, "loss": 2.1437, "step": 281955 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.00015042453945850025, "loss": 2.0393, "step": 281960 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015042294333854702, "loss": 1.9463, "step": 281965 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.0001504213472013684, "loss": 2.1368, "step": 281970 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.00015041975104696493, "loss": 1.9031, "step": 281975 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015041815487533712, "loss": 2.008, "step": 281980 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.0001504165586864856, "loss": 2.1866, "step": 281985 }, { "epoch": 0.66, "grad_norm": 2.921875, "learning_rate": 0.00015041496248041086, "loss": 2.1119, "step": 281990 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.00015041336625711344, "loss": 1.9706, "step": 281995 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.0001504117700165939, "loss": 2.0391, "step": 282000 }, { "epoch": 0.66, "grad_norm": 2.390625, "learning_rate": 0.0001504101737588528, "loss": 1.9842, "step": 282005 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015040857748389064, "loss": 2.0332, "step": 282010 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.00015040698119170802, "loss": 1.9035, "step": 282015 }, { "epoch": 0.66, "grad_norm": 2.609375, "learning_rate": 0.00015040538488230544, "loss": 2.0202, "step": 282020 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.0001504037885556835, "loss": 2.1448, "step": 282025 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.0001504021922118427, "loss": 2.027, "step": 282030 }, { "epoch": 0.66, "grad_norm": 2.375, "learning_rate": 0.0001504005958507836, "loss": 1.9568, "step": 282035 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.0001503989994725067, "loss": 2.0867, "step": 282040 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.00015039740307701263, "loss": 2.141, "step": 282045 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.0001503958066643019, "loss": 2.0438, "step": 282050 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.00015039421023437503, "loss": 2.1556, "step": 282055 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.0001503926137872326, "loss": 2.1521, "step": 282060 }, { "epoch": 0.66, "grad_norm": 1.765625, "learning_rate": 0.0001503910173228751, "loss": 1.863, "step": 282065 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015038942084130317, "loss": 2.0311, "step": 282070 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.00015038782434251724, "loss": 2.0547, "step": 282075 }, { "epoch": 0.66, "grad_norm": 2.59375, "learning_rate": 0.00015038622782651798, "loss": 2.1886, "step": 282080 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015038463129330584, "loss": 1.9982, "step": 282085 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015038303474288137, "loss": 1.9882, "step": 282090 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.00015038143817524516, "loss": 2.1045, "step": 282095 }, { "epoch": 0.66, "grad_norm": 2.328125, "learning_rate": 0.00015037984159039779, "loss": 2.1834, "step": 282100 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.00015037824498833968, "loss": 2.1751, "step": 282105 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.00015037664836907146, "loss": 1.9776, "step": 282110 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015037505173259367, "loss": 2.0438, "step": 282115 }, { "epoch": 0.66, "grad_norm": 2.203125, "learning_rate": 0.00015037345507890683, "loss": 1.8402, "step": 282120 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.00015037185840801156, "loss": 2.0695, "step": 282125 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015037026171990832, "loss": 1.8788, "step": 282130 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.00015036866501459768, "loss": 2.1142, "step": 282135 }, { "epoch": 0.66, "grad_norm": 2.484375, "learning_rate": 0.00015036706829208018, "loss": 2.1177, "step": 282140 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.0001503654715523564, "loss": 2.1305, "step": 282145 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015036387479542684, "loss": 2.0481, "step": 282150 }, { "epoch": 0.66, "grad_norm": 2.1875, "learning_rate": 0.0001503622780212921, "loss": 2.0329, "step": 282155 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015036068122995265, "loss": 2.0531, "step": 282160 }, { "epoch": 0.66, "grad_norm": 2.015625, "learning_rate": 0.0001503590844214091, "loss": 1.9704, "step": 282165 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.000150357487595662, "loss": 2.1182, "step": 282170 }, { "epoch": 0.66, "grad_norm": 2.453125, "learning_rate": 0.00015035589075271184, "loss": 2.0976, "step": 282175 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015035429389255916, "loss": 2.1044, "step": 282180 }, { "epoch": 0.66, "grad_norm": 1.8046875, "learning_rate": 0.0001503526970152046, "loss": 1.9968, "step": 282185 }, { "epoch": 0.66, "grad_norm": 2.46875, "learning_rate": 0.00015035110012064863, "loss": 2.0532, "step": 282190 }, { "epoch": 0.66, "grad_norm": 2.609375, "learning_rate": 0.0001503495032088918, "loss": 2.1174, "step": 282195 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015034790627993465, "loss": 2.181, "step": 282200 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015034630933377776, "loss": 2.1013, "step": 282205 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.00015034471237042167, "loss": 1.9908, "step": 282210 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.0001503431153898669, "loss": 2.2246, "step": 282215 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.000150341518392114, "loss": 2.159, "step": 282220 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015033992137716351, "loss": 1.9949, "step": 282225 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.00015033832434501604, "loss": 2.0452, "step": 282230 }, { "epoch": 0.66, "grad_norm": 2.546875, "learning_rate": 0.00015033672729567206, "loss": 2.1578, "step": 282235 }, { "epoch": 0.66, "grad_norm": 1.8125, "learning_rate": 0.00015033513022913217, "loss": 2.1684, "step": 282240 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015033353314539683, "loss": 2.1607, "step": 282245 }, { "epoch": 0.66, "grad_norm": 2.03125, "learning_rate": 0.0001503319360444667, "loss": 2.057, "step": 282250 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015033033892634218, "loss": 2.0383, "step": 282255 }, { "epoch": 0.66, "grad_norm": 1.8515625, "learning_rate": 0.000150328741791024, "loss": 2.0724, "step": 282260 }, { "epoch": 0.66, "grad_norm": 2.40625, "learning_rate": 0.00015032714463851255, "loss": 2.1594, "step": 282265 }, { "epoch": 0.66, "grad_norm": 1.9375, "learning_rate": 0.00015032554746880847, "loss": 2.0362, "step": 282270 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015032395028191227, "loss": 1.9325, "step": 282275 }, { "epoch": 0.66, "grad_norm": 2.3125, "learning_rate": 0.00015032235307782448, "loss": 2.0092, "step": 282280 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015032075585654563, "loss": 2.102, "step": 282285 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015031915861807635, "loss": 2.2028, "step": 282290 }, { "epoch": 0.66, "grad_norm": 2.0, "learning_rate": 0.00015031756136241713, "loss": 2.0015, "step": 282295 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.00015031596408956847, "loss": 1.9713, "step": 282300 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015031436679953099, "loss": 2.0638, "step": 282305 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.0001503127694923052, "loss": 2.1819, "step": 282310 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.0001503111721678917, "loss": 1.8671, "step": 282315 }, { "epoch": 0.66, "grad_norm": 2.234375, "learning_rate": 0.00015030957482629095, "loss": 1.9631, "step": 282320 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.00015030797746750355, "loss": 2.0789, "step": 282325 }, { "epoch": 0.66, "grad_norm": 1.8671875, "learning_rate": 0.00015030638009153006, "loss": 2.1037, "step": 282330 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.00015030478269837096, "loss": 2.0837, "step": 282335 }, { "epoch": 0.66, "grad_norm": 2.4375, "learning_rate": 0.00015030318528802683, "loss": 1.9947, "step": 282340 }, { "epoch": 0.66, "grad_norm": 2.5625, "learning_rate": 0.00015030158786049825, "loss": 1.9252, "step": 282345 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.0001502999904157857, "loss": 1.9092, "step": 282350 }, { "epoch": 0.66, "grad_norm": 2.265625, "learning_rate": 0.0001502983929538898, "loss": 2.0285, "step": 282355 }, { "epoch": 0.66, "grad_norm": 2.34375, "learning_rate": 0.000150296795474811, "loss": 2.047, "step": 282360 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015029519797854995, "loss": 1.959, "step": 282365 }, { "epoch": 0.66, "grad_norm": 2.09375, "learning_rate": 0.00015029360046510715, "loss": 2.2173, "step": 282370 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015029200293448315, "loss": 2.1548, "step": 282375 }, { "epoch": 0.66, "grad_norm": 2.6875, "learning_rate": 0.00015029040538667844, "loss": 2.1157, "step": 282380 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015028880782169365, "loss": 1.9291, "step": 282385 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.00015028721023952927, "loss": 2.0599, "step": 282390 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015028561264018593, "loss": 2.1243, "step": 282395 }, { "epoch": 0.66, "grad_norm": 1.90625, "learning_rate": 0.00015028401502366405, "loss": 1.9465, "step": 282400 }, { "epoch": 0.66, "grad_norm": 2.5, "learning_rate": 0.00015028241738996423, "loss": 2.0756, "step": 282405 }, { "epoch": 0.66, "grad_norm": 2.46875, "learning_rate": 0.00015028081973908708, "loss": 2.1605, "step": 282410 }, { "epoch": 0.66, "grad_norm": 2.078125, "learning_rate": 0.00015027922207103305, "loss": 1.9433, "step": 282415 }, { "epoch": 0.66, "grad_norm": 2.640625, "learning_rate": 0.00015027762438580276, "loss": 2.1721, "step": 282420 }, { "epoch": 0.66, "grad_norm": 2.453125, "learning_rate": 0.0001502760266833967, "loss": 1.9653, "step": 282425 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015027442896381541, "loss": 1.8863, "step": 282430 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.00015027283122705947, "loss": 2.2313, "step": 282435 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015027123347312945, "loss": 2.0559, "step": 282440 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.00015026963570202584, "loss": 2.0609, "step": 282445 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015026803791374924, "loss": 2.1571, "step": 282450 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015026644010830012, "loss": 2.1227, "step": 282455 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.0001502648422856791, "loss": 2.0119, "step": 282460 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.00015026324444588672, "loss": 2.0847, "step": 282465 }, { "epoch": 0.66, "grad_norm": 1.9609375, "learning_rate": 0.0001502616465889235, "loss": 1.9806, "step": 282470 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015026004871478995, "loss": 2.1275, "step": 282475 }, { "epoch": 0.66, "grad_norm": 1.9921875, "learning_rate": 0.00015025845082348668, "loss": 2.087, "step": 282480 }, { "epoch": 0.66, "grad_norm": 2.21875, "learning_rate": 0.0001502568529150142, "loss": 2.1793, "step": 282485 }, { "epoch": 0.66, "grad_norm": 2.25, "learning_rate": 0.0001502552549893731, "loss": 2.1336, "step": 282490 }, { "epoch": 0.66, "grad_norm": 2.15625, "learning_rate": 0.00015025365704656387, "loss": 1.6673, "step": 282495 }, { "epoch": 0.66, "grad_norm": 2.296875, "learning_rate": 0.00015025205908658707, "loss": 2.0694, "step": 282500 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015025046110944326, "loss": 2.0524, "step": 282505 }, { "epoch": 0.66, "grad_norm": 2.515625, "learning_rate": 0.000150248863115133, "loss": 2.2037, "step": 282510 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015024726510365683, "loss": 2.0237, "step": 282515 }, { "epoch": 0.66, "grad_norm": 2.75, "learning_rate": 0.00015024566707501523, "loss": 2.1862, "step": 282520 }, { "epoch": 0.66, "grad_norm": 2.609375, "learning_rate": 0.00015024406902920884, "loss": 2.2127, "step": 282525 }, { "epoch": 0.66, "grad_norm": 2.125, "learning_rate": 0.00015024247096623812, "loss": 1.9989, "step": 282530 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015024087288610373, "loss": 2.1396, "step": 282535 }, { "epoch": 0.66, "grad_norm": 2.28125, "learning_rate": 0.00015023927478880614, "loss": 2.1179, "step": 282540 }, { "epoch": 0.66, "grad_norm": 2.046875, "learning_rate": 0.00015023767667434585, "loss": 2.2315, "step": 282545 }, { "epoch": 0.66, "grad_norm": 2.140625, "learning_rate": 0.00015023607854272345, "loss": 2.0714, "step": 282550 }, { "epoch": 0.66, "grad_norm": 2.421875, "learning_rate": 0.00015023448039393954, "loss": 2.1516, "step": 282555 }, { "epoch": 0.66, "grad_norm": 2.171875, "learning_rate": 0.00015023288222799463, "loss": 2.1989, "step": 282560 }, { "epoch": 0.66, "grad_norm": 1.6328125, "learning_rate": 0.00015023128404488922, "loss": 1.935, "step": 282565 }, { "epoch": 0.66, "grad_norm": 2.0625, "learning_rate": 0.0001502296858446239, "loss": 1.9654, "step": 282570 }, { "epoch": 0.66, "grad_norm": 1.8828125, "learning_rate": 0.00015022808762719922, "loss": 1.9279, "step": 282575 }, { "epoch": 0.67, "grad_norm": 2.6875, "learning_rate": 0.0001502264893926157, "loss": 2.0306, "step": 282580 }, { "epoch": 0.67, "grad_norm": 1.78125, "learning_rate": 0.0001502248911408739, "loss": 1.946, "step": 282585 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00015022329287197437, "loss": 2.0327, "step": 282590 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00015022169458591767, "loss": 2.1597, "step": 282595 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.0001502200962827043, "loss": 2.3629, "step": 282600 }, { "epoch": 0.67, "grad_norm": 2.453125, "learning_rate": 0.00015021849796233484, "loss": 2.1481, "step": 282605 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00015021689962480984, "loss": 2.0651, "step": 282610 }, { "epoch": 0.67, "grad_norm": 1.7890625, "learning_rate": 0.00015021530127012987, "loss": 1.8853, "step": 282615 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00015021370289829537, "loss": 2.1362, "step": 282620 }, { "epoch": 0.67, "grad_norm": 2.5625, "learning_rate": 0.00015021210450930702, "loss": 2.1583, "step": 282625 }, { "epoch": 0.67, "grad_norm": 2.65625, "learning_rate": 0.00015021050610316528, "loss": 2.1588, "step": 282630 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.00015020890767987073, "loss": 2.0294, "step": 282635 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00015020730923942388, "loss": 1.992, "step": 282640 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.0001502057107818253, "loss": 1.9025, "step": 282645 }, { "epoch": 0.67, "grad_norm": 2.28125, "learning_rate": 0.0001502041123070756, "loss": 2.0011, "step": 282650 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.00015020251381517522, "loss": 2.11, "step": 282655 }, { "epoch": 0.67, "grad_norm": 1.890625, "learning_rate": 0.00015020091530612475, "loss": 2.0483, "step": 282660 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00015019931677992472, "loss": 2.0881, "step": 282665 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.00015019771823657574, "loss": 1.9694, "step": 282670 }, { "epoch": 0.67, "grad_norm": 2.0, "learning_rate": 0.00015019611967607828, "loss": 2.297, "step": 282675 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.00015019452109843294, "loss": 2.0965, "step": 282680 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.00015019292250364016, "loss": 2.1769, "step": 282685 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00015019132389170066, "loss": 2.0017, "step": 282690 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.00015018972526261487, "loss": 2.0542, "step": 282695 }, { "epoch": 0.67, "grad_norm": 2.671875, "learning_rate": 0.00015018812661638333, "loss": 2.0384, "step": 282700 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00015018652795300666, "loss": 2.0548, "step": 282705 }, { "epoch": 0.67, "grad_norm": 1.9921875, "learning_rate": 0.0001501849292724853, "loss": 1.9457, "step": 282710 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.00015018333057481993, "loss": 2.1457, "step": 282715 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00015018173186001098, "loss": 2.1142, "step": 282720 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00015018013312805906, "loss": 2.1208, "step": 282725 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.0001501785343789647, "loss": 1.9113, "step": 282730 }, { "epoch": 0.67, "grad_norm": 2.640625, "learning_rate": 0.0001501769356127284, "loss": 2.2905, "step": 282735 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.00015017533682935077, "loss": 2.058, "step": 282740 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00015017373802883236, "loss": 2.0544, "step": 282745 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.0001501721392111737, "loss": 1.9467, "step": 282750 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00015017054037637525, "loss": 2.033, "step": 282755 }, { "epoch": 0.67, "grad_norm": 2.40625, "learning_rate": 0.0001501689415244377, "loss": 2.3167, "step": 282760 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.0001501673426553615, "loss": 1.9267, "step": 282765 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00015016574376914727, "loss": 1.8744, "step": 282770 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00015016414486579546, "loss": 2.0631, "step": 282775 }, { "epoch": 0.67, "grad_norm": 1.640625, "learning_rate": 0.0001501625459453067, "loss": 2.041, "step": 282780 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00015016094700768151, "loss": 2.173, "step": 282785 }, { "epoch": 0.67, "grad_norm": 2.4375, "learning_rate": 0.0001501593480529204, "loss": 2.024, "step": 282790 }, { "epoch": 0.67, "grad_norm": 2.5625, "learning_rate": 0.00015015774908102395, "loss": 2.1099, "step": 282795 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00015015615009199274, "loss": 1.829, "step": 282800 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00015015455108582728, "loss": 2.0364, "step": 282805 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.0001501529520625281, "loss": 2.0347, "step": 282810 }, { "epoch": 0.67, "grad_norm": 2.421875, "learning_rate": 0.00015015135302209572, "loss": 2.208, "step": 282815 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00015014975396453076, "loss": 1.9431, "step": 282820 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00015014815488983372, "loss": 1.9515, "step": 282825 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.0001501465557980052, "loss": 2.1838, "step": 282830 }, { "epoch": 0.67, "grad_norm": 2.5, "learning_rate": 0.0001501449566890457, "loss": 2.1702, "step": 282835 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00015014335756295574, "loss": 2.0736, "step": 282840 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.0001501417584197359, "loss": 2.0132, "step": 282845 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00015014015925938676, "loss": 2.0914, "step": 282850 }, { "epoch": 0.67, "grad_norm": 1.984375, "learning_rate": 0.00015013856008190882, "loss": 1.9765, "step": 282855 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00015013696088730263, "loss": 2.1018, "step": 282860 }, { "epoch": 0.67, "grad_norm": 2.4375, "learning_rate": 0.00015013536167556875, "loss": 2.0076, "step": 282865 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.0001501337624467077, "loss": 1.9992, "step": 282870 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.0001501321632007201, "loss": 2.0243, "step": 282875 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.0001501305639376064, "loss": 2.2273, "step": 282880 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.0001501289646573672, "loss": 1.9689, "step": 282885 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00015012736536000306, "loss": 1.9333, "step": 282890 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.00015012576604551446, "loss": 2.0639, "step": 282895 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.000150124166713902, "loss": 2.0561, "step": 282900 }, { "epoch": 0.67, "grad_norm": 1.90625, "learning_rate": 0.00015012256736516623, "loss": 1.9417, "step": 282905 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00015012096799930767, "loss": 2.0101, "step": 282910 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.0001501193686163269, "loss": 2.0507, "step": 282915 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00015011776921622445, "loss": 2.0746, "step": 282920 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.00015011616979900084, "loss": 1.9378, "step": 282925 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.00015011457036465662, "loss": 2.1924, "step": 282930 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00015011297091319236, "loss": 2.1116, "step": 282935 }, { "epoch": 0.67, "grad_norm": 1.5625, "learning_rate": 0.00015011137144460863, "loss": 1.9912, "step": 282940 }, { "epoch": 0.67, "grad_norm": 2.59375, "learning_rate": 0.00015010977195890597, "loss": 2.0725, "step": 282945 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.00015010817245608485, "loss": 1.8939, "step": 282950 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00015010657293614585, "loss": 1.9718, "step": 282955 }, { "epoch": 0.67, "grad_norm": 2.609375, "learning_rate": 0.00015010497339908957, "loss": 2.0274, "step": 282960 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00015010337384491654, "loss": 1.9619, "step": 282965 }, { "epoch": 0.67, "grad_norm": 2.53125, "learning_rate": 0.00015010177427362726, "loss": 2.1048, "step": 282970 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00015010017468522233, "loss": 2.0161, "step": 282975 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00015009857507970224, "loss": 1.9194, "step": 282980 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.0001500969754570676, "loss": 2.044, "step": 282985 }, { "epoch": 0.67, "grad_norm": 2.640625, "learning_rate": 0.0001500953758173189, "loss": 1.9214, "step": 282990 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00015009377616045675, "loss": 2.1057, "step": 282995 }, { "epoch": 0.67, "grad_norm": 2.828125, "learning_rate": 0.00015009217648648162, "loss": 2.1803, "step": 283000 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.0001500905767953941, "loss": 2.0629, "step": 283005 }, { "epoch": 0.67, "grad_norm": 2.4375, "learning_rate": 0.00015008897708719473, "loss": 2.0778, "step": 283010 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00015008737736188404, "loss": 2.0699, "step": 283015 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00015008577761946263, "loss": 2.2923, "step": 283020 }, { "epoch": 0.67, "grad_norm": 2.28125, "learning_rate": 0.00015008417785993097, "loss": 2.0917, "step": 283025 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.00015008257808328968, "loss": 2.1562, "step": 283030 }, { "epoch": 0.67, "grad_norm": 2.515625, "learning_rate": 0.00015008097828953928, "loss": 2.2408, "step": 283035 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.0001500793784786803, "loss": 2.0086, "step": 283040 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00015007777865071328, "loss": 1.908, "step": 283045 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.0001500761788056388, "loss": 2.1503, "step": 283050 }, { "epoch": 0.67, "grad_norm": 1.84375, "learning_rate": 0.00015007457894345737, "loss": 2.1166, "step": 283055 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00015007297906416956, "loss": 2.1915, "step": 283060 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00015007137916777592, "loss": 2.1373, "step": 283065 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00015006977925427698, "loss": 2.1239, "step": 283070 }, { "epoch": 0.67, "grad_norm": 1.7890625, "learning_rate": 0.00015006817932367332, "loss": 2.0731, "step": 283075 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00015006657937596547, "loss": 2.0068, "step": 283080 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00015006497941115392, "loss": 2.1029, "step": 283085 }, { "epoch": 0.67, "grad_norm": 2.453125, "learning_rate": 0.0001500633794292393, "loss": 2.1842, "step": 283090 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.0001500617794302221, "loss": 2.0183, "step": 283095 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00015006017941410291, "loss": 2.095, "step": 283100 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00015005857938088222, "loss": 2.1721, "step": 283105 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.00015005697933056063, "loss": 2.1653, "step": 283110 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.00015005537926313868, "loss": 2.0928, "step": 283115 }, { "epoch": 0.67, "grad_norm": 2.4375, "learning_rate": 0.0001500537791786169, "loss": 1.9898, "step": 283120 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.00015005217907699585, "loss": 2.2207, "step": 283125 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.00015005057895827606, "loss": 2.0031, "step": 283130 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00015004897882245807, "loss": 2.1355, "step": 283135 }, { "epoch": 0.67, "grad_norm": 1.921875, "learning_rate": 0.00015004737866954244, "loss": 2.1333, "step": 283140 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00015004577849952978, "loss": 2.0772, "step": 283145 }, { "epoch": 0.67, "grad_norm": 2.421875, "learning_rate": 0.0001500441783124205, "loss": 1.8625, "step": 283150 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.00015004257810821521, "loss": 2.0791, "step": 283155 }, { "epoch": 0.67, "grad_norm": 2.4375, "learning_rate": 0.00015004097788691453, "loss": 1.9828, "step": 283160 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.0001500393776485189, "loss": 2.1906, "step": 283165 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00015003777739302897, "loss": 2.1346, "step": 283170 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00015003617712044517, "loss": 1.9198, "step": 283175 }, { "epoch": 0.67, "grad_norm": 1.96875, "learning_rate": 0.0001500345768307681, "loss": 1.9517, "step": 283180 }, { "epoch": 0.67, "grad_norm": 1.875, "learning_rate": 0.00015003297652399836, "loss": 1.9264, "step": 283185 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.0001500313762001364, "loss": 2.0968, "step": 283190 }, { "epoch": 0.67, "grad_norm": 1.7578125, "learning_rate": 0.00015002977585918282, "loss": 1.9218, "step": 283195 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.0001500281755011382, "loss": 2.1003, "step": 283200 }, { "epoch": 0.67, "grad_norm": 1.9921875, "learning_rate": 0.00015002657512600302, "loss": 2.2077, "step": 283205 }, { "epoch": 0.67, "grad_norm": 1.859375, "learning_rate": 0.00015002497473377786, "loss": 1.9425, "step": 283210 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00015002337432446327, "loss": 2.0597, "step": 283215 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00015002177389805975, "loss": 2.057, "step": 283220 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.0001500201734545679, "loss": 1.9236, "step": 283225 }, { "epoch": 0.67, "grad_norm": 1.984375, "learning_rate": 0.0001500185729939883, "loss": 2.1076, "step": 283230 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.0001500169725163214, "loss": 2.0507, "step": 283235 }, { "epoch": 0.67, "grad_norm": 1.9921875, "learning_rate": 0.00015001537202156783, "loss": 2.011, "step": 283240 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.00015001377150972807, "loss": 2.1903, "step": 283245 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.0001500121709808027, "loss": 1.9467, "step": 283250 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00015001057043479227, "loss": 2.0779, "step": 283255 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.00015000896987169734, "loss": 2.2855, "step": 283260 }, { "epoch": 0.67, "grad_norm": 2.78125, "learning_rate": 0.00015000736929151841, "loss": 2.044, "step": 283265 }, { "epoch": 0.67, "grad_norm": 3.109375, "learning_rate": 0.00015000576869425607, "loss": 1.8615, "step": 283270 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.00015000416807991085, "loss": 1.9316, "step": 283275 }, { "epoch": 0.67, "grad_norm": 1.75, "learning_rate": 0.00015000256744848332, "loss": 2.0519, "step": 283280 }, { "epoch": 0.67, "grad_norm": 2.5, "learning_rate": 0.000150000966799974, "loss": 2.1039, "step": 283285 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014999936613438342, "loss": 2.0245, "step": 283290 }, { "epoch": 0.67, "grad_norm": 2.515625, "learning_rate": 0.00014999776545171215, "loss": 2.1619, "step": 283295 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014999616475196072, "loss": 1.9897, "step": 283300 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00014999456403512975, "loss": 2.1671, "step": 283305 }, { "epoch": 0.67, "grad_norm": 1.8671875, "learning_rate": 0.0001499929633012197, "loss": 1.9049, "step": 283310 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014999136255023114, "loss": 2.027, "step": 283315 }, { "epoch": 0.67, "grad_norm": 1.9921875, "learning_rate": 0.00014998976178216464, "loss": 1.9525, "step": 283320 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.00014998816099702072, "loss": 2.0413, "step": 283325 }, { "epoch": 0.67, "grad_norm": 2.8125, "learning_rate": 0.00014998656019479993, "loss": 1.8537, "step": 283330 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014998495937550285, "loss": 1.9691, "step": 283335 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014998335853912996, "loss": 2.1501, "step": 283340 }, { "epoch": 0.67, "grad_norm": 1.828125, "learning_rate": 0.0001499817576856819, "loss": 2.0322, "step": 283345 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014998015681515914, "loss": 2.1209, "step": 283350 }, { "epoch": 0.67, "grad_norm": 1.796875, "learning_rate": 0.00014997855592756225, "loss": 1.9343, "step": 283355 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014997695502289178, "loss": 1.8597, "step": 283360 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00014997535410114828, "loss": 2.132, "step": 283365 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00014997375316233227, "loss": 2.1974, "step": 283370 }, { "epoch": 0.67, "grad_norm": 4.0625, "learning_rate": 0.00014997215220644437, "loss": 1.9936, "step": 283375 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.00014997055123348502, "loss": 1.9493, "step": 283380 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00014996895024345486, "loss": 2.0803, "step": 283385 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.0001499673492363544, "loss": 2.1553, "step": 283390 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014996574821218416, "loss": 2.1593, "step": 283395 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014996414717094475, "loss": 2.1169, "step": 283400 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.00014996254611263662, "loss": 2.0219, "step": 283405 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014996094503726042, "loss": 2.2482, "step": 283410 }, { "epoch": 0.67, "grad_norm": 1.8671875, "learning_rate": 0.00014995934394481668, "loss": 2.1566, "step": 283415 }, { "epoch": 0.67, "grad_norm": 1.9765625, "learning_rate": 0.0001499577428353059, "loss": 2.144, "step": 283420 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.00014995614170872865, "loss": 2.0828, "step": 283425 }, { "epoch": 0.67, "grad_norm": 1.9765625, "learning_rate": 0.00014995454056508547, "loss": 2.036, "step": 283430 }, { "epoch": 0.67, "grad_norm": 1.8203125, "learning_rate": 0.0001499529394043769, "loss": 1.9529, "step": 283435 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014995133822660352, "loss": 1.9679, "step": 283440 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.00014994973703176586, "loss": 1.9731, "step": 283445 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014994813581986445, "loss": 2.109, "step": 283450 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014994653459089987, "loss": 2.0608, "step": 283455 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014994493334487263, "loss": 2.1019, "step": 283460 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014994333208178328, "loss": 1.8931, "step": 283465 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.0001499417308016324, "loss": 1.9933, "step": 283470 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.00014994012950442052, "loss": 2.0572, "step": 283475 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.0001499385281901482, "loss": 1.9835, "step": 283480 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014993692685881594, "loss": 2.0896, "step": 283485 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.00014993532551042435, "loss": 1.9922, "step": 283490 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.00014993372414497395, "loss": 2.0771, "step": 283495 }, { "epoch": 0.67, "grad_norm": 1.765625, "learning_rate": 0.00014993212276246526, "loss": 2.1818, "step": 283500 }, { "epoch": 0.67, "grad_norm": 2.28125, "learning_rate": 0.00014993052136289887, "loss": 2.0127, "step": 283505 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.00014992891994627532, "loss": 2.049, "step": 283510 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.0001499273185125951, "loss": 1.9394, "step": 283515 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014992571706185884, "loss": 2.149, "step": 283520 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.00014992411559406702, "loss": 1.9651, "step": 283525 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014992251410922023, "loss": 1.9981, "step": 283530 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014992091260731903, "loss": 1.929, "step": 283535 }, { "epoch": 0.67, "grad_norm": 2.59375, "learning_rate": 0.0001499193110883639, "loss": 2.2989, "step": 283540 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.00014991770955235544, "loss": 1.9478, "step": 283545 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.0001499161079992942, "loss": 1.9709, "step": 283550 }, { "epoch": 0.67, "grad_norm": 2.453125, "learning_rate": 0.00014991450642918072, "loss": 2.1096, "step": 283555 }, { "epoch": 0.67, "grad_norm": 1.8203125, "learning_rate": 0.00014991290484201553, "loss": 1.9763, "step": 283560 }, { "epoch": 0.67, "grad_norm": 1.8828125, "learning_rate": 0.00014991130323779915, "loss": 1.9771, "step": 283565 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014990970161653222, "loss": 2.0463, "step": 283570 }, { "epoch": 0.67, "grad_norm": 1.953125, "learning_rate": 0.0001499080999782152, "loss": 2.1998, "step": 283575 }, { "epoch": 0.67, "grad_norm": 2.59375, "learning_rate": 0.0001499064983228487, "loss": 1.9798, "step": 283580 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.0001499048966504332, "loss": 2.1203, "step": 283585 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014990329496096928, "loss": 1.8935, "step": 283590 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014990169325445748, "loss": 2.0797, "step": 283595 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014990009153089838, "loss": 2.0573, "step": 283600 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.0001498984897902925, "loss": 1.8536, "step": 283605 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014989688803264043, "loss": 2.214, "step": 283610 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014989528625794263, "loss": 1.9664, "step": 283615 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014989368446619968, "loss": 2.0879, "step": 283620 }, { "epoch": 0.67, "grad_norm": 1.9453125, "learning_rate": 0.00014989208265741216, "loss": 1.9108, "step": 283625 }, { "epoch": 0.67, "grad_norm": 2.484375, "learning_rate": 0.00014989048083158062, "loss": 2.0927, "step": 283630 }, { "epoch": 0.67, "grad_norm": 1.9765625, "learning_rate": 0.0001498888789887056, "loss": 2.1684, "step": 283635 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014988727712878758, "loss": 2.1169, "step": 283640 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014988567525182718, "loss": 2.1424, "step": 283645 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014988407335782495, "loss": 2.0175, "step": 283650 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.0001498824714467814, "loss": 2.0123, "step": 283655 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.0001498808695186971, "loss": 2.1895, "step": 283660 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.00014987926757357257, "loss": 2.1047, "step": 283665 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014987766561140844, "loss": 2.2978, "step": 283670 }, { "epoch": 0.67, "grad_norm": 2.0, "learning_rate": 0.00014987606363220512, "loss": 1.9828, "step": 283675 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014987446163596327, "loss": 2.1057, "step": 283680 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014987285962268337, "loss": 2.0478, "step": 283685 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.00014987125759236605, "loss": 2.1739, "step": 283690 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.00014986965554501179, "loss": 2.0739, "step": 283695 }, { "epoch": 0.67, "grad_norm": 1.9765625, "learning_rate": 0.0001498680534806211, "loss": 2.1969, "step": 283700 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.0001498664513991946, "loss": 1.8855, "step": 283705 }, { "epoch": 0.67, "grad_norm": 2.59375, "learning_rate": 0.00014986484930073284, "loss": 2.1997, "step": 283710 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014986324718523633, "loss": 2.0362, "step": 283715 }, { "epoch": 0.67, "grad_norm": 2.71875, "learning_rate": 0.00014986164505270563, "loss": 1.985, "step": 283720 }, { "epoch": 0.67, "grad_norm": 1.9921875, "learning_rate": 0.00014986004290314128, "loss": 2.1277, "step": 283725 }, { "epoch": 0.67, "grad_norm": 1.9453125, "learning_rate": 0.00014985844073654386, "loss": 2.0667, "step": 283730 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014985683855291387, "loss": 2.0583, "step": 283735 }, { "epoch": 0.67, "grad_norm": 2.625, "learning_rate": 0.0001498552363522519, "loss": 1.9627, "step": 283740 }, { "epoch": 0.67, "grad_norm": 1.8984375, "learning_rate": 0.00014985363413455845, "loss": 2.0692, "step": 283745 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014985203189983413, "loss": 2.1495, "step": 283750 }, { "epoch": 0.67, "grad_norm": 2.5625, "learning_rate": 0.0001498504296480794, "loss": 2.1569, "step": 283755 }, { "epoch": 0.67, "grad_norm": 1.984375, "learning_rate": 0.00014984882737929488, "loss": 1.9405, "step": 283760 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.00014984722509348112, "loss": 1.9094, "step": 283765 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00014984562279063863, "loss": 2.2717, "step": 283770 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014984402047076798, "loss": 2.2722, "step": 283775 }, { "epoch": 0.67, "grad_norm": 2.5, "learning_rate": 0.00014984241813386967, "loss": 2.2878, "step": 283780 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014984081577994434, "loss": 1.9615, "step": 283785 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.00014983921340899246, "loss": 2.0803, "step": 283790 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.0001498376110210146, "loss": 2.0707, "step": 283795 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.00014983600861601129, "loss": 1.9351, "step": 283800 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.00014983440619398316, "loss": 1.9858, "step": 283805 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.0001498328037549306, "loss": 2.2445, "step": 283810 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.0001498312012988543, "loss": 2.1613, "step": 283815 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00014982959882575475, "loss": 1.9371, "step": 283820 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.00014982799633563251, "loss": 2.1939, "step": 283825 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014982639382848814, "loss": 2.0852, "step": 283830 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014982479130432212, "loss": 2.0347, "step": 283835 }, { "epoch": 0.67, "grad_norm": 2.59375, "learning_rate": 0.00014982318876313508, "loss": 1.8946, "step": 283840 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014982158620492755, "loss": 2.0157, "step": 283845 }, { "epoch": 0.67, "grad_norm": 2.4375, "learning_rate": 0.00014981998362970005, "loss": 1.8138, "step": 283850 }, { "epoch": 0.67, "grad_norm": 2.53125, "learning_rate": 0.00014981838103745316, "loss": 2.0937, "step": 283855 }, { "epoch": 0.67, "grad_norm": 1.9375, "learning_rate": 0.00014981677842818735, "loss": 1.9147, "step": 283860 }, { "epoch": 0.67, "grad_norm": 2.28125, "learning_rate": 0.00014981517580190326, "loss": 1.9871, "step": 283865 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00014981357315860141, "loss": 1.9195, "step": 283870 }, { "epoch": 0.67, "grad_norm": 1.921875, "learning_rate": 0.00014981197049828234, "loss": 2.0863, "step": 283875 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00014981036782094658, "loss": 2.0632, "step": 283880 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014980876512659473, "loss": 2.1605, "step": 283885 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014980716241522725, "loss": 2.0905, "step": 283890 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.0001498055596868448, "loss": 2.1671, "step": 283895 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014980395694144782, "loss": 2.048, "step": 283900 }, { "epoch": 0.67, "grad_norm": 1.9921875, "learning_rate": 0.00014980235417903692, "loss": 1.9487, "step": 283905 }, { "epoch": 0.67, "grad_norm": 2.40625, "learning_rate": 0.00014980075139961262, "loss": 1.9886, "step": 283910 }, { "epoch": 0.67, "grad_norm": 2.28125, "learning_rate": 0.0001497991486031755, "loss": 2.1308, "step": 283915 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.0001497975457897261, "loss": 2.0982, "step": 283920 }, { "epoch": 0.67, "grad_norm": 2.53125, "learning_rate": 0.00014979594295926494, "loss": 1.9668, "step": 283925 }, { "epoch": 0.67, "grad_norm": 1.8671875, "learning_rate": 0.00014979434011179257, "loss": 2.0462, "step": 283930 }, { "epoch": 0.67, "grad_norm": 2.4375, "learning_rate": 0.00014979273724730955, "loss": 2.0648, "step": 283935 }, { "epoch": 0.67, "grad_norm": 1.703125, "learning_rate": 0.00014979113436581644, "loss": 2.0116, "step": 283940 }, { "epoch": 0.67, "grad_norm": 2.453125, "learning_rate": 0.00014978953146731376, "loss": 2.0056, "step": 283945 }, { "epoch": 0.67, "grad_norm": 2.640625, "learning_rate": 0.0001497879285518021, "loss": 2.05, "step": 283950 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.00014978632561928198, "loss": 2.0319, "step": 283955 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014978472266975393, "loss": 2.1434, "step": 283960 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.00014978311970321851, "loss": 2.0019, "step": 283965 }, { "epoch": 0.67, "grad_norm": 2.53125, "learning_rate": 0.00014978151671967628, "loss": 2.1525, "step": 283970 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.0001497799137191278, "loss": 1.8941, "step": 283975 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014977831070157356, "loss": 2.0652, "step": 283980 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.0001497767076670142, "loss": 1.9895, "step": 283985 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014977510461545017, "loss": 2.1039, "step": 283990 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014977350154688206, "loss": 2.4192, "step": 283995 }, { "epoch": 0.67, "grad_norm": 1.984375, "learning_rate": 0.00014977189846131044, "loss": 2.0782, "step": 284000 }, { "epoch": 0.67, "grad_norm": 1.8046875, "learning_rate": 0.00014977029535873584, "loss": 2.1571, "step": 284005 }, { "epoch": 0.67, "grad_norm": 1.8359375, "learning_rate": 0.00014976869223915878, "loss": 2.0706, "step": 284010 }, { "epoch": 0.67, "grad_norm": 1.796875, "learning_rate": 0.0001497670891025799, "loss": 1.9985, "step": 284015 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014976548594899957, "loss": 2.0878, "step": 284020 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.0001497638827784185, "loss": 1.9845, "step": 284025 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014976227959083722, "loss": 2.1317, "step": 284030 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.00014976067638625622, "loss": 2.0546, "step": 284035 }, { "epoch": 0.67, "grad_norm": 1.703125, "learning_rate": 0.00014975907316467605, "loss": 1.9706, "step": 284040 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014975746992609729, "loss": 2.2042, "step": 284045 }, { "epoch": 0.67, "grad_norm": 1.8515625, "learning_rate": 0.00014975586667052048, "loss": 2.0434, "step": 284050 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014975426339794616, "loss": 2.0026, "step": 284055 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.00014975266010837487, "loss": 1.8852, "step": 284060 }, { "epoch": 0.67, "grad_norm": 1.9296875, "learning_rate": 0.00014975105680180718, "loss": 1.9901, "step": 284065 }, { "epoch": 0.67, "grad_norm": 2.484375, "learning_rate": 0.00014974945347824366, "loss": 2.0505, "step": 284070 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.00014974785013768476, "loss": 2.0742, "step": 284075 }, { "epoch": 0.67, "grad_norm": 1.7109375, "learning_rate": 0.00014974624678013113, "loss": 2.12, "step": 284080 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.0001497446434055833, "loss": 2.2379, "step": 284085 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014974304001404176, "loss": 2.0458, "step": 284090 }, { "epoch": 0.67, "grad_norm": 1.890625, "learning_rate": 0.0001497414366055071, "loss": 1.9977, "step": 284095 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.0001497398331799799, "loss": 2.0899, "step": 284100 }, { "epoch": 0.67, "grad_norm": 2.5, "learning_rate": 0.00014973822973746062, "loss": 2.1728, "step": 284105 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014973662627794986, "loss": 2.1159, "step": 284110 }, { "epoch": 0.67, "grad_norm": 2.5, "learning_rate": 0.0001497350228014482, "loss": 1.8965, "step": 284115 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014973341930795615, "loss": 2.2302, "step": 284120 }, { "epoch": 0.67, "grad_norm": 1.921875, "learning_rate": 0.00014973181579747426, "loss": 2.1445, "step": 284125 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.00014973021227000305, "loss": 2.0435, "step": 284130 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.00014972860872554313, "loss": 2.0776, "step": 284135 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.000149727005164095, "loss": 1.9753, "step": 284140 }, { "epoch": 0.67, "grad_norm": 2.4375, "learning_rate": 0.00014972540158565925, "loss": 2.2005, "step": 284145 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014972379799023638, "loss": 2.2622, "step": 284150 }, { "epoch": 0.67, "grad_norm": 2.5, "learning_rate": 0.00014972219437782696, "loss": 2.0071, "step": 284155 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014972059074843153, "loss": 2.1454, "step": 284160 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014971898710205064, "loss": 1.9854, "step": 284165 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014971738343868486, "loss": 2.1887, "step": 284170 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014971577975833472, "loss": 2.1524, "step": 284175 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.00014971417606100075, "loss": 2.0271, "step": 284180 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.00014971257234668354, "loss": 2.0472, "step": 284185 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.0001497109686153836, "loss": 2.0662, "step": 284190 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014970936486710147, "loss": 2.0563, "step": 284195 }, { "epoch": 0.67, "grad_norm": 1.859375, "learning_rate": 0.00014970776110183776, "loss": 2.168, "step": 284200 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014970615731959295, "loss": 2.0103, "step": 284205 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014970455352036763, "loss": 2.0595, "step": 284210 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014970294970416233, "loss": 2.1787, "step": 284215 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.0001497013458709776, "loss": 2.0363, "step": 284220 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.00014969974202081398, "loss": 2.1624, "step": 284225 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014969813815367204, "loss": 2.1189, "step": 284230 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.0001496965342695523, "loss": 1.904, "step": 284235 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.0001496949303684553, "loss": 2.1007, "step": 284240 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014969332645038168, "loss": 1.9622, "step": 284245 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014969172251533185, "loss": 1.9821, "step": 284250 }, { "epoch": 0.67, "grad_norm": 1.734375, "learning_rate": 0.00014969011856330648, "loss": 2.1754, "step": 284255 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.00014968851459430606, "loss": 2.0528, "step": 284260 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.0001496869106083311, "loss": 1.8538, "step": 284265 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.0001496853066053822, "loss": 2.0503, "step": 284270 }, { "epoch": 0.67, "grad_norm": 1.765625, "learning_rate": 0.00014968370258545993, "loss": 1.8764, "step": 284275 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014968209854856478, "loss": 2.1815, "step": 284280 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014968049449469735, "loss": 1.9166, "step": 284285 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.0001496788904238581, "loss": 2.0305, "step": 284290 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.0001496772863360477, "loss": 2.2339, "step": 284295 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014967568223126662, "loss": 2.2442, "step": 284300 }, { "epoch": 0.67, "grad_norm": 3.0625, "learning_rate": 0.00014967407810951544, "loss": 2.0581, "step": 284305 }, { "epoch": 0.67, "grad_norm": 1.9921875, "learning_rate": 0.00014967247397079469, "loss": 1.9846, "step": 284310 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.0001496708698151049, "loss": 2.1452, "step": 284315 }, { "epoch": 0.67, "grad_norm": 1.96875, "learning_rate": 0.00014966926564244663, "loss": 2.1053, "step": 284320 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.0001496676614528205, "loss": 2.2353, "step": 284325 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00014966605724622695, "loss": 2.0818, "step": 284330 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.0001496644530226666, "loss": 2.2582, "step": 284335 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014966284878213996, "loss": 2.0249, "step": 284340 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.00014966124452464756, "loss": 2.0355, "step": 284345 }, { "epoch": 0.67, "grad_norm": 2.53125, "learning_rate": 0.00014965964025019, "loss": 2.0164, "step": 284350 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.0001496580359587678, "loss": 2.1147, "step": 284355 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014965643165038153, "loss": 1.9631, "step": 284360 }, { "epoch": 0.67, "grad_norm": 2.421875, "learning_rate": 0.0001496548273250317, "loss": 2.1518, "step": 284365 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.0001496532229827189, "loss": 2.1172, "step": 284370 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014965161862344364, "loss": 1.9388, "step": 284375 }, { "epoch": 0.67, "grad_norm": 2.453125, "learning_rate": 0.00014965001424720653, "loss": 1.9229, "step": 284380 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014964840985400802, "loss": 1.9689, "step": 284385 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014964680544384877, "loss": 1.9912, "step": 284390 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014964520101672923, "loss": 2.1599, "step": 284395 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014964359657264998, "loss": 2.0462, "step": 284400 }, { "epoch": 0.67, "grad_norm": 2.53125, "learning_rate": 0.0001496419921116116, "loss": 1.9837, "step": 284405 }, { "epoch": 0.67, "grad_norm": 1.7890625, "learning_rate": 0.00014964038763361462, "loss": 2.0593, "step": 284410 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014963878313865959, "loss": 2.1376, "step": 284415 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.000149637178626747, "loss": 2.2003, "step": 284420 }, { "epoch": 0.67, "grad_norm": 2.40625, "learning_rate": 0.00014963557409787749, "loss": 1.9717, "step": 284425 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014963396955205158, "loss": 1.8948, "step": 284430 }, { "epoch": 0.67, "grad_norm": 2.28125, "learning_rate": 0.00014963236498926978, "loss": 2.1074, "step": 284435 }, { "epoch": 0.67, "grad_norm": 1.7109375, "learning_rate": 0.0001496307604095327, "loss": 1.9334, "step": 284440 }, { "epoch": 0.67, "grad_norm": 1.953125, "learning_rate": 0.0001496291558128408, "loss": 2.0375, "step": 284445 }, { "epoch": 0.67, "grad_norm": 2.640625, "learning_rate": 0.00014962755119919468, "loss": 2.0466, "step": 284450 }, { "epoch": 0.67, "grad_norm": 2.84375, "learning_rate": 0.00014962594656859493, "loss": 1.9957, "step": 284455 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00014962434192104207, "loss": 2.1141, "step": 284460 }, { "epoch": 0.67, "grad_norm": 2.765625, "learning_rate": 0.00014962273725653658, "loss": 2.1662, "step": 284465 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.0001496211325750791, "loss": 2.1925, "step": 284470 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014961952787667014, "loss": 2.1591, "step": 284475 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.0001496179231613102, "loss": 2.2824, "step": 284480 }, { "epoch": 0.67, "grad_norm": 3.140625, "learning_rate": 0.00014961631842899995, "loss": 2.1453, "step": 284485 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.00014961471367973983, "loss": 2.2024, "step": 284490 }, { "epoch": 0.67, "grad_norm": 1.859375, "learning_rate": 0.00014961310891353042, "loss": 2.1706, "step": 284495 }, { "epoch": 0.67, "grad_norm": 1.8671875, "learning_rate": 0.00014961150413037228, "loss": 2.0181, "step": 284500 }, { "epoch": 0.67, "grad_norm": 1.8515625, "learning_rate": 0.00014960989933026595, "loss": 2.1326, "step": 284505 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014960829451321196, "loss": 2.0934, "step": 284510 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.0001496066896792109, "loss": 2.0248, "step": 284515 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00014960508482826328, "loss": 2.0593, "step": 284520 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00014960347996036968, "loss": 1.9641, "step": 284525 }, { "epoch": 0.67, "grad_norm": 1.9921875, "learning_rate": 0.00014960187507553064, "loss": 1.9798, "step": 284530 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014960027017374666, "loss": 2.0766, "step": 284535 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.00014959866525501836, "loss": 2.0055, "step": 284540 }, { "epoch": 0.67, "grad_norm": 1.8515625, "learning_rate": 0.00014959706031934624, "loss": 1.949, "step": 284545 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014959545536673087, "loss": 2.0622, "step": 284550 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014959385039717282, "loss": 1.9621, "step": 284555 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.0001495922454106726, "loss": 2.0607, "step": 284560 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014959064040723073, "loss": 2.1879, "step": 284565 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.00014958903538684782, "loss": 1.9887, "step": 284570 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014958743034952442, "loss": 2.1257, "step": 284575 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.00014958582529526104, "loss": 2.2942, "step": 284580 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014958422022405822, "loss": 2.118, "step": 284585 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014958261513591653, "loss": 1.9145, "step": 284590 }, { "epoch": 0.67, "grad_norm": 2.0, "learning_rate": 0.00014958101003083654, "loss": 2.1003, "step": 284595 }, { "epoch": 0.67, "grad_norm": 2.484375, "learning_rate": 0.00014957940490881882, "loss": 2.1131, "step": 284600 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.0001495777997698638, "loss": 2.1404, "step": 284605 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014957619461397213, "loss": 1.9354, "step": 284610 }, { "epoch": 0.67, "grad_norm": 2.90625, "learning_rate": 0.00014957458944114436, "loss": 2.0461, "step": 284615 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.000149572984251381, "loss": 2.1182, "step": 284620 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014957137904468259, "loss": 2.131, "step": 284625 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.0001495697738210497, "loss": 1.9367, "step": 284630 }, { "epoch": 0.67, "grad_norm": 2.734375, "learning_rate": 0.0001495681685804829, "loss": 2.0652, "step": 284635 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.0001495665633229827, "loss": 2.0246, "step": 284640 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014956495804854965, "loss": 2.208, "step": 284645 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014956335275718435, "loss": 2.0376, "step": 284650 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.00014956174744888728, "loss": 1.9314, "step": 284655 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.000149560142123659, "loss": 2.1227, "step": 284660 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.0001495585367815001, "loss": 2.0503, "step": 284665 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.00014955693142241108, "loss": 1.8338, "step": 284670 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014955532604639255, "loss": 2.0359, "step": 284675 }, { "epoch": 0.67, "grad_norm": 1.9453125, "learning_rate": 0.000149553720653445, "loss": 2.2133, "step": 284680 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014955211524356904, "loss": 2.1572, "step": 284685 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014955050981676511, "loss": 2.1528, "step": 284690 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.00014954890437303387, "loss": 2.0798, "step": 284695 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.00014954729891237585, "loss": 1.9378, "step": 284700 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014954569343479155, "loss": 2.0357, "step": 284705 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.0001495440879402815, "loss": 2.035, "step": 284710 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.00014954248242884637, "loss": 2.0432, "step": 284715 }, { "epoch": 0.67, "grad_norm": 2.5, "learning_rate": 0.00014954087690048656, "loss": 1.9679, "step": 284720 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.0001495392713552027, "loss": 2.0366, "step": 284725 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.00014953766579299535, "loss": 1.958, "step": 284730 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014953606021386506, "loss": 2.1156, "step": 284735 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014953445461781233, "loss": 2.2891, "step": 284740 }, { "epoch": 0.67, "grad_norm": 2.640625, "learning_rate": 0.00014953284900483767, "loss": 2.0624, "step": 284745 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.00014953124337494176, "loss": 2.1925, "step": 284750 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.0001495296377281251, "loss": 2.1718, "step": 284755 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014952803206438818, "loss": 2.161, "step": 284760 }, { "epoch": 0.67, "grad_norm": 2.28125, "learning_rate": 0.00014952642638373158, "loss": 2.0018, "step": 284765 }, { "epoch": 0.67, "grad_norm": 2.40625, "learning_rate": 0.00014952482068615583, "loss": 1.9566, "step": 284770 }, { "epoch": 0.67, "grad_norm": 2.40625, "learning_rate": 0.00014952321497166152, "loss": 2.1449, "step": 284775 }, { "epoch": 0.67, "grad_norm": 1.890625, "learning_rate": 0.00014952160924024924, "loss": 2.0833, "step": 284780 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014952000349191942, "loss": 1.873, "step": 284785 }, { "epoch": 0.67, "grad_norm": 1.9140625, "learning_rate": 0.00014951839772667268, "loss": 2.0323, "step": 284790 }, { "epoch": 0.67, "grad_norm": 1.875, "learning_rate": 0.00014951679194450957, "loss": 1.9167, "step": 284795 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.0001495151861454306, "loss": 1.9089, "step": 284800 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014951358032943635, "loss": 2.0343, "step": 284805 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.00014951197449652738, "loss": 2.0829, "step": 284810 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014951036864670422, "loss": 2.0742, "step": 284815 }, { "epoch": 0.67, "grad_norm": 2.75, "learning_rate": 0.00014950876277996744, "loss": 2.0549, "step": 284820 }, { "epoch": 0.67, "grad_norm": 2.484375, "learning_rate": 0.00014950715689631753, "loss": 2.2657, "step": 284825 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.0001495055509957551, "loss": 2.0292, "step": 284830 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.00014950394507828066, "loss": 2.0655, "step": 284835 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014950233914389478, "loss": 1.9157, "step": 284840 }, { "epoch": 0.67, "grad_norm": 1.953125, "learning_rate": 0.000149500733192598, "loss": 1.8636, "step": 284845 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.0001494991272243909, "loss": 2.1721, "step": 284850 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014949752123927397, "loss": 2.1427, "step": 284855 }, { "epoch": 0.67, "grad_norm": 2.546875, "learning_rate": 0.0001494959152372478, "loss": 1.9416, "step": 284860 }, { "epoch": 0.67, "grad_norm": 1.7109375, "learning_rate": 0.00014949430921831292, "loss": 2.193, "step": 284865 }, { "epoch": 0.67, "grad_norm": 3.203125, "learning_rate": 0.00014949270318246992, "loss": 1.9905, "step": 284870 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.00014949109712971928, "loss": 2.2089, "step": 284875 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014948949106006156, "loss": 1.8498, "step": 284880 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.00014948788497349736, "loss": 2.2484, "step": 284885 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.0001494862788700272, "loss": 2.168, "step": 284890 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014948467274965166, "loss": 2.0159, "step": 284895 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014948306661237122, "loss": 1.9494, "step": 284900 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014948146045818645, "loss": 2.0689, "step": 284905 }, { "epoch": 0.67, "grad_norm": 1.78125, "learning_rate": 0.00014947985428709795, "loss": 1.9392, "step": 284910 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014947824809910624, "loss": 2.1445, "step": 284915 }, { "epoch": 0.67, "grad_norm": 1.9375, "learning_rate": 0.00014947664189421186, "loss": 2.3029, "step": 284920 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014947503567241537, "loss": 2.092, "step": 284925 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.00014947342943371727, "loss": 2.1465, "step": 284930 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00014947182317811815, "loss": 2.2725, "step": 284935 }, { "epoch": 0.67, "grad_norm": 2.875, "learning_rate": 0.00014947021690561862, "loss": 2.2341, "step": 284940 }, { "epoch": 0.67, "grad_norm": 1.65625, "learning_rate": 0.0001494686106162191, "loss": 1.9781, "step": 284945 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014946700430992026, "loss": 2.1043, "step": 284950 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014946539798672256, "loss": 2.1627, "step": 284955 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014946379164662658, "loss": 1.9966, "step": 284960 }, { "epoch": 0.67, "grad_norm": 1.84375, "learning_rate": 0.00014946218528963287, "loss": 1.9962, "step": 284965 }, { "epoch": 0.67, "grad_norm": 1.78125, "learning_rate": 0.000149460578915742, "loss": 1.9825, "step": 284970 }, { "epoch": 0.67, "grad_norm": 1.9296875, "learning_rate": 0.0001494589725249545, "loss": 2.0495, "step": 284975 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.00014945736611727092, "loss": 2.099, "step": 284980 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014945575969269178, "loss": 2.0245, "step": 284985 }, { "epoch": 0.67, "grad_norm": 2.4375, "learning_rate": 0.0001494541532512177, "loss": 2.1754, "step": 284990 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014945254679284912, "loss": 2.1451, "step": 284995 }, { "epoch": 0.67, "grad_norm": 2.28125, "learning_rate": 0.00014945094031758668, "loss": 2.1734, "step": 285000 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.00014944933382543093, "loss": 2.2233, "step": 285005 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.0001494477273163824, "loss": 2.0507, "step": 285010 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.00014944612079044158, "loss": 2.0729, "step": 285015 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014944451424760908, "loss": 1.9555, "step": 285020 }, { "epoch": 0.67, "grad_norm": 1.90625, "learning_rate": 0.00014944290768788545, "loss": 2.0287, "step": 285025 }, { "epoch": 0.67, "grad_norm": 2.9375, "learning_rate": 0.00014944130111127125, "loss": 2.0694, "step": 285030 }, { "epoch": 0.67, "grad_norm": 1.75, "learning_rate": 0.00014943969451776696, "loss": 2.1913, "step": 285035 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00014943808790737317, "loss": 2.2566, "step": 285040 }, { "epoch": 0.67, "grad_norm": 1.96875, "learning_rate": 0.00014943648128009046, "loss": 2.034, "step": 285045 }, { "epoch": 0.67, "grad_norm": 1.921875, "learning_rate": 0.00014943487463591937, "loss": 2.207, "step": 285050 }, { "epoch": 0.67, "grad_norm": 2.5625, "learning_rate": 0.0001494332679748604, "loss": 2.0535, "step": 285055 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.00014943166129691412, "loss": 1.9971, "step": 285060 }, { "epoch": 0.67, "grad_norm": 2.53125, "learning_rate": 0.00014943005460208111, "loss": 2.0744, "step": 285065 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014942844789036188, "loss": 2.0194, "step": 285070 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014942684116175703, "loss": 2.1271, "step": 285075 }, { "epoch": 0.67, "grad_norm": 1.9140625, "learning_rate": 0.00014942523441626704, "loss": 1.9973, "step": 285080 }, { "epoch": 0.67, "grad_norm": 2.0, "learning_rate": 0.00014942362765389252, "loss": 2.3208, "step": 285085 }, { "epoch": 0.67, "grad_norm": 2.28125, "learning_rate": 0.00014942202087463396, "loss": 2.0323, "step": 285090 }, { "epoch": 0.67, "grad_norm": 1.8984375, "learning_rate": 0.00014942041407849197, "loss": 2.0312, "step": 285095 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.00014941880726546705, "loss": 2.1805, "step": 285100 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014941720043555977, "loss": 2.1901, "step": 285105 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014941559358877068, "loss": 2.0915, "step": 285110 }, { "epoch": 0.67, "grad_norm": 2.40625, "learning_rate": 0.00014941398672510035, "loss": 2.0581, "step": 285115 }, { "epoch": 0.67, "grad_norm": 2.53125, "learning_rate": 0.00014941237984454929, "loss": 2.0184, "step": 285120 }, { "epoch": 0.67, "grad_norm": 2.546875, "learning_rate": 0.00014941077294711806, "loss": 2.0523, "step": 285125 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00014940916603280722, "loss": 2.0019, "step": 285130 }, { "epoch": 0.67, "grad_norm": 1.84375, "learning_rate": 0.0001494075591016173, "loss": 1.9233, "step": 285135 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014940595215354887, "loss": 2.0573, "step": 285140 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014940434518860246, "loss": 1.9516, "step": 285145 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014940273820677867, "loss": 2.1229, "step": 285150 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.00014940113120807797, "loss": 2.132, "step": 285155 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014939952419250097, "loss": 2.0995, "step": 285160 }, { "epoch": 0.67, "grad_norm": 2.0, "learning_rate": 0.0001493979171600482, "loss": 2.0987, "step": 285165 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.00014939631011072016, "loss": 2.002, "step": 285170 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014939470304451747, "loss": 2.0334, "step": 285175 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014939309596144067, "loss": 2.077, "step": 285180 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.0001493914888614903, "loss": 2.0619, "step": 285185 }, { "epoch": 0.67, "grad_norm": 2.484375, "learning_rate": 0.00014938988174466688, "loss": 2.0983, "step": 285190 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.00014938827461097098, "loss": 2.2156, "step": 285195 }, { "epoch": 0.67, "grad_norm": 2.59375, "learning_rate": 0.00014938666746040313, "loss": 2.0806, "step": 285200 }, { "epoch": 0.67, "grad_norm": 1.7578125, "learning_rate": 0.00014938506029296393, "loss": 2.0568, "step": 285205 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.0001493834531086539, "loss": 2.118, "step": 285210 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014938184590747357, "loss": 2.0075, "step": 285215 }, { "epoch": 0.67, "grad_norm": 1.90625, "learning_rate": 0.00014938023868942351, "loss": 2.1031, "step": 285220 }, { "epoch": 0.67, "grad_norm": 1.96875, "learning_rate": 0.00014937863145450428, "loss": 2.092, "step": 285225 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.0001493770242027164, "loss": 1.877, "step": 285230 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.00014937541693406042, "loss": 2.129, "step": 285235 }, { "epoch": 0.67, "grad_norm": 2.0, "learning_rate": 0.00014937380964853694, "loss": 2.1432, "step": 285240 }, { "epoch": 0.67, "grad_norm": 1.9765625, "learning_rate": 0.00014937220234614645, "loss": 2.0416, "step": 285245 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.0001493705950268895, "loss": 2.014, "step": 285250 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.0001493689876907667, "loss": 2.3325, "step": 285255 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014936738033777852, "loss": 2.01, "step": 285260 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.00014936577296792555, "loss": 2.0111, "step": 285265 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014936416558120836, "loss": 2.2066, "step": 285270 }, { "epoch": 0.67, "grad_norm": 2.765625, "learning_rate": 0.00014936255817762746, "loss": 1.9998, "step": 285275 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014936095075718342, "loss": 2.1258, "step": 285280 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.0001493593433198768, "loss": 2.0108, "step": 285285 }, { "epoch": 0.67, "grad_norm": 1.9375, "learning_rate": 0.0001493577358657081, "loss": 1.9463, "step": 285290 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014935612839467793, "loss": 2.2392, "step": 285295 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014935452090678681, "loss": 2.2141, "step": 285300 }, { "epoch": 0.67, "grad_norm": 1.9921875, "learning_rate": 0.00014935291340203527, "loss": 1.9908, "step": 285305 }, { "epoch": 0.67, "grad_norm": 1.8671875, "learning_rate": 0.0001493513058804239, "loss": 1.8483, "step": 285310 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.0001493496983419532, "loss": 2.3083, "step": 285315 }, { "epoch": 0.67, "grad_norm": 1.875, "learning_rate": 0.00014934809078662376, "loss": 1.9466, "step": 285320 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014934648321443617, "loss": 1.9668, "step": 285325 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.0001493448756253909, "loss": 2.064, "step": 285330 }, { "epoch": 0.67, "grad_norm": 1.9140625, "learning_rate": 0.00014934326801948846, "loss": 1.9551, "step": 285335 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.0001493416603967295, "loss": 2.0287, "step": 285340 }, { "epoch": 0.67, "grad_norm": 2.0, "learning_rate": 0.00014934005275711457, "loss": 2.1311, "step": 285345 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.0001493384451006442, "loss": 1.8585, "step": 285350 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014933683742731886, "loss": 2.088, "step": 285355 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.00014933522973713915, "loss": 1.9681, "step": 285360 }, { "epoch": 0.67, "grad_norm": 2.421875, "learning_rate": 0.00014933362203010567, "loss": 2.2422, "step": 285365 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014933201430621896, "loss": 1.9644, "step": 285370 }, { "epoch": 0.67, "grad_norm": 2.0, "learning_rate": 0.0001493304065654795, "loss": 1.9913, "step": 285375 }, { "epoch": 0.67, "grad_norm": 2.65625, "learning_rate": 0.00014932879880788787, "loss": 2.03, "step": 285380 }, { "epoch": 0.67, "grad_norm": 2.671875, "learning_rate": 0.00014932719103344464, "loss": 1.9798, "step": 285385 }, { "epoch": 0.67, "grad_norm": 2.59375, "learning_rate": 0.00014932558324215033, "loss": 2.0829, "step": 285390 }, { "epoch": 0.67, "grad_norm": 2.453125, "learning_rate": 0.00014932397543400555, "loss": 2.1369, "step": 285395 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.00014932236760901077, "loss": 2.2261, "step": 285400 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014932075976716657, "loss": 2.046, "step": 285405 }, { "epoch": 0.67, "grad_norm": 2.40625, "learning_rate": 0.0001493191519084735, "loss": 2.1696, "step": 285410 }, { "epoch": 0.67, "grad_norm": 1.9765625, "learning_rate": 0.00014931754403293217, "loss": 1.9055, "step": 285415 }, { "epoch": 0.67, "grad_norm": 1.8984375, "learning_rate": 0.00014931593614054303, "loss": 2.1825, "step": 285420 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014931432823130666, "loss": 1.9982, "step": 285425 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014931272030522364, "loss": 2.0837, "step": 285430 }, { "epoch": 0.67, "grad_norm": 2.453125, "learning_rate": 0.0001493111123622945, "loss": 2.0892, "step": 285435 }, { "epoch": 0.67, "grad_norm": 2.65625, "learning_rate": 0.00014930950440251978, "loss": 1.9648, "step": 285440 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014930789642590003, "loss": 2.0652, "step": 285445 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014930628843243585, "loss": 2.049, "step": 285450 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00014930468042212772, "loss": 2.0631, "step": 285455 }, { "epoch": 0.67, "grad_norm": 1.8984375, "learning_rate": 0.00014930307239497623, "loss": 2.0404, "step": 285460 }, { "epoch": 0.67, "grad_norm": 2.609375, "learning_rate": 0.00014930146435098189, "loss": 2.2594, "step": 285465 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014929985629014527, "loss": 2.0785, "step": 285470 }, { "epoch": 0.67, "grad_norm": 2.484375, "learning_rate": 0.00014929824821246696, "loss": 1.9794, "step": 285475 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.00014929664011794746, "loss": 2.1382, "step": 285480 }, { "epoch": 0.67, "grad_norm": 2.421875, "learning_rate": 0.00014929503200658735, "loss": 1.9623, "step": 285485 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014929342387838714, "loss": 2.0935, "step": 285490 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.0001492918157333474, "loss": 2.0266, "step": 285495 }, { "epoch": 0.67, "grad_norm": 1.8828125, "learning_rate": 0.00014929020757146867, "loss": 2.0317, "step": 285500 }, { "epoch": 0.67, "grad_norm": 1.921875, "learning_rate": 0.00014928859939275158, "loss": 1.9559, "step": 285505 }, { "epoch": 0.67, "grad_norm": 2.421875, "learning_rate": 0.00014928699119719653, "loss": 2.0004, "step": 285510 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.0001492853829848042, "loss": 2.1329, "step": 285515 }, { "epoch": 0.67, "grad_norm": 1.921875, "learning_rate": 0.00014928377475557504, "loss": 1.9545, "step": 285520 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.0001492821665095097, "loss": 1.9827, "step": 285525 }, { "epoch": 0.67, "grad_norm": 2.53125, "learning_rate": 0.00014928055824660867, "loss": 2.0144, "step": 285530 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014927894996687248, "loss": 2.09, "step": 285535 }, { "epoch": 0.67, "grad_norm": 1.6875, "learning_rate": 0.0001492773416703017, "loss": 1.7355, "step": 285540 }, { "epoch": 0.67, "grad_norm": 2.515625, "learning_rate": 0.00014927573335689695, "loss": 1.8066, "step": 285545 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014927412502665867, "loss": 2.1735, "step": 285550 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014927251667958745, "loss": 2.0498, "step": 285555 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.00014927090831568386, "loss": 1.9479, "step": 285560 }, { "epoch": 0.67, "grad_norm": 1.9140625, "learning_rate": 0.00014926929993494843, "loss": 2.0133, "step": 285565 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.0001492676915373817, "loss": 2.1224, "step": 285570 }, { "epoch": 0.67, "grad_norm": 1.65625, "learning_rate": 0.00014926608312298428, "loss": 1.9344, "step": 285575 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.0001492644746917566, "loss": 2.1833, "step": 285580 }, { "epoch": 0.67, "grad_norm": 3.203125, "learning_rate": 0.00014926286624369935, "loss": 1.9728, "step": 285585 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014926125777881298, "loss": 2.0455, "step": 285590 }, { "epoch": 0.67, "grad_norm": 2.609375, "learning_rate": 0.00014925964929709805, "loss": 2.1661, "step": 285595 }, { "epoch": 0.67, "grad_norm": 2.5, "learning_rate": 0.0001492580407985552, "loss": 2.051, "step": 285600 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014925643228318482, "loss": 1.984, "step": 285605 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.0001492548237509876, "loss": 2.0661, "step": 285610 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.000149253215201964, "loss": 2.1767, "step": 285615 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014925160663611467, "loss": 1.9278, "step": 285620 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014924999805344008, "loss": 2.0344, "step": 285625 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.00014924838945394074, "loss": 2.006, "step": 285630 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00014924678083761732, "loss": 1.995, "step": 285635 }, { "epoch": 0.67, "grad_norm": 1.9921875, "learning_rate": 0.00014924517220447033, "loss": 2.1624, "step": 285640 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014924356355450023, "loss": 1.8534, "step": 285645 }, { "epoch": 0.67, "grad_norm": 1.765625, "learning_rate": 0.00014924195488770766, "loss": 2.1462, "step": 285650 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014924034620409312, "loss": 2.0181, "step": 285655 }, { "epoch": 0.67, "grad_norm": 1.9296875, "learning_rate": 0.00014923873750365723, "loss": 2.0077, "step": 285660 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014923712878640047, "loss": 1.975, "step": 285665 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014923552005232345, "loss": 1.9959, "step": 285670 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014923391130142663, "loss": 2.0228, "step": 285675 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00014923230253371063, "loss": 2.0569, "step": 285680 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.000149230693749176, "loss": 2.0817, "step": 285685 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.00014922908494782326, "loss": 2.0941, "step": 285690 }, { "epoch": 0.67, "grad_norm": 2.90625, "learning_rate": 0.00014922747612965294, "loss": 2.1912, "step": 285695 }, { "epoch": 0.67, "grad_norm": 1.875, "learning_rate": 0.00014922586729466567, "loss": 2.0319, "step": 285700 }, { "epoch": 0.67, "grad_norm": 2.65625, "learning_rate": 0.00014922425844286194, "loss": 1.9665, "step": 285705 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.0001492226495742423, "loss": 2.1544, "step": 285710 }, { "epoch": 0.67, "grad_norm": 2.59375, "learning_rate": 0.0001492210406888073, "loss": 2.015, "step": 285715 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00014921943178655754, "loss": 2.3014, "step": 285720 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.0001492178228674935, "loss": 2.0575, "step": 285725 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014921621393161575, "loss": 2.0531, "step": 285730 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014921460497892486, "loss": 2.1194, "step": 285735 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.0001492129960094214, "loss": 2.0892, "step": 285740 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014921138702310584, "loss": 2.0918, "step": 285745 }, { "epoch": 0.67, "grad_norm": 2.5625, "learning_rate": 0.0001492097780199788, "loss": 2.1344, "step": 285750 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.0001492081690000408, "loss": 2.0247, "step": 285755 }, { "epoch": 0.67, "grad_norm": 2.4375, "learning_rate": 0.0001492065599632924, "loss": 2.0623, "step": 285760 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014920495090973415, "loss": 2.0308, "step": 285765 }, { "epoch": 0.67, "grad_norm": 1.6875, "learning_rate": 0.00014920334183936656, "loss": 2.1371, "step": 285770 }, { "epoch": 0.67, "grad_norm": 3.0625, "learning_rate": 0.00014920173275219026, "loss": 2.0054, "step": 285775 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00014920012364820576, "loss": 2.0855, "step": 285780 }, { "epoch": 0.67, "grad_norm": 2.6875, "learning_rate": 0.0001491985145274136, "loss": 2.2298, "step": 285785 }, { "epoch": 0.67, "grad_norm": 2.90625, "learning_rate": 0.0001491969053898143, "loss": 2.1307, "step": 285790 }, { "epoch": 0.67, "grad_norm": 2.484375, "learning_rate": 0.0001491952962354085, "loss": 2.1149, "step": 285795 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.00014919368706419665, "loss": 2.0503, "step": 285800 }, { "epoch": 0.67, "grad_norm": 1.9296875, "learning_rate": 0.00014919207787617936, "loss": 2.1975, "step": 285805 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014919046867135718, "loss": 2.1021, "step": 285810 }, { "epoch": 0.67, "grad_norm": 1.859375, "learning_rate": 0.0001491888594497306, "loss": 1.8436, "step": 285815 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.00014918725021130025, "loss": 2.1674, "step": 285820 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.00014918564095606665, "loss": 1.9608, "step": 285825 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014918403168403033, "loss": 2.0427, "step": 285830 }, { "epoch": 0.67, "grad_norm": 1.9609375, "learning_rate": 0.00014918242239519186, "loss": 2.0082, "step": 285835 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.00014918081308955177, "loss": 2.163, "step": 285840 }, { "epoch": 0.67, "grad_norm": 2.609375, "learning_rate": 0.0001491792037671106, "loss": 2.0499, "step": 285845 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014917759442786897, "loss": 1.9098, "step": 285850 }, { "epoch": 0.67, "grad_norm": 2.484375, "learning_rate": 0.00014917598507182734, "loss": 1.95, "step": 285855 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00014917437569898634, "loss": 2.1404, "step": 285860 }, { "epoch": 0.67, "grad_norm": 1.8203125, "learning_rate": 0.00014917276630934645, "loss": 1.8905, "step": 285865 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.00014917115690290825, "loss": 2.0998, "step": 285870 }, { "epoch": 0.67, "grad_norm": 1.75, "learning_rate": 0.00014916954747967232, "loss": 1.8316, "step": 285875 }, { "epoch": 0.67, "grad_norm": 1.8671875, "learning_rate": 0.00014916793803963915, "loss": 2.1059, "step": 285880 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014916632858280935, "loss": 2.253, "step": 285885 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.0001491647191091834, "loss": 1.9479, "step": 285890 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.0001491631096187619, "loss": 2.0529, "step": 285895 }, { "epoch": 0.67, "grad_norm": 2.28125, "learning_rate": 0.00014916150011154544, "loss": 1.9682, "step": 285900 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.0001491598905875345, "loss": 1.9603, "step": 285905 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014915828104672963, "loss": 1.9347, "step": 285910 }, { "epoch": 0.67, "grad_norm": 2.65625, "learning_rate": 0.00014915667148913142, "loss": 2.0571, "step": 285915 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.0001491550619147404, "loss": 2.0906, "step": 285920 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00014915345232355707, "loss": 2.154, "step": 285925 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014915184271558206, "loss": 2.1482, "step": 285930 }, { "epoch": 0.67, "grad_norm": 1.9765625, "learning_rate": 0.0001491502330908159, "loss": 2.1995, "step": 285935 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014914862344925914, "loss": 2.096, "step": 285940 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.0001491470137909123, "loss": 2.0986, "step": 285945 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.0001491454041157759, "loss": 2.2783, "step": 285950 }, { "epoch": 0.67, "grad_norm": 2.515625, "learning_rate": 0.00014914379442385061, "loss": 2.1464, "step": 285955 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.0001491421847151369, "loss": 2.1225, "step": 285960 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.00014914057498963533, "loss": 1.8193, "step": 285965 }, { "epoch": 0.67, "grad_norm": 2.75, "learning_rate": 0.00014913896524734643, "loss": 1.9793, "step": 285970 }, { "epoch": 0.67, "grad_norm": 2.53125, "learning_rate": 0.00014913735548827077, "loss": 1.8763, "step": 285975 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.0001491357457124089, "loss": 2.003, "step": 285980 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014913413591976138, "loss": 2.0456, "step": 285985 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00014913252611032873, "loss": 1.9706, "step": 285990 }, { "epoch": 0.67, "grad_norm": 1.9296875, "learning_rate": 0.00014913091628411152, "loss": 2.105, "step": 285995 }, { "epoch": 0.67, "grad_norm": 1.7421875, "learning_rate": 0.0001491293064411103, "loss": 2.0566, "step": 286000 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014912769658132562, "loss": 1.9846, "step": 286005 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014912608670475803, "loss": 2.2364, "step": 286010 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00014912447681140806, "loss": 2.0184, "step": 286015 }, { "epoch": 0.67, "grad_norm": 2.484375, "learning_rate": 0.0001491228669012763, "loss": 2.0162, "step": 286020 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00014912125697436328, "loss": 2.0216, "step": 286025 }, { "epoch": 0.67, "grad_norm": 1.7734375, "learning_rate": 0.00014911964703066954, "loss": 1.8627, "step": 286030 }, { "epoch": 0.67, "grad_norm": 2.40625, "learning_rate": 0.00014911803707019563, "loss": 2.0433, "step": 286035 }, { "epoch": 0.67, "grad_norm": 2.640625, "learning_rate": 0.00014911642709294212, "loss": 2.2288, "step": 286040 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014911481709890953, "loss": 2.0113, "step": 286045 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.00014911320708809847, "loss": 1.9796, "step": 286050 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.0001491115970605094, "loss": 2.092, "step": 286055 }, { "epoch": 0.67, "grad_norm": 1.96875, "learning_rate": 0.00014910998701614294, "loss": 1.9866, "step": 286060 }, { "epoch": 0.67, "grad_norm": 2.40625, "learning_rate": 0.0001491083769549996, "loss": 2.042, "step": 286065 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00014910676687707996, "loss": 1.8939, "step": 286070 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.0001491051567823846, "loss": 2.0911, "step": 286075 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00014910354667091398, "loss": 2.0269, "step": 286080 }, { "epoch": 0.67, "grad_norm": 2.71875, "learning_rate": 0.00014910193654266867, "loss": 2.0927, "step": 286085 }, { "epoch": 0.67, "grad_norm": 2.53125, "learning_rate": 0.00014910032639764927, "loss": 1.8961, "step": 286090 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014909871623585636, "loss": 2.128, "step": 286095 }, { "epoch": 0.67, "grad_norm": 2.453125, "learning_rate": 0.0001490971060572904, "loss": 1.916, "step": 286100 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.00014909549586195197, "loss": 2.1098, "step": 286105 }, { "epoch": 0.67, "grad_norm": 1.9296875, "learning_rate": 0.00014909388564984162, "loss": 2.1127, "step": 286110 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.00014909227542095994, "loss": 1.9936, "step": 286115 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014909066517530744, "loss": 1.9629, "step": 286120 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014908905491288469, "loss": 2.0824, "step": 286125 }, { "epoch": 0.67, "grad_norm": 1.890625, "learning_rate": 0.00014908744463369217, "loss": 2.0301, "step": 286130 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014908583433773052, "loss": 2.1512, "step": 286135 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.00014908422402500028, "loss": 2.0153, "step": 286140 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014908261369550195, "loss": 2.0511, "step": 286145 }, { "epoch": 0.67, "grad_norm": 2.5625, "learning_rate": 0.00014908100334923614, "loss": 2.1, "step": 286150 }, { "epoch": 0.67, "grad_norm": 2.0, "learning_rate": 0.00014907939298620338, "loss": 1.862, "step": 286155 }, { "epoch": 0.67, "grad_norm": 2.578125, "learning_rate": 0.00014907778260640417, "loss": 1.8825, "step": 286160 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.0001490761722098391, "loss": 2.1039, "step": 286165 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.00014907456179650874, "loss": 1.8696, "step": 286170 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.0001490729513664136, "loss": 2.0303, "step": 286175 }, { "epoch": 0.67, "grad_norm": 1.828125, "learning_rate": 0.00014907134091955425, "loss": 1.9936, "step": 286180 }, { "epoch": 0.67, "grad_norm": 1.859375, "learning_rate": 0.00014906973045593127, "loss": 1.9465, "step": 286185 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014906811997554515, "loss": 2.1803, "step": 286190 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014906650947839648, "loss": 2.0062, "step": 286195 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.0001490648989644858, "loss": 1.9475, "step": 286200 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014906328843381368, "loss": 2.1886, "step": 286205 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014906167788638066, "loss": 2.1699, "step": 286210 }, { "epoch": 0.67, "grad_norm": 2.734375, "learning_rate": 0.00014906006732218725, "loss": 2.1163, "step": 286215 }, { "epoch": 0.67, "grad_norm": 2.609375, "learning_rate": 0.000149058456741234, "loss": 2.2578, "step": 286220 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014905684614352155, "loss": 2.2271, "step": 286225 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.00014905523552905035, "loss": 1.8495, "step": 286230 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00014905362489782103, "loss": 2.0842, "step": 286235 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014905201424983408, "loss": 1.9964, "step": 286240 }, { "epoch": 0.67, "grad_norm": 2.765625, "learning_rate": 0.00014905040358509006, "loss": 2.0403, "step": 286245 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014904879290358956, "loss": 2.1392, "step": 286250 }, { "epoch": 0.67, "grad_norm": 1.9453125, "learning_rate": 0.0001490471822053331, "loss": 1.8434, "step": 286255 }, { "epoch": 0.67, "grad_norm": 2.625, "learning_rate": 0.0001490455714903212, "loss": 1.9977, "step": 286260 }, { "epoch": 0.67, "grad_norm": 2.671875, "learning_rate": 0.0001490439607585545, "loss": 2.0133, "step": 286265 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014904235001003344, "loss": 2.0551, "step": 286270 }, { "epoch": 0.67, "grad_norm": 1.6171875, "learning_rate": 0.00014904073924475867, "loss": 2.0038, "step": 286275 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014903912846273064, "loss": 2.0177, "step": 286280 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.00014903751766395, "loss": 2.0335, "step": 286285 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014903590684841724, "loss": 2.1778, "step": 286290 }, { "epoch": 0.67, "grad_norm": 1.96875, "learning_rate": 0.00014903429601613292, "loss": 2.0288, "step": 286295 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.0001490326851670976, "loss": 2.1999, "step": 286300 }, { "epoch": 0.67, "grad_norm": 1.53125, "learning_rate": 0.0001490310743013118, "loss": 1.9933, "step": 286305 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.00014902946341877613, "loss": 2.2019, "step": 286310 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.0001490278525194911, "loss": 2.0162, "step": 286315 }, { "epoch": 0.67, "grad_norm": 2.53125, "learning_rate": 0.00014902624160345726, "loss": 2.0445, "step": 286320 }, { "epoch": 0.67, "grad_norm": 1.890625, "learning_rate": 0.00014902463067067515, "loss": 2.2054, "step": 286325 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014902301972114537, "loss": 2.1599, "step": 286330 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014902140875486844, "loss": 2.0099, "step": 286335 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014901979777184486, "loss": 1.9726, "step": 286340 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.0001490181867720753, "loss": 2.1637, "step": 286345 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.0001490165757555602, "loss": 2.066, "step": 286350 }, { "epoch": 0.67, "grad_norm": 2.0, "learning_rate": 0.0001490149647223001, "loss": 2.1277, "step": 286355 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014901335367229566, "loss": 2.051, "step": 286360 }, { "epoch": 0.67, "grad_norm": 1.8515625, "learning_rate": 0.00014901174260554737, "loss": 2.1448, "step": 286365 }, { "epoch": 0.67, "grad_norm": 2.796875, "learning_rate": 0.00014901013152205576, "loss": 1.9897, "step": 286370 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.0001490085204218214, "loss": 2.1118, "step": 286375 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014900690930484483, "loss": 2.243, "step": 286380 }, { "epoch": 0.67, "grad_norm": 1.9765625, "learning_rate": 0.00014900529817112664, "loss": 2.0834, "step": 286385 }, { "epoch": 0.67, "grad_norm": 2.5, "learning_rate": 0.00014900368702066734, "loss": 2.0903, "step": 286390 }, { "epoch": 0.67, "grad_norm": 2.765625, "learning_rate": 0.00014900207585346752, "loss": 2.1335, "step": 286395 }, { "epoch": 0.67, "grad_norm": 1.90625, "learning_rate": 0.00014900046466952764, "loss": 2.0855, "step": 286400 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.00014899885346884831, "loss": 2.0914, "step": 286405 }, { "epoch": 0.67, "grad_norm": 2.015625, "learning_rate": 0.00014899724225143015, "loss": 2.0239, "step": 286410 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.0001489956310172736, "loss": 2.1873, "step": 286415 }, { "epoch": 0.67, "grad_norm": 3.203125, "learning_rate": 0.00014899401976637927, "loss": 2.2065, "step": 286420 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00014899240849874769, "loss": 2.0292, "step": 286425 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.0001489907972143794, "loss": 2.1157, "step": 286430 }, { "epoch": 0.67, "grad_norm": 3.0, "learning_rate": 0.000148989185913275, "loss": 2.0965, "step": 286435 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014898757459543498, "loss": 2.0459, "step": 286440 }, { "epoch": 0.67, "grad_norm": 1.7265625, "learning_rate": 0.0001489859632608599, "loss": 2.076, "step": 286445 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.00014898435190955036, "loss": 2.2639, "step": 286450 }, { "epoch": 0.67, "grad_norm": 2.15625, "learning_rate": 0.00014898274054150686, "loss": 2.1635, "step": 286455 }, { "epoch": 0.67, "grad_norm": 1.9453125, "learning_rate": 0.00014898112915672998, "loss": 2.1584, "step": 286460 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014897951775522022, "loss": 1.934, "step": 286465 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014897790633697824, "loss": 2.1097, "step": 286470 }, { "epoch": 0.67, "grad_norm": 1.8125, "learning_rate": 0.00014897629490200447, "loss": 2.0175, "step": 286475 }, { "epoch": 0.67, "grad_norm": 1.9140625, "learning_rate": 0.0001489746834502995, "loss": 2.0678, "step": 286480 }, { "epoch": 0.67, "grad_norm": 2.171875, "learning_rate": 0.0001489730719818639, "loss": 2.0914, "step": 286485 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014897146049669822, "loss": 2.2293, "step": 286490 }, { "epoch": 0.67, "grad_norm": 1.9296875, "learning_rate": 0.000148969848994803, "loss": 2.0633, "step": 286495 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014896823747617878, "loss": 2.0015, "step": 286500 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014896662594082616, "loss": 2.0683, "step": 286505 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014896501438874563, "loss": 2.045, "step": 286510 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014896340281993773, "loss": 2.1251, "step": 286515 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.00014896179123440306, "loss": 2.1164, "step": 286520 }, { "epoch": 0.67, "grad_norm": 2.0625, "learning_rate": 0.0001489601796321422, "loss": 2.0994, "step": 286525 }, { "epoch": 0.67, "grad_norm": 2.390625, "learning_rate": 0.0001489585680131556, "loss": 2.0398, "step": 286530 }, { "epoch": 0.67, "grad_norm": 2.3125, "learning_rate": 0.0001489569563774439, "loss": 1.9845, "step": 286535 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014895534472500756, "loss": 2.1545, "step": 286540 }, { "epoch": 0.67, "grad_norm": 2.234375, "learning_rate": 0.00014895373305584723, "loss": 1.9729, "step": 286545 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00014895212136996344, "loss": 1.8699, "step": 286550 }, { "epoch": 0.67, "grad_norm": 2.1875, "learning_rate": 0.00014895050966735667, "loss": 2.0259, "step": 286555 }, { "epoch": 0.67, "grad_norm": 2.0, "learning_rate": 0.00014894889794802755, "loss": 2.0097, "step": 286560 }, { "epoch": 0.67, "grad_norm": 1.8125, "learning_rate": 0.00014894728621197659, "loss": 2.0494, "step": 286565 }, { "epoch": 0.67, "grad_norm": 2.296875, "learning_rate": 0.00014894567445920435, "loss": 1.9146, "step": 286570 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.00014894406268971136, "loss": 2.06, "step": 286575 }, { "epoch": 0.67, "grad_norm": 2.40625, "learning_rate": 0.00014894245090349824, "loss": 2.0192, "step": 286580 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014894083910056545, "loss": 1.9157, "step": 286585 }, { "epoch": 0.67, "grad_norm": 2.0, "learning_rate": 0.00014893922728091358, "loss": 2.0819, "step": 286590 }, { "epoch": 0.67, "grad_norm": 1.8359375, "learning_rate": 0.0001489376154445432, "loss": 1.9799, "step": 286595 }, { "epoch": 0.67, "grad_norm": 1.84375, "learning_rate": 0.00014893600359145484, "loss": 2.1209, "step": 286600 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.00014893439172164905, "loss": 2.0662, "step": 286605 }, { "epoch": 0.67, "grad_norm": 2.921875, "learning_rate": 0.00014893277983512635, "loss": 1.9604, "step": 286610 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.0001489311679318874, "loss": 2.0433, "step": 286615 }, { "epoch": 0.67, "grad_norm": 2.34375, "learning_rate": 0.0001489295560119326, "loss": 2.0523, "step": 286620 }, { "epoch": 0.67, "grad_norm": 2.28125, "learning_rate": 0.00014892794407526263, "loss": 2.0878, "step": 286625 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014892633212187798, "loss": 2.0061, "step": 286630 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.00014892472015177918, "loss": 1.9734, "step": 286635 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014892310816496683, "loss": 1.929, "step": 286640 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014892149616144143, "loss": 1.9946, "step": 286645 }, { "epoch": 0.67, "grad_norm": 2.421875, "learning_rate": 0.0001489198841412036, "loss": 2.0981, "step": 286650 }, { "epoch": 0.67, "grad_norm": 2.375, "learning_rate": 0.00014891827210425381, "loss": 2.3805, "step": 286655 }, { "epoch": 0.67, "grad_norm": 2.40625, "learning_rate": 0.00014891666005059268, "loss": 1.9942, "step": 286660 }, { "epoch": 0.67, "grad_norm": 2.125, "learning_rate": 0.00014891504798022077, "loss": 1.9769, "step": 286665 }, { "epoch": 0.67, "grad_norm": 1.9453125, "learning_rate": 0.00014891343589313853, "loss": 2.1354, "step": 286670 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014891182378934658, "loss": 1.8241, "step": 286675 }, { "epoch": 0.67, "grad_norm": 3.421875, "learning_rate": 0.00014891021166884546, "loss": 2.1936, "step": 286680 }, { "epoch": 0.67, "grad_norm": 2.328125, "learning_rate": 0.00014890859953163577, "loss": 1.9066, "step": 286685 }, { "epoch": 0.67, "grad_norm": 2.109375, "learning_rate": 0.00014890698737771797, "loss": 2.2228, "step": 286690 }, { "epoch": 0.67, "grad_norm": 2.09375, "learning_rate": 0.00014890537520709268, "loss": 1.9526, "step": 286695 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.00014890376301976042, "loss": 2.1585, "step": 286700 }, { "epoch": 0.67, "grad_norm": 3.328125, "learning_rate": 0.00014890215081572172, "loss": 1.9834, "step": 286705 }, { "epoch": 0.67, "grad_norm": 2.21875, "learning_rate": 0.00014890053859497723, "loss": 2.1535, "step": 286710 }, { "epoch": 0.67, "grad_norm": 3.15625, "learning_rate": 0.00014889892635752734, "loss": 2.134, "step": 286715 }, { "epoch": 0.67, "grad_norm": 2.40625, "learning_rate": 0.00014889731410337275, "loss": 1.9773, "step": 286720 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014889570183251394, "loss": 2.2333, "step": 286725 }, { "epoch": 0.67, "grad_norm": 2.546875, "learning_rate": 0.00014889408954495145, "loss": 2.0746, "step": 286730 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014889247724068588, "loss": 2.0421, "step": 286735 }, { "epoch": 0.67, "grad_norm": 2.46875, "learning_rate": 0.00014889086491971773, "loss": 1.9059, "step": 286740 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.0001488892525820476, "loss": 2.1248, "step": 286745 }, { "epoch": 0.67, "grad_norm": 2.046875, "learning_rate": 0.000148887640227676, "loss": 2.0575, "step": 286750 }, { "epoch": 0.67, "grad_norm": 1.9765625, "learning_rate": 0.00014888602785660348, "loss": 2.0982, "step": 286755 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014888441546883062, "loss": 2.1581, "step": 286760 }, { "epoch": 0.67, "grad_norm": 2.359375, "learning_rate": 0.00014888280306435796, "loss": 2.1631, "step": 286765 }, { "epoch": 0.67, "grad_norm": 2.03125, "learning_rate": 0.00014888119064318604, "loss": 2.0094, "step": 286770 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014887957820531545, "loss": 2.162, "step": 286775 }, { "epoch": 0.67, "grad_norm": 1.984375, "learning_rate": 0.00014887796575074666, "loss": 1.9946, "step": 286780 }, { "epoch": 0.67, "grad_norm": 2.265625, "learning_rate": 0.0001488763532794803, "loss": 1.8707, "step": 286785 }, { "epoch": 0.67, "grad_norm": 2.203125, "learning_rate": 0.0001488747407915169, "loss": 1.9873, "step": 286790 }, { "epoch": 0.67, "grad_norm": 2.59375, "learning_rate": 0.000148873128286857, "loss": 2.0101, "step": 286795 }, { "epoch": 0.67, "grad_norm": 2.28125, "learning_rate": 0.00014887151576550113, "loss": 2.0559, "step": 286800 }, { "epoch": 0.67, "grad_norm": 2.140625, "learning_rate": 0.00014886990322744988, "loss": 1.9813, "step": 286805 }, { "epoch": 0.67, "grad_norm": 2.078125, "learning_rate": 0.00014886829067270376, "loss": 1.9829, "step": 286810 }, { "epoch": 0.67, "grad_norm": 1.859375, "learning_rate": 0.00014886667810126337, "loss": 1.8183, "step": 286815 }, { "epoch": 0.67, "grad_norm": 2.25, "learning_rate": 0.00014886506551312924, "loss": 1.9452, "step": 286820 }, { "epoch": 0.67, "grad_norm": 1.8203125, "learning_rate": 0.0001488634529083019, "loss": 1.8934, "step": 286825 }, { "epoch": 0.68, "grad_norm": 1.9921875, "learning_rate": 0.00014886184028678194, "loss": 2.1226, "step": 286830 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014886022764856987, "loss": 2.0742, "step": 286835 }, { "epoch": 0.68, "grad_norm": 2.78125, "learning_rate": 0.00014885861499366624, "loss": 1.8283, "step": 286840 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.00014885700232207167, "loss": 1.9794, "step": 286845 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014885538963378664, "loss": 2.2563, "step": 286850 }, { "epoch": 0.68, "grad_norm": 2.578125, "learning_rate": 0.00014885377692881174, "loss": 2.0329, "step": 286855 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.00014885216420714746, "loss": 2.0846, "step": 286860 }, { "epoch": 0.68, "grad_norm": 2.359375, "learning_rate": 0.00014885055146879442, "loss": 2.0164, "step": 286865 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014884893871375317, "loss": 2.1014, "step": 286870 }, { "epoch": 0.68, "grad_norm": 1.84375, "learning_rate": 0.00014884732594202423, "loss": 1.958, "step": 286875 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014884571315360812, "loss": 2.0533, "step": 286880 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014884410034850544, "loss": 2.0734, "step": 286885 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014884248752671675, "loss": 2.0687, "step": 286890 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.00014884087468824256, "loss": 1.9702, "step": 286895 }, { "epoch": 0.68, "grad_norm": 1.8671875, "learning_rate": 0.00014883926183308346, "loss": 1.9385, "step": 286900 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.00014883764896123997, "loss": 2.0146, "step": 286905 }, { "epoch": 0.68, "grad_norm": 1.9921875, "learning_rate": 0.00014883603607271267, "loss": 2.1144, "step": 286910 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014883442316750208, "loss": 1.9551, "step": 286915 }, { "epoch": 0.68, "grad_norm": 1.9921875, "learning_rate": 0.00014883281024560875, "loss": 1.856, "step": 286920 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014883119730703327, "loss": 2.0571, "step": 286925 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014882958435177618, "loss": 2.1542, "step": 286930 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.000148827971379838, "loss": 2.0585, "step": 286935 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.00014882635839121928, "loss": 2.0596, "step": 286940 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014882474538592059, "loss": 1.9263, "step": 286945 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.00014882313236394253, "loss": 2.0044, "step": 286950 }, { "epoch": 0.68, "grad_norm": 1.90625, "learning_rate": 0.00014882151932528554, "loss": 2.0176, "step": 286955 }, { "epoch": 0.68, "grad_norm": 2.03125, "learning_rate": 0.0001488199062699503, "loss": 2.0286, "step": 286960 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014881829319793725, "loss": 1.9409, "step": 286965 }, { "epoch": 0.68, "grad_norm": 2.375, "learning_rate": 0.000148816680109247, "loss": 2.051, "step": 286970 }, { "epoch": 0.68, "grad_norm": 1.84375, "learning_rate": 0.00014881506700388007, "loss": 2.0902, "step": 286975 }, { "epoch": 0.68, "grad_norm": 2.46875, "learning_rate": 0.00014881345388183706, "loss": 2.0637, "step": 286980 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014881184074311846, "loss": 2.064, "step": 286985 }, { "epoch": 0.68, "grad_norm": 1.9453125, "learning_rate": 0.00014881022758772488, "loss": 1.9321, "step": 286990 }, { "epoch": 0.68, "grad_norm": 2.359375, "learning_rate": 0.00014880861441565678, "loss": 2.1388, "step": 286995 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014880700122691482, "loss": 2.0306, "step": 287000 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014880538802149952, "loss": 2.1273, "step": 287005 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.0001488037747994114, "loss": 2.1329, "step": 287010 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.00014880216156065102, "loss": 2.2063, "step": 287015 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.0001488005483052189, "loss": 1.9213, "step": 287020 }, { "epoch": 0.68, "grad_norm": 1.6875, "learning_rate": 0.0001487989350331157, "loss": 2.0655, "step": 287025 }, { "epoch": 0.68, "grad_norm": 2.453125, "learning_rate": 0.00014879732174434183, "loss": 2.1328, "step": 287030 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014879570843889795, "loss": 2.0111, "step": 287035 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.00014879409511678454, "loss": 2.1546, "step": 287040 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.0001487924817780022, "loss": 1.9865, "step": 287045 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014879086842255147, "loss": 2.1589, "step": 287050 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.0001487892550504329, "loss": 2.1453, "step": 287055 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014878764166164702, "loss": 2.1156, "step": 287060 }, { "epoch": 0.68, "grad_norm": 1.9765625, "learning_rate": 0.0001487860282561944, "loss": 1.945, "step": 287065 }, { "epoch": 0.68, "grad_norm": 2.03125, "learning_rate": 0.0001487844148340756, "loss": 1.957, "step": 287070 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014878280139529114, "loss": 1.9931, "step": 287075 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014878118793984162, "loss": 2.1414, "step": 287080 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014877957446772752, "loss": 1.9917, "step": 287085 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014877796097894947, "loss": 2.0293, "step": 287090 }, { "epoch": 0.68, "grad_norm": 1.9921875, "learning_rate": 0.00014877634747350796, "loss": 2.2156, "step": 287095 }, { "epoch": 0.68, "grad_norm": 1.953125, "learning_rate": 0.00014877473395140357, "loss": 2.0253, "step": 287100 }, { "epoch": 0.68, "grad_norm": 2.484375, "learning_rate": 0.00014877312041263686, "loss": 2.2171, "step": 287105 }, { "epoch": 0.68, "grad_norm": 1.984375, "learning_rate": 0.00014877150685720835, "loss": 2.1698, "step": 287110 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.00014876989328511863, "loss": 2.0515, "step": 287115 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.00014876827969636822, "loss": 2.1362, "step": 287120 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.0001487666660909577, "loss": 2.1321, "step": 287125 }, { "epoch": 0.68, "grad_norm": 2.8125, "learning_rate": 0.00014876505246888755, "loss": 2.0309, "step": 287130 }, { "epoch": 0.68, "grad_norm": 2.546875, "learning_rate": 0.00014876343883015838, "loss": 2.0425, "step": 287135 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014876182517477077, "loss": 1.9568, "step": 287140 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014876021150272522, "loss": 1.9866, "step": 287145 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014875859781402233, "loss": 1.9999, "step": 287150 }, { "epoch": 0.68, "grad_norm": 1.90625, "learning_rate": 0.00014875698410866253, "loss": 2.1394, "step": 287155 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.00014875537038664657, "loss": 2.0899, "step": 287160 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014875375664797483, "loss": 2.0617, "step": 287165 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014875214289264795, "loss": 2.1056, "step": 287170 }, { "epoch": 0.68, "grad_norm": 2.546875, "learning_rate": 0.00014875052912066643, "loss": 1.9404, "step": 287175 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014874891533203085, "loss": 2.0795, "step": 287180 }, { "epoch": 0.68, "grad_norm": 2.546875, "learning_rate": 0.00014874730152674177, "loss": 2.1358, "step": 287185 }, { "epoch": 0.68, "grad_norm": 1.90625, "learning_rate": 0.0001487456877047997, "loss": 2.1057, "step": 287190 }, { "epoch": 0.68, "grad_norm": 1.984375, "learning_rate": 0.00014874407386620522, "loss": 2.1643, "step": 287195 }, { "epoch": 0.68, "grad_norm": 2.5, "learning_rate": 0.0001487424600109589, "loss": 1.9791, "step": 287200 }, { "epoch": 0.68, "grad_norm": 1.8515625, "learning_rate": 0.00014874084613906128, "loss": 2.0807, "step": 287205 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.0001487392322505129, "loss": 2.1393, "step": 287210 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.0001487376183453143, "loss": 2.1449, "step": 287215 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.00014873600442346605, "loss": 2.0983, "step": 287220 }, { "epoch": 0.68, "grad_norm": 2.5, "learning_rate": 0.0001487343904849687, "loss": 2.0572, "step": 287225 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.0001487327765298228, "loss": 1.9213, "step": 287230 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014873116255802887, "loss": 2.1142, "step": 287235 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.00014872954856958755, "loss": 2.0757, "step": 287240 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014872793456449928, "loss": 1.9368, "step": 287245 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.0001487263205427647, "loss": 2.0938, "step": 287250 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014872470650438433, "loss": 2.0719, "step": 287255 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014872309244935868, "loss": 2.0855, "step": 287260 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014872147837768833, "loss": 1.7879, "step": 287265 }, { "epoch": 0.68, "grad_norm": 1.953125, "learning_rate": 0.00014871986428937386, "loss": 1.8989, "step": 287270 }, { "epoch": 0.68, "grad_norm": 2.46875, "learning_rate": 0.00014871825018441582, "loss": 2.2007, "step": 287275 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014871663606281474, "loss": 2.0556, "step": 287280 }, { "epoch": 0.68, "grad_norm": 2.890625, "learning_rate": 0.00014871502192457116, "loss": 2.0066, "step": 287285 }, { "epoch": 0.68, "grad_norm": 1.9375, "learning_rate": 0.0001487134077696856, "loss": 1.996, "step": 287290 }, { "epoch": 0.68, "grad_norm": 2.515625, "learning_rate": 0.00014871179359815873, "loss": 2.0585, "step": 287295 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.000148710179409991, "loss": 2.224, "step": 287300 }, { "epoch": 0.68, "grad_norm": 1.765625, "learning_rate": 0.000148708565205183, "loss": 2.0057, "step": 287305 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014870695098373525, "loss": 2.0852, "step": 287310 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.0001487053367456483, "loss": 2.0377, "step": 287315 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.0001487037224909228, "loss": 2.2839, "step": 287320 }, { "epoch": 0.68, "grad_norm": 1.8984375, "learning_rate": 0.00014870210821955917, "loss": 1.9052, "step": 287325 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014870049393155802, "loss": 1.9834, "step": 287330 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014869887962691992, "loss": 2.1106, "step": 287335 }, { "epoch": 0.68, "grad_norm": 1.8984375, "learning_rate": 0.00014869726530564536, "loss": 2.1391, "step": 287340 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014869565096773497, "loss": 2.0177, "step": 287345 }, { "epoch": 0.68, "grad_norm": 1.78125, "learning_rate": 0.00014869403661318923, "loss": 2.0803, "step": 287350 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014869242224200874, "loss": 2.0139, "step": 287355 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014869080785419404, "loss": 2.169, "step": 287360 }, { "epoch": 0.68, "grad_norm": 2.359375, "learning_rate": 0.00014868919344974567, "loss": 2.1505, "step": 287365 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.0001486875790286642, "loss": 2.0679, "step": 287370 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014868596459095015, "loss": 1.9704, "step": 287375 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014868435013660412, "loss": 2.0123, "step": 287380 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.0001486827356656266, "loss": 2.066, "step": 287385 }, { "epoch": 0.68, "grad_norm": 2.59375, "learning_rate": 0.00014868112117801823, "loss": 2.1625, "step": 287390 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014867950667377943, "loss": 2.1811, "step": 287395 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014867789215291087, "loss": 2.0655, "step": 287400 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014867627761541306, "loss": 2.0962, "step": 287405 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014867466306128655, "loss": 2.1553, "step": 287410 }, { "epoch": 0.68, "grad_norm": 2.453125, "learning_rate": 0.0001486730484905319, "loss": 2.1064, "step": 287415 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014867143390314964, "loss": 2.0984, "step": 287420 }, { "epoch": 0.68, "grad_norm": 2.625, "learning_rate": 0.0001486698192991403, "loss": 2.0501, "step": 287425 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014866820467850453, "loss": 2.3108, "step": 287430 }, { "epoch": 0.68, "grad_norm": 2.484375, "learning_rate": 0.0001486665900412428, "loss": 2.0886, "step": 287435 }, { "epoch": 0.68, "grad_norm": 1.9609375, "learning_rate": 0.00014866497538735567, "loss": 2.1072, "step": 287440 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.0001486633607168437, "loss": 2.0162, "step": 287445 }, { "epoch": 0.68, "grad_norm": 1.7109375, "learning_rate": 0.00014866174602970743, "loss": 2.0529, "step": 287450 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014866013132594746, "loss": 2.0902, "step": 287455 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.0001486585166055643, "loss": 2.058, "step": 287460 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.0001486569018685585, "loss": 2.1272, "step": 287465 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014865528711493063, "loss": 2.0738, "step": 287470 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.0001486536723446812, "loss": 2.2041, "step": 287475 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.0001486520575578108, "loss": 2.0972, "step": 287480 }, { "epoch": 0.68, "grad_norm": 1.9375, "learning_rate": 0.00014865044275432, "loss": 1.9929, "step": 287485 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.0001486488279342093, "loss": 2.1555, "step": 287490 }, { "epoch": 0.68, "grad_norm": 1.96875, "learning_rate": 0.00014864721309747932, "loss": 2.1084, "step": 287495 }, { "epoch": 0.68, "grad_norm": 2.375, "learning_rate": 0.00014864559824413051, "loss": 2.1228, "step": 287500 }, { "epoch": 0.68, "grad_norm": 1.9921875, "learning_rate": 0.00014864398337416352, "loss": 2.2229, "step": 287505 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014864236848757887, "loss": 1.8981, "step": 287510 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014864075358437707, "loss": 1.9858, "step": 287515 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014863913866455873, "loss": 2.0638, "step": 287520 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.0001486375237281244, "loss": 2.0762, "step": 287525 }, { "epoch": 0.68, "grad_norm": 1.7265625, "learning_rate": 0.00014863590877507455, "loss": 1.9999, "step": 287530 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.00014863429380540985, "loss": 2.1136, "step": 287535 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014863267881913076, "loss": 2.0794, "step": 287540 }, { "epoch": 0.68, "grad_norm": 1.9765625, "learning_rate": 0.0001486310638162379, "loss": 2.0326, "step": 287545 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.00014862944879673174, "loss": 2.1329, "step": 287550 }, { "epoch": 0.68, "grad_norm": 1.890625, "learning_rate": 0.0001486278337606129, "loss": 1.9843, "step": 287555 }, { "epoch": 0.68, "grad_norm": 1.7890625, "learning_rate": 0.00014862621870788192, "loss": 2.0741, "step": 287560 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.0001486246036385393, "loss": 2.0701, "step": 287565 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.00014862298855258568, "loss": 2.1295, "step": 287570 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.00014862137345002157, "loss": 2.174, "step": 287575 }, { "epoch": 0.68, "grad_norm": 2.65625, "learning_rate": 0.00014861975833084748, "loss": 1.9444, "step": 287580 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.000148618143195064, "loss": 1.9636, "step": 287585 }, { "epoch": 0.68, "grad_norm": 2.46875, "learning_rate": 0.00014861652804267172, "loss": 2.202, "step": 287590 }, { "epoch": 0.68, "grad_norm": 1.8671875, "learning_rate": 0.00014861491287367114, "loss": 2.0489, "step": 287595 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.0001486132976880628, "loss": 2.0919, "step": 287600 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.0001486116824858473, "loss": 1.9271, "step": 287605 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014861006726702512, "loss": 2.0198, "step": 287610 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014860845203159694, "loss": 1.8816, "step": 287615 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014860683677956317, "loss": 1.9384, "step": 287620 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014860522151092443, "loss": 2.1074, "step": 287625 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.0001486036062256813, "loss": 2.0486, "step": 287630 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014860199092383428, "loss": 1.9882, "step": 287635 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.0001486003756053839, "loss": 1.8785, "step": 287640 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.0001485987602703308, "loss": 1.9851, "step": 287645 }, { "epoch": 0.68, "grad_norm": 1.984375, "learning_rate": 0.00014859714491867546, "loss": 1.9891, "step": 287650 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014859552955041844, "loss": 2.1448, "step": 287655 }, { "epoch": 0.68, "grad_norm": 1.7265625, "learning_rate": 0.00014859391416556034, "loss": 2.1005, "step": 287660 }, { "epoch": 0.68, "grad_norm": 1.8359375, "learning_rate": 0.00014859229876410166, "loss": 2.0462, "step": 287665 }, { "epoch": 0.68, "grad_norm": 1.9140625, "learning_rate": 0.00014859068334604296, "loss": 2.1134, "step": 287670 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.00014858906791138482, "loss": 2.2317, "step": 287675 }, { "epoch": 0.68, "grad_norm": 1.859375, "learning_rate": 0.00014858745246012776, "loss": 1.818, "step": 287680 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014858583699227235, "loss": 2.0769, "step": 287685 }, { "epoch": 0.68, "grad_norm": 1.859375, "learning_rate": 0.0001485842215078191, "loss": 1.9404, "step": 287690 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.0001485826060067687, "loss": 2.1169, "step": 287695 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.0001485809904891215, "loss": 2.087, "step": 287700 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014857937495487818, "loss": 1.9912, "step": 287705 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.0001485777594040393, "loss": 2.1028, "step": 287710 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.00014857614383660534, "loss": 2.051, "step": 287715 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014857452825257689, "loss": 1.9221, "step": 287720 }, { "epoch": 0.68, "grad_norm": 1.9921875, "learning_rate": 0.0001485729126519545, "loss": 1.8901, "step": 287725 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014857129703473875, "loss": 2.1195, "step": 287730 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014856968140093015, "loss": 2.1747, "step": 287735 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014856806575052925, "loss": 1.8392, "step": 287740 }, { "epoch": 0.68, "grad_norm": 2.375, "learning_rate": 0.0001485664500835366, "loss": 2.1239, "step": 287745 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014856483439995284, "loss": 2.1344, "step": 287750 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.0001485632186997784, "loss": 2.0999, "step": 287755 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.0001485616029830139, "loss": 1.9849, "step": 287760 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014855998724965986, "loss": 2.1215, "step": 287765 }, { "epoch": 0.68, "grad_norm": 2.53125, "learning_rate": 0.00014855837149971686, "loss": 2.0287, "step": 287770 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014855675573318541, "loss": 2.1408, "step": 287775 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014855513995006613, "loss": 1.9422, "step": 287780 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014855352415035953, "loss": 2.0853, "step": 287785 }, { "epoch": 0.68, "grad_norm": 1.984375, "learning_rate": 0.00014855190833406616, "loss": 2.0218, "step": 287790 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.00014855029250118658, "loss": 1.876, "step": 287795 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.0001485486766517213, "loss": 1.988, "step": 287800 }, { "epoch": 0.68, "grad_norm": 2.46875, "learning_rate": 0.00014854706078567094, "loss": 2.1161, "step": 287805 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014854544490303605, "loss": 2.1674, "step": 287810 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014854382900381715, "loss": 2.0382, "step": 287815 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.0001485422130880148, "loss": 2.0491, "step": 287820 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014854059715562948, "loss": 1.925, "step": 287825 }, { "epoch": 0.68, "grad_norm": 2.59375, "learning_rate": 0.00014853898120666188, "loss": 1.9961, "step": 287830 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.0001485373652411125, "loss": 2.0367, "step": 287835 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.0001485357492589818, "loss": 1.9309, "step": 287840 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.0001485341332602705, "loss": 2.0246, "step": 287845 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.000148532517244979, "loss": 2.1224, "step": 287850 }, { "epoch": 0.68, "grad_norm": 1.90625, "learning_rate": 0.0001485309012131079, "loss": 2.0802, "step": 287855 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014852928516465778, "loss": 2.0643, "step": 287860 }, { "epoch": 0.68, "grad_norm": 1.9609375, "learning_rate": 0.00014852766909962921, "loss": 2.071, "step": 287865 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014852605301802268, "loss": 1.9547, "step": 287870 }, { "epoch": 0.68, "grad_norm": 1.78125, "learning_rate": 0.00014852443691983877, "loss": 1.9975, "step": 287875 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.000148522820805078, "loss": 2.1623, "step": 287880 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.000148521204673741, "loss": 2.1905, "step": 287885 }, { "epoch": 0.68, "grad_norm": 2.515625, "learning_rate": 0.00014851958852582828, "loss": 2.1391, "step": 287890 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014851797236134037, "loss": 2.0087, "step": 287895 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.00014851635618027783, "loss": 2.0563, "step": 287900 }, { "epoch": 0.68, "grad_norm": 1.8515625, "learning_rate": 0.00014851473998264122, "loss": 2.1523, "step": 287905 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014851312376843115, "loss": 2.0776, "step": 287910 }, { "epoch": 0.68, "grad_norm": 2.484375, "learning_rate": 0.00014851150753764807, "loss": 2.1336, "step": 287915 }, { "epoch": 0.68, "grad_norm": 1.9375, "learning_rate": 0.00014850989129029259, "loss": 1.9629, "step": 287920 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014850827502636523, "loss": 2.2015, "step": 287925 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.0001485066587458666, "loss": 2.176, "step": 287930 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.0001485050424487972, "loss": 2.0393, "step": 287935 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.00014850342613515757, "loss": 2.1495, "step": 287940 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014850180980494835, "loss": 2.0136, "step": 287945 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014850019345816998, "loss": 2.0472, "step": 287950 }, { "epoch": 0.68, "grad_norm": 1.875, "learning_rate": 0.0001484985770948231, "loss": 2.005, "step": 287955 }, { "epoch": 0.68, "grad_norm": 1.9921875, "learning_rate": 0.0001484969607149082, "loss": 1.8915, "step": 287960 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014849534431842586, "loss": 1.8865, "step": 287965 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.00014849372790537663, "loss": 2.3355, "step": 287970 }, { "epoch": 0.68, "grad_norm": 1.875, "learning_rate": 0.00014849211147576106, "loss": 2.0504, "step": 287975 }, { "epoch": 0.68, "grad_norm": 5.6875, "learning_rate": 0.00014849049502957976, "loss": 2.1585, "step": 287980 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.00014848887856683317, "loss": 2.0194, "step": 287985 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.0001484872620875219, "loss": 2.0734, "step": 287990 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014848564559164653, "loss": 2.0963, "step": 287995 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014848402907920754, "loss": 1.9471, "step": 288000 }, { "epoch": 0.68, "grad_norm": 1.9453125, "learning_rate": 0.00014848241255020558, "loss": 2.0466, "step": 288005 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014848079600464112, "loss": 2.1084, "step": 288010 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014847917944251475, "loss": 2.1905, "step": 288015 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.000148477562863827, "loss": 1.9626, "step": 288020 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014847594626857846, "loss": 2.1633, "step": 288025 }, { "epoch": 0.68, "grad_norm": 1.8359375, "learning_rate": 0.00014847432965676963, "loss": 2.1002, "step": 288030 }, { "epoch": 0.68, "grad_norm": 2.375, "learning_rate": 0.0001484727130284011, "loss": 2.0625, "step": 288035 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.0001484710963834734, "loss": 2.0641, "step": 288040 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014846947972198713, "loss": 2.1707, "step": 288045 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.0001484678630439428, "loss": 1.8971, "step": 288050 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014846624634934095, "loss": 2.0816, "step": 288055 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014846462963818215, "loss": 1.8752, "step": 288060 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014846301291046693, "loss": 2.045, "step": 288065 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014846139616619592, "loss": 1.9078, "step": 288070 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.0001484597794053696, "loss": 2.0303, "step": 288075 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014845816262798852, "loss": 2.0354, "step": 288080 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014845654583405327, "loss": 2.1084, "step": 288085 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.0001484549290235644, "loss": 1.9247, "step": 288090 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.0001484533121965224, "loss": 2.0936, "step": 288095 }, { "epoch": 0.68, "grad_norm": 2.546875, "learning_rate": 0.0001484516953529279, "loss": 2.1746, "step": 288100 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014845007849278142, "loss": 2.1391, "step": 288105 }, { "epoch": 0.68, "grad_norm": 1.890625, "learning_rate": 0.00014844846161608352, "loss": 2.0215, "step": 288110 }, { "epoch": 0.68, "grad_norm": 2.359375, "learning_rate": 0.00014844684472283475, "loss": 1.976, "step": 288115 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.00014844522781303564, "loss": 1.9423, "step": 288120 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014844361088668678, "loss": 2.0509, "step": 288125 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.0001484419939437887, "loss": 2.2192, "step": 288130 }, { "epoch": 0.68, "grad_norm": 1.9609375, "learning_rate": 0.00014844037698434195, "loss": 1.7791, "step": 288135 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.0001484387600083471, "loss": 2.0001, "step": 288140 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.0001484371430158047, "loss": 1.9371, "step": 288145 }, { "epoch": 0.68, "grad_norm": 1.8671875, "learning_rate": 0.00014843552600671527, "loss": 1.8301, "step": 288150 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.00014843390898107938, "loss": 2.1416, "step": 288155 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.0001484322919388976, "loss": 2.1268, "step": 288160 }, { "epoch": 0.68, "grad_norm": 2.578125, "learning_rate": 0.00014843067488017048, "loss": 2.0986, "step": 288165 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014842905780489856, "loss": 1.9788, "step": 288170 }, { "epoch": 0.68, "grad_norm": 2.640625, "learning_rate": 0.00014842744071308237, "loss": 2.2046, "step": 288175 }, { "epoch": 0.68, "grad_norm": 1.8828125, "learning_rate": 0.0001484258236047225, "loss": 2.1621, "step": 288180 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014842420647981952, "loss": 2.1536, "step": 288185 }, { "epoch": 0.68, "grad_norm": 1.953125, "learning_rate": 0.00014842258933837394, "loss": 1.9739, "step": 288190 }, { "epoch": 0.68, "grad_norm": 1.984375, "learning_rate": 0.00014842097218038632, "loss": 2.2179, "step": 288195 }, { "epoch": 0.68, "grad_norm": 2.53125, "learning_rate": 0.0001484193550058572, "loss": 2.1328, "step": 288200 }, { "epoch": 0.68, "grad_norm": 2.03125, "learning_rate": 0.00014841773781478716, "loss": 2.178, "step": 288205 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014841612060717676, "loss": 2.327, "step": 288210 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.0001484145033830265, "loss": 2.2772, "step": 288215 }, { "epoch": 0.68, "grad_norm": 2.765625, "learning_rate": 0.00014841288614233703, "loss": 2.2987, "step": 288220 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014841126888510877, "loss": 2.1056, "step": 288225 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.00014840965161134237, "loss": 1.93, "step": 288230 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014840803432103835, "loss": 1.8221, "step": 288235 }, { "epoch": 0.68, "grad_norm": 2.53125, "learning_rate": 0.00014840641701419728, "loss": 2.1287, "step": 288240 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.0001484047996908197, "loss": 2.0096, "step": 288245 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.00014840318235090615, "loss": 1.9797, "step": 288250 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014840156499445722, "loss": 1.8473, "step": 288255 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.0001483999476214734, "loss": 2.0515, "step": 288260 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.0001483983302319553, "loss": 1.938, "step": 288265 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.00014839671282590347, "loss": 2.2187, "step": 288270 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014839509540331844, "loss": 2.1782, "step": 288275 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014839347796420074, "loss": 2.1551, "step": 288280 }, { "epoch": 0.68, "grad_norm": 1.96875, "learning_rate": 0.00014839186050855097, "loss": 2.1462, "step": 288285 }, { "epoch": 0.68, "grad_norm": 2.90625, "learning_rate": 0.00014839024303636966, "loss": 1.9843, "step": 288290 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014838862554765735, "loss": 2.0747, "step": 288295 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014838700804241465, "loss": 2.1598, "step": 288300 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014838539052064204, "loss": 2.0588, "step": 288305 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.0001483837729823401, "loss": 1.9024, "step": 288310 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014838215542750937, "loss": 1.935, "step": 288315 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014838053785615047, "loss": 1.9711, "step": 288320 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.00014837892026826386, "loss": 2.1685, "step": 288325 }, { "epoch": 0.68, "grad_norm": 3.234375, "learning_rate": 0.00014837730266385014, "loss": 1.966, "step": 288330 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014837568504290984, "loss": 1.8928, "step": 288335 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014837406740544359, "loss": 1.9809, "step": 288340 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.00014837244975145181, "loss": 1.878, "step": 288345 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014837083208093515, "loss": 1.9739, "step": 288350 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.0001483692143938942, "loss": 1.9, "step": 288355 }, { "epoch": 0.68, "grad_norm": 2.515625, "learning_rate": 0.00014836759669032933, "loss": 1.9503, "step": 288360 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.0001483659789702413, "loss": 1.9805, "step": 288365 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.00014836436123363052, "loss": 2.158, "step": 288370 }, { "epoch": 0.68, "grad_norm": 1.9609375, "learning_rate": 0.00014836274348049763, "loss": 2.2122, "step": 288375 }, { "epoch": 0.68, "grad_norm": 2.65625, "learning_rate": 0.00014836112571084314, "loss": 2.212, "step": 288380 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.0001483595079246676, "loss": 2.0373, "step": 288385 }, { "epoch": 0.68, "grad_norm": 1.9765625, "learning_rate": 0.0001483578901219716, "loss": 2.104, "step": 288390 }, { "epoch": 0.68, "grad_norm": 1.9296875, "learning_rate": 0.00014835627230275566, "loss": 2.2241, "step": 288395 }, { "epoch": 0.68, "grad_norm": 2.03125, "learning_rate": 0.00014835465446702036, "loss": 2.1763, "step": 288400 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.0001483530366147662, "loss": 2.0355, "step": 288405 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.0001483514187459938, "loss": 2.1415, "step": 288410 }, { "epoch": 0.68, "grad_norm": 2.5, "learning_rate": 0.0001483498008607036, "loss": 2.0217, "step": 288415 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014834818295889632, "loss": 2.2492, "step": 288420 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014834656504057237, "loss": 2.1358, "step": 288425 }, { "epoch": 0.68, "grad_norm": 1.875, "learning_rate": 0.0001483449471057324, "loss": 2.0581, "step": 288430 }, { "epoch": 0.68, "grad_norm": 2.59375, "learning_rate": 0.0001483433291543769, "loss": 2.1389, "step": 288435 }, { "epoch": 0.68, "grad_norm": 1.7421875, "learning_rate": 0.0001483417111865064, "loss": 1.779, "step": 288440 }, { "epoch": 0.68, "grad_norm": 2.640625, "learning_rate": 0.00014834009320212156, "loss": 2.1022, "step": 288445 }, { "epoch": 0.68, "grad_norm": 2.625, "learning_rate": 0.00014833847520122283, "loss": 2.1192, "step": 288450 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014833685718381085, "loss": 2.0312, "step": 288455 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014833523914988607, "loss": 2.0928, "step": 288460 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014833362109944914, "loss": 2.0261, "step": 288465 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.0001483320030325005, "loss": 1.947, "step": 288470 }, { "epoch": 0.68, "grad_norm": 1.8125, "learning_rate": 0.0001483303849490408, "loss": 1.9743, "step": 288475 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.0001483287668490706, "loss": 2.2023, "step": 288480 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.0001483271487325904, "loss": 2.0412, "step": 288485 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014832553059960076, "loss": 2.195, "step": 288490 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.0001483239124501022, "loss": 2.0253, "step": 288495 }, { "epoch": 0.68, "grad_norm": 1.90625, "learning_rate": 0.00014832229428409538, "loss": 2.0558, "step": 288500 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.00014832067610158077, "loss": 2.0698, "step": 288505 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014831905790255894, "loss": 2.1234, "step": 288510 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014831743968703043, "loss": 2.1892, "step": 288515 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014831582145499581, "loss": 2.0648, "step": 288520 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014831420320645563, "loss": 1.9325, "step": 288525 }, { "epoch": 0.68, "grad_norm": 1.9375, "learning_rate": 0.00014831258494141044, "loss": 1.9965, "step": 288530 }, { "epoch": 0.68, "grad_norm": 2.96875, "learning_rate": 0.00014831096665986082, "loss": 2.133, "step": 288535 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.0001483093483618073, "loss": 2.1575, "step": 288540 }, { "epoch": 0.68, "grad_norm": 1.9296875, "learning_rate": 0.00014830773004725038, "loss": 2.0926, "step": 288545 }, { "epoch": 0.68, "grad_norm": 2.359375, "learning_rate": 0.0001483061117161907, "loss": 2.0247, "step": 288550 }, { "epoch": 0.68, "grad_norm": 2.515625, "learning_rate": 0.00014830449336862876, "loss": 2.1212, "step": 288555 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014830287500456515, "loss": 2.0288, "step": 288560 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014830125662400039, "loss": 2.0397, "step": 288565 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.00014829963822693504, "loss": 2.108, "step": 288570 }, { "epoch": 0.68, "grad_norm": 1.8828125, "learning_rate": 0.00014829801981336966, "loss": 2.1811, "step": 288575 }, { "epoch": 0.68, "grad_norm": 1.65625, "learning_rate": 0.00014829640138330479, "loss": 1.9335, "step": 288580 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.000148294782936741, "loss": 1.9293, "step": 288585 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014829316447367884, "loss": 2.0183, "step": 288590 }, { "epoch": 0.68, "grad_norm": 2.546875, "learning_rate": 0.0001482915459941189, "loss": 2.0858, "step": 288595 }, { "epoch": 0.68, "grad_norm": 2.609375, "learning_rate": 0.00014828992749806163, "loss": 2.1189, "step": 288600 }, { "epoch": 0.68, "grad_norm": 2.5, "learning_rate": 0.00014828830898550764, "loss": 2.1182, "step": 288605 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014828669045645752, "loss": 2.0014, "step": 288610 }, { "epoch": 0.68, "grad_norm": 1.640625, "learning_rate": 0.0001482850719109118, "loss": 1.9891, "step": 288615 }, { "epoch": 0.68, "grad_norm": 1.84375, "learning_rate": 0.00014828345334887103, "loss": 2.0641, "step": 288620 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014828183477033573, "loss": 1.9306, "step": 288625 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014828021617530648, "loss": 1.9892, "step": 288630 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014827859756378386, "loss": 1.9248, "step": 288635 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014827697893576839, "loss": 2.1287, "step": 288640 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.0001482753602912606, "loss": 2.0652, "step": 288645 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.0001482737416302611, "loss": 2.042, "step": 288650 }, { "epoch": 0.68, "grad_norm": 1.953125, "learning_rate": 0.00014827212295277035, "loss": 1.9051, "step": 288655 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014827050425878904, "loss": 2.1671, "step": 288660 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014826888554831763, "loss": 2.1377, "step": 288665 }, { "epoch": 0.68, "grad_norm": 5.0, "learning_rate": 0.00014826726682135667, "loss": 1.9481, "step": 288670 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014826564807790676, "loss": 2.0913, "step": 288675 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014826402931796844, "loss": 2.0825, "step": 288680 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014826241054154225, "loss": 2.0824, "step": 288685 }, { "epoch": 0.68, "grad_norm": 1.75, "learning_rate": 0.00014826079174862873, "loss": 1.9085, "step": 288690 }, { "epoch": 0.68, "grad_norm": 1.9921875, "learning_rate": 0.00014825917293922845, "loss": 1.9803, "step": 288695 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014825755411334194, "loss": 2.0204, "step": 288700 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.00014825593527096982, "loss": 2.1092, "step": 288705 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014825431641211256, "loss": 1.9115, "step": 288710 }, { "epoch": 0.68, "grad_norm": 2.640625, "learning_rate": 0.00014825269753677075, "loss": 1.9509, "step": 288715 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014825107864494498, "loss": 2.1244, "step": 288720 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014824945973663573, "loss": 2.0387, "step": 288725 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014824784081184363, "loss": 2.1236, "step": 288730 }, { "epoch": 0.68, "grad_norm": 3.015625, "learning_rate": 0.00014824622187056916, "loss": 2.1441, "step": 288735 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.0001482446029128129, "loss": 2.165, "step": 288740 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014824298393857543, "loss": 2.2282, "step": 288745 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.0001482413649478573, "loss": 2.0906, "step": 288750 }, { "epoch": 0.68, "grad_norm": 1.7109375, "learning_rate": 0.000148239745940659, "loss": 2.1082, "step": 288755 }, { "epoch": 0.68, "grad_norm": 2.546875, "learning_rate": 0.00014823812691698116, "loss": 1.9321, "step": 288760 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014823650787682425, "loss": 2.114, "step": 288765 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014823488882018893, "loss": 2.0703, "step": 288770 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014823326974707567, "loss": 2.1485, "step": 288775 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014823165065748508, "loss": 2.1779, "step": 288780 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014823003155141766, "loss": 2.1752, "step": 288785 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014822841242887396, "loss": 2.1409, "step": 288790 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.0001482267932898546, "loss": 1.9634, "step": 288795 }, { "epoch": 0.68, "grad_norm": 2.578125, "learning_rate": 0.00014822517413436007, "loss": 1.9039, "step": 288800 }, { "epoch": 0.68, "grad_norm": 2.546875, "learning_rate": 0.00014822355496239096, "loss": 2.0709, "step": 288805 }, { "epoch": 0.68, "grad_norm": 2.53125, "learning_rate": 0.00014822193577394783, "loss": 1.925, "step": 288810 }, { "epoch": 0.68, "grad_norm": 2.546875, "learning_rate": 0.00014822031656903118, "loss": 1.8884, "step": 288815 }, { "epoch": 0.68, "grad_norm": 2.53125, "learning_rate": 0.0001482186973476416, "loss": 1.9721, "step": 288820 }, { "epoch": 0.68, "grad_norm": 1.8515625, "learning_rate": 0.00014821707810977962, "loss": 1.8981, "step": 288825 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014821545885544584, "loss": 2.0787, "step": 288830 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014821383958464078, "loss": 2.0044, "step": 288835 }, { "epoch": 0.68, "grad_norm": 3.234375, "learning_rate": 0.000148212220297365, "loss": 2.2192, "step": 288840 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014821060099361907, "loss": 2.048, "step": 288845 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.0001482089816734035, "loss": 2.1376, "step": 288850 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014820736233671887, "loss": 2.1911, "step": 288855 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.0001482057429835657, "loss": 2.1245, "step": 288860 }, { "epoch": 0.68, "grad_norm": 1.984375, "learning_rate": 0.00014820412361394463, "loss": 2.1671, "step": 288865 }, { "epoch": 0.68, "grad_norm": 2.53125, "learning_rate": 0.0001482025042278561, "loss": 2.0764, "step": 288870 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014820088482530078, "loss": 2.0126, "step": 288875 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.0001481992654062791, "loss": 2.0629, "step": 288880 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.00014819764597079173, "loss": 2.0246, "step": 288885 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014819602651883917, "loss": 2.1383, "step": 288890 }, { "epoch": 0.68, "grad_norm": 2.859375, "learning_rate": 0.00014819440705042194, "loss": 2.0809, "step": 288895 }, { "epoch": 0.68, "grad_norm": 2.703125, "learning_rate": 0.00014819278756554062, "loss": 2.2841, "step": 288900 }, { "epoch": 0.68, "grad_norm": 2.03125, "learning_rate": 0.00014819116806419582, "loss": 1.8853, "step": 288905 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014818954854638801, "loss": 1.9967, "step": 288910 }, { "epoch": 0.68, "grad_norm": 3.234375, "learning_rate": 0.00014818792901211779, "loss": 2.2039, "step": 288915 }, { "epoch": 0.68, "grad_norm": 2.53125, "learning_rate": 0.0001481863094613857, "loss": 2.0609, "step": 288920 }, { "epoch": 0.68, "grad_norm": 1.984375, "learning_rate": 0.00014818468989419225, "loss": 2.0408, "step": 288925 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014818307031053807, "loss": 2.1632, "step": 288930 }, { "epoch": 0.68, "grad_norm": 2.546875, "learning_rate": 0.00014818145071042368, "loss": 2.1738, "step": 288935 }, { "epoch": 0.68, "grad_norm": 2.546875, "learning_rate": 0.00014817983109384966, "loss": 2.1186, "step": 288940 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.0001481782114608165, "loss": 2.0095, "step": 288945 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014817659181132477, "loss": 2.0565, "step": 288950 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014817497214537507, "loss": 1.8928, "step": 288955 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014817335246296794, "loss": 2.1012, "step": 288960 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.0001481717327641039, "loss": 1.9898, "step": 288965 }, { "epoch": 0.68, "grad_norm": 2.609375, "learning_rate": 0.00014817011304878354, "loss": 2.0683, "step": 288970 }, { "epoch": 0.68, "grad_norm": 2.609375, "learning_rate": 0.00014816849331700737, "loss": 2.1807, "step": 288975 }, { "epoch": 0.68, "grad_norm": 2.453125, "learning_rate": 0.000148166873568776, "loss": 2.055, "step": 288980 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014816525380408993, "loss": 2.0925, "step": 288985 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.00014816363402294974, "loss": 2.1145, "step": 288990 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014816201422535597, "loss": 1.8308, "step": 288995 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014816039441130919, "loss": 2.1832, "step": 289000 }, { "epoch": 0.68, "grad_norm": 1.7109375, "learning_rate": 0.00014815877458080993, "loss": 2.1523, "step": 289005 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014815715473385878, "loss": 2.3392, "step": 289010 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014815553487045627, "loss": 2.0749, "step": 289015 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014815391499060297, "loss": 2.1894, "step": 289020 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.0001481522950942994, "loss": 2.265, "step": 289025 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014815067518154614, "loss": 2.1327, "step": 289030 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.0001481490552523437, "loss": 1.9493, "step": 289035 }, { "epoch": 0.68, "grad_norm": 1.8671875, "learning_rate": 0.0001481474353066927, "loss": 2.1597, "step": 289040 }, { "epoch": 0.68, "grad_norm": 1.984375, "learning_rate": 0.00014814581534459368, "loss": 1.9404, "step": 289045 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.0001481441953660472, "loss": 2.0281, "step": 289050 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014814257537105373, "loss": 2.1323, "step": 289055 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014814095535961388, "loss": 1.9008, "step": 289060 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014813933533172825, "loss": 2.0264, "step": 289065 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014813771528739733, "loss": 1.8395, "step": 289070 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014813609522662172, "loss": 2.2078, "step": 289075 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.0001481344751494019, "loss": 1.9756, "step": 289080 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.00014813285505573846, "loss": 2.1752, "step": 289085 }, { "epoch": 0.68, "grad_norm": 2.359375, "learning_rate": 0.000148131234945632, "loss": 2.0376, "step": 289090 }, { "epoch": 0.68, "grad_norm": 2.453125, "learning_rate": 0.00014812961481908306, "loss": 2.0496, "step": 289095 }, { "epoch": 0.68, "grad_norm": 2.375, "learning_rate": 0.00014812799467609213, "loss": 1.9962, "step": 289100 }, { "epoch": 0.68, "grad_norm": 1.9453125, "learning_rate": 0.00014812637451665982, "loss": 2.2423, "step": 289105 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.00014812475434078666, "loss": 2.096, "step": 289110 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014812313414847323, "loss": 2.0973, "step": 289115 }, { "epoch": 0.68, "grad_norm": 1.875, "learning_rate": 0.00014812151393972002, "loss": 2.0734, "step": 289120 }, { "epoch": 0.68, "grad_norm": 1.9765625, "learning_rate": 0.00014811989371452767, "loss": 2.0613, "step": 289125 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014811827347289668, "loss": 2.0847, "step": 289130 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.0001481166532148276, "loss": 1.988, "step": 289135 }, { "epoch": 0.68, "grad_norm": 1.9609375, "learning_rate": 0.00014811503294032103, "loss": 2.1379, "step": 289140 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.0001481134126493775, "loss": 2.0613, "step": 289145 }, { "epoch": 0.68, "grad_norm": 2.609375, "learning_rate": 0.0001481117923419975, "loss": 2.158, "step": 289150 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014811017201818167, "loss": 2.085, "step": 289155 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014810855167793056, "loss": 1.968, "step": 289160 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014810693132124466, "loss": 2.1183, "step": 289165 }, { "epoch": 0.68, "grad_norm": 2.46875, "learning_rate": 0.00014810531094812455, "loss": 2.0253, "step": 289170 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014810369055857082, "loss": 1.915, "step": 289175 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.000148102070152584, "loss": 1.9652, "step": 289180 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.00014810044973016464, "loss": 2.2082, "step": 289185 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014809882929131326, "loss": 1.8267, "step": 289190 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014809720883603047, "loss": 2.0796, "step": 289195 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014809558836431682, "loss": 2.1846, "step": 289200 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014809396787617284, "loss": 1.9883, "step": 289205 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014809234737159907, "loss": 1.8953, "step": 289210 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.0001480907268505961, "loss": 2.2189, "step": 289215 }, { "epoch": 0.68, "grad_norm": 1.9921875, "learning_rate": 0.00014808910631316442, "loss": 2.031, "step": 289220 }, { "epoch": 0.68, "grad_norm": 1.921875, "learning_rate": 0.0001480874857593047, "loss": 1.8659, "step": 289225 }, { "epoch": 0.68, "grad_norm": 2.546875, "learning_rate": 0.00014808586518901738, "loss": 2.1095, "step": 289230 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014808424460230305, "loss": 2.0041, "step": 289235 }, { "epoch": 0.68, "grad_norm": 2.453125, "learning_rate": 0.00014808262399916227, "loss": 2.1611, "step": 289240 }, { "epoch": 0.68, "grad_norm": 2.609375, "learning_rate": 0.0001480810033795956, "loss": 2.2393, "step": 289245 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.0001480793827436036, "loss": 2.1002, "step": 289250 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.0001480777620911868, "loss": 2.1218, "step": 289255 }, { "epoch": 0.68, "grad_norm": 2.734375, "learning_rate": 0.00014807614142234578, "loss": 1.7466, "step": 289260 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014807452073708105, "loss": 1.9956, "step": 289265 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014807290003539323, "loss": 1.9415, "step": 289270 }, { "epoch": 0.68, "grad_norm": 1.921875, "learning_rate": 0.0001480712793172828, "loss": 2.1741, "step": 289275 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014806965858275037, "loss": 2.1604, "step": 289280 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014806803783179646, "loss": 1.9478, "step": 289285 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014806641706442163, "loss": 2.3317, "step": 289290 }, { "epoch": 0.68, "grad_norm": 1.96875, "learning_rate": 0.00014806479628062645, "loss": 2.1365, "step": 289295 }, { "epoch": 0.68, "grad_norm": 1.8203125, "learning_rate": 0.00014806317548041146, "loss": 2.0435, "step": 289300 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014806155466377723, "loss": 2.0654, "step": 289305 }, { "epoch": 0.68, "grad_norm": 2.46875, "learning_rate": 0.0001480599338307243, "loss": 1.9876, "step": 289310 }, { "epoch": 0.68, "grad_norm": 2.5, "learning_rate": 0.00014805831298125323, "loss": 2.3093, "step": 289315 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014805669211536457, "loss": 2.2273, "step": 289320 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.00014805507123305883, "loss": 2.2142, "step": 289325 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014805345033433668, "loss": 1.9817, "step": 289330 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014805182941919853, "loss": 2.076, "step": 289335 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.00014805020848764503, "loss": 2.1142, "step": 289340 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014804858753967673, "loss": 2.0818, "step": 289345 }, { "epoch": 0.68, "grad_norm": 2.453125, "learning_rate": 0.00014804696657529413, "loss": 2.1564, "step": 289350 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.0001480453455944978, "loss": 1.9898, "step": 289355 }, { "epoch": 0.68, "grad_norm": 2.703125, "learning_rate": 0.00014804372459728836, "loss": 2.1346, "step": 289360 }, { "epoch": 0.68, "grad_norm": 1.7578125, "learning_rate": 0.00014804210358366628, "loss": 2.1986, "step": 289365 }, { "epoch": 0.68, "grad_norm": 1.9921875, "learning_rate": 0.00014804048255363217, "loss": 2.0453, "step": 289370 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014803886150718652, "loss": 2.0335, "step": 289375 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.00014803724044432993, "loss": 1.9132, "step": 289380 }, { "epoch": 0.68, "grad_norm": 1.7734375, "learning_rate": 0.00014803561936506298, "loss": 1.9926, "step": 289385 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014803399826938618, "loss": 2.0849, "step": 289390 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014803237715730008, "loss": 2.0981, "step": 289395 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.00014803075602880524, "loss": 1.9478, "step": 289400 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014802913488390227, "loss": 2.1635, "step": 289405 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.00014802751372259163, "loss": 2.1715, "step": 289410 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014802589254487393, "loss": 2.0613, "step": 289415 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014802427135074972, "loss": 2.0784, "step": 289420 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014802265014021956, "loss": 2.017, "step": 289425 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014802102891328398, "loss": 2.0443, "step": 289430 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.00014801940766994353, "loss": 2.1038, "step": 289435 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.0001480177864101988, "loss": 2.021, "step": 289440 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014801616513405031, "loss": 2.0931, "step": 289445 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.00014801454384149863, "loss": 2.0265, "step": 289450 }, { "epoch": 0.68, "grad_norm": 2.46875, "learning_rate": 0.00014801292253254435, "loss": 1.8311, "step": 289455 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014801130120718794, "loss": 1.9507, "step": 289460 }, { "epoch": 0.68, "grad_norm": 2.734375, "learning_rate": 0.00014800967986543, "loss": 2.1257, "step": 289465 }, { "epoch": 0.68, "grad_norm": 2.703125, "learning_rate": 0.00014800805850727111, "loss": 2.163, "step": 289470 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014800643713271176, "loss": 2.1355, "step": 289475 }, { "epoch": 0.68, "grad_norm": 2.375, "learning_rate": 0.0001480048157417526, "loss": 2.2366, "step": 289480 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014800319433439408, "loss": 1.9038, "step": 289485 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.0001480015729106368, "loss": 2.0795, "step": 289490 }, { "epoch": 0.68, "grad_norm": 3.234375, "learning_rate": 0.0001479999514704813, "loss": 2.0242, "step": 289495 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014799833001392817, "loss": 2.0679, "step": 289500 }, { "epoch": 0.68, "grad_norm": 1.9765625, "learning_rate": 0.00014799670854097793, "loss": 2.0669, "step": 289505 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014799508705163114, "loss": 2.0433, "step": 289510 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014799346554588834, "loss": 2.1028, "step": 289515 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014799184402375016, "loss": 2.0257, "step": 289520 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.00014799022248521707, "loss": 2.016, "step": 289525 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014798860093028963, "loss": 2.0972, "step": 289530 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014798697935896842, "loss": 1.9544, "step": 289535 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014798535777125397, "loss": 2.1681, "step": 289540 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.0001479837361671469, "loss": 2.1712, "step": 289545 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014798211454664767, "loss": 2.114, "step": 289550 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.0001479804929097569, "loss": 2.0499, "step": 289555 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014797887125647514, "loss": 2.0455, "step": 289560 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.0001479772495868029, "loss": 2.1412, "step": 289565 }, { "epoch": 0.68, "grad_norm": 2.671875, "learning_rate": 0.00014797562790074077, "loss": 2.058, "step": 289570 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.0001479740061982893, "loss": 2.2402, "step": 289575 }, { "epoch": 0.68, "grad_norm": 1.9296875, "learning_rate": 0.00014797238447944902, "loss": 2.0853, "step": 289580 }, { "epoch": 0.68, "grad_norm": 2.671875, "learning_rate": 0.00014797076274422052, "loss": 2.1705, "step": 289585 }, { "epoch": 0.68, "grad_norm": 2.5, "learning_rate": 0.00014796914099260433, "loss": 2.0109, "step": 289590 }, { "epoch": 0.68, "grad_norm": 2.671875, "learning_rate": 0.00014796751922460102, "loss": 2.2378, "step": 289595 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014796589744021113, "loss": 1.9695, "step": 289600 }, { "epoch": 0.68, "grad_norm": 1.8828125, "learning_rate": 0.00014796427563943524, "loss": 2.0328, "step": 289605 }, { "epoch": 0.68, "grad_norm": 1.84375, "learning_rate": 0.00014796265382227385, "loss": 2.0862, "step": 289610 }, { "epoch": 0.68, "grad_norm": 1.7265625, "learning_rate": 0.0001479610319887276, "loss": 1.9318, "step": 289615 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014795941013879692, "loss": 2.1722, "step": 289620 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.0001479577882724825, "loss": 2.0993, "step": 289625 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014795616638978477, "loss": 1.9838, "step": 289630 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.0001479545444907044, "loss": 2.0963, "step": 289635 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.00014795292257524184, "loss": 1.9779, "step": 289640 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014795130064339772, "loss": 2.0123, "step": 289645 }, { "epoch": 0.68, "grad_norm": 2.375, "learning_rate": 0.00014794967869517252, "loss": 2.1953, "step": 289650 }, { "epoch": 0.68, "grad_norm": 1.921875, "learning_rate": 0.0001479480567305669, "loss": 1.9833, "step": 289655 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014794643474958137, "loss": 2.1847, "step": 289660 }, { "epoch": 0.68, "grad_norm": 2.6875, "learning_rate": 0.00014794481275221642, "loss": 1.9859, "step": 289665 }, { "epoch": 0.68, "grad_norm": 2.640625, "learning_rate": 0.00014794319073847267, "loss": 2.0885, "step": 289670 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014794156870835063, "loss": 2.0574, "step": 289675 }, { "epoch": 0.68, "grad_norm": 2.625, "learning_rate": 0.0001479399466618509, "loss": 2.0762, "step": 289680 }, { "epoch": 0.68, "grad_norm": 1.8515625, "learning_rate": 0.00014793832459897402, "loss": 2.072, "step": 289685 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014793670251972055, "loss": 1.9777, "step": 289690 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.000147935080424091, "loss": 1.8646, "step": 289695 }, { "epoch": 0.68, "grad_norm": 1.9296875, "learning_rate": 0.000147933458312086, "loss": 2.0999, "step": 289700 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.00014793183618370603, "loss": 1.9364, "step": 289705 }, { "epoch": 0.68, "grad_norm": 1.8671875, "learning_rate": 0.00014793021403895168, "loss": 2.01, "step": 289710 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.0001479285918778235, "loss": 2.0784, "step": 289715 }, { "epoch": 0.68, "grad_norm": 1.9140625, "learning_rate": 0.00014792696970032204, "loss": 2.024, "step": 289720 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014792534750644788, "loss": 1.9636, "step": 289725 }, { "epoch": 0.68, "grad_norm": 2.53125, "learning_rate": 0.00014792372529620154, "loss": 2.1108, "step": 289730 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.0001479221030695836, "loss": 2.2024, "step": 289735 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014792048082659456, "loss": 2.0575, "step": 289740 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014791885856723505, "loss": 2.0068, "step": 289745 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014791723629150558, "loss": 2.103, "step": 289750 }, { "epoch": 0.68, "grad_norm": 1.6640625, "learning_rate": 0.0001479156139994067, "loss": 2.1814, "step": 289755 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014791399169093898, "loss": 1.992, "step": 289760 }, { "epoch": 0.68, "grad_norm": 2.5625, "learning_rate": 0.000147912369366103, "loss": 2.1038, "step": 289765 }, { "epoch": 0.68, "grad_norm": 1.9765625, "learning_rate": 0.00014791074702489925, "loss": 1.9707, "step": 289770 }, { "epoch": 0.68, "grad_norm": 1.9921875, "learning_rate": 0.00014790912466732835, "loss": 2.118, "step": 289775 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.0001479075022933908, "loss": 2.1178, "step": 289780 }, { "epoch": 0.68, "grad_norm": 1.5546875, "learning_rate": 0.00014790587990308717, "loss": 1.8205, "step": 289785 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.00014790425749641804, "loss": 2.0708, "step": 289790 }, { "epoch": 0.68, "grad_norm": 2.453125, "learning_rate": 0.00014790263507338396, "loss": 2.2374, "step": 289795 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014790101263398547, "loss": 2.1479, "step": 289800 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.00014789939017822312, "loss": 1.9809, "step": 289805 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014789776770609742, "loss": 2.1853, "step": 289810 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014789614521760905, "loss": 1.988, "step": 289815 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014789452271275844, "loss": 2.0071, "step": 289820 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014789290019154622, "loss": 2.1485, "step": 289825 }, { "epoch": 0.68, "grad_norm": 2.359375, "learning_rate": 0.0001478912776539729, "loss": 2.2276, "step": 289830 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014788965510003903, "loss": 2.0651, "step": 289835 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.00014788803252974525, "loss": 2.1506, "step": 289840 }, { "epoch": 0.68, "grad_norm": 2.5, "learning_rate": 0.000147886409943092, "loss": 2.032, "step": 289845 }, { "epoch": 0.68, "grad_norm": 2.03125, "learning_rate": 0.0001478847873400799, "loss": 2.1896, "step": 289850 }, { "epoch": 0.68, "grad_norm": 1.9921875, "learning_rate": 0.00014788316472070948, "loss": 1.8782, "step": 289855 }, { "epoch": 0.68, "grad_norm": 1.9765625, "learning_rate": 0.00014788154208498128, "loss": 2.0291, "step": 289860 }, { "epoch": 0.68, "grad_norm": 2.484375, "learning_rate": 0.00014787991943289593, "loss": 2.0867, "step": 289865 }, { "epoch": 0.68, "grad_norm": 1.890625, "learning_rate": 0.00014787829676445388, "loss": 2.0127, "step": 289870 }, { "epoch": 0.68, "grad_norm": 1.96875, "learning_rate": 0.00014787667407965575, "loss": 1.9675, "step": 289875 }, { "epoch": 0.68, "grad_norm": 1.875, "learning_rate": 0.0001478750513785021, "loss": 2.0402, "step": 289880 }, { "epoch": 0.68, "grad_norm": 1.8984375, "learning_rate": 0.00014787342866099345, "loss": 2.2714, "step": 289885 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014787180592713036, "loss": 2.0531, "step": 289890 }, { "epoch": 0.68, "grad_norm": 1.7734375, "learning_rate": 0.00014787018317691342, "loss": 1.9605, "step": 289895 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014786856041034313, "loss": 2.111, "step": 289900 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014786693762742007, "loss": 1.9644, "step": 289905 }, { "epoch": 0.68, "grad_norm": 1.9375, "learning_rate": 0.00014786531482814482, "loss": 1.94, "step": 289910 }, { "epoch": 0.68, "grad_norm": 2.59375, "learning_rate": 0.0001478636920125179, "loss": 1.9961, "step": 289915 }, { "epoch": 0.68, "grad_norm": 1.7265625, "learning_rate": 0.00014786206918053987, "loss": 1.9675, "step": 289920 }, { "epoch": 0.68, "grad_norm": 2.578125, "learning_rate": 0.0001478604463322113, "loss": 2.066, "step": 289925 }, { "epoch": 0.68, "grad_norm": 1.7265625, "learning_rate": 0.00014785882346753275, "loss": 1.9302, "step": 289930 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014785720058650473, "loss": 2.1209, "step": 289935 }, { "epoch": 0.68, "grad_norm": 2.5625, "learning_rate": 0.00014785557768912778, "loss": 2.1254, "step": 289940 }, { "epoch": 0.68, "grad_norm": 2.03125, "learning_rate": 0.00014785395477540255, "loss": 2.1264, "step": 289945 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014785233184532953, "loss": 2.1495, "step": 289950 }, { "epoch": 0.68, "grad_norm": 2.5, "learning_rate": 0.00014785070889890932, "loss": 1.8619, "step": 289955 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.00014784908593614243, "loss": 1.8672, "step": 289960 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014784746295702937, "loss": 2.139, "step": 289965 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.00014784583996157078, "loss": 2.109, "step": 289970 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.0001478442169497672, "loss": 2.2069, "step": 289975 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014784259392161916, "loss": 1.8893, "step": 289980 }, { "epoch": 0.68, "grad_norm": 2.03125, "learning_rate": 0.0001478409708771272, "loss": 2.1866, "step": 289985 }, { "epoch": 0.68, "grad_norm": 2.375, "learning_rate": 0.00014783934781629191, "loss": 2.1302, "step": 289990 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.00014783772473911383, "loss": 1.9571, "step": 289995 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014783610164559354, "loss": 2.0109, "step": 290000 }, { "epoch": 0.68, "grad_norm": 1.875, "learning_rate": 0.00014783447853573156, "loss": 2.1529, "step": 290005 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.00014783285540952842, "loss": 1.9412, "step": 290010 }, { "epoch": 0.68, "grad_norm": 1.90625, "learning_rate": 0.00014783123226698474, "loss": 2.1154, "step": 290015 }, { "epoch": 0.68, "grad_norm": 2.359375, "learning_rate": 0.00014782960910810104, "loss": 2.0621, "step": 290020 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014782798593287788, "loss": 1.9615, "step": 290025 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.00014782636274131582, "loss": 2.1032, "step": 290030 }, { "epoch": 0.68, "grad_norm": 2.625, "learning_rate": 0.00014782473953341537, "loss": 2.0525, "step": 290035 }, { "epoch": 0.68, "grad_norm": 1.9375, "learning_rate": 0.00014782311630917716, "loss": 2.017, "step": 290040 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.0001478214930686017, "loss": 2.1274, "step": 290045 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.00014781986981168954, "loss": 2.1064, "step": 290050 }, { "epoch": 0.68, "grad_norm": 1.9140625, "learning_rate": 0.00014781824653844122, "loss": 2.1063, "step": 290055 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.00014781662324885736, "loss": 1.9687, "step": 290060 }, { "epoch": 0.68, "grad_norm": 2.625, "learning_rate": 0.00014781499994293845, "loss": 1.9815, "step": 290065 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.0001478133766206851, "loss": 1.9637, "step": 290070 }, { "epoch": 0.68, "grad_norm": 2.515625, "learning_rate": 0.00014781175328209783, "loss": 2.1889, "step": 290075 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014781012992717715, "loss": 2.1351, "step": 290080 }, { "epoch": 0.68, "grad_norm": 1.9375, "learning_rate": 0.0001478085065559237, "loss": 1.9935, "step": 290085 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.00014780688316833802, "loss": 1.9575, "step": 290090 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014780525976442062, "loss": 2.0797, "step": 290095 }, { "epoch": 0.68, "grad_norm": 2.546875, "learning_rate": 0.00014780363634417208, "loss": 2.1107, "step": 290100 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.0001478020129075929, "loss": 2.2973, "step": 290105 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014780038945468374, "loss": 2.1227, "step": 290110 }, { "epoch": 0.68, "grad_norm": 2.484375, "learning_rate": 0.00014779876598544509, "loss": 1.8211, "step": 290115 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.00014779714249987753, "loss": 2.1783, "step": 290120 }, { "epoch": 0.68, "grad_norm": 1.8125, "learning_rate": 0.00014779551899798157, "loss": 2.1042, "step": 290125 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.0001477938954797578, "loss": 1.9236, "step": 290130 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014779227194520677, "loss": 2.1181, "step": 290135 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014779064839432902, "loss": 2.2312, "step": 290140 }, { "epoch": 0.68, "grad_norm": 2.828125, "learning_rate": 0.00014778902482712517, "loss": 2.1128, "step": 290145 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014778740124359567, "loss": 2.1782, "step": 290150 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014778577764374114, "loss": 2.0381, "step": 290155 }, { "epoch": 0.68, "grad_norm": 1.734375, "learning_rate": 0.00014778415402756214, "loss": 1.7969, "step": 290160 }, { "epoch": 0.68, "grad_norm": 3.796875, "learning_rate": 0.00014778253039505917, "loss": 2.1679, "step": 290165 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.00014778090674623287, "loss": 2.1413, "step": 290170 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.0001477792830810837, "loss": 2.1161, "step": 290175 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014777765939961228, "loss": 2.1326, "step": 290180 }, { "epoch": 0.68, "grad_norm": 1.96875, "learning_rate": 0.00014777603570181915, "loss": 2.1471, "step": 290185 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014777441198770484, "loss": 2.1466, "step": 290190 }, { "epoch": 0.68, "grad_norm": 1.8359375, "learning_rate": 0.00014777278825726996, "loss": 1.8255, "step": 290195 }, { "epoch": 0.68, "grad_norm": 2.53125, "learning_rate": 0.000147771164510515, "loss": 1.9496, "step": 290200 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014776954074744057, "loss": 2.0726, "step": 290205 }, { "epoch": 0.68, "grad_norm": 2.453125, "learning_rate": 0.00014776791696804716, "loss": 2.1367, "step": 290210 }, { "epoch": 0.68, "grad_norm": 1.8203125, "learning_rate": 0.00014776629317233536, "loss": 1.9671, "step": 290215 }, { "epoch": 0.68, "grad_norm": 2.5, "learning_rate": 0.00014776466936030578, "loss": 2.0181, "step": 290220 }, { "epoch": 0.68, "grad_norm": 1.890625, "learning_rate": 0.0001477630455319589, "loss": 1.9437, "step": 290225 }, { "epoch": 0.68, "grad_norm": 2.453125, "learning_rate": 0.0001477614216872953, "loss": 1.8935, "step": 290230 }, { "epoch": 0.68, "grad_norm": 2.375, "learning_rate": 0.00014775979782631548, "loss": 2.0806, "step": 290235 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.0001477581739490201, "loss": 2.178, "step": 290240 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014775655005540964, "loss": 2.1003, "step": 290245 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.0001477549261454847, "loss": 2.0298, "step": 290250 }, { "epoch": 0.68, "grad_norm": 2.453125, "learning_rate": 0.0001477533022192458, "loss": 1.9888, "step": 290255 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014775167827669352, "loss": 1.9965, "step": 290260 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014775005431782838, "loss": 2.0493, "step": 290265 }, { "epoch": 0.68, "grad_norm": 1.8671875, "learning_rate": 0.00014774843034265098, "loss": 2.1263, "step": 290270 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014774680635116182, "loss": 2.2496, "step": 290275 }, { "epoch": 0.68, "grad_norm": 2.609375, "learning_rate": 0.0001477451823433615, "loss": 1.9411, "step": 290280 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014774355831925054, "loss": 2.0062, "step": 290285 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014774193427882955, "loss": 2.2843, "step": 290290 }, { "epoch": 0.68, "grad_norm": 1.890625, "learning_rate": 0.00014774031022209903, "loss": 2.1563, "step": 290295 }, { "epoch": 0.68, "grad_norm": 2.609375, "learning_rate": 0.00014773868614905954, "loss": 2.2372, "step": 290300 }, { "epoch": 0.68, "grad_norm": 2.4375, "learning_rate": 0.00014773706205971167, "loss": 2.1728, "step": 290305 }, { "epoch": 0.68, "grad_norm": 1.9453125, "learning_rate": 0.00014773543795405596, "loss": 2.1428, "step": 290310 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014773381383209293, "loss": 2.0485, "step": 290315 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014773218969382318, "loss": 2.0234, "step": 290320 }, { "epoch": 0.68, "grad_norm": 2.03125, "learning_rate": 0.00014773056553924724, "loss": 2.0604, "step": 290325 }, { "epoch": 0.68, "grad_norm": 2.375, "learning_rate": 0.00014772894136836568, "loss": 1.9763, "step": 290330 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014772731718117906, "loss": 1.9478, "step": 290335 }, { "epoch": 0.68, "grad_norm": 2.03125, "learning_rate": 0.00014772569297768793, "loss": 2.0118, "step": 290340 }, { "epoch": 0.68, "grad_norm": 2.09375, "learning_rate": 0.0001477240687578928, "loss": 2.0703, "step": 290345 }, { "epoch": 0.68, "grad_norm": 1.8203125, "learning_rate": 0.00014772244452179427, "loss": 2.0019, "step": 290350 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.0001477208202693929, "loss": 2.1966, "step": 290355 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014771919600068924, "loss": 2.0923, "step": 290360 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014771757171568386, "loss": 2.0411, "step": 290365 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.00014771594741437723, "loss": 2.0, "step": 290370 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014771432309677, "loss": 2.024, "step": 290375 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.0001477126987628627, "loss": 1.9914, "step": 290380 }, { "epoch": 0.68, "grad_norm": 1.90625, "learning_rate": 0.00014771107441265588, "loss": 2.0143, "step": 290385 }, { "epoch": 0.68, "grad_norm": 1.9296875, "learning_rate": 0.00014770945004615008, "loss": 2.0819, "step": 290390 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014770782566334587, "loss": 1.9761, "step": 290395 }, { "epoch": 0.68, "grad_norm": 1.9296875, "learning_rate": 0.00014770620126424377, "loss": 2.1221, "step": 290400 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.0001477045768488444, "loss": 1.997, "step": 290405 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.00014770295241714826, "loss": 2.1679, "step": 290410 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014770132796915596, "loss": 1.9729, "step": 290415 }, { "epoch": 0.68, "grad_norm": 1.984375, "learning_rate": 0.000147699703504868, "loss": 2.1617, "step": 290420 }, { "epoch": 0.68, "grad_norm": 1.8828125, "learning_rate": 0.00014769807902428495, "loss": 2.1339, "step": 290425 }, { "epoch": 0.68, "grad_norm": 2.46875, "learning_rate": 0.0001476964545274074, "loss": 2.1113, "step": 290430 }, { "epoch": 0.68, "grad_norm": 2.515625, "learning_rate": 0.00014769483001423583, "loss": 2.1535, "step": 290435 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.0001476932054847709, "loss": 2.098, "step": 290440 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014769158093901305, "loss": 1.9504, "step": 290445 }, { "epoch": 0.68, "grad_norm": 2.609375, "learning_rate": 0.00014768995637696292, "loss": 1.9791, "step": 290450 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.00014768833179862103, "loss": 2.0718, "step": 290455 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014768670720398795, "loss": 2.0795, "step": 290460 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.0001476850825930642, "loss": 2.107, "step": 290465 }, { "epoch": 0.68, "grad_norm": 1.7734375, "learning_rate": 0.00014768345796585037, "loss": 2.0554, "step": 290470 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.000147681833322347, "loss": 2.1674, "step": 290475 }, { "epoch": 0.68, "grad_norm": 2.78125, "learning_rate": 0.0001476802086625547, "loss": 2.1337, "step": 290480 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014767858398647392, "loss": 1.9264, "step": 290485 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.0001476769592941053, "loss": 2.1091, "step": 290490 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.00014767533458544935, "loss": 2.0194, "step": 290495 }, { "epoch": 0.68, "grad_norm": 2.6875, "learning_rate": 0.0001476737098605067, "loss": 2.0567, "step": 290500 }, { "epoch": 0.68, "grad_norm": 2.5625, "learning_rate": 0.00014767208511927776, "loss": 2.0105, "step": 290505 }, { "epoch": 0.68, "grad_norm": 2.484375, "learning_rate": 0.0001476704603617632, "loss": 2.0617, "step": 290510 }, { "epoch": 0.68, "grad_norm": 1.921875, "learning_rate": 0.00014766883558796354, "loss": 2.0237, "step": 290515 }, { "epoch": 0.68, "grad_norm": 2.03125, "learning_rate": 0.00014766721079787938, "loss": 1.7857, "step": 290520 }, { "epoch": 0.68, "grad_norm": 2.46875, "learning_rate": 0.0001476655859915112, "loss": 2.0672, "step": 290525 }, { "epoch": 0.68, "grad_norm": 2.515625, "learning_rate": 0.0001476639611688596, "loss": 2.1913, "step": 290530 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014766233632992513, "loss": 2.2452, "step": 290535 }, { "epoch": 0.68, "grad_norm": 2.5625, "learning_rate": 0.00014766071147470834, "loss": 1.9204, "step": 290540 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014765908660320978, "loss": 2.1318, "step": 290545 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014765746171543003, "loss": 2.1899, "step": 290550 }, { "epoch": 0.68, "grad_norm": 1.9375, "learning_rate": 0.0001476558368113696, "loss": 2.0291, "step": 290555 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014765421189102908, "loss": 2.0983, "step": 290560 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014765258695440906, "loss": 2.0438, "step": 290565 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014765096200150998, "loss": 2.0543, "step": 290570 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.0001476493370323325, "loss": 2.1205, "step": 290575 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014764771204687715, "loss": 2.0361, "step": 290580 }, { "epoch": 0.68, "grad_norm": 2.828125, "learning_rate": 0.00014764608704514448, "loss": 1.9713, "step": 290585 }, { "epoch": 0.68, "grad_norm": 2.375, "learning_rate": 0.00014764446202713503, "loss": 2.0454, "step": 290590 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014764283699284937, "loss": 2.165, "step": 290595 }, { "epoch": 0.68, "grad_norm": 2.78125, "learning_rate": 0.00014764121194228805, "loss": 2.1788, "step": 290600 }, { "epoch": 0.68, "grad_norm": 2.53125, "learning_rate": 0.00014763958687545167, "loss": 2.1036, "step": 290605 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014763796179234067, "loss": 1.8695, "step": 290610 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014763633669295572, "loss": 2.044, "step": 290615 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014763471157729733, "loss": 2.0265, "step": 290620 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.00014763308644536607, "loss": 2.1693, "step": 290625 }, { "epoch": 0.68, "grad_norm": 2.359375, "learning_rate": 0.00014763146129716245, "loss": 1.8268, "step": 290630 }, { "epoch": 0.68, "grad_norm": 3.34375, "learning_rate": 0.0001476298361326871, "loss": 1.9716, "step": 290635 }, { "epoch": 0.68, "grad_norm": 2.359375, "learning_rate": 0.0001476282109519405, "loss": 2.0728, "step": 290640 }, { "epoch": 0.68, "grad_norm": 2.171875, "learning_rate": 0.00014762658575492326, "loss": 2.0394, "step": 290645 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.0001476249605416359, "loss": 2.0894, "step": 290650 }, { "epoch": 0.68, "grad_norm": 2.453125, "learning_rate": 0.00014762333531207899, "loss": 2.0031, "step": 290655 }, { "epoch": 0.68, "grad_norm": 1.7578125, "learning_rate": 0.00014762171006625313, "loss": 1.9392, "step": 290660 }, { "epoch": 0.68, "grad_norm": 2.265625, "learning_rate": 0.00014762008480415876, "loss": 2.1078, "step": 290665 }, { "epoch": 0.68, "grad_norm": 2.625, "learning_rate": 0.00014761845952579653, "loss": 2.1169, "step": 290670 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.00014761683423116697, "loss": 2.1176, "step": 290675 }, { "epoch": 0.68, "grad_norm": 1.7890625, "learning_rate": 0.00014761520892027068, "loss": 1.8721, "step": 290680 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014761358359310814, "loss": 1.9661, "step": 290685 }, { "epoch": 0.68, "grad_norm": 1.921875, "learning_rate": 0.0001476119582496799, "loss": 2.1672, "step": 290690 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014761033288998658, "loss": 2.1266, "step": 290695 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014760870751402873, "loss": 1.9789, "step": 290700 }, { "epoch": 0.68, "grad_norm": 2.703125, "learning_rate": 0.00014760708212180687, "loss": 1.9439, "step": 290705 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014760545671332155, "loss": 2.0445, "step": 290710 }, { "epoch": 0.68, "grad_norm": 2.25, "learning_rate": 0.00014760383128857335, "loss": 2.1246, "step": 290715 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014760220584756278, "loss": 2.0576, "step": 290720 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014760058039029053, "loss": 2.0773, "step": 290725 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.000147598954916757, "loss": 2.0268, "step": 290730 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.0001475973294269628, "loss": 1.8506, "step": 290735 }, { "epoch": 0.68, "grad_norm": 1.703125, "learning_rate": 0.0001475957039209085, "loss": 2.0985, "step": 290740 }, { "epoch": 0.68, "grad_norm": 2.515625, "learning_rate": 0.00014759407839859463, "loss": 2.1325, "step": 290745 }, { "epoch": 0.68, "grad_norm": 1.9375, "learning_rate": 0.00014759245286002177, "loss": 1.9736, "step": 290750 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014759082730519046, "loss": 2.0876, "step": 290755 }, { "epoch": 0.68, "grad_norm": 1.90625, "learning_rate": 0.00014758920173410127, "loss": 1.9171, "step": 290760 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.0001475875761467547, "loss": 2.1952, "step": 290765 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.0001475859505431514, "loss": 2.1153, "step": 290770 }, { "epoch": 0.68, "grad_norm": 1.7421875, "learning_rate": 0.0001475843249232919, "loss": 2.1364, "step": 290775 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014758269928717666, "loss": 2.0794, "step": 290780 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014758107363480635, "loss": 2.0686, "step": 290785 }, { "epoch": 0.68, "grad_norm": 2.375, "learning_rate": 0.0001475794479661815, "loss": 2.139, "step": 290790 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014757782228130265, "loss": 2.1372, "step": 290795 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.0001475761965801703, "loss": 1.9307, "step": 290800 }, { "epoch": 0.68, "grad_norm": 1.8828125, "learning_rate": 0.00014757457086278508, "loss": 1.9966, "step": 290805 }, { "epoch": 0.68, "grad_norm": 3.421875, "learning_rate": 0.00014757294512914753, "loss": 2.1019, "step": 290810 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.00014757131937925822, "loss": 2.0595, "step": 290815 }, { "epoch": 0.68, "grad_norm": 2.296875, "learning_rate": 0.00014756969361311766, "loss": 2.0393, "step": 290820 }, { "epoch": 0.68, "grad_norm": 2.34375, "learning_rate": 0.0001475680678307264, "loss": 2.1918, "step": 290825 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014756644203208508, "loss": 2.1909, "step": 290830 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.0001475648162171942, "loss": 2.0191, "step": 290835 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014756319038605428, "loss": 2.0719, "step": 290840 }, { "epoch": 0.68, "grad_norm": 2.859375, "learning_rate": 0.00014756156453866592, "loss": 2.0012, "step": 290845 }, { "epoch": 0.68, "grad_norm": 2.203125, "learning_rate": 0.00014755993867502967, "loss": 2.2247, "step": 290850 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.0001475583127951461, "loss": 2.2957, "step": 290855 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014755668689901574, "loss": 1.9757, "step": 290860 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.00014755506098663914, "loss": 2.0426, "step": 290865 }, { "epoch": 0.68, "grad_norm": 1.8671875, "learning_rate": 0.00014755343505801688, "loss": 1.9824, "step": 290870 }, { "epoch": 0.68, "grad_norm": 2.015625, "learning_rate": 0.0001475518091131495, "loss": 2.0069, "step": 290875 }, { "epoch": 0.68, "grad_norm": 1.9453125, "learning_rate": 0.00014755018315203757, "loss": 2.1486, "step": 290880 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014754855717468161, "loss": 2.2583, "step": 290885 }, { "epoch": 0.68, "grad_norm": 2.390625, "learning_rate": 0.00014754693118108222, "loss": 2.1297, "step": 290890 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.00014754530517123993, "loss": 2.3206, "step": 290895 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.0001475436791451553, "loss": 1.9638, "step": 290900 }, { "epoch": 0.68, "grad_norm": 1.9296875, "learning_rate": 0.0001475420531028289, "loss": 2.1254, "step": 290905 }, { "epoch": 0.68, "grad_norm": 2.234375, "learning_rate": 0.00014754042704426124, "loss": 2.0826, "step": 290910 }, { "epoch": 0.68, "grad_norm": 2.109375, "learning_rate": 0.00014753880096945294, "loss": 2.1241, "step": 290915 }, { "epoch": 0.68, "grad_norm": 2.46875, "learning_rate": 0.00014753717487840453, "loss": 1.8282, "step": 290920 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014753554877111654, "loss": 1.7986, "step": 290925 }, { "epoch": 0.68, "grad_norm": 2.0625, "learning_rate": 0.00014753392264758955, "loss": 1.898, "step": 290930 }, { "epoch": 0.68, "grad_norm": 1.5546875, "learning_rate": 0.0001475322965078241, "loss": 1.8202, "step": 290935 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.00014753067035182076, "loss": 2.2058, "step": 290940 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.00014752904417958008, "loss": 2.1073, "step": 290945 }, { "epoch": 0.68, "grad_norm": 2.421875, "learning_rate": 0.00014752741799110264, "loss": 2.1171, "step": 290950 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014752579178638897, "loss": 1.9546, "step": 290955 }, { "epoch": 0.68, "grad_norm": 2.0, "learning_rate": 0.00014752416556543957, "loss": 2.0803, "step": 290960 }, { "epoch": 0.68, "grad_norm": 1.953125, "learning_rate": 0.0001475225393282551, "loss": 1.9398, "step": 290965 }, { "epoch": 0.68, "grad_norm": 2.40625, "learning_rate": 0.00014752091307483606, "loss": 2.1438, "step": 290970 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014751928680518305, "loss": 2.0844, "step": 290975 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014751766051929654, "loss": 2.0167, "step": 290980 }, { "epoch": 0.68, "grad_norm": 2.5, "learning_rate": 0.00014751603421717715, "loss": 2.1149, "step": 290985 }, { "epoch": 0.68, "grad_norm": 2.21875, "learning_rate": 0.0001475144078988254, "loss": 1.8893, "step": 290990 }, { "epoch": 0.68, "grad_norm": 2.078125, "learning_rate": 0.0001475127815642419, "loss": 2.0283, "step": 290995 }, { "epoch": 0.68, "grad_norm": 2.65625, "learning_rate": 0.00014751115521342717, "loss": 2.0474, "step": 291000 }, { "epoch": 0.68, "grad_norm": 2.125, "learning_rate": 0.00014750952884638177, "loss": 2.19, "step": 291005 }, { "epoch": 0.68, "grad_norm": 2.3125, "learning_rate": 0.00014750790246310623, "loss": 2.0297, "step": 291010 }, { "epoch": 0.68, "grad_norm": 2.984375, "learning_rate": 0.00014750627606360112, "loss": 2.1464, "step": 291015 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014750464964786707, "loss": 1.9837, "step": 291020 }, { "epoch": 0.68, "grad_norm": 2.1875, "learning_rate": 0.00014750302321590451, "loss": 1.9082, "step": 291025 }, { "epoch": 0.68, "grad_norm": 2.046875, "learning_rate": 0.00014750139676771408, "loss": 2.1552, "step": 291030 }, { "epoch": 0.68, "grad_norm": 1.8984375, "learning_rate": 0.0001474997703032963, "loss": 1.9556, "step": 291035 }, { "epoch": 0.68, "grad_norm": 2.15625, "learning_rate": 0.00014749814382265173, "loss": 2.0143, "step": 291040 }, { "epoch": 0.68, "grad_norm": 2.28125, "learning_rate": 0.00014749651732578095, "loss": 1.9868, "step": 291045 }, { "epoch": 0.68, "grad_norm": 1.7734375, "learning_rate": 0.0001474948908126845, "loss": 1.9234, "step": 291050 }, { "epoch": 0.68, "grad_norm": 2.328125, "learning_rate": 0.0001474932642833629, "loss": 2.0811, "step": 291055 }, { "epoch": 0.68, "grad_norm": 2.359375, "learning_rate": 0.00014749163773781677, "loss": 2.1942, "step": 291060 }, { "epoch": 0.68, "grad_norm": 2.140625, "learning_rate": 0.00014749001117604665, "loss": 2.0454, "step": 291065 }, { "epoch": 0.68, "grad_norm": 2.515625, "learning_rate": 0.00014748838459805307, "loss": 2.0156, "step": 291070 }, { "epoch": 0.68, "grad_norm": 2.03125, "learning_rate": 0.00014748675800383657, "loss": 2.1289, "step": 291075 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.00014748513139339775, "loss": 2.0703, "step": 291080 }, { "epoch": 0.69, "grad_norm": 3.0625, "learning_rate": 0.00014748350476673713, "loss": 2.0508, "step": 291085 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014748187812385532, "loss": 2.1226, "step": 291090 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014748025146475282, "loss": 2.1159, "step": 291095 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.0001474786247894302, "loss": 2.0498, "step": 291100 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.000147476998097888, "loss": 1.9467, "step": 291105 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014747537139012687, "loss": 1.9193, "step": 291110 }, { "epoch": 0.69, "grad_norm": 1.96875, "learning_rate": 0.00014747374466614723, "loss": 2.0895, "step": 291115 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.0001474721179259497, "loss": 2.083, "step": 291120 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.00014747049116953484, "loss": 2.0368, "step": 291125 }, { "epoch": 0.69, "grad_norm": 2.40625, "learning_rate": 0.0001474688643969032, "loss": 1.9404, "step": 291130 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014746723760805536, "loss": 2.0913, "step": 291135 }, { "epoch": 0.69, "grad_norm": 1.7890625, "learning_rate": 0.00014746561080299184, "loss": 1.8342, "step": 291140 }, { "epoch": 0.69, "grad_norm": 2.453125, "learning_rate": 0.0001474639839817132, "loss": 2.0361, "step": 291145 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014746235714421997, "loss": 1.9887, "step": 291150 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.0001474607302905128, "loss": 2.1351, "step": 291155 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014745910342059214, "loss": 1.9967, "step": 291160 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.0001474574765344586, "loss": 2.2381, "step": 291165 }, { "epoch": 0.69, "grad_norm": 2.453125, "learning_rate": 0.00014745584963211274, "loss": 2.0748, "step": 291170 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014745422271355508, "loss": 2.0227, "step": 291175 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.0001474525957787862, "loss": 2.2086, "step": 291180 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014745096882780664, "loss": 2.254, "step": 291185 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.000147449341860617, "loss": 2.1756, "step": 291190 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.0001474477148772178, "loss": 2.1692, "step": 291195 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.0001474460878776096, "loss": 1.9198, "step": 291200 }, { "epoch": 0.69, "grad_norm": 2.40625, "learning_rate": 0.00014744446086179292, "loss": 1.9515, "step": 291205 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014744283382976838, "loss": 1.8736, "step": 291210 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.0001474412067815365, "loss": 1.9426, "step": 291215 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.00014743957971709786, "loss": 2.1883, "step": 291220 }, { "epoch": 0.69, "grad_norm": 2.53125, "learning_rate": 0.000147437952636453, "loss": 2.1136, "step": 291225 }, { "epoch": 0.69, "grad_norm": 2.703125, "learning_rate": 0.00014743632553960244, "loss": 2.0747, "step": 291230 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.0001474346984265468, "loss": 2.0396, "step": 291235 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014743307129728658, "loss": 2.0595, "step": 291240 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.0001474314441518224, "loss": 2.0465, "step": 291245 }, { "epoch": 0.69, "grad_norm": 2.578125, "learning_rate": 0.00014742981699015476, "loss": 1.955, "step": 291250 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.00014742818981228423, "loss": 2.128, "step": 291255 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014742656261821139, "loss": 1.943, "step": 291260 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014742493540793677, "loss": 1.9821, "step": 291265 }, { "epoch": 0.69, "grad_norm": 2.0, "learning_rate": 0.00014742330818146092, "loss": 2.0736, "step": 291270 }, { "epoch": 0.69, "grad_norm": 1.6484375, "learning_rate": 0.0001474216809387844, "loss": 1.9589, "step": 291275 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.0001474200536799078, "loss": 2.1766, "step": 291280 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014741842640483163, "loss": 2.1239, "step": 291285 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014741679911355648, "loss": 2.163, "step": 291290 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.0001474151718060829, "loss": 2.0595, "step": 291295 }, { "epoch": 0.69, "grad_norm": 2.53125, "learning_rate": 0.0001474135444824114, "loss": 1.9926, "step": 291300 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.0001474119171425426, "loss": 2.03, "step": 291305 }, { "epoch": 0.69, "grad_norm": 2.453125, "learning_rate": 0.000147410289786477, "loss": 2.2281, "step": 291310 }, { "epoch": 0.69, "grad_norm": 2.71875, "learning_rate": 0.00014740866241421522, "loss": 2.0982, "step": 291315 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.00014740703502575777, "loss": 2.0449, "step": 291320 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014740540762110522, "loss": 1.9843, "step": 291325 }, { "epoch": 0.69, "grad_norm": 1.6640625, "learning_rate": 0.0001474037802002581, "loss": 1.8693, "step": 291330 }, { "epoch": 0.69, "grad_norm": 2.75, "learning_rate": 0.00014740215276321702, "loss": 2.2938, "step": 291335 }, { "epoch": 0.69, "grad_norm": 1.8359375, "learning_rate": 0.0001474005253099825, "loss": 1.8911, "step": 291340 }, { "epoch": 0.69, "grad_norm": 1.90625, "learning_rate": 0.0001473988978405551, "loss": 1.9358, "step": 291345 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014739727035493536, "loss": 2.1203, "step": 291350 }, { "epoch": 0.69, "grad_norm": 2.703125, "learning_rate": 0.00014739564285312385, "loss": 2.0754, "step": 291355 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014739401533512116, "loss": 2.1326, "step": 291360 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014739238780092778, "loss": 2.0637, "step": 291365 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014739076025054433, "loss": 2.1323, "step": 291370 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.0001473891326839713, "loss": 2.2153, "step": 291375 }, { "epoch": 0.69, "grad_norm": 1.890625, "learning_rate": 0.00014738750510120933, "loss": 1.8468, "step": 291380 }, { "epoch": 0.69, "grad_norm": 3.578125, "learning_rate": 0.0001473858775022589, "loss": 2.0204, "step": 291385 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.0001473842498871206, "loss": 2.0071, "step": 291390 }, { "epoch": 0.69, "grad_norm": 1.8125, "learning_rate": 0.00014738262225579498, "loss": 2.1125, "step": 291395 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.0001473809946082826, "loss": 1.8938, "step": 291400 }, { "epoch": 0.69, "grad_norm": 1.9140625, "learning_rate": 0.00014737936694458404, "loss": 1.9864, "step": 291405 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.0001473777392646998, "loss": 2.2015, "step": 291410 }, { "epoch": 0.69, "grad_norm": 1.9453125, "learning_rate": 0.00014737611156863043, "loss": 1.9981, "step": 291415 }, { "epoch": 0.69, "grad_norm": 1.640625, "learning_rate": 0.00014737448385637654, "loss": 1.7972, "step": 291420 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.0001473728561279387, "loss": 2.1641, "step": 291425 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014737122838331745, "loss": 2.2098, "step": 291430 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.0001473696006225133, "loss": 2.1111, "step": 291435 }, { "epoch": 0.69, "grad_norm": 2.390625, "learning_rate": 0.0001473679728455268, "loss": 2.0767, "step": 291440 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014736634505235858, "loss": 2.1855, "step": 291445 }, { "epoch": 0.69, "grad_norm": 2.796875, "learning_rate": 0.0001473647172430092, "loss": 2.0139, "step": 291450 }, { "epoch": 0.69, "grad_norm": 2.546875, "learning_rate": 0.00014736308941747908, "loss": 2.132, "step": 291455 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014736146157576894, "loss": 2.2099, "step": 291460 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014735983371787922, "loss": 1.9548, "step": 291465 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014735820584381055, "loss": 2.1045, "step": 291470 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014735657795356344, "loss": 1.9745, "step": 291475 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014735495004713847, "loss": 2.1109, "step": 291480 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.0001473533221245362, "loss": 2.1091, "step": 291485 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014735169418575716, "loss": 2.2913, "step": 291490 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014735006623080193, "loss": 2.0047, "step": 291495 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.0001473484382596711, "loss": 2.136, "step": 291500 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014734681027236512, "loss": 2.0114, "step": 291505 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014734518226888462, "loss": 1.9485, "step": 291510 }, { "epoch": 0.69, "grad_norm": 2.5, "learning_rate": 0.0001473435542492302, "loss": 2.1374, "step": 291515 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014734192621340233, "loss": 2.0593, "step": 291520 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014734029816140158, "loss": 2.1205, "step": 291525 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.00014733867009322852, "loss": 2.0648, "step": 291530 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014733704200888375, "loss": 2.0227, "step": 291535 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014733541390836777, "loss": 2.0113, "step": 291540 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.0001473337857916812, "loss": 2.1411, "step": 291545 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014733215765882446, "loss": 2.0993, "step": 291550 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014733052950979825, "loss": 2.1166, "step": 291555 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.00014732890134460308, "loss": 2.2276, "step": 291560 }, { "epoch": 0.69, "grad_norm": 1.9375, "learning_rate": 0.0001473272731632395, "loss": 2.1236, "step": 291565 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014732564496570803, "loss": 2.2373, "step": 291570 }, { "epoch": 0.69, "grad_norm": 2.828125, "learning_rate": 0.00014732401675200926, "loss": 1.9864, "step": 291575 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.00014732238852214376, "loss": 2.0033, "step": 291580 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014732076027611213, "loss": 2.1013, "step": 291585 }, { "epoch": 0.69, "grad_norm": 1.546875, "learning_rate": 0.0001473191320139148, "loss": 2.0247, "step": 291590 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014731750373555243, "loss": 2.1009, "step": 291595 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.00014731587544102555, "loss": 2.0769, "step": 291600 }, { "epoch": 0.69, "grad_norm": 2.921875, "learning_rate": 0.00014731424713033465, "loss": 2.0795, "step": 291605 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014731261880348038, "loss": 1.9951, "step": 291610 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.0001473109904604633, "loss": 2.0679, "step": 291615 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.0001473093621012839, "loss": 1.9908, "step": 291620 }, { "epoch": 0.69, "grad_norm": 2.390625, "learning_rate": 0.00014730773372594275, "loss": 2.0596, "step": 291625 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014730610533444043, "loss": 2.1011, "step": 291630 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014730447692677746, "loss": 1.9171, "step": 291635 }, { "epoch": 0.69, "grad_norm": 1.984375, "learning_rate": 0.00014730284850295446, "loss": 1.9372, "step": 291640 }, { "epoch": 0.69, "grad_norm": 2.828125, "learning_rate": 0.00014730122006297193, "loss": 1.9045, "step": 291645 }, { "epoch": 0.69, "grad_norm": 2.421875, "learning_rate": 0.00014729959160683046, "loss": 2.199, "step": 291650 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.00014729796313453058, "loss": 2.088, "step": 291655 }, { "epoch": 0.69, "grad_norm": 2.609375, "learning_rate": 0.00014729633464607286, "loss": 2.1017, "step": 291660 }, { "epoch": 0.69, "grad_norm": 1.9140625, "learning_rate": 0.00014729470614145785, "loss": 2.1463, "step": 291665 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014729307762068614, "loss": 2.0956, "step": 291670 }, { "epoch": 0.69, "grad_norm": 2.5, "learning_rate": 0.00014729144908375821, "loss": 1.873, "step": 291675 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.00014728982053067473, "loss": 2.2226, "step": 291680 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.00014728819196143612, "loss": 2.1163, "step": 291685 }, { "epoch": 0.69, "grad_norm": 1.9375, "learning_rate": 0.00014728656337604303, "loss": 2.0468, "step": 291690 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.000147284934774496, "loss": 2.0848, "step": 291695 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014728330615679559, "loss": 2.0496, "step": 291700 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014728167752294232, "loss": 1.9736, "step": 291705 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.00014728004887293677, "loss": 2.0643, "step": 291710 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.0001472784202067795, "loss": 2.1175, "step": 291715 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.00014727679152447108, "loss": 2.1751, "step": 291720 }, { "epoch": 0.69, "grad_norm": 2.5625, "learning_rate": 0.00014727516282601205, "loss": 2.0728, "step": 291725 }, { "epoch": 0.69, "grad_norm": 1.7578125, "learning_rate": 0.00014727353411140296, "loss": 2.1559, "step": 291730 }, { "epoch": 0.69, "grad_norm": 2.71875, "learning_rate": 0.00014727190538064438, "loss": 2.3552, "step": 291735 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014727027663373685, "loss": 2.1101, "step": 291740 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014726864787068095, "loss": 2.1794, "step": 291745 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.0001472670190914772, "loss": 2.2333, "step": 291750 }, { "epoch": 0.69, "grad_norm": 1.90625, "learning_rate": 0.0001472653902961262, "loss": 2.1094, "step": 291755 }, { "epoch": 0.69, "grad_norm": 2.0, "learning_rate": 0.0001472637614846285, "loss": 2.0824, "step": 291760 }, { "epoch": 0.69, "grad_norm": 2.40625, "learning_rate": 0.0001472621326569846, "loss": 1.9375, "step": 291765 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014726050381319513, "loss": 1.9747, "step": 291770 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.00014725887495326058, "loss": 2.1584, "step": 291775 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014725724607718156, "loss": 2.0716, "step": 291780 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014725561718495863, "loss": 1.8858, "step": 291785 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014725398827659228, "loss": 2.2853, "step": 291790 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.00014725235935208314, "loss": 2.1884, "step": 291795 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014725073041143172, "loss": 1.9967, "step": 291800 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014724910145463862, "loss": 2.3649, "step": 291805 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014724747248170435, "loss": 1.9294, "step": 291810 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.0001472458434926295, "loss": 2.0284, "step": 291815 }, { "epoch": 0.69, "grad_norm": 1.9375, "learning_rate": 0.0001472442144874146, "loss": 1.9786, "step": 291820 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014724258546606022, "loss": 2.0111, "step": 291825 }, { "epoch": 0.69, "grad_norm": 2.390625, "learning_rate": 0.00014724095642856694, "loss": 2.0194, "step": 291830 }, { "epoch": 0.69, "grad_norm": 2.421875, "learning_rate": 0.00014723932737493526, "loss": 2.114, "step": 291835 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.0001472376983051658, "loss": 2.2148, "step": 291840 }, { "epoch": 0.69, "grad_norm": 1.9609375, "learning_rate": 0.00014723606921925907, "loss": 2.1582, "step": 291845 }, { "epoch": 0.69, "grad_norm": 1.8984375, "learning_rate": 0.00014723444011721564, "loss": 1.9124, "step": 291850 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.00014723281099903606, "loss": 2.1378, "step": 291855 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.0001472311818647209, "loss": 2.019, "step": 291860 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014722955271427075, "loss": 2.2297, "step": 291865 }, { "epoch": 0.69, "grad_norm": 2.625, "learning_rate": 0.00014722792354768607, "loss": 1.9924, "step": 291870 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.0001472262943649675, "loss": 2.0195, "step": 291875 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.0001472246651661156, "loss": 2.069, "step": 291880 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014722303595113086, "loss": 2.1195, "step": 291885 }, { "epoch": 0.69, "grad_norm": 2.53125, "learning_rate": 0.00014722140672001387, "loss": 1.9247, "step": 291890 }, { "epoch": 0.69, "grad_norm": 2.703125, "learning_rate": 0.0001472197774727652, "loss": 2.0338, "step": 291895 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014721814820938537, "loss": 2.0654, "step": 291900 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014721651892987502, "loss": 2.1753, "step": 291905 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014721488963423464, "loss": 2.2191, "step": 291910 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.0001472132603224648, "loss": 2.2043, "step": 291915 }, { "epoch": 0.69, "grad_norm": 2.53125, "learning_rate": 0.00014721163099456602, "loss": 2.1386, "step": 291920 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.0001472100016505389, "loss": 1.9045, "step": 291925 }, { "epoch": 0.69, "grad_norm": 2.5625, "learning_rate": 0.000147208372290384, "loss": 2.1457, "step": 291930 }, { "epoch": 0.69, "grad_norm": 2.640625, "learning_rate": 0.00014720674291410186, "loss": 2.0055, "step": 291935 }, { "epoch": 0.69, "grad_norm": 1.9296875, "learning_rate": 0.00014720511352169305, "loss": 2.3818, "step": 291940 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.0001472034841131581, "loss": 2.14, "step": 291945 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.0001472018546884976, "loss": 1.9401, "step": 291950 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014720022524771206, "loss": 2.1382, "step": 291955 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.0001471985957908021, "loss": 2.0124, "step": 291960 }, { "epoch": 0.69, "grad_norm": 1.984375, "learning_rate": 0.00014719696631776823, "loss": 2.0346, "step": 291965 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.000147195336828611, "loss": 2.1541, "step": 291970 }, { "epoch": 0.69, "grad_norm": 2.40625, "learning_rate": 0.00014719370732333106, "loss": 2.1333, "step": 291975 }, { "epoch": 0.69, "grad_norm": 2.5625, "learning_rate": 0.0001471920778019288, "loss": 2.1391, "step": 291980 }, { "epoch": 0.69, "grad_norm": 2.515625, "learning_rate": 0.00014719044826440493, "loss": 2.049, "step": 291985 }, { "epoch": 0.69, "grad_norm": 1.9453125, "learning_rate": 0.00014718881871075994, "loss": 2.0646, "step": 291990 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.00014718718914099438, "loss": 2.0675, "step": 291995 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.00014718555955510885, "loss": 2.1418, "step": 292000 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.0001471839299531038, "loss": 1.8898, "step": 292005 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.00014718230033497994, "loss": 1.9219, "step": 292010 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014718067070073772, "loss": 2.0619, "step": 292015 }, { "epoch": 0.69, "grad_norm": 2.40625, "learning_rate": 0.00014717904105037777, "loss": 2.011, "step": 292020 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014717741138390054, "loss": 2.209, "step": 292025 }, { "epoch": 0.69, "grad_norm": 2.515625, "learning_rate": 0.00014717578170130667, "loss": 2.1159, "step": 292030 }, { "epoch": 0.69, "grad_norm": 2.53125, "learning_rate": 0.0001471741520025967, "loss": 2.0276, "step": 292035 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.0001471725222877712, "loss": 2.1565, "step": 292040 }, { "epoch": 0.69, "grad_norm": 1.8984375, "learning_rate": 0.0001471708925568307, "loss": 1.8948, "step": 292045 }, { "epoch": 0.69, "grad_norm": 2.515625, "learning_rate": 0.00014716926280977579, "loss": 2.151, "step": 292050 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014716763304660697, "loss": 2.057, "step": 292055 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014716600326732484, "loss": 1.9094, "step": 292060 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014716437347192995, "loss": 2.0566, "step": 292065 }, { "epoch": 0.69, "grad_norm": 1.90625, "learning_rate": 0.00014716274366042287, "loss": 2.0064, "step": 292070 }, { "epoch": 0.69, "grad_norm": 2.40625, "learning_rate": 0.0001471611138328041, "loss": 2.2413, "step": 292075 }, { "epoch": 0.69, "grad_norm": 2.0, "learning_rate": 0.0001471594839890743, "loss": 2.0359, "step": 292080 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014715785412923392, "loss": 1.9864, "step": 292085 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014715622425328357, "loss": 2.1607, "step": 292090 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.0001471545943612238, "loss": 2.063, "step": 292095 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014715296445305516, "loss": 1.9295, "step": 292100 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014715133452877823, "loss": 2.1282, "step": 292105 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.00014714970458839353, "loss": 2.1758, "step": 292110 }, { "epoch": 0.69, "grad_norm": 2.828125, "learning_rate": 0.00014714807463190165, "loss": 2.2134, "step": 292115 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014714644465930313, "loss": 2.0425, "step": 292120 }, { "epoch": 0.69, "grad_norm": 2.90625, "learning_rate": 0.00014714481467059852, "loss": 1.9881, "step": 292125 }, { "epoch": 0.69, "grad_norm": 2.640625, "learning_rate": 0.00014714318466578838, "loss": 2.0976, "step": 292130 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.0001471415546448733, "loss": 2.1505, "step": 292135 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014713992460785378, "loss": 2.0746, "step": 292140 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014713829455473043, "loss": 1.977, "step": 292145 }, { "epoch": 0.69, "grad_norm": 2.578125, "learning_rate": 0.00014713666448550375, "loss": 1.8669, "step": 292150 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014713503440017437, "loss": 1.9328, "step": 292155 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014713340429874282, "loss": 2.2676, "step": 292160 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.0001471317741812096, "loss": 2.2073, "step": 292165 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014713014404757534, "loss": 2.0983, "step": 292170 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014712851389784055, "loss": 2.0743, "step": 292175 }, { "epoch": 0.69, "grad_norm": 1.9296875, "learning_rate": 0.00014712688373200581, "loss": 1.9158, "step": 292180 }, { "epoch": 0.69, "grad_norm": 1.9296875, "learning_rate": 0.00014712525355007168, "loss": 2.0911, "step": 292185 }, { "epoch": 0.69, "grad_norm": 1.8828125, "learning_rate": 0.0001471236233520387, "loss": 2.1262, "step": 292190 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014712199313790742, "loss": 2.1597, "step": 292195 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.00014712036290767845, "loss": 1.8985, "step": 292200 }, { "epoch": 0.69, "grad_norm": 2.859375, "learning_rate": 0.0001471187326613523, "loss": 1.9901, "step": 292205 }, { "epoch": 0.69, "grad_norm": 2.0, "learning_rate": 0.0001471171023989295, "loss": 2.1529, "step": 292210 }, { "epoch": 0.69, "grad_norm": 1.9375, "learning_rate": 0.00014711547212041068, "loss": 2.1219, "step": 292215 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.00014711384182579634, "loss": 1.8513, "step": 292220 }, { "epoch": 0.69, "grad_norm": 1.90625, "learning_rate": 0.00014711221151508707, "loss": 2.1127, "step": 292225 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.0001471105811882834, "loss": 1.938, "step": 292230 }, { "epoch": 0.69, "grad_norm": 1.984375, "learning_rate": 0.00014710895084538592, "loss": 2.0194, "step": 292235 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.00014710732048639518, "loss": 2.0095, "step": 292240 }, { "epoch": 0.69, "grad_norm": 3.34375, "learning_rate": 0.0001471056901113117, "loss": 1.9108, "step": 292245 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.00014710405972013607, "loss": 2.0631, "step": 292250 }, { "epoch": 0.69, "grad_norm": 1.859375, "learning_rate": 0.0001471024293128688, "loss": 2.0053, "step": 292255 }, { "epoch": 0.69, "grad_norm": 1.546875, "learning_rate": 0.00014710079888951053, "loss": 2.0057, "step": 292260 }, { "epoch": 0.69, "grad_norm": 2.515625, "learning_rate": 0.00014709916845006177, "loss": 2.043, "step": 292265 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.0001470975379945231, "loss": 2.0327, "step": 292270 }, { "epoch": 0.69, "grad_norm": 2.828125, "learning_rate": 0.00014709590752289502, "loss": 2.1187, "step": 292275 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014709427703517812, "loss": 2.0958, "step": 292280 }, { "epoch": 0.69, "grad_norm": 1.8828125, "learning_rate": 0.000147092646531373, "loss": 2.2107, "step": 292285 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.00014709101601148016, "loss": 2.3567, "step": 292290 }, { "epoch": 0.69, "grad_norm": 1.984375, "learning_rate": 0.00014708938547550017, "loss": 2.0771, "step": 292295 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014708775492343356, "loss": 1.9915, "step": 292300 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014708612435528095, "loss": 2.0417, "step": 292305 }, { "epoch": 0.69, "grad_norm": 1.8984375, "learning_rate": 0.00014708449377104287, "loss": 2.004, "step": 292310 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.0001470828631707199, "loss": 2.1805, "step": 292315 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014708123255431255, "loss": 2.1357, "step": 292320 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014707960192182135, "loss": 1.9854, "step": 292325 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014707797127324693, "loss": 2.01, "step": 292330 }, { "epoch": 0.69, "grad_norm": 1.921875, "learning_rate": 0.00014707634060858986, "loss": 2.1394, "step": 292335 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.0001470747099278506, "loss": 2.1985, "step": 292340 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.0001470730792310298, "loss": 2.0354, "step": 292345 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.000147071448518128, "loss": 2.0834, "step": 292350 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.00014706981778914567, "loss": 1.8954, "step": 292355 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014706818704408355, "loss": 2.0423, "step": 292360 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.000147066556282942, "loss": 2.151, "step": 292365 }, { "epoch": 0.69, "grad_norm": 2.421875, "learning_rate": 0.00014706492550572168, "loss": 1.9024, "step": 292370 }, { "epoch": 0.69, "grad_norm": 2.390625, "learning_rate": 0.00014706329471242312, "loss": 2.133, "step": 292375 }, { "epoch": 0.69, "grad_norm": 1.9765625, "learning_rate": 0.00014706166390304688, "loss": 2.3522, "step": 292380 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014706003307759353, "loss": 2.1247, "step": 292385 }, { "epoch": 0.69, "grad_norm": 2.875, "learning_rate": 0.00014705840223606362, "loss": 1.9568, "step": 292390 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014705677137845773, "loss": 2.3078, "step": 292395 }, { "epoch": 0.69, "grad_norm": 2.53125, "learning_rate": 0.00014705514050477635, "loss": 1.9398, "step": 292400 }, { "epoch": 0.69, "grad_norm": 1.9921875, "learning_rate": 0.00014705350961502012, "loss": 1.9886, "step": 292405 }, { "epoch": 0.69, "grad_norm": 1.7265625, "learning_rate": 0.00014705187870918952, "loss": 2.0814, "step": 292410 }, { "epoch": 0.69, "grad_norm": 2.453125, "learning_rate": 0.00014705024778728518, "loss": 2.1841, "step": 292415 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.0001470486168493076, "loss": 2.2216, "step": 292420 }, { "epoch": 0.69, "grad_norm": 2.625, "learning_rate": 0.00014704698589525735, "loss": 2.1894, "step": 292425 }, { "epoch": 0.69, "grad_norm": 1.828125, "learning_rate": 0.00014704535492513504, "loss": 1.9841, "step": 292430 }, { "epoch": 0.69, "grad_norm": 2.671875, "learning_rate": 0.00014704372393894115, "loss": 1.9694, "step": 292435 }, { "epoch": 0.69, "grad_norm": 2.71875, "learning_rate": 0.0001470420929366763, "loss": 1.9953, "step": 292440 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014704046191834098, "loss": 2.014, "step": 292445 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.0001470388308839358, "loss": 2.1242, "step": 292450 }, { "epoch": 0.69, "grad_norm": 2.578125, "learning_rate": 0.0001470371998334613, "loss": 1.9385, "step": 292455 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014703556876691803, "loss": 1.9116, "step": 292460 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014703393768430658, "loss": 2.0591, "step": 292465 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014703230658562747, "loss": 2.0715, "step": 292470 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014703067547088128, "loss": 2.1623, "step": 292475 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014702904434006855, "loss": 2.1252, "step": 292480 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.00014702741319318983, "loss": 2.1026, "step": 292485 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014702578203024572, "loss": 2.0184, "step": 292490 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014702415085123674, "loss": 2.1951, "step": 292495 }, { "epoch": 0.69, "grad_norm": 1.890625, "learning_rate": 0.00014702251965616347, "loss": 2.0868, "step": 292500 }, { "epoch": 0.69, "grad_norm": 1.9140625, "learning_rate": 0.00014702088844502644, "loss": 1.9911, "step": 292505 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014701925721782622, "loss": 2.1182, "step": 292510 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.00014701762597456338, "loss": 2.3499, "step": 292515 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.00014701599471523847, "loss": 2.0094, "step": 292520 }, { "epoch": 0.69, "grad_norm": 1.96875, "learning_rate": 0.00014701436343985202, "loss": 1.9931, "step": 292525 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014701273214840464, "loss": 1.9847, "step": 292530 }, { "epoch": 0.69, "grad_norm": 1.8515625, "learning_rate": 0.0001470111008408968, "loss": 1.9792, "step": 292535 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014700946951732918, "loss": 2.0501, "step": 292540 }, { "epoch": 0.69, "grad_norm": 1.8203125, "learning_rate": 0.00014700783817770226, "loss": 1.9533, "step": 292545 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014700620682201658, "loss": 2.1355, "step": 292550 }, { "epoch": 0.69, "grad_norm": 2.65625, "learning_rate": 0.00014700457545027275, "loss": 2.2191, "step": 292555 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.0001470029440624713, "loss": 2.1768, "step": 292560 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014700131265861282, "loss": 1.856, "step": 292565 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.00014699968123869778, "loss": 1.9628, "step": 292570 }, { "epoch": 0.69, "grad_norm": 2.421875, "learning_rate": 0.00014699804980272685, "loss": 1.9583, "step": 292575 }, { "epoch": 0.69, "grad_norm": 1.9453125, "learning_rate": 0.00014699641835070048, "loss": 1.945, "step": 292580 }, { "epoch": 0.69, "grad_norm": 1.796875, "learning_rate": 0.00014699478688261935, "loss": 2.0333, "step": 292585 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.0001469931553984839, "loss": 2.084, "step": 292590 }, { "epoch": 0.69, "grad_norm": 2.71875, "learning_rate": 0.00014699152389829474, "loss": 1.9977, "step": 292595 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014698989238205243, "loss": 1.9774, "step": 292600 }, { "epoch": 0.69, "grad_norm": 2.515625, "learning_rate": 0.0001469882608497575, "loss": 2.1866, "step": 292605 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.0001469866293014106, "loss": 2.122, "step": 292610 }, { "epoch": 0.69, "grad_norm": 1.9140625, "learning_rate": 0.00014698499773701216, "loss": 2.1892, "step": 292615 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.00014698336615656275, "loss": 1.9326, "step": 292620 }, { "epoch": 0.69, "grad_norm": 1.953125, "learning_rate": 0.00014698173456006303, "loss": 1.9094, "step": 292625 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014698010294751353, "loss": 1.8793, "step": 292630 }, { "epoch": 0.69, "grad_norm": 2.515625, "learning_rate": 0.00014697847131891472, "loss": 2.1072, "step": 292635 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.0001469768396742672, "loss": 2.0889, "step": 292640 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.00014697520801357157, "loss": 2.0222, "step": 292645 }, { "epoch": 0.69, "grad_norm": 2.640625, "learning_rate": 0.00014697357633682834, "loss": 2.1163, "step": 292650 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.0001469719446440381, "loss": 2.1646, "step": 292655 }, { "epoch": 0.69, "grad_norm": 3.09375, "learning_rate": 0.0001469703129352014, "loss": 2.0314, "step": 292660 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014696868121031876, "loss": 2.0592, "step": 292665 }, { "epoch": 0.69, "grad_norm": 2.875, "learning_rate": 0.00014696704946939076, "loss": 2.1034, "step": 292670 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.000146965417712418, "loss": 1.7618, "step": 292675 }, { "epoch": 0.69, "grad_norm": 2.421875, "learning_rate": 0.000146963785939401, "loss": 2.0782, "step": 292680 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.0001469621541503403, "loss": 2.17, "step": 292685 }, { "epoch": 0.69, "grad_norm": 1.8125, "learning_rate": 0.00014696052234523647, "loss": 2.0414, "step": 292690 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014695889052409008, "loss": 2.0383, "step": 292695 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014695725868690168, "loss": 2.2203, "step": 292700 }, { "epoch": 0.69, "grad_norm": 1.9609375, "learning_rate": 0.00014695562683367185, "loss": 2.0533, "step": 292705 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.0001469539949644011, "loss": 2.0269, "step": 292710 }, { "epoch": 0.69, "grad_norm": 2.5625, "learning_rate": 0.00014695236307909004, "loss": 2.0168, "step": 292715 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.0001469507311777392, "loss": 2.1567, "step": 292720 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014694909926034913, "loss": 1.9902, "step": 292725 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.00014694746732692038, "loss": 2.1333, "step": 292730 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014694583537745351, "loss": 1.9251, "step": 292735 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014694420341194913, "loss": 2.1392, "step": 292740 }, { "epoch": 0.69, "grad_norm": 2.40625, "learning_rate": 0.00014694257143040777, "loss": 2.0946, "step": 292745 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014694093943282995, "loss": 1.9931, "step": 292750 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014693930741921623, "loss": 2.1486, "step": 292755 }, { "epoch": 0.69, "grad_norm": 1.6953125, "learning_rate": 0.00014693767538956722, "loss": 1.9611, "step": 292760 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014693604334388346, "loss": 1.9706, "step": 292765 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014693441128216548, "loss": 2.1405, "step": 292770 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014693277920441386, "loss": 2.1331, "step": 292775 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014693114711062915, "loss": 2.0749, "step": 292780 }, { "epoch": 0.69, "grad_norm": 1.8515625, "learning_rate": 0.0001469295150008119, "loss": 2.0934, "step": 292785 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.00014692788287496268, "loss": 2.1104, "step": 292790 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014692625073308205, "loss": 1.9999, "step": 292795 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014692461857517057, "loss": 2.0156, "step": 292800 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.00014692298640122878, "loss": 2.2179, "step": 292805 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014692135421125725, "loss": 2.1675, "step": 292810 }, { "epoch": 0.69, "grad_norm": 2.8125, "learning_rate": 0.00014691972200525652, "loss": 2.1551, "step": 292815 }, { "epoch": 0.69, "grad_norm": 2.453125, "learning_rate": 0.00014691808978322716, "loss": 2.0947, "step": 292820 }, { "epoch": 0.69, "grad_norm": 2.453125, "learning_rate": 0.00014691645754516975, "loss": 2.0532, "step": 292825 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014691482529108482, "loss": 2.1545, "step": 292830 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014691319302097294, "loss": 1.9934, "step": 292835 }, { "epoch": 0.69, "grad_norm": 1.9375, "learning_rate": 0.00014691156073483465, "loss": 2.1104, "step": 292840 }, { "epoch": 0.69, "grad_norm": 2.59375, "learning_rate": 0.00014690992843267053, "loss": 2.0948, "step": 292845 }, { "epoch": 0.69, "grad_norm": 1.890625, "learning_rate": 0.00014690829611448112, "loss": 2.0288, "step": 292850 }, { "epoch": 0.69, "grad_norm": 2.671875, "learning_rate": 0.00014690666378026698, "loss": 2.0962, "step": 292855 }, { "epoch": 0.69, "grad_norm": 2.640625, "learning_rate": 0.00014690503143002868, "loss": 1.9361, "step": 292860 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014690339906376677, "loss": 2.1479, "step": 292865 }, { "epoch": 0.69, "grad_norm": 1.8984375, "learning_rate": 0.00014690176668148182, "loss": 1.965, "step": 292870 }, { "epoch": 0.69, "grad_norm": 1.7890625, "learning_rate": 0.00014690013428317436, "loss": 2.0662, "step": 292875 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.000146898501868845, "loss": 2.0799, "step": 292880 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014689686943849422, "loss": 2.1047, "step": 292885 }, { "epoch": 0.69, "grad_norm": 2.734375, "learning_rate": 0.00014689523699212262, "loss": 2.1834, "step": 292890 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014689360452973076, "loss": 2.0078, "step": 292895 }, { "epoch": 0.69, "grad_norm": 2.5, "learning_rate": 0.0001468919720513192, "loss": 2.0569, "step": 292900 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.00014689033955688852, "loss": 2.1412, "step": 292905 }, { "epoch": 0.69, "grad_norm": 1.6953125, "learning_rate": 0.0001468887070464392, "loss": 1.7825, "step": 292910 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014688707451997188, "loss": 1.9783, "step": 292915 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.00014688544197748705, "loss": 2.0836, "step": 292920 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014688380941898537, "loss": 1.9762, "step": 292925 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014688217684446727, "loss": 2.0384, "step": 292930 }, { "epoch": 0.69, "grad_norm": 1.96875, "learning_rate": 0.00014688054425393337, "loss": 1.9675, "step": 292935 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014687891164738426, "loss": 2.1104, "step": 292940 }, { "epoch": 0.69, "grad_norm": 2.625, "learning_rate": 0.00014687727902482043, "loss": 1.9819, "step": 292945 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.0001468756463862425, "loss": 2.0765, "step": 292950 }, { "epoch": 0.69, "grad_norm": 1.875, "learning_rate": 0.000146874013731651, "loss": 2.1899, "step": 292955 }, { "epoch": 0.69, "grad_norm": 2.515625, "learning_rate": 0.00014687238106104647, "loss": 2.0268, "step": 292960 }, { "epoch": 0.69, "grad_norm": 2.859375, "learning_rate": 0.00014687074837442947, "loss": 1.9239, "step": 292965 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014686911567180058, "loss": 1.9354, "step": 292970 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014686748295316036, "loss": 1.8826, "step": 292975 }, { "epoch": 0.69, "grad_norm": 1.90625, "learning_rate": 0.00014686585021850935, "loss": 2.1738, "step": 292980 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014686421746784814, "loss": 2.1681, "step": 292985 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.00014686258470117728, "loss": 2.1899, "step": 292990 }, { "epoch": 0.69, "grad_norm": 2.78125, "learning_rate": 0.00014686095191849725, "loss": 2.1977, "step": 292995 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.0001468593191198087, "loss": 1.8427, "step": 293000 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014685768630511215, "loss": 2.1958, "step": 293005 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014685605347440817, "loss": 1.9853, "step": 293010 }, { "epoch": 0.69, "grad_norm": 1.9765625, "learning_rate": 0.0001468544206276973, "loss": 2.0991, "step": 293015 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.00014685278776498014, "loss": 2.0181, "step": 293020 }, { "epoch": 0.69, "grad_norm": 1.984375, "learning_rate": 0.00014685115488625717, "loss": 2.0521, "step": 293025 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.00014684952199152902, "loss": 2.176, "step": 293030 }, { "epoch": 0.69, "grad_norm": 2.75, "learning_rate": 0.00014684788908079623, "loss": 2.1405, "step": 293035 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014684625615405937, "loss": 2.0763, "step": 293040 }, { "epoch": 0.69, "grad_norm": 1.8671875, "learning_rate": 0.00014684462321131898, "loss": 2.082, "step": 293045 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014684299025257555, "loss": 2.1861, "step": 293050 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014684135727782978, "loss": 2.2044, "step": 293055 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.00014683972428708211, "loss": 2.1845, "step": 293060 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014683809128033317, "loss": 1.9585, "step": 293065 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014683645825758348, "loss": 2.1411, "step": 293070 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014683482521883357, "loss": 2.0181, "step": 293075 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014683319216408407, "loss": 2.1342, "step": 293080 }, { "epoch": 0.69, "grad_norm": 2.875, "learning_rate": 0.00014683155909333553, "loss": 1.9418, "step": 293085 }, { "epoch": 0.69, "grad_norm": 1.9375, "learning_rate": 0.00014682992600658843, "loss": 2.0281, "step": 293090 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.0001468282929038434, "loss": 2.0795, "step": 293095 }, { "epoch": 0.69, "grad_norm": 2.453125, "learning_rate": 0.00014682665978510095, "loss": 2.119, "step": 293100 }, { "epoch": 0.69, "grad_norm": 1.875, "learning_rate": 0.0001468250266503617, "loss": 2.1258, "step": 293105 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014682339349962614, "loss": 2.2476, "step": 293110 }, { "epoch": 0.69, "grad_norm": 1.75, "learning_rate": 0.00014682176033289487, "loss": 2.0079, "step": 293115 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014682012715016844, "loss": 2.0578, "step": 293120 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.00014681849395144743, "loss": 2.0251, "step": 293125 }, { "epoch": 0.69, "grad_norm": 2.765625, "learning_rate": 0.00014681686073673235, "loss": 2.0141, "step": 293130 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014681522750602377, "loss": 2.113, "step": 293135 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014681359425932228, "loss": 2.1054, "step": 293140 }, { "epoch": 0.69, "grad_norm": 2.71875, "learning_rate": 0.00014681196099662839, "loss": 2.2413, "step": 293145 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014681032771794272, "loss": 2.0245, "step": 293150 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.00014680869442326578, "loss": 1.9411, "step": 293155 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014680706111259813, "loss": 2.2148, "step": 293160 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014680542778594034, "loss": 2.0298, "step": 293165 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014680379444329298, "loss": 2.0962, "step": 293170 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014680216108465662, "loss": 2.2339, "step": 293175 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014680052771003175, "loss": 2.1242, "step": 293180 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014679889431941896, "loss": 2.1274, "step": 293185 }, { "epoch": 0.69, "grad_norm": 1.828125, "learning_rate": 0.00014679726091281885, "loss": 1.9925, "step": 293190 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.00014679562749023191, "loss": 2.1638, "step": 293195 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014679399405165882, "loss": 2.0426, "step": 293200 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014679236059709998, "loss": 2.148, "step": 293205 }, { "epoch": 0.69, "grad_norm": 1.9453125, "learning_rate": 0.000146790727126556, "loss": 2.0991, "step": 293210 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014678909364002752, "loss": 1.8932, "step": 293215 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014678746013751503, "loss": 2.1791, "step": 293220 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014678582661901908, "loss": 2.0208, "step": 293225 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014678419308454023, "loss": 2.1249, "step": 293230 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014678255953407904, "loss": 1.9752, "step": 293235 }, { "epoch": 0.69, "grad_norm": 1.9921875, "learning_rate": 0.00014678092596763612, "loss": 1.9958, "step": 293240 }, { "epoch": 0.69, "grad_norm": 2.515625, "learning_rate": 0.00014677929238521197, "loss": 1.8546, "step": 293245 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014677765878680718, "loss": 2.2039, "step": 293250 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014677602517242227, "loss": 2.0745, "step": 293255 }, { "epoch": 0.69, "grad_norm": 2.390625, "learning_rate": 0.00014677439154205782, "loss": 1.9693, "step": 293260 }, { "epoch": 0.69, "grad_norm": 1.984375, "learning_rate": 0.0001467727578957144, "loss": 2.0961, "step": 293265 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014677112423339256, "loss": 2.1754, "step": 293270 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014676949055509285, "loss": 2.1341, "step": 293275 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014676785686081585, "loss": 2.0705, "step": 293280 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014676622315056207, "loss": 2.0066, "step": 293285 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014676458942433214, "loss": 2.1265, "step": 293290 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014676295568212655, "loss": 2.037, "step": 293295 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.0001467613219239459, "loss": 2.0438, "step": 293300 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.0001467596881497907, "loss": 2.0985, "step": 293305 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014675805435966158, "loss": 2.0537, "step": 293310 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014675642055355907, "loss": 2.1657, "step": 293315 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.0001467547867314837, "loss": 1.9894, "step": 293320 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014675315289343604, "loss": 2.1148, "step": 293325 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.00014675151903941668, "loss": 1.947, "step": 293330 }, { "epoch": 0.69, "grad_norm": 1.890625, "learning_rate": 0.00014674988516942615, "loss": 2.0847, "step": 293335 }, { "epoch": 0.69, "grad_norm": 1.96875, "learning_rate": 0.000146748251283465, "loss": 2.1465, "step": 293340 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014674661738153373, "loss": 1.9506, "step": 293345 }, { "epoch": 0.69, "grad_norm": 1.5390625, "learning_rate": 0.00014674498346363304, "loss": 1.9151, "step": 293350 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014674334952976344, "loss": 2.1883, "step": 293355 }, { "epoch": 0.69, "grad_norm": 1.953125, "learning_rate": 0.00014674171557992543, "loss": 2.3021, "step": 293360 }, { "epoch": 0.69, "grad_norm": 1.8203125, "learning_rate": 0.00014674008161411962, "loss": 1.9765, "step": 293365 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.0001467384476323465, "loss": 2.025, "step": 293370 }, { "epoch": 0.69, "grad_norm": 2.578125, "learning_rate": 0.00014673681363460672, "loss": 2.1649, "step": 293375 }, { "epoch": 0.69, "grad_norm": 2.59375, "learning_rate": 0.0001467351796209008, "loss": 2.174, "step": 293380 }, { "epoch": 0.69, "grad_norm": 2.578125, "learning_rate": 0.0001467335455912293, "loss": 1.9229, "step": 293385 }, { "epoch": 0.69, "grad_norm": 2.84375, "learning_rate": 0.00014673191154559277, "loss": 1.9328, "step": 293390 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014673027748399176, "loss": 2.0821, "step": 293395 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014672864340642682, "loss": 1.9234, "step": 293400 }, { "epoch": 0.69, "grad_norm": 2.703125, "learning_rate": 0.00014672700931289855, "loss": 2.1491, "step": 293405 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.0001467253752034075, "loss": 2.0381, "step": 293410 }, { "epoch": 0.69, "grad_norm": 2.890625, "learning_rate": 0.00014672374107795418, "loss": 2.1967, "step": 293415 }, { "epoch": 0.69, "grad_norm": 2.671875, "learning_rate": 0.0001467221069365392, "loss": 2.1248, "step": 293420 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.00014672047277916313, "loss": 2.053, "step": 293425 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014671883860582644, "loss": 2.1066, "step": 293430 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014671720441652978, "loss": 2.0073, "step": 293435 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014671557021127369, "loss": 2.0147, "step": 293440 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014671393599005868, "loss": 2.1234, "step": 293445 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014671230175288536, "loss": 2.0343, "step": 293450 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.0001467106674997543, "loss": 1.9905, "step": 293455 }, { "epoch": 0.69, "grad_norm": 2.515625, "learning_rate": 0.000146709033230666, "loss": 2.2096, "step": 293460 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014670739894562105, "loss": 2.051, "step": 293465 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014670576464462001, "loss": 1.9251, "step": 293470 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014670413032766343, "loss": 2.1033, "step": 293475 }, { "epoch": 0.69, "grad_norm": 2.71875, "learning_rate": 0.00014670249599475183, "loss": 1.8704, "step": 293480 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.00014670086164588587, "loss": 1.9529, "step": 293485 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.000146699227281066, "loss": 2.0671, "step": 293490 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.0001466975929002929, "loss": 1.9971, "step": 293495 }, { "epoch": 0.69, "grad_norm": 2.40625, "learning_rate": 0.000146695958503567, "loss": 2.1562, "step": 293500 }, { "epoch": 0.69, "grad_norm": 2.53125, "learning_rate": 0.0001466943240908889, "loss": 2.1294, "step": 293505 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.0001466926896622592, "loss": 2.0116, "step": 293510 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014669105521767845, "loss": 2.2365, "step": 293515 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014668942075714715, "loss": 1.8543, "step": 293520 }, { "epoch": 0.69, "grad_norm": 2.546875, "learning_rate": 0.00014668778628066593, "loss": 2.1305, "step": 293525 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014668615178823529, "loss": 2.197, "step": 293530 }, { "epoch": 0.69, "grad_norm": 1.84375, "learning_rate": 0.00014668451727985578, "loss": 2.2201, "step": 293535 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014668288275552807, "loss": 2.183, "step": 293540 }, { "epoch": 0.69, "grad_norm": 2.0, "learning_rate": 0.00014668124821525262, "loss": 2.1202, "step": 293545 }, { "epoch": 0.69, "grad_norm": 1.859375, "learning_rate": 0.00014667961365902999, "loss": 2.0081, "step": 293550 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014667797908686077, "loss": 2.0046, "step": 293555 }, { "epoch": 0.69, "grad_norm": 1.953125, "learning_rate": 0.00014667634449874547, "loss": 1.9418, "step": 293560 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014667470989468472, "loss": 2.0383, "step": 293565 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014667307527467904, "loss": 1.93, "step": 293570 }, { "epoch": 0.69, "grad_norm": 3.09375, "learning_rate": 0.00014667144063872897, "loss": 2.093, "step": 293575 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014666980598683512, "loss": 2.0712, "step": 293580 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.000146668171318998, "loss": 2.0494, "step": 293585 }, { "epoch": 0.69, "grad_norm": 2.921875, "learning_rate": 0.0001466665366352182, "loss": 2.1669, "step": 293590 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014666490193549625, "loss": 2.0158, "step": 293595 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014666326721983272, "loss": 2.023, "step": 293600 }, { "epoch": 0.69, "grad_norm": 1.984375, "learning_rate": 0.00014666163248822818, "loss": 2.3136, "step": 293605 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014665999774068317, "loss": 1.9693, "step": 293610 }, { "epoch": 0.69, "grad_norm": 2.5625, "learning_rate": 0.00014665836297719827, "loss": 2.2001, "step": 293615 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014665672819777403, "loss": 2.1328, "step": 293620 }, { "epoch": 0.69, "grad_norm": 2.59375, "learning_rate": 0.00014665509340241098, "loss": 2.1356, "step": 293625 }, { "epoch": 0.69, "grad_norm": 2.546875, "learning_rate": 0.00014665345859110975, "loss": 2.1182, "step": 293630 }, { "epoch": 0.69, "grad_norm": 2.984375, "learning_rate": 0.00014665182376387084, "loss": 2.0475, "step": 293635 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014665018892069478, "loss": 2.0784, "step": 293640 }, { "epoch": 0.69, "grad_norm": 2.8125, "learning_rate": 0.0001466485540615822, "loss": 2.0731, "step": 293645 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014664691918653361, "loss": 2.1437, "step": 293650 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014664528429554963, "loss": 1.9879, "step": 293655 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014664364938863074, "loss": 1.9458, "step": 293660 }, { "epoch": 0.69, "grad_norm": 2.5625, "learning_rate": 0.00014664201446577753, "loss": 2.1187, "step": 293665 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014664037952699056, "loss": 2.1028, "step": 293670 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.00014663874457227042, "loss": 1.9527, "step": 293675 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.0001466371096016176, "loss": 1.9897, "step": 293680 }, { "epoch": 0.69, "grad_norm": 1.9609375, "learning_rate": 0.00014663547461503274, "loss": 1.8836, "step": 293685 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.00014663383961251633, "loss": 1.9516, "step": 293690 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014663220459406896, "loss": 1.9457, "step": 293695 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014663056955969116, "loss": 2.04, "step": 293700 }, { "epoch": 0.69, "grad_norm": 2.640625, "learning_rate": 0.00014662893450938355, "loss": 1.74, "step": 293705 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014662729944314663, "loss": 2.0676, "step": 293710 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014662566436098097, "loss": 2.1303, "step": 293715 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014662402926288715, "loss": 2.1791, "step": 293720 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.0001466223941488657, "loss": 1.8714, "step": 293725 }, { "epoch": 0.69, "grad_norm": 2.859375, "learning_rate": 0.00014662075901891722, "loss": 2.0898, "step": 293730 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014661912387304224, "loss": 2.0547, "step": 293735 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.00014661748871124133, "loss": 2.1153, "step": 293740 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.000146615853533515, "loss": 2.1577, "step": 293745 }, { "epoch": 0.69, "grad_norm": 1.7265625, "learning_rate": 0.00014661421833986392, "loss": 1.9827, "step": 293750 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.0001466125831302885, "loss": 2.1311, "step": 293755 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014661094790478942, "loss": 1.9198, "step": 293760 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.00014660931266336717, "loss": 1.9705, "step": 293765 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014660767740602238, "loss": 2.134, "step": 293770 }, { "epoch": 0.69, "grad_norm": 2.71875, "learning_rate": 0.0001466060421327555, "loss": 2.2236, "step": 293775 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014660440684356718, "loss": 2.1618, "step": 293780 }, { "epoch": 0.69, "grad_norm": 1.9140625, "learning_rate": 0.00014660277153845795, "loss": 1.9043, "step": 293785 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.0001466011362174284, "loss": 1.954, "step": 293790 }, { "epoch": 0.69, "grad_norm": 2.703125, "learning_rate": 0.00014659950088047902, "loss": 2.0366, "step": 293795 }, { "epoch": 0.69, "grad_norm": 2.0, "learning_rate": 0.00014659786552761038, "loss": 1.9641, "step": 293800 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.0001465962301588231, "loss": 2.149, "step": 293805 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014659459477411773, "loss": 2.221, "step": 293810 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014659295937349476, "loss": 2.1186, "step": 293815 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014659132395695477, "loss": 1.9141, "step": 293820 }, { "epoch": 0.69, "grad_norm": 1.9921875, "learning_rate": 0.00014658968852449838, "loss": 1.9637, "step": 293825 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014658805307612605, "loss": 1.9468, "step": 293830 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014658641761183848, "loss": 2.1122, "step": 293835 }, { "epoch": 0.69, "grad_norm": 1.9609375, "learning_rate": 0.0001465847821316361, "loss": 1.9548, "step": 293840 }, { "epoch": 0.69, "grad_norm": 1.9296875, "learning_rate": 0.00014658314663551953, "loss": 2.0485, "step": 293845 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.0001465815111234893, "loss": 2.1687, "step": 293850 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014657987559554597, "loss": 1.942, "step": 293855 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014657824005169013, "loss": 1.9962, "step": 293860 }, { "epoch": 0.69, "grad_norm": 2.90625, "learning_rate": 0.0001465766044919223, "loss": 2.1195, "step": 293865 }, { "epoch": 0.69, "grad_norm": 1.9453125, "learning_rate": 0.00014657496891624307, "loss": 2.0047, "step": 293870 }, { "epoch": 0.69, "grad_norm": 2.734375, "learning_rate": 0.000146573333324653, "loss": 2.0234, "step": 293875 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.0001465716977171526, "loss": 2.0866, "step": 293880 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014657006209374248, "loss": 2.1241, "step": 293885 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.0001465684264544232, "loss": 2.0007, "step": 293890 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014656679079919527, "loss": 2.2066, "step": 293895 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.0001465651551280593, "loss": 2.1885, "step": 293900 }, { "epoch": 0.69, "grad_norm": 2.546875, "learning_rate": 0.00014656351944101583, "loss": 1.9781, "step": 293905 }, { "epoch": 0.69, "grad_norm": 2.546875, "learning_rate": 0.00014656188373806541, "loss": 2.0048, "step": 293910 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.0001465602480192086, "loss": 2.1176, "step": 293915 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014655861228444597, "loss": 2.0549, "step": 293920 }, { "epoch": 0.69, "grad_norm": 1.984375, "learning_rate": 0.0001465569765337781, "loss": 1.9916, "step": 293925 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.0001465553407672055, "loss": 2.1407, "step": 293930 }, { "epoch": 0.69, "grad_norm": 1.9765625, "learning_rate": 0.00014655370498472872, "loss": 2.1061, "step": 293935 }, { "epoch": 0.69, "grad_norm": 2.390625, "learning_rate": 0.00014655206918634837, "loss": 2.0303, "step": 293940 }, { "epoch": 0.69, "grad_norm": 2.40625, "learning_rate": 0.000146550433372065, "loss": 1.9592, "step": 293945 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014654879754187917, "loss": 2.288, "step": 293950 }, { "epoch": 0.69, "grad_norm": 1.9921875, "learning_rate": 0.0001465471616957914, "loss": 1.98, "step": 293955 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014654552583380227, "loss": 2.0916, "step": 293960 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014654388995591236, "loss": 2.2368, "step": 293965 }, { "epoch": 0.69, "grad_norm": 2.390625, "learning_rate": 0.00014654225406212225, "loss": 1.9486, "step": 293970 }, { "epoch": 0.69, "grad_norm": 1.9921875, "learning_rate": 0.00014654061815243244, "loss": 1.9405, "step": 293975 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.0001465389822268435, "loss": 2.0267, "step": 293980 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014653734628535596, "loss": 2.052, "step": 293985 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014653571032797045, "loss": 2.027, "step": 293990 }, { "epoch": 0.69, "grad_norm": 2.5, "learning_rate": 0.0001465340743546875, "loss": 2.0229, "step": 293995 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014653243836550768, "loss": 2.0594, "step": 294000 }, { "epoch": 0.69, "grad_norm": 1.9140625, "learning_rate": 0.00014653080236043154, "loss": 2.1758, "step": 294005 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.0001465291663394596, "loss": 2.3119, "step": 294010 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014652753030259248, "loss": 1.8493, "step": 294015 }, { "epoch": 0.69, "grad_norm": 1.9296875, "learning_rate": 0.0001465258942498307, "loss": 2.0218, "step": 294020 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014652425818117483, "loss": 2.3503, "step": 294025 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.00014652262209662544, "loss": 2.1704, "step": 294030 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014652098599618306, "loss": 1.9782, "step": 294035 }, { "epoch": 0.69, "grad_norm": 2.75, "learning_rate": 0.00014651934987984828, "loss": 2.1147, "step": 294040 }, { "epoch": 0.69, "grad_norm": 2.59375, "learning_rate": 0.00014651771374762164, "loss": 2.0421, "step": 294045 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014651607759950372, "loss": 2.024, "step": 294050 }, { "epoch": 0.69, "grad_norm": 1.875, "learning_rate": 0.00014651444143549508, "loss": 2.1233, "step": 294055 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014651280525559624, "loss": 2.239, "step": 294060 }, { "epoch": 0.69, "grad_norm": 1.9375, "learning_rate": 0.00014651116905980779, "loss": 1.9557, "step": 294065 }, { "epoch": 0.69, "grad_norm": 2.71875, "learning_rate": 0.00014650953284813025, "loss": 2.0345, "step": 294070 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014650789662056424, "loss": 2.1088, "step": 294075 }, { "epoch": 0.69, "grad_norm": 3.109375, "learning_rate": 0.00014650626037711025, "loss": 2.1816, "step": 294080 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014650462411776896, "loss": 2.1371, "step": 294085 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014650298784254077, "loss": 1.9498, "step": 294090 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014650135155142631, "loss": 2.0981, "step": 294095 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.0001464997152444262, "loss": 1.9344, "step": 294100 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014649807892154094, "loss": 2.0352, "step": 294105 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.00014649644258277105, "loss": 2.0764, "step": 294110 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014649480622811717, "loss": 2.1361, "step": 294115 }, { "epoch": 0.69, "grad_norm": 1.90625, "learning_rate": 0.0001464931698575798, "loss": 1.9639, "step": 294120 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.0001464915334711595, "loss": 2.1715, "step": 294125 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.0001464898970688569, "loss": 2.1763, "step": 294130 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.0001464882606506725, "loss": 1.9713, "step": 294135 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014648662421660685, "loss": 2.0337, "step": 294140 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014648498776666054, "loss": 1.9942, "step": 294145 }, { "epoch": 0.69, "grad_norm": 1.875, "learning_rate": 0.00014648335130083409, "loss": 2.0347, "step": 294150 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.00014648171481912808, "loss": 2.2812, "step": 294155 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.0001464800783215431, "loss": 2.1866, "step": 294160 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.00014647844180807967, "loss": 1.8475, "step": 294165 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014647680527873834, "loss": 2.1903, "step": 294170 }, { "epoch": 0.69, "grad_norm": 2.171875, "learning_rate": 0.00014647516873351975, "loss": 2.2304, "step": 294175 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014647353217242434, "loss": 1.8911, "step": 294180 }, { "epoch": 0.69, "grad_norm": 2.859375, "learning_rate": 0.00014647189559545277, "loss": 1.9637, "step": 294185 }, { "epoch": 0.69, "grad_norm": 2.421875, "learning_rate": 0.00014647025900260552, "loss": 2.01, "step": 294190 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014646862239388323, "loss": 1.9023, "step": 294195 }, { "epoch": 0.69, "grad_norm": 2.734375, "learning_rate": 0.00014646698576928637, "loss": 2.1864, "step": 294200 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014646534912881557, "loss": 2.0432, "step": 294205 }, { "epoch": 0.69, "grad_norm": 1.9296875, "learning_rate": 0.00014646371247247134, "loss": 1.9422, "step": 294210 }, { "epoch": 0.69, "grad_norm": 2.390625, "learning_rate": 0.00014646207580025432, "loss": 2.0742, "step": 294215 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014646043911216498, "loss": 2.1263, "step": 294220 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.0001464588024082039, "loss": 2.1013, "step": 294225 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014645716568837164, "loss": 2.1594, "step": 294230 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014645552895266878, "loss": 2.0243, "step": 294235 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.00014645389220109588, "loss": 1.9857, "step": 294240 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.00014645225543365352, "loss": 2.0498, "step": 294245 }, { "epoch": 0.69, "grad_norm": 5.65625, "learning_rate": 0.00014645061865034217, "loss": 2.1475, "step": 294250 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014644898185116244, "loss": 2.0638, "step": 294255 }, { "epoch": 0.69, "grad_norm": 2.453125, "learning_rate": 0.00014644734503611493, "loss": 2.163, "step": 294260 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014644570820520017, "loss": 2.0631, "step": 294265 }, { "epoch": 0.69, "grad_norm": 1.6953125, "learning_rate": 0.0001464440713584187, "loss": 2.0729, "step": 294270 }, { "epoch": 0.69, "grad_norm": 2.0, "learning_rate": 0.00014644243449577111, "loss": 1.9356, "step": 294275 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.0001464407976172579, "loss": 1.8899, "step": 294280 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014643916072287972, "loss": 2.1545, "step": 294285 }, { "epoch": 0.69, "grad_norm": 1.9921875, "learning_rate": 0.00014643752381263706, "loss": 2.1275, "step": 294290 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.0001464358868865305, "loss": 2.1328, "step": 294295 }, { "epoch": 0.69, "grad_norm": 2.65625, "learning_rate": 0.00014643424994456057, "loss": 2.1102, "step": 294300 }, { "epoch": 0.69, "grad_norm": 1.90625, "learning_rate": 0.0001464326129867279, "loss": 2.0375, "step": 294305 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.000146430976013033, "loss": 2.1929, "step": 294310 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014642933902347642, "loss": 2.1936, "step": 294315 }, { "epoch": 0.69, "grad_norm": 3.125, "learning_rate": 0.00014642770201805876, "loss": 1.9113, "step": 294320 }, { "epoch": 0.69, "grad_norm": 1.984375, "learning_rate": 0.00014642606499678054, "loss": 1.951, "step": 294325 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014642442795964234, "loss": 2.3737, "step": 294330 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014642279090664468, "loss": 2.1603, "step": 294335 }, { "epoch": 0.69, "grad_norm": 1.421875, "learning_rate": 0.00014642115383778817, "loss": 1.9611, "step": 294340 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014641951675307336, "loss": 1.946, "step": 294345 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014641787965250083, "loss": 2.261, "step": 294350 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014641624253607108, "loss": 2.1597, "step": 294355 }, { "epoch": 0.69, "grad_norm": 2.65625, "learning_rate": 0.00014641460540378468, "loss": 2.1301, "step": 294360 }, { "epoch": 0.69, "grad_norm": 2.546875, "learning_rate": 0.00014641296825564223, "loss": 2.2058, "step": 294365 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.0001464113310916443, "loss": 2.118, "step": 294370 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014640969391179138, "loss": 2.0802, "step": 294375 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014640805671608408, "loss": 2.1912, "step": 294380 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014640641950452294, "loss": 2.0107, "step": 294385 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014640478227710853, "loss": 2.1063, "step": 294390 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014640314503384138, "loss": 1.9344, "step": 294395 }, { "epoch": 0.69, "grad_norm": 2.421875, "learning_rate": 0.00014640150777472211, "loss": 2.105, "step": 294400 }, { "epoch": 0.69, "grad_norm": 2.75, "learning_rate": 0.00014639987049975124, "loss": 2.0556, "step": 294405 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.0001463982332089293, "loss": 1.9236, "step": 294410 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.0001463965959022569, "loss": 1.9831, "step": 294415 }, { "epoch": 0.69, "grad_norm": 2.390625, "learning_rate": 0.00014639495857973457, "loss": 1.9807, "step": 294420 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014639332124136294, "loss": 1.9299, "step": 294425 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014639168388714247, "loss": 2.0862, "step": 294430 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014639004651707373, "loss": 2.0049, "step": 294435 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.00014638840913115735, "loss": 2.0214, "step": 294440 }, { "epoch": 0.69, "grad_norm": 2.515625, "learning_rate": 0.00014638677172939383, "loss": 2.0806, "step": 294445 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.00014638513431178374, "loss": 2.0247, "step": 294450 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014638349687832766, "loss": 1.9869, "step": 294455 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014638185942902613, "loss": 1.9471, "step": 294460 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014638022196387972, "loss": 1.9599, "step": 294465 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.000146378584482889, "loss": 2.1081, "step": 294470 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.0001463769469860545, "loss": 1.9504, "step": 294475 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014637530947337677, "loss": 2.1153, "step": 294480 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.00014637367194485643, "loss": 2.0015, "step": 294485 }, { "epoch": 0.69, "grad_norm": 2.640625, "learning_rate": 0.00014637203440049396, "loss": 2.0335, "step": 294490 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014637039684028998, "loss": 1.9324, "step": 294495 }, { "epoch": 0.69, "grad_norm": 1.8359375, "learning_rate": 0.00014636875926424508, "loss": 2.0157, "step": 294500 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014636712167235972, "loss": 1.9984, "step": 294505 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.0001463654840646345, "loss": 2.1011, "step": 294510 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014636384644107006, "loss": 1.946, "step": 294515 }, { "epoch": 0.69, "grad_norm": 2.15625, "learning_rate": 0.00014636220880166683, "loss": 2.0832, "step": 294520 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.0001463605711464254, "loss": 2.1882, "step": 294525 }, { "epoch": 0.69, "grad_norm": 2.40625, "learning_rate": 0.00014635893347534642, "loss": 1.9025, "step": 294530 }, { "epoch": 0.69, "grad_norm": 1.9140625, "learning_rate": 0.00014635729578843035, "loss": 1.9689, "step": 294535 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014635565808567783, "loss": 2.212, "step": 294540 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014635402036708935, "loss": 2.0379, "step": 294545 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.0001463523826326655, "loss": 2.2936, "step": 294550 }, { "epoch": 0.69, "grad_norm": 1.78125, "learning_rate": 0.00014635074488240682, "loss": 2.0335, "step": 294555 }, { "epoch": 0.69, "grad_norm": 2.4375, "learning_rate": 0.0001463491071163139, "loss": 1.997, "step": 294560 }, { "epoch": 0.69, "grad_norm": 2.796875, "learning_rate": 0.00014634746933438729, "loss": 2.1175, "step": 294565 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014634583153662754, "loss": 1.991, "step": 294570 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014634419372303517, "loss": 2.1226, "step": 294575 }, { "epoch": 0.69, "grad_norm": 2.53125, "learning_rate": 0.00014634255589361086, "loss": 2.0898, "step": 294580 }, { "epoch": 0.69, "grad_norm": 1.984375, "learning_rate": 0.00014634091804835504, "loss": 2.0788, "step": 294585 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014633928018726832, "loss": 2.2044, "step": 294590 }, { "epoch": 0.69, "grad_norm": 1.9453125, "learning_rate": 0.00014633764231035127, "loss": 2.0352, "step": 294595 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014633600441760447, "loss": 2.1464, "step": 294600 }, { "epoch": 0.69, "grad_norm": 1.9453125, "learning_rate": 0.00014633436650902842, "loss": 2.0369, "step": 294605 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.0001463327285846237, "loss": 2.1375, "step": 294610 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014633109064439092, "loss": 2.1598, "step": 294615 }, { "epoch": 0.69, "grad_norm": 1.9375, "learning_rate": 0.00014632945268833056, "loss": 2.0214, "step": 294620 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014632781471644323, "loss": 2.0738, "step": 294625 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014632617672872948, "loss": 2.1661, "step": 294630 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014632453872518987, "loss": 2.0947, "step": 294635 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014632290070582497, "loss": 1.9727, "step": 294640 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.0001463212626706353, "loss": 2.2444, "step": 294645 }, { "epoch": 0.69, "grad_norm": 2.5, "learning_rate": 0.00014631962461962147, "loss": 2.2327, "step": 294650 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014631798655278402, "loss": 2.1357, "step": 294655 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.0001463163484701235, "loss": 2.218, "step": 294660 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014631471037164045, "loss": 2.1605, "step": 294665 }, { "epoch": 0.69, "grad_norm": 1.9296875, "learning_rate": 0.00014631307225733547, "loss": 2.1369, "step": 294670 }, { "epoch": 0.69, "grad_norm": 2.65625, "learning_rate": 0.00014631143412720916, "loss": 2.0144, "step": 294675 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014630979598126195, "loss": 2.0591, "step": 294680 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014630815781949447, "loss": 1.942, "step": 294685 }, { "epoch": 0.69, "grad_norm": 2.0, "learning_rate": 0.00014630651964190735, "loss": 2.0279, "step": 294690 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014630488144850104, "loss": 1.9907, "step": 294695 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014630324323927613, "loss": 2.1086, "step": 294700 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014630160501423323, "loss": 2.0931, "step": 294705 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.0001462999667733728, "loss": 2.0286, "step": 294710 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.0001462983285166955, "loss": 2.1243, "step": 294715 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.0001462966902442019, "loss": 2.2631, "step": 294720 }, { "epoch": 0.69, "grad_norm": 1.6796875, "learning_rate": 0.00014629505195589245, "loss": 2.0368, "step": 294725 }, { "epoch": 0.69, "grad_norm": 2.40625, "learning_rate": 0.0001462934136517678, "loss": 2.2673, "step": 294730 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014629177533182842, "loss": 2.1506, "step": 294735 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.000146290136996075, "loss": 2.0219, "step": 294740 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014628849864450801, "loss": 2.067, "step": 294745 }, { "epoch": 0.69, "grad_norm": 2.015625, "learning_rate": 0.00014628686027712805, "loss": 2.2296, "step": 294750 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.0001462852218939356, "loss": 2.0886, "step": 294755 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.0001462835834949313, "loss": 2.0967, "step": 294760 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014628194508011573, "loss": 1.9892, "step": 294765 }, { "epoch": 0.69, "grad_norm": 1.953125, "learning_rate": 0.00014628030664948937, "loss": 1.8561, "step": 294770 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014627866820305282, "loss": 2.0649, "step": 294775 }, { "epoch": 0.69, "grad_norm": 1.8125, "learning_rate": 0.00014627702974080666, "loss": 2.2404, "step": 294780 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.0001462753912627514, "loss": 2.0597, "step": 294785 }, { "epoch": 0.69, "grad_norm": 1.96875, "learning_rate": 0.00014627375276888763, "loss": 1.9256, "step": 294790 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.0001462721142592159, "loss": 1.9476, "step": 294795 }, { "epoch": 0.69, "grad_norm": 1.9296875, "learning_rate": 0.00014627047573373682, "loss": 2.0638, "step": 294800 }, { "epoch": 0.69, "grad_norm": 2.5, "learning_rate": 0.00014626883719245088, "loss": 2.0248, "step": 294805 }, { "epoch": 0.69, "grad_norm": 1.859375, "learning_rate": 0.00014626719863535868, "loss": 2.0419, "step": 294810 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014626556006246076, "loss": 1.8024, "step": 294815 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014626392147375764, "loss": 2.0281, "step": 294820 }, { "epoch": 0.69, "grad_norm": 2.65625, "learning_rate": 0.00014626228286924997, "loss": 2.1412, "step": 294825 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014626064424893827, "loss": 2.0827, "step": 294830 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.0001462590056128231, "loss": 2.2078, "step": 294835 }, { "epoch": 0.69, "grad_norm": 1.5703125, "learning_rate": 0.000146257366960905, "loss": 1.9817, "step": 294840 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.0001462557282931845, "loss": 1.8911, "step": 294845 }, { "epoch": 0.69, "grad_norm": 2.5, "learning_rate": 0.00014625408960966226, "loss": 2.038, "step": 294850 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014625245091033878, "loss": 2.2508, "step": 294855 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014625081219521462, "loss": 2.0599, "step": 294860 }, { "epoch": 0.69, "grad_norm": 2.390625, "learning_rate": 0.00014624917346429034, "loss": 2.0407, "step": 294865 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.00014624753471756646, "loss": 1.936, "step": 294870 }, { "epoch": 0.69, "grad_norm": 2.328125, "learning_rate": 0.00014624589595504364, "loss": 2.134, "step": 294875 }, { "epoch": 0.69, "grad_norm": 1.9609375, "learning_rate": 0.0001462442571767224, "loss": 2.0711, "step": 294880 }, { "epoch": 0.69, "grad_norm": 2.390625, "learning_rate": 0.00014624261838260325, "loss": 2.0659, "step": 294885 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014624097957268676, "loss": 1.9891, "step": 294890 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014623934074697355, "loss": 2.078, "step": 294895 }, { "epoch": 0.69, "grad_norm": 2.0, "learning_rate": 0.0001462377019054641, "loss": 2.1341, "step": 294900 }, { "epoch": 0.69, "grad_norm": 2.421875, "learning_rate": 0.00014623606304815906, "loss": 2.2032, "step": 294905 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.00014623442417505892, "loss": 2.1354, "step": 294910 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014623278528616427, "loss": 2.1309, "step": 294915 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.00014623114638147566, "loss": 2.0614, "step": 294920 }, { "epoch": 0.69, "grad_norm": 1.8671875, "learning_rate": 0.00014622950746099364, "loss": 2.0148, "step": 294925 }, { "epoch": 0.69, "grad_norm": 1.6953125, "learning_rate": 0.0001462278685247188, "loss": 2.0138, "step": 294930 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014622622957265167, "loss": 2.0642, "step": 294935 }, { "epoch": 0.69, "grad_norm": 1.9921875, "learning_rate": 0.00014622459060479283, "loss": 2.0793, "step": 294940 }, { "epoch": 0.69, "grad_norm": 2.453125, "learning_rate": 0.0001462229516211428, "loss": 2.1056, "step": 294945 }, { "epoch": 0.69, "grad_norm": 2.453125, "learning_rate": 0.00014622131262170222, "loss": 1.8472, "step": 294950 }, { "epoch": 0.69, "grad_norm": 1.8828125, "learning_rate": 0.00014621967360647158, "loss": 2.0083, "step": 294955 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.00014621803457545146, "loss": 1.8632, "step": 294960 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.0001462163955286424, "loss": 2.0457, "step": 294965 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014621475646604503, "loss": 2.018, "step": 294970 }, { "epoch": 0.69, "grad_norm": 1.828125, "learning_rate": 0.00014621311738765983, "loss": 2.0102, "step": 294975 }, { "epoch": 0.69, "grad_norm": 1.7890625, "learning_rate": 0.0001462114782934874, "loss": 1.9433, "step": 294980 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014620983918352829, "loss": 2.177, "step": 294985 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014620820005778305, "loss": 1.9279, "step": 294990 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014620656091625226, "loss": 1.9938, "step": 294995 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014620492175893647, "loss": 2.0342, "step": 295000 }, { "epoch": 0.69, "grad_norm": 2.5, "learning_rate": 0.00014620328258583624, "loss": 1.9657, "step": 295005 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014620164339695214, "loss": 2.1787, "step": 295010 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014620000419228473, "loss": 2.0203, "step": 295015 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014619836497183452, "loss": 2.129, "step": 295020 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014619672573560211, "loss": 2.1426, "step": 295025 }, { "epoch": 0.69, "grad_norm": 3.140625, "learning_rate": 0.0001461950864835881, "loss": 1.9499, "step": 295030 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.000146193447215793, "loss": 1.8405, "step": 295035 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014619180793221735, "loss": 2.1169, "step": 295040 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014619016863286178, "loss": 2.0878, "step": 295045 }, { "epoch": 0.69, "grad_norm": 1.9921875, "learning_rate": 0.0001461885293177268, "loss": 1.9847, "step": 295050 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014618688998681298, "loss": 2.2943, "step": 295055 }, { "epoch": 0.69, "grad_norm": 2.25, "learning_rate": 0.00014618525064012086, "loss": 2.0599, "step": 295060 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014618361127765106, "loss": 2.0795, "step": 295065 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.00014618197189940406, "loss": 2.0041, "step": 295070 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014618033250538047, "loss": 2.1644, "step": 295075 }, { "epoch": 0.69, "grad_norm": 2.515625, "learning_rate": 0.00014617869309558086, "loss": 2.1522, "step": 295080 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014617705367000577, "loss": 2.0522, "step": 295085 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014617541422865576, "loss": 2.0605, "step": 295090 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014617377477153137, "loss": 2.0009, "step": 295095 }, { "epoch": 0.69, "grad_norm": 2.34375, "learning_rate": 0.00014617213529863321, "loss": 1.9361, "step": 295100 }, { "epoch": 0.69, "grad_norm": 1.9453125, "learning_rate": 0.00014617049580996182, "loss": 1.9666, "step": 295105 }, { "epoch": 0.69, "grad_norm": 2.609375, "learning_rate": 0.0001461688563055177, "loss": 2.0356, "step": 295110 }, { "epoch": 0.69, "grad_norm": 1.9921875, "learning_rate": 0.0001461672167853015, "loss": 1.9689, "step": 295115 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.0001461655772493137, "loss": 2.0973, "step": 295120 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014616393769755496, "loss": 1.9105, "step": 295125 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014616229813002574, "loss": 2.2012, "step": 295130 }, { "epoch": 0.69, "grad_norm": 2.484375, "learning_rate": 0.00014616065854672664, "loss": 1.9342, "step": 295135 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014615901894765823, "loss": 1.9189, "step": 295140 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.00014615737933282107, "loss": 2.2029, "step": 295145 }, { "epoch": 0.69, "grad_norm": 2.78125, "learning_rate": 0.00014615573970221574, "loss": 2.1697, "step": 295150 }, { "epoch": 0.69, "grad_norm": 2.59375, "learning_rate": 0.00014615410005584274, "loss": 2.0951, "step": 295155 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.00014615246039370266, "loss": 2.1038, "step": 295160 }, { "epoch": 0.69, "grad_norm": 2.1875, "learning_rate": 0.00014615082071579605, "loss": 1.9351, "step": 295165 }, { "epoch": 0.69, "grad_norm": 2.140625, "learning_rate": 0.00014614918102212352, "loss": 1.8971, "step": 295170 }, { "epoch": 0.69, "grad_norm": 1.953125, "learning_rate": 0.00014614754131268558, "loss": 2.0205, "step": 295175 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.0001461459015874828, "loss": 1.9504, "step": 295180 }, { "epoch": 0.69, "grad_norm": 2.5, "learning_rate": 0.00014614426184651573, "loss": 2.0637, "step": 295185 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014614262208978494, "loss": 1.9554, "step": 295190 }, { "epoch": 0.69, "grad_norm": 2.234375, "learning_rate": 0.00014614098231729102, "loss": 1.9834, "step": 295195 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014613934252903448, "loss": 1.9065, "step": 295200 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.00014613770272501591, "loss": 2.1183, "step": 295205 }, { "epoch": 0.69, "grad_norm": 1.953125, "learning_rate": 0.00014613606290523587, "loss": 2.2045, "step": 295210 }, { "epoch": 0.69, "grad_norm": 2.09375, "learning_rate": 0.0001461344230696949, "loss": 1.9943, "step": 295215 }, { "epoch": 0.69, "grad_norm": 2.265625, "learning_rate": 0.00014613278321839359, "loss": 2.0215, "step": 295220 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.0001461311433513325, "loss": 1.9447, "step": 295225 }, { "epoch": 0.69, "grad_norm": 2.3125, "learning_rate": 0.00014612950346851213, "loss": 2.0307, "step": 295230 }, { "epoch": 0.69, "grad_norm": 1.9765625, "learning_rate": 0.0001461278635699331, "loss": 2.0791, "step": 295235 }, { "epoch": 0.69, "grad_norm": 2.203125, "learning_rate": 0.00014612622365559597, "loss": 2.1108, "step": 295240 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.00014612458372550128, "loss": 2.0905, "step": 295245 }, { "epoch": 0.69, "grad_norm": 2.078125, "learning_rate": 0.00014612294377964961, "loss": 2.1133, "step": 295250 }, { "epoch": 0.69, "grad_norm": 2.46875, "learning_rate": 0.0001461213038180415, "loss": 1.7809, "step": 295255 }, { "epoch": 0.69, "grad_norm": 2.21875, "learning_rate": 0.0001461196638406775, "loss": 2.1204, "step": 295260 }, { "epoch": 0.69, "grad_norm": 2.296875, "learning_rate": 0.00014611802384755822, "loss": 1.87, "step": 295265 }, { "epoch": 0.69, "grad_norm": 2.375, "learning_rate": 0.00014611638383868415, "loss": 1.8477, "step": 295270 }, { "epoch": 0.69, "grad_norm": 2.0, "learning_rate": 0.0001461147438140559, "loss": 2.1887, "step": 295275 }, { "epoch": 0.69, "grad_norm": 2.125, "learning_rate": 0.000146113103773674, "loss": 2.2084, "step": 295280 }, { "epoch": 0.69, "grad_norm": 2.03125, "learning_rate": 0.00014611146371753904, "loss": 1.9487, "step": 295285 }, { "epoch": 0.69, "grad_norm": 2.046875, "learning_rate": 0.0001461098236456516, "loss": 1.881, "step": 295290 }, { "epoch": 0.69, "grad_norm": 2.0625, "learning_rate": 0.00014610818355801218, "loss": 2.0552, "step": 295295 }, { "epoch": 0.69, "grad_norm": 1.7890625, "learning_rate": 0.00014610654345462132, "loss": 1.8144, "step": 295300 }, { "epoch": 0.69, "grad_norm": 2.359375, "learning_rate": 0.0001461049033354797, "loss": 2.0449, "step": 295305 }, { "epoch": 0.69, "grad_norm": 2.40625, "learning_rate": 0.0001461032632005878, "loss": 1.9471, "step": 295310 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.00014610162304994618, "loss": 2.0001, "step": 295315 }, { "epoch": 0.69, "grad_norm": 2.28125, "learning_rate": 0.0001460999828835554, "loss": 2.1622, "step": 295320 }, { "epoch": 0.69, "grad_norm": 2.109375, "learning_rate": 0.00014609834270141602, "loss": 2.1826, "step": 295325 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014609670250352865, "loss": 1.9573, "step": 295330 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.00014609506228989379, "loss": 2.1025, "step": 295335 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.000146093422060512, "loss": 2.0974, "step": 295340 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.00014609178181538388, "loss": 2.0447, "step": 295345 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014609014155450995, "loss": 2.1272, "step": 295350 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.0001460885012778908, "loss": 2.0973, "step": 295355 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014608686098552698, "loss": 2.2806, "step": 295360 }, { "epoch": 0.7, "grad_norm": 1.9453125, "learning_rate": 0.00014608522067741908, "loss": 1.8242, "step": 295365 }, { "epoch": 0.7, "grad_norm": 2.8125, "learning_rate": 0.00014608358035356762, "loss": 2.14, "step": 295370 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014608194001397318, "loss": 2.1072, "step": 295375 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.0001460802996586363, "loss": 2.1253, "step": 295380 }, { "epoch": 0.7, "grad_norm": 1.8046875, "learning_rate": 0.00014607865928755755, "loss": 1.977, "step": 295385 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.0001460770189007375, "loss": 2.1594, "step": 295390 }, { "epoch": 0.7, "grad_norm": 3.25, "learning_rate": 0.00014607537849817672, "loss": 2.3058, "step": 295395 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.00014607373807987574, "loss": 2.0789, "step": 295400 }, { "epoch": 0.7, "grad_norm": 2.421875, "learning_rate": 0.00014607209764583512, "loss": 2.0859, "step": 295405 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014607045719605544, "loss": 2.0071, "step": 295410 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.0001460688167305373, "loss": 2.0234, "step": 295415 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014606717624928117, "loss": 1.9266, "step": 295420 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.00014606553575228769, "loss": 2.0769, "step": 295425 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.00014606389523955736, "loss": 2.141, "step": 295430 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.00014606225471109076, "loss": 1.8745, "step": 295435 }, { "epoch": 0.7, "grad_norm": 2.46875, "learning_rate": 0.00014606061416688848, "loss": 2.0109, "step": 295440 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.0001460589736069511, "loss": 2.158, "step": 295445 }, { "epoch": 0.7, "grad_norm": 2.015625, "learning_rate": 0.00014605733303127907, "loss": 1.9667, "step": 295450 }, { "epoch": 0.7, "grad_norm": 1.7421875, "learning_rate": 0.00014605569243987304, "loss": 2.2008, "step": 295455 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014605405183273353, "loss": 2.0115, "step": 295460 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014605241120986117, "loss": 2.0261, "step": 295465 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.00014605077057125643, "loss": 1.9564, "step": 295470 }, { "epoch": 0.7, "grad_norm": 2.578125, "learning_rate": 0.00014604912991691995, "loss": 2.0964, "step": 295475 }, { "epoch": 0.7, "grad_norm": 2.546875, "learning_rate": 0.00014604748924685224, "loss": 2.1318, "step": 295480 }, { "epoch": 0.7, "grad_norm": 2.890625, "learning_rate": 0.00014604584856105384, "loss": 1.9976, "step": 295485 }, { "epoch": 0.7, "grad_norm": 3.625, "learning_rate": 0.00014604420785952537, "loss": 2.0669, "step": 295490 }, { "epoch": 0.7, "grad_norm": 2.71875, "learning_rate": 0.00014604256714226735, "loss": 2.2223, "step": 295495 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014604092640928037, "loss": 2.3097, "step": 295500 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014603928566056496, "loss": 2.0428, "step": 295505 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.0001460376448961217, "loss": 2.1708, "step": 295510 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014603600411595115, "loss": 2.0694, "step": 295515 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014603436332005386, "loss": 2.1306, "step": 295520 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.0001460327225084304, "loss": 2.2084, "step": 295525 }, { "epoch": 0.7, "grad_norm": 2.5, "learning_rate": 0.00014603108168108136, "loss": 1.9919, "step": 295530 }, { "epoch": 0.7, "grad_norm": 1.96875, "learning_rate": 0.00014602944083800723, "loss": 2.1738, "step": 295535 }, { "epoch": 0.7, "grad_norm": 1.9609375, "learning_rate": 0.0001460277999792086, "loss": 2.1446, "step": 295540 }, { "epoch": 0.7, "grad_norm": 1.8046875, "learning_rate": 0.00014602615910468605, "loss": 2.1398, "step": 295545 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014602451821444013, "loss": 2.1362, "step": 295550 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014602287730847139, "loss": 2.0641, "step": 295555 }, { "epoch": 0.7, "grad_norm": 1.796875, "learning_rate": 0.00014602123638678043, "loss": 2.0692, "step": 295560 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.0001460195954493678, "loss": 2.0878, "step": 295565 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014601795449623397, "loss": 2.1627, "step": 295570 }, { "epoch": 0.7, "grad_norm": 1.921875, "learning_rate": 0.0001460163135273796, "loss": 2.0833, "step": 295575 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014601467254280525, "loss": 2.0062, "step": 295580 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.00014601303154251143, "loss": 1.8978, "step": 295585 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014601139052649873, "loss": 2.1559, "step": 295590 }, { "epoch": 0.7, "grad_norm": 1.9765625, "learning_rate": 0.00014600974949476769, "loss": 2.1073, "step": 295595 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.0001460081084473189, "loss": 2.0012, "step": 295600 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.0001460064673841529, "loss": 2.1144, "step": 295605 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014600482630527025, "loss": 2.0623, "step": 295610 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014600318521067152, "loss": 2.1243, "step": 295615 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014600154410035726, "loss": 2.1376, "step": 295620 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014599990297432808, "loss": 2.1643, "step": 295625 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014599826183258447, "loss": 2.0473, "step": 295630 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014599662067512703, "loss": 2.047, "step": 295635 }, { "epoch": 0.7, "grad_norm": 1.84375, "learning_rate": 0.00014599497950195628, "loss": 2.13, "step": 295640 }, { "epoch": 0.7, "grad_norm": 1.953125, "learning_rate": 0.00014599333831307284, "loss": 2.0592, "step": 295645 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014599169710847724, "loss": 2.1739, "step": 295650 }, { "epoch": 0.7, "grad_norm": 2.640625, "learning_rate": 0.00014599005588817002, "loss": 2.0227, "step": 295655 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014598841465215178, "loss": 2.0194, "step": 295660 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014598677340042307, "loss": 2.0916, "step": 295665 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014598513213298444, "loss": 2.1347, "step": 295670 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014598349084983645, "loss": 2.1066, "step": 295675 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014598184955097966, "loss": 2.0741, "step": 295680 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014598020823641464, "loss": 2.1416, "step": 295685 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014597856690614196, "loss": 2.1072, "step": 295690 }, { "epoch": 0.7, "grad_norm": 2.484375, "learning_rate": 0.00014597692556016215, "loss": 1.9601, "step": 295695 }, { "epoch": 0.7, "grad_norm": 2.65625, "learning_rate": 0.0001459752841984758, "loss": 2.0568, "step": 295700 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014597364282108342, "loss": 2.1031, "step": 295705 }, { "epoch": 0.7, "grad_norm": 2.53125, "learning_rate": 0.00014597200142798565, "loss": 2.1406, "step": 295710 }, { "epoch": 0.7, "grad_norm": 1.9296875, "learning_rate": 0.00014597036001918302, "loss": 2.107, "step": 295715 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.0001459687185946761, "loss": 1.9504, "step": 295720 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014596707715446537, "loss": 2.0776, "step": 295725 }, { "epoch": 0.7, "grad_norm": 1.6796875, "learning_rate": 0.00014596543569855147, "loss": 2.0535, "step": 295730 }, { "epoch": 0.7, "grad_norm": 2.53125, "learning_rate": 0.00014596379422693496, "loss": 2.1258, "step": 295735 }, { "epoch": 0.7, "grad_norm": 1.859375, "learning_rate": 0.0001459621527396164, "loss": 2.0313, "step": 295740 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014596051123659632, "loss": 2.0791, "step": 295745 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014595886971787526, "loss": 2.0589, "step": 295750 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014595722818345385, "loss": 2.0192, "step": 295755 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014595558663333264, "loss": 2.0418, "step": 295760 }, { "epoch": 0.7, "grad_norm": 3.078125, "learning_rate": 0.0001459539450675121, "loss": 2.0791, "step": 295765 }, { "epoch": 0.7, "grad_norm": 2.109375, "learning_rate": 0.00014595230348599293, "loss": 2.0435, "step": 295770 }, { "epoch": 0.7, "grad_norm": 2.015625, "learning_rate": 0.0001459506618887756, "loss": 2.212, "step": 295775 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014594902027586067, "loss": 2.0358, "step": 295780 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.00014594737864724873, "loss": 2.094, "step": 295785 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014594573700294034, "loss": 2.1328, "step": 295790 }, { "epoch": 0.7, "grad_norm": 1.890625, "learning_rate": 0.00014594409534293606, "loss": 2.0639, "step": 295795 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014594245366723643, "loss": 1.9395, "step": 295800 }, { "epoch": 0.7, "grad_norm": 1.984375, "learning_rate": 0.000145940811975842, "loss": 1.9681, "step": 295805 }, { "epoch": 0.7, "grad_norm": 2.40625, "learning_rate": 0.0001459391702687534, "loss": 2.0673, "step": 295810 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.0001459375285459711, "loss": 1.9904, "step": 295815 }, { "epoch": 0.7, "grad_norm": 2.59375, "learning_rate": 0.00014593588680749574, "loss": 1.9589, "step": 295820 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014593424505332785, "loss": 2.1849, "step": 295825 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014593260328346798, "loss": 2.1305, "step": 295830 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.0001459309614979167, "loss": 2.1338, "step": 295835 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.0001459293196966746, "loss": 2.1037, "step": 295840 }, { "epoch": 0.7, "grad_norm": 1.953125, "learning_rate": 0.00014592767787974215, "loss": 1.9331, "step": 295845 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014592603604712, "loss": 2.0264, "step": 295850 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014592439419880873, "loss": 1.9648, "step": 295855 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.00014592275233480878, "loss": 2.1536, "step": 295860 }, { "epoch": 0.7, "grad_norm": 2.984375, "learning_rate": 0.00014592111045512083, "loss": 1.9497, "step": 295865 }, { "epoch": 0.7, "grad_norm": 2.5, "learning_rate": 0.00014591946855974536, "loss": 2.0075, "step": 295870 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.000145917826648683, "loss": 1.9956, "step": 295875 }, { "epoch": 0.7, "grad_norm": 1.859375, "learning_rate": 0.00014591618472193429, "loss": 2.2832, "step": 295880 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014591454277949976, "loss": 1.9618, "step": 295885 }, { "epoch": 0.7, "grad_norm": 2.546875, "learning_rate": 0.00014591290082137997, "loss": 2.2115, "step": 295890 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014591125884757551, "loss": 2.1035, "step": 295895 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014590961685808694, "loss": 2.0993, "step": 295900 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.00014590797485291483, "loss": 2.1477, "step": 295905 }, { "epoch": 0.7, "grad_norm": 2.5625, "learning_rate": 0.0001459063328320597, "loss": 2.0592, "step": 295910 }, { "epoch": 0.7, "grad_norm": 2.625, "learning_rate": 0.0001459046907955221, "loss": 2.0419, "step": 295915 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.0001459030487433027, "loss": 1.9668, "step": 295920 }, { "epoch": 0.7, "grad_norm": 1.9765625, "learning_rate": 0.00014590140667540194, "loss": 2.1848, "step": 295925 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014589976459182045, "loss": 2.134, "step": 295930 }, { "epoch": 0.7, "grad_norm": 2.0, "learning_rate": 0.00014589812249255873, "loss": 2.0543, "step": 295935 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.00014589648037761742, "loss": 1.8883, "step": 295940 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014589483824699703, "loss": 2.1033, "step": 295945 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.0001458931961006981, "loss": 2.0241, "step": 295950 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.00014589155393872124, "loss": 2.2195, "step": 295955 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014588991176106702, "loss": 1.7733, "step": 295960 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014588826956773594, "loss": 2.2185, "step": 295965 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.0001458866273587286, "loss": 2.1056, "step": 295970 }, { "epoch": 0.7, "grad_norm": 1.8125, "learning_rate": 0.00014588498513404556, "loss": 2.0914, "step": 295975 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.0001458833428936874, "loss": 2.033, "step": 295980 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014588170063765463, "loss": 1.9549, "step": 295985 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.00014588005836594788, "loss": 1.9232, "step": 295990 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.0001458784160785676, "loss": 2.0805, "step": 295995 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.00014587677377551444, "loss": 1.8755, "step": 296000 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014587513145678899, "loss": 1.9354, "step": 296005 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.0001458734891223917, "loss": 2.1416, "step": 296010 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014587184677232327, "loss": 2.0616, "step": 296015 }, { "epoch": 0.7, "grad_norm": 2.625, "learning_rate": 0.00014587020440658415, "loss": 2.1385, "step": 296020 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.0001458685620251749, "loss": 2.0017, "step": 296025 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.00014586691962809614, "loss": 1.9339, "step": 296030 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014586527721534843, "loss": 2.0235, "step": 296035 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.0001458636347869323, "loss": 2.0439, "step": 296040 }, { "epoch": 0.7, "grad_norm": 2.546875, "learning_rate": 0.0001458619923428483, "loss": 2.1259, "step": 296045 }, { "epoch": 0.7, "grad_norm": 1.9140625, "learning_rate": 0.000145860349883097, "loss": 2.0977, "step": 296050 }, { "epoch": 0.7, "grad_norm": 2.671875, "learning_rate": 0.00014585870740767902, "loss": 2.1067, "step": 296055 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014585706491659486, "loss": 1.9511, "step": 296060 }, { "epoch": 0.7, "grad_norm": 2.59375, "learning_rate": 0.00014585542240984508, "loss": 1.9911, "step": 296065 }, { "epoch": 0.7, "grad_norm": 2.84375, "learning_rate": 0.00014585377988743028, "loss": 1.8659, "step": 296070 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.00014585213734935094, "loss": 2.025, "step": 296075 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.00014585049479560775, "loss": 2.1924, "step": 296080 }, { "epoch": 0.7, "grad_norm": 3.734375, "learning_rate": 0.00014584885222620117, "loss": 2.1188, "step": 296085 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.0001458472096411318, "loss": 2.1117, "step": 296090 }, { "epoch": 0.7, "grad_norm": 3.078125, "learning_rate": 0.00014584556704040015, "loss": 2.1463, "step": 296095 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014584392442400687, "loss": 2.0679, "step": 296100 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.00014584228179195246, "loss": 2.224, "step": 296105 }, { "epoch": 0.7, "grad_norm": 2.609375, "learning_rate": 0.00014584063914423748, "loss": 2.2083, "step": 296110 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014583899648086253, "loss": 2.1581, "step": 296115 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014583735380182812, "loss": 1.9882, "step": 296120 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014583571110713485, "loss": 2.019, "step": 296125 }, { "epoch": 0.7, "grad_norm": 1.890625, "learning_rate": 0.00014583406839678329, "loss": 1.9681, "step": 296130 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014583242567077395, "loss": 1.9932, "step": 296135 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014583078292910746, "loss": 1.8908, "step": 296140 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.0001458291401717843, "loss": 2.0751, "step": 296145 }, { "epoch": 0.7, "grad_norm": 2.65625, "learning_rate": 0.00014582749739880513, "loss": 2.0345, "step": 296150 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.0001458258546101704, "loss": 2.0798, "step": 296155 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014582421180588074, "loss": 2.077, "step": 296160 }, { "epoch": 0.7, "grad_norm": 2.484375, "learning_rate": 0.00014582256898593672, "loss": 1.9816, "step": 296165 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014582092615033888, "loss": 2.0776, "step": 296170 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014581928329908777, "loss": 2.1541, "step": 296175 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.00014581764043218395, "loss": 2.1665, "step": 296180 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014581599754962798, "loss": 2.0531, "step": 296185 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.00014581435465142045, "loss": 2.1888, "step": 296190 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014581271173756192, "loss": 1.8995, "step": 296195 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014581106880805295, "loss": 2.0393, "step": 296200 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014580942586289405, "loss": 2.1975, "step": 296205 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014580778290208582, "loss": 2.0546, "step": 296210 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014580613992562887, "loss": 2.0975, "step": 296215 }, { "epoch": 0.7, "grad_norm": 1.9453125, "learning_rate": 0.00014580449693352368, "loss": 1.9794, "step": 296220 }, { "epoch": 0.7, "grad_norm": 2.109375, "learning_rate": 0.00014580285392577083, "loss": 2.1517, "step": 296225 }, { "epoch": 0.7, "grad_norm": 1.921875, "learning_rate": 0.0001458012109023709, "loss": 2.1757, "step": 296230 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014579956786332448, "loss": 2.2011, "step": 296235 }, { "epoch": 0.7, "grad_norm": 2.5625, "learning_rate": 0.00014579792480863205, "loss": 2.1134, "step": 296240 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014579628173829422, "loss": 2.3213, "step": 296245 }, { "epoch": 0.7, "grad_norm": 2.546875, "learning_rate": 0.00014579463865231157, "loss": 2.0395, "step": 296250 }, { "epoch": 0.7, "grad_norm": 2.5625, "learning_rate": 0.00014579299555068464, "loss": 1.9562, "step": 296255 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.000145791352433414, "loss": 2.0121, "step": 296260 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.0001457897093005002, "loss": 2.1117, "step": 296265 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014578806615194378, "loss": 2.1087, "step": 296270 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014578642298774535, "loss": 2.1857, "step": 296275 }, { "epoch": 0.7, "grad_norm": 2.109375, "learning_rate": 0.00014578477980790544, "loss": 2.0252, "step": 296280 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014578313661242463, "loss": 1.9996, "step": 296285 }, { "epoch": 0.7, "grad_norm": 1.9296875, "learning_rate": 0.00014578149340130345, "loss": 2.0756, "step": 296290 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.0001457798501745425, "loss": 1.9639, "step": 296295 }, { "epoch": 0.7, "grad_norm": 2.640625, "learning_rate": 0.0001457782069321423, "loss": 2.1845, "step": 296300 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014577656367410345, "loss": 2.1855, "step": 296305 }, { "epoch": 0.7, "grad_norm": 2.484375, "learning_rate": 0.00014577492040042652, "loss": 2.0076, "step": 296310 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.00014577327711111203, "loss": 2.1103, "step": 296315 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014577163380616052, "loss": 2.1145, "step": 296320 }, { "epoch": 0.7, "grad_norm": 2.53125, "learning_rate": 0.00014576999048557262, "loss": 2.0087, "step": 296325 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014576834714934888, "loss": 2.1636, "step": 296330 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014576670379748982, "loss": 2.0928, "step": 296335 }, { "epoch": 0.7, "grad_norm": 2.75, "learning_rate": 0.00014576506042999603, "loss": 2.1204, "step": 296340 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.00014576341704686805, "loss": 1.8899, "step": 296345 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.0001457617736481065, "loss": 2.0201, "step": 296350 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014576013023371185, "loss": 2.021, "step": 296355 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014575848680368475, "loss": 2.0405, "step": 296360 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014575684335802567, "loss": 1.9987, "step": 296365 }, { "epoch": 0.7, "grad_norm": 2.40625, "learning_rate": 0.00014575519989673525, "loss": 2.1186, "step": 296370 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014575355641981402, "loss": 2.1357, "step": 296375 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.00014575191292726255, "loss": 2.1946, "step": 296380 }, { "epoch": 0.7, "grad_norm": 2.40625, "learning_rate": 0.0001457502694190814, "loss": 2.2856, "step": 296385 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014574862589527115, "loss": 1.9553, "step": 296390 }, { "epoch": 0.7, "grad_norm": 3.296875, "learning_rate": 0.0001457469823558323, "loss": 1.9781, "step": 296395 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.00014574533880076546, "loss": 2.0928, "step": 296400 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.0001457436952300712, "loss": 1.9424, "step": 296405 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014574205164375004, "loss": 2.1669, "step": 296410 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014574040804180257, "loss": 2.0538, "step": 296415 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014573876442422937, "loss": 2.144, "step": 296420 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.00014573712079103096, "loss": 2.0155, "step": 296425 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014573547714220793, "loss": 2.088, "step": 296430 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014573383347776084, "loss": 2.0076, "step": 296435 }, { "epoch": 0.7, "grad_norm": 2.640625, "learning_rate": 0.0001457321897976902, "loss": 2.1598, "step": 296440 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.0001457305461019967, "loss": 2.1877, "step": 296445 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.0001457289023906807, "loss": 2.038, "step": 296450 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014572725866374294, "loss": 2.2334, "step": 296455 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014572561492118394, "loss": 2.0104, "step": 296460 }, { "epoch": 0.7, "grad_norm": 1.984375, "learning_rate": 0.00014572397116300424, "loss": 2.0945, "step": 296465 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014572232738920438, "loss": 2.0891, "step": 296470 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.0001457206835997849, "loss": 2.1128, "step": 296475 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014571903979474646, "loss": 2.0557, "step": 296480 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014571739597408955, "loss": 2.1442, "step": 296485 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.0001457157521378148, "loss": 2.278, "step": 296490 }, { "epoch": 0.7, "grad_norm": 1.875, "learning_rate": 0.00014571410828592265, "loss": 2.0966, "step": 296495 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014571246441841376, "loss": 2.0683, "step": 296500 }, { "epoch": 0.7, "grad_norm": 1.8828125, "learning_rate": 0.00014571082053528863, "loss": 1.9146, "step": 296505 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014570917663654791, "loss": 2.0812, "step": 296510 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014570753272219206, "loss": 2.0878, "step": 296515 }, { "epoch": 0.7, "grad_norm": 2.640625, "learning_rate": 0.00014570588879222172, "loss": 2.0001, "step": 296520 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.0001457042448466374, "loss": 2.2266, "step": 296525 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014570260088543968, "loss": 2.1113, "step": 296530 }, { "epoch": 0.7, "grad_norm": 3.0625, "learning_rate": 0.00014570095690862912, "loss": 1.9704, "step": 296535 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014569931291620632, "loss": 2.017, "step": 296540 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014569766890817177, "loss": 2.1791, "step": 296545 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.0001456960248845261, "loss": 2.0614, "step": 296550 }, { "epoch": 0.7, "grad_norm": 2.734375, "learning_rate": 0.0001456943808452698, "loss": 2.0027, "step": 296555 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014569273679040348, "loss": 1.8993, "step": 296560 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014569109271992772, "loss": 2.0729, "step": 296565 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.00014568944863384303, "loss": 1.728, "step": 296570 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014568780453215, "loss": 2.2141, "step": 296575 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.0001456861604148492, "loss": 2.1806, "step": 296580 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014568451628194115, "loss": 2.1782, "step": 296585 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014568287213342646, "loss": 2.0128, "step": 296590 }, { "epoch": 0.7, "grad_norm": 1.96875, "learning_rate": 0.00014568122796930567, "loss": 2.0817, "step": 296595 }, { "epoch": 0.7, "grad_norm": 1.96875, "learning_rate": 0.00014567958378957933, "loss": 1.9612, "step": 296600 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014567793959424804, "loss": 2.1273, "step": 296605 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.00014567629538331234, "loss": 2.0434, "step": 296610 }, { "epoch": 0.7, "grad_norm": 1.953125, "learning_rate": 0.00014567465115677278, "loss": 1.8936, "step": 296615 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014567300691462992, "loss": 1.8743, "step": 296620 }, { "epoch": 0.7, "grad_norm": 1.8828125, "learning_rate": 0.00014567136265688434, "loss": 2.1582, "step": 296625 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014566971838353657, "loss": 2.0617, "step": 296630 }, { "epoch": 0.7, "grad_norm": 1.9140625, "learning_rate": 0.00014566807409458724, "loss": 1.9436, "step": 296635 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014566642979003684, "loss": 2.0815, "step": 296640 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.00014566478546988599, "loss": 2.0467, "step": 296645 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.0001456631411341352, "loss": 2.0819, "step": 296650 }, { "epoch": 0.7, "grad_norm": 1.8125, "learning_rate": 0.00014566149678278505, "loss": 1.9648, "step": 296655 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014565985241583607, "loss": 2.1248, "step": 296660 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.0001456582080332889, "loss": 2.1674, "step": 296665 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014565656363514407, "loss": 2.0659, "step": 296670 }, { "epoch": 0.7, "grad_norm": 2.46875, "learning_rate": 0.00014565491922140212, "loss": 1.9888, "step": 296675 }, { "epoch": 0.7, "grad_norm": 2.65625, "learning_rate": 0.0001456532747920636, "loss": 2.087, "step": 296680 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.0001456516303471291, "loss": 2.1308, "step": 296685 }, { "epoch": 0.7, "grad_norm": 2.8125, "learning_rate": 0.00014564998588659917, "loss": 1.9874, "step": 296690 }, { "epoch": 0.7, "grad_norm": 2.71875, "learning_rate": 0.0001456483414104744, "loss": 2.14, "step": 296695 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.0001456466969187553, "loss": 2.2366, "step": 296700 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.00014564505241144247, "loss": 2.0297, "step": 296705 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014564340788853647, "loss": 2.1429, "step": 296710 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014564176335003784, "loss": 2.0403, "step": 296715 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.0001456401187959472, "loss": 2.0131, "step": 296720 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014563847422626502, "loss": 2.0994, "step": 296725 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014563682964099193, "loss": 2.1755, "step": 296730 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014563518504012845, "loss": 2.0147, "step": 296735 }, { "epoch": 0.7, "grad_norm": 2.5625, "learning_rate": 0.0001456335404236752, "loss": 2.0981, "step": 296740 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.00014563189579163267, "loss": 2.0178, "step": 296745 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014563025114400148, "loss": 1.9577, "step": 296750 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014562860648078215, "loss": 2.143, "step": 296755 }, { "epoch": 0.7, "grad_norm": 2.734375, "learning_rate": 0.0001456269618019753, "loss": 2.2327, "step": 296760 }, { "epoch": 0.7, "grad_norm": 2.421875, "learning_rate": 0.0001456253171075814, "loss": 2.1347, "step": 296765 }, { "epoch": 0.7, "grad_norm": 2.421875, "learning_rate": 0.0001456236723976011, "loss": 2.1135, "step": 296770 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.0001456220276720349, "loss": 2.1451, "step": 296775 }, { "epoch": 0.7, "grad_norm": 1.84375, "learning_rate": 0.0001456203829308834, "loss": 1.9833, "step": 296780 }, { "epoch": 0.7, "grad_norm": 2.40625, "learning_rate": 0.00014561873817414716, "loss": 2.1749, "step": 296785 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014561709340182673, "loss": 2.1615, "step": 296790 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014561544861392266, "loss": 1.9422, "step": 296795 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.0001456138038104355, "loss": 2.0258, "step": 296800 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014561215899136588, "loss": 2.0311, "step": 296805 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014561051415671434, "loss": 2.0369, "step": 296810 }, { "epoch": 0.7, "grad_norm": 1.8671875, "learning_rate": 0.00014560886930648138, "loss": 2.1256, "step": 296815 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014560722444066762, "loss": 2.1245, "step": 296820 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.00014560557955927358, "loss": 2.0654, "step": 296825 }, { "epoch": 0.7, "grad_norm": 2.578125, "learning_rate": 0.00014560393466229988, "loss": 2.0461, "step": 296830 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.000145602289749747, "loss": 2.2668, "step": 296835 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.0001456006448216156, "loss": 2.0072, "step": 296840 }, { "epoch": 0.7, "grad_norm": 1.9453125, "learning_rate": 0.0001455989998779062, "loss": 2.1399, "step": 296845 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.0001455973549186193, "loss": 2.2191, "step": 296850 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014559570994375558, "loss": 2.1066, "step": 296855 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.0001455940649533155, "loss": 1.958, "step": 296860 }, { "epoch": 0.7, "grad_norm": 2.015625, "learning_rate": 0.00014559241994729967, "loss": 1.9723, "step": 296865 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014559077492570864, "loss": 1.9401, "step": 296870 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.000145589129888543, "loss": 1.9823, "step": 296875 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014558748483580323, "loss": 2.1177, "step": 296880 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014558583976748997, "loss": 2.1002, "step": 296885 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014558419468360375, "loss": 2.2544, "step": 296890 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.00014558254958414518, "loss": 2.1169, "step": 296895 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014558090446911478, "loss": 2.0655, "step": 296900 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014557925933851308, "loss": 2.0452, "step": 296905 }, { "epoch": 0.7, "grad_norm": 1.9375, "learning_rate": 0.0001455776141923407, "loss": 2.212, "step": 296910 }, { "epoch": 0.7, "grad_norm": 2.578125, "learning_rate": 0.00014557596903059817, "loss": 2.1596, "step": 296915 }, { "epoch": 0.7, "grad_norm": 2.46875, "learning_rate": 0.0001455743238532861, "loss": 2.058, "step": 296920 }, { "epoch": 0.7, "grad_norm": 1.9140625, "learning_rate": 0.000145572678660405, "loss": 2.0416, "step": 296925 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014557103345195542, "loss": 2.1068, "step": 296930 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014556938822793796, "loss": 1.9763, "step": 296935 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014556774298835317, "loss": 2.0935, "step": 296940 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014556609773320163, "loss": 2.24, "step": 296945 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014556445246248387, "loss": 2.0368, "step": 296950 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014556280717620049, "loss": 2.1727, "step": 296955 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014556116187435195, "loss": 2.0352, "step": 296960 }, { "epoch": 0.7, "grad_norm": 1.953125, "learning_rate": 0.00014555951655693898, "loss": 2.1017, "step": 296965 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014555787122396202, "loss": 2.0211, "step": 296970 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014555622587542167, "loss": 1.9721, "step": 296975 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.00014555458051131848, "loss": 1.959, "step": 296980 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.000145552935131653, "loss": 2.0779, "step": 296985 }, { "epoch": 0.7, "grad_norm": 1.8984375, "learning_rate": 0.00014555128973642585, "loss": 2.0733, "step": 296990 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014554964432563755, "loss": 1.9458, "step": 296995 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014554799889928864, "loss": 2.075, "step": 297000 }, { "epoch": 0.7, "grad_norm": 2.40625, "learning_rate": 0.00014554635345737972, "loss": 1.814, "step": 297005 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014554470799991133, "loss": 2.0921, "step": 297010 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014554306252688405, "loss": 1.9911, "step": 297015 }, { "epoch": 0.7, "grad_norm": 1.8671875, "learning_rate": 0.00014554141703829843, "loss": 2.1012, "step": 297020 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014553977153415504, "loss": 2.1295, "step": 297025 }, { "epoch": 0.7, "grad_norm": 2.578125, "learning_rate": 0.00014553812601445445, "loss": 2.1306, "step": 297030 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.0001455364804791972, "loss": 1.9968, "step": 297035 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.00014553483492838384, "loss": 2.0565, "step": 297040 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.000145533189362015, "loss": 2.1325, "step": 297045 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.00014553154378009115, "loss": 2.1441, "step": 297050 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014552989818261293, "loss": 2.0104, "step": 297055 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014552825256958086, "loss": 2.0524, "step": 297060 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014552660694099552, "loss": 1.8391, "step": 297065 }, { "epoch": 0.7, "grad_norm": 1.9375, "learning_rate": 0.00014552496129685745, "loss": 1.9407, "step": 297070 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.00014552331563716722, "loss": 1.9923, "step": 297075 }, { "epoch": 0.7, "grad_norm": 2.109375, "learning_rate": 0.00014552166996192547, "loss": 2.1553, "step": 297080 }, { "epoch": 0.7, "grad_norm": 1.8125, "learning_rate": 0.00014552002427113263, "loss": 2.0872, "step": 297085 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014551837856478935, "loss": 2.0989, "step": 297090 }, { "epoch": 0.7, "grad_norm": 2.40625, "learning_rate": 0.00014551673284289612, "loss": 2.3049, "step": 297095 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.00014551508710545358, "loss": 2.149, "step": 297100 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014551344135246228, "loss": 2.1366, "step": 297105 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014551179558392274, "loss": 2.0279, "step": 297110 }, { "epoch": 0.7, "grad_norm": 2.46875, "learning_rate": 0.00014551014979983552, "loss": 1.7897, "step": 297115 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014550850400020125, "loss": 1.9284, "step": 297120 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014550685818502044, "loss": 2.0764, "step": 297125 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014550521235429367, "loss": 2.022, "step": 297130 }, { "epoch": 0.7, "grad_norm": 2.015625, "learning_rate": 0.00014550356650802146, "loss": 2.0919, "step": 297135 }, { "epoch": 0.7, "grad_norm": 2.015625, "learning_rate": 0.00014550192064620443, "loss": 2.1956, "step": 297140 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.0001455002747688431, "loss": 2.1424, "step": 297145 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014549862887593807, "loss": 2.2304, "step": 297150 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014549698296748988, "loss": 1.88, "step": 297155 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014549533704349908, "loss": 2.1131, "step": 297160 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.0001454936911039663, "loss": 2.0737, "step": 297165 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.000145492045148892, "loss": 2.0593, "step": 297170 }, { "epoch": 0.7, "grad_norm": 1.953125, "learning_rate": 0.0001454903991782768, "loss": 2.0855, "step": 297175 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014548875319212126, "loss": 1.9413, "step": 297180 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014548710719042594, "loss": 2.1183, "step": 297185 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.0001454854611731914, "loss": 2.143, "step": 297190 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014548381514041823, "loss": 1.9071, "step": 297195 }, { "epoch": 0.7, "grad_norm": 1.9140625, "learning_rate": 0.0001454821690921069, "loss": 2.0472, "step": 297200 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.00014548052302825807, "loss": 2.1333, "step": 297205 }, { "epoch": 0.7, "grad_norm": 2.109375, "learning_rate": 0.0001454788769488723, "loss": 2.0493, "step": 297210 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014547723085395005, "loss": 2.0004, "step": 297215 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014547558474349202, "loss": 2.0019, "step": 297220 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.00014547393861749866, "loss": 1.9532, "step": 297225 }, { "epoch": 0.7, "grad_norm": 2.0, "learning_rate": 0.00014547229247597059, "loss": 2.12, "step": 297230 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.0001454706463189084, "loss": 2.081, "step": 297235 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014546900014631255, "loss": 1.9501, "step": 297240 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014546735395818368, "loss": 2.0462, "step": 297245 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.00014546570775452236, "loss": 1.8475, "step": 297250 }, { "epoch": 0.7, "grad_norm": 1.8828125, "learning_rate": 0.0001454640615353291, "loss": 2.0791, "step": 297255 }, { "epoch": 0.7, "grad_norm": 2.609375, "learning_rate": 0.00014546241530060455, "loss": 2.0866, "step": 297260 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.00014546076905034915, "loss": 2.0779, "step": 297265 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.00014545912278456354, "loss": 1.897, "step": 297270 }, { "epoch": 0.7, "grad_norm": 2.40625, "learning_rate": 0.0001454574765032483, "loss": 2.0447, "step": 297275 }, { "epoch": 0.7, "grad_norm": 1.9296875, "learning_rate": 0.00014545583020640393, "loss": 2.0786, "step": 297280 }, { "epoch": 0.7, "grad_norm": 2.71875, "learning_rate": 0.00014545418389403105, "loss": 1.8882, "step": 297285 }, { "epoch": 0.7, "grad_norm": 2.015625, "learning_rate": 0.0001454525375661302, "loss": 1.9337, "step": 297290 }, { "epoch": 0.7, "grad_norm": 2.015625, "learning_rate": 0.0001454508912227019, "loss": 2.1144, "step": 297295 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.0001454492448637468, "loss": 2.1493, "step": 297300 }, { "epoch": 0.7, "grad_norm": 2.0, "learning_rate": 0.00014544759848926538, "loss": 2.2015, "step": 297305 }, { "epoch": 0.7, "grad_norm": 2.578125, "learning_rate": 0.00014544595209925823, "loss": 1.8696, "step": 297310 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014544430569372595, "loss": 2.1112, "step": 297315 }, { "epoch": 0.7, "grad_norm": 1.890625, "learning_rate": 0.00014544265927266903, "loss": 2.0737, "step": 297320 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014544101283608807, "loss": 2.0064, "step": 297325 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.0001454393663839837, "loss": 1.928, "step": 297330 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.00014543771991635637, "loss": 2.0366, "step": 297335 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.0001454360734332067, "loss": 2.0933, "step": 297340 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014543442693453523, "loss": 2.0525, "step": 297345 }, { "epoch": 0.7, "grad_norm": 1.9375, "learning_rate": 0.00014543278042034258, "loss": 2.0185, "step": 297350 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014543113389062923, "loss": 2.0145, "step": 297355 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014542948734539582, "loss": 2.0634, "step": 297360 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.0001454278407846428, "loss": 2.1114, "step": 297365 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.00014542619420837083, "loss": 1.8602, "step": 297370 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.0001454245476165805, "loss": 1.708, "step": 297375 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.0001454229010092723, "loss": 2.1208, "step": 297380 }, { "epoch": 0.7, "grad_norm": 1.953125, "learning_rate": 0.0001454212543864468, "loss": 2.0, "step": 297385 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014541960774810454, "loss": 2.0592, "step": 297390 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.0001454179610942462, "loss": 2.138, "step": 297395 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.0001454163144248722, "loss": 2.0995, "step": 297400 }, { "epoch": 0.7, "grad_norm": 1.9375, "learning_rate": 0.00014541466773998317, "loss": 2.1487, "step": 297405 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.0001454130210395797, "loss": 2.1297, "step": 297410 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014541137432366228, "loss": 1.9417, "step": 297415 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014540972759223152, "loss": 2.1688, "step": 297420 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014540808084528796, "loss": 2.2467, "step": 297425 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.0001454064340828322, "loss": 2.1935, "step": 297430 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.00014540478730486477, "loss": 2.0934, "step": 297435 }, { "epoch": 0.7, "grad_norm": 2.015625, "learning_rate": 0.00014540314051138625, "loss": 1.9375, "step": 297440 }, { "epoch": 0.7, "grad_norm": 2.015625, "learning_rate": 0.0001454014937023972, "loss": 2.0092, "step": 297445 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014539984687789813, "loss": 1.8989, "step": 297450 }, { "epoch": 0.7, "grad_norm": 2.546875, "learning_rate": 0.00014539820003788971, "loss": 2.1008, "step": 297455 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.0001453965531823724, "loss": 2.0244, "step": 297460 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014539490631134685, "loss": 1.8716, "step": 297465 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.0001453932594248135, "loss": 1.9663, "step": 297470 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.00014539161252277304, "loss": 2.1179, "step": 297475 }, { "epoch": 0.7, "grad_norm": 2.015625, "learning_rate": 0.00014538996560522599, "loss": 2.0977, "step": 297480 }, { "epoch": 0.7, "grad_norm": 1.7890625, "learning_rate": 0.0001453883186721729, "loss": 1.9906, "step": 297485 }, { "epoch": 0.7, "grad_norm": 1.8828125, "learning_rate": 0.00014538667172361432, "loss": 2.1927, "step": 297490 }, { "epoch": 0.7, "grad_norm": 1.6640625, "learning_rate": 0.00014538502475955082, "loss": 2.0448, "step": 297495 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014538337777998298, "loss": 1.9577, "step": 297500 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014538173078491138, "loss": 2.0173, "step": 297505 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.00014538008377433656, "loss": 1.9548, "step": 297510 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014537843674825905, "loss": 2.2366, "step": 297515 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014537678970667948, "loss": 1.9846, "step": 297520 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014537514264959832, "loss": 2.2309, "step": 297525 }, { "epoch": 0.7, "grad_norm": 1.796875, "learning_rate": 0.00014537349557701624, "loss": 2.1135, "step": 297530 }, { "epoch": 0.7, "grad_norm": 2.640625, "learning_rate": 0.00014537184848893375, "loss": 2.0057, "step": 297535 }, { "epoch": 0.7, "grad_norm": 3.96875, "learning_rate": 0.00014537020138535137, "loss": 2.1721, "step": 297540 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014536855426626973, "loss": 1.9848, "step": 297545 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014536690713168936, "loss": 2.1488, "step": 297550 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014536525998161087, "loss": 2.0696, "step": 297555 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014536361281603473, "loss": 2.1289, "step": 297560 }, { "epoch": 0.7, "grad_norm": 1.9609375, "learning_rate": 0.0001453619656349616, "loss": 2.1702, "step": 297565 }, { "epoch": 0.7, "grad_norm": 1.8359375, "learning_rate": 0.000145360318438392, "loss": 2.2235, "step": 297570 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014535867122632645, "loss": 2.1356, "step": 297575 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.0001453570239987656, "loss": 2.139, "step": 297580 }, { "epoch": 0.7, "grad_norm": 2.40625, "learning_rate": 0.00014535537675570994, "loss": 2.2983, "step": 297585 }, { "epoch": 0.7, "grad_norm": 2.109375, "learning_rate": 0.00014535372949716008, "loss": 2.2345, "step": 297590 }, { "epoch": 0.7, "grad_norm": 2.109375, "learning_rate": 0.00014535208222311654, "loss": 2.1794, "step": 297595 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.0001453504349335799, "loss": 2.1574, "step": 297600 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.00014534878762855076, "loss": 2.0627, "step": 297605 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014534714030802964, "loss": 2.0695, "step": 297610 }, { "epoch": 0.7, "grad_norm": 2.40625, "learning_rate": 0.00014534549297201714, "loss": 2.1055, "step": 297615 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014534384562051375, "loss": 1.9986, "step": 297620 }, { "epoch": 0.7, "grad_norm": 2.59375, "learning_rate": 0.00014534219825352015, "loss": 2.0309, "step": 297625 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014534055087103676, "loss": 2.0732, "step": 297630 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014533890347306422, "loss": 2.0554, "step": 297635 }, { "epoch": 0.7, "grad_norm": 1.984375, "learning_rate": 0.00014533725605960312, "loss": 2.0152, "step": 297640 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014533560863065398, "loss": 1.9792, "step": 297645 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014533396118621738, "loss": 1.9572, "step": 297650 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014533231372629386, "loss": 2.1941, "step": 297655 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.00014533066625088402, "loss": 2.027, "step": 297660 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014532901875998836, "loss": 2.0722, "step": 297665 }, { "epoch": 0.7, "grad_norm": 2.5, "learning_rate": 0.00014532737125360755, "loss": 2.015, "step": 297670 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014532572373174207, "loss": 2.0486, "step": 297675 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.0001453240761943925, "loss": 1.9447, "step": 297680 }, { "epoch": 0.7, "grad_norm": 2.109375, "learning_rate": 0.00014532242864155934, "loss": 2.1624, "step": 297685 }, { "epoch": 0.7, "grad_norm": 1.703125, "learning_rate": 0.0001453207810732433, "loss": 1.8601, "step": 297690 }, { "epoch": 0.7, "grad_norm": 1.6875, "learning_rate": 0.00014531913348944482, "loss": 1.9567, "step": 297695 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.0001453174858901645, "loss": 2.0875, "step": 297700 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.00014531583827540292, "loss": 2.1132, "step": 297705 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.0001453141906451606, "loss": 2.0683, "step": 297710 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014531254299943815, "loss": 2.0685, "step": 297715 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014531089533823612, "loss": 2.0115, "step": 297720 }, { "epoch": 0.7, "grad_norm": 1.9609375, "learning_rate": 0.00014530924766155507, "loss": 2.0733, "step": 297725 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014530759996939555, "loss": 2.1705, "step": 297730 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014530595226175814, "loss": 2.082, "step": 297735 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.00014530430453864337, "loss": 1.988, "step": 297740 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014530265680005183, "loss": 2.0552, "step": 297745 }, { "epoch": 0.7, "grad_norm": 2.71875, "learning_rate": 0.00014530100904598412, "loss": 2.0289, "step": 297750 }, { "epoch": 0.7, "grad_norm": 1.8671875, "learning_rate": 0.00014529936127644072, "loss": 1.9839, "step": 297755 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.0001452977134914223, "loss": 2.0812, "step": 297760 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014529606569092929, "loss": 2.0462, "step": 297765 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014529441787496233, "loss": 2.161, "step": 297770 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.000145292770043522, "loss": 2.0083, "step": 297775 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014529112219660882, "loss": 2.0775, "step": 297780 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014528947433422343, "loss": 2.0766, "step": 297785 }, { "epoch": 0.7, "grad_norm": 2.671875, "learning_rate": 0.00014528782645636625, "loss": 2.0067, "step": 297790 }, { "epoch": 0.7, "grad_norm": 1.9609375, "learning_rate": 0.00014528617856303798, "loss": 1.8469, "step": 297795 }, { "epoch": 0.7, "grad_norm": 2.0, "learning_rate": 0.0001452845306542391, "loss": 2.0257, "step": 297800 }, { "epoch": 0.7, "grad_norm": 2.421875, "learning_rate": 0.00014528288272997022, "loss": 1.9657, "step": 297805 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014528123479023191, "loss": 1.9827, "step": 297810 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.00014527958683502467, "loss": 1.9673, "step": 297815 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.0001452779388643491, "loss": 1.9928, "step": 297820 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.0001452762908782058, "loss": 1.9915, "step": 297825 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.00014527464287659528, "loss": 2.0399, "step": 297830 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014527299485951812, "loss": 2.0777, "step": 297835 }, { "epoch": 0.7, "grad_norm": 1.7890625, "learning_rate": 0.00014527134682697487, "loss": 2.1694, "step": 297840 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.0001452696987789661, "loss": 2.0508, "step": 297845 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.00014526805071549243, "loss": 2.1956, "step": 297850 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014526640263655432, "loss": 1.9472, "step": 297855 }, { "epoch": 0.7, "grad_norm": 1.8359375, "learning_rate": 0.00014526475454215243, "loss": 2.0123, "step": 297860 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014526310643228724, "loss": 2.0315, "step": 297865 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.00014526145830695938, "loss": 2.0375, "step": 297870 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014525981016616935, "loss": 1.9418, "step": 297875 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.0001452581620099178, "loss": 1.9762, "step": 297880 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.0001452565138382052, "loss": 2.23, "step": 297885 }, { "epoch": 0.7, "grad_norm": 1.8984375, "learning_rate": 0.00014525486565103218, "loss": 2.1272, "step": 297890 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014525321744839927, "loss": 2.1562, "step": 297895 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.000145251569230307, "loss": 2.1673, "step": 297900 }, { "epoch": 0.7, "grad_norm": 1.84375, "learning_rate": 0.00014524992099675604, "loss": 2.3556, "step": 297905 }, { "epoch": 0.7, "grad_norm": 2.59375, "learning_rate": 0.00014524827274774684, "loss": 2.0839, "step": 297910 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014524662448328003, "loss": 2.2099, "step": 297915 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014524497620335615, "loss": 2.054, "step": 297920 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014524332790797573, "loss": 2.031, "step": 297925 }, { "epoch": 0.7, "grad_norm": 3.1875, "learning_rate": 0.00014524167959713942, "loss": 2.1032, "step": 297930 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.0001452400312708477, "loss": 1.9968, "step": 297935 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014523838292910122, "loss": 1.9993, "step": 297940 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014523673457190043, "loss": 2.3065, "step": 297945 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.00014523508619924592, "loss": 2.027, "step": 297950 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014523343781113837, "loss": 2.1574, "step": 297955 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.0001452317894075782, "loss": 1.9224, "step": 297960 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014523014098856605, "loss": 1.964, "step": 297965 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014522849255410247, "loss": 2.0851, "step": 297970 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.000145226844104188, "loss": 2.0784, "step": 297975 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014522519563882322, "loss": 2.1749, "step": 297980 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014522354715800872, "loss": 1.9379, "step": 297985 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014522189866174502, "loss": 1.9814, "step": 297990 }, { "epoch": 0.7, "grad_norm": 1.8203125, "learning_rate": 0.00014522025015003268, "loss": 1.974, "step": 297995 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.0001452186016228723, "loss": 1.9096, "step": 298000 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.0001452169530802644, "loss": 2.1345, "step": 298005 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.0001452153045222096, "loss": 2.1092, "step": 298010 }, { "epoch": 0.7, "grad_norm": 2.421875, "learning_rate": 0.00014521365594870846, "loss": 2.1036, "step": 298015 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014521200735976146, "loss": 2.1038, "step": 298020 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014521035875536927, "loss": 2.0833, "step": 298025 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014520871013553235, "loss": 2.001, "step": 298030 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014520706150025135, "loss": 1.9279, "step": 298035 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014520541284952676, "loss": 2.0771, "step": 298040 }, { "epoch": 0.7, "grad_norm": 2.484375, "learning_rate": 0.0001452037641833592, "loss": 1.9995, "step": 298045 }, { "epoch": 0.7, "grad_norm": 1.9453125, "learning_rate": 0.00014520211550174923, "loss": 2.0737, "step": 298050 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.0001452004668046974, "loss": 2.271, "step": 298055 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014519881809220422, "loss": 2.1436, "step": 298060 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014519716936427034, "loss": 2.0504, "step": 298065 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.0001451955206208963, "loss": 2.0545, "step": 298070 }, { "epoch": 0.7, "grad_norm": 1.921875, "learning_rate": 0.00014519387186208262, "loss": 1.9303, "step": 298075 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014519222308782994, "loss": 2.2334, "step": 298080 }, { "epoch": 0.7, "grad_norm": 2.5, "learning_rate": 0.00014519057429813873, "loss": 2.1213, "step": 298085 }, { "epoch": 0.7, "grad_norm": 1.96875, "learning_rate": 0.00014518892549300963, "loss": 2.1447, "step": 298090 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014518727667244314, "loss": 2.0387, "step": 298095 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014518562783643991, "loss": 2.069, "step": 298100 }, { "epoch": 0.7, "grad_norm": 1.875, "learning_rate": 0.00014518397898500042, "loss": 1.9998, "step": 298105 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014518233011812527, "loss": 2.0952, "step": 298110 }, { "epoch": 0.7, "grad_norm": 2.75, "learning_rate": 0.00014518068123581498, "loss": 1.9472, "step": 298115 }, { "epoch": 0.7, "grad_norm": 1.8671875, "learning_rate": 0.0001451790323380702, "loss": 1.954, "step": 298120 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014517738342489142, "loss": 2.0335, "step": 298125 }, { "epoch": 0.7, "grad_norm": 1.9296875, "learning_rate": 0.00014517573449627925, "loss": 1.9098, "step": 298130 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.0001451740855522342, "loss": 1.9916, "step": 298135 }, { "epoch": 0.7, "grad_norm": 2.484375, "learning_rate": 0.00014517243659275685, "loss": 2.1127, "step": 298140 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.0001451707876178478, "loss": 1.9996, "step": 298145 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.0001451691386275076, "loss": 1.9985, "step": 298150 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.0001451674896217368, "loss": 1.9309, "step": 298155 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014516584060053593, "loss": 1.9276, "step": 298160 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014516419156390562, "loss": 2.1501, "step": 298165 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.0001451625425118464, "loss": 1.9115, "step": 298170 }, { "epoch": 0.7, "grad_norm": 2.0, "learning_rate": 0.00014516089344435883, "loss": 2.116, "step": 298175 }, { "epoch": 0.7, "grad_norm": 2.0, "learning_rate": 0.0001451592443614435, "loss": 2.0392, "step": 298180 }, { "epoch": 0.7, "grad_norm": 2.46875, "learning_rate": 0.00014515759526310094, "loss": 2.2641, "step": 298185 }, { "epoch": 0.7, "grad_norm": 1.9609375, "learning_rate": 0.00014515594614933174, "loss": 2.0268, "step": 298190 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014515429702013642, "loss": 2.067, "step": 298195 }, { "epoch": 0.7, "grad_norm": 2.609375, "learning_rate": 0.0001451526478755156, "loss": 2.0023, "step": 298200 }, { "epoch": 0.7, "grad_norm": 2.59375, "learning_rate": 0.00014515099871546979, "loss": 1.9599, "step": 298205 }, { "epoch": 0.7, "grad_norm": 1.8984375, "learning_rate": 0.00014514934953999958, "loss": 2.0792, "step": 298210 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.0001451477003491056, "loss": 2.0655, "step": 298215 }, { "epoch": 0.7, "grad_norm": 2.5625, "learning_rate": 0.00014514605114278824, "loss": 2.1133, "step": 298220 }, { "epoch": 0.7, "grad_norm": 2.609375, "learning_rate": 0.00014514440192104825, "loss": 2.0932, "step": 298225 }, { "epoch": 0.7, "grad_norm": 2.609375, "learning_rate": 0.0001451427526838861, "loss": 1.8561, "step": 298230 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014514110343130237, "loss": 2.0509, "step": 298235 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.0001451394541632976, "loss": 1.9725, "step": 298240 }, { "epoch": 0.7, "grad_norm": 2.53125, "learning_rate": 0.00014513780487987235, "loss": 2.0164, "step": 298245 }, { "epoch": 0.7, "grad_norm": 1.90625, "learning_rate": 0.00014513615558102726, "loss": 2.1809, "step": 298250 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.0001451345062667628, "loss": 1.9367, "step": 298255 }, { "epoch": 0.7, "grad_norm": 2.6875, "learning_rate": 0.0001451328569370796, "loss": 2.1591, "step": 298260 }, { "epoch": 0.7, "grad_norm": 2.65625, "learning_rate": 0.0001451312075919782, "loss": 2.1099, "step": 298265 }, { "epoch": 0.7, "grad_norm": 1.859375, "learning_rate": 0.00014512955823145916, "loss": 2.0872, "step": 298270 }, { "epoch": 0.7, "grad_norm": 1.9140625, "learning_rate": 0.000145127908855523, "loss": 2.0354, "step": 298275 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014512625946417037, "loss": 2.0524, "step": 298280 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.0001451246100574018, "loss": 2.0776, "step": 298285 }, { "epoch": 0.7, "grad_norm": 1.828125, "learning_rate": 0.00014512296063521785, "loss": 1.8152, "step": 298290 }, { "epoch": 0.7, "grad_norm": 1.90625, "learning_rate": 0.00014512131119761903, "loss": 2.0976, "step": 298295 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014511966174460595, "loss": 2.1479, "step": 298300 }, { "epoch": 0.7, "grad_norm": 2.421875, "learning_rate": 0.00014511801227617922, "loss": 2.3119, "step": 298305 }, { "epoch": 0.7, "grad_norm": 2.109375, "learning_rate": 0.00014511636279233937, "loss": 2.0432, "step": 298310 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014511471329308693, "loss": 2.1391, "step": 298315 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.00014511306377842248, "loss": 2.2375, "step": 298320 }, { "epoch": 0.7, "grad_norm": 2.484375, "learning_rate": 0.00014511141424834658, "loss": 1.9407, "step": 298325 }, { "epoch": 0.7, "grad_norm": 1.9140625, "learning_rate": 0.00014510976470285983, "loss": 2.1626, "step": 298330 }, { "epoch": 0.7, "grad_norm": 2.421875, "learning_rate": 0.00014510811514196274, "loss": 2.0419, "step": 298335 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.0001451064655656559, "loss": 2.0384, "step": 298340 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014510481597393988, "loss": 2.1636, "step": 298345 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.00014510316636681526, "loss": 2.0464, "step": 298350 }, { "epoch": 0.7, "grad_norm": 2.109375, "learning_rate": 0.00014510151674428253, "loss": 2.1396, "step": 298355 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014509986710634234, "loss": 2.0816, "step": 298360 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.0001450982174529952, "loss": 1.9723, "step": 298365 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.0001450965677842417, "loss": 1.9865, "step": 298370 }, { "epoch": 0.7, "grad_norm": 1.9609375, "learning_rate": 0.0001450949181000824, "loss": 2.3117, "step": 298375 }, { "epoch": 0.7, "grad_norm": 1.9453125, "learning_rate": 0.00014509326840051788, "loss": 2.2287, "step": 298380 }, { "epoch": 0.7, "grad_norm": 2.5625, "learning_rate": 0.00014509161868554867, "loss": 2.0456, "step": 298385 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.0001450899689551753, "loss": 2.0664, "step": 298390 }, { "epoch": 0.7, "grad_norm": 2.46875, "learning_rate": 0.00014508831920939845, "loss": 2.1221, "step": 298395 }, { "epoch": 0.7, "grad_norm": 1.9140625, "learning_rate": 0.00014508666944821857, "loss": 2.046, "step": 298400 }, { "epoch": 0.7, "grad_norm": 2.421875, "learning_rate": 0.00014508501967163624, "loss": 2.1345, "step": 298405 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.0001450833698796521, "loss": 2.0835, "step": 298410 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014508172007226663, "loss": 2.3323, "step": 298415 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014508007024948045, "loss": 2.0294, "step": 298420 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014507842041129408, "loss": 2.0496, "step": 298425 }, { "epoch": 0.7, "grad_norm": 2.75, "learning_rate": 0.0001450767705577081, "loss": 2.2684, "step": 298430 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.00014507512068872308, "loss": 1.886, "step": 298435 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014507347080433962, "loss": 2.1476, "step": 298440 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014507182090455823, "loss": 2.0429, "step": 298445 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014507017098937947, "loss": 2.0224, "step": 298450 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014506852105880392, "loss": 2.1045, "step": 298455 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.00014506687111283216, "loss": 2.0155, "step": 298460 }, { "epoch": 0.7, "grad_norm": 2.59375, "learning_rate": 0.00014506522115146473, "loss": 2.0606, "step": 298465 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.0001450635711747022, "loss": 2.1065, "step": 298470 }, { "epoch": 0.7, "grad_norm": 1.90625, "learning_rate": 0.00014506192118254516, "loss": 2.0901, "step": 298475 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.00014506027117499413, "loss": 2.1304, "step": 298480 }, { "epoch": 0.7, "grad_norm": 1.9609375, "learning_rate": 0.00014505862115204968, "loss": 1.956, "step": 298485 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.0001450569711137124, "loss": 2.2155, "step": 298490 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014505532105998285, "loss": 2.2412, "step": 298495 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.0001450536709908616, "loss": 2.0703, "step": 298500 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.00014505202090634917, "loss": 2.1965, "step": 298505 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014505037080644618, "loss": 2.1495, "step": 298510 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.00014504872069115313, "loss": 2.0461, "step": 298515 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014504707056047064, "loss": 2.0151, "step": 298520 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.00014504542041439926, "loss": 2.0071, "step": 298525 }, { "epoch": 0.7, "grad_norm": 2.0, "learning_rate": 0.00014504377025293956, "loss": 2.2541, "step": 298530 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.0001450421200760921, "loss": 2.1205, "step": 298535 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014504046988385736, "loss": 1.9033, "step": 298540 }, { "epoch": 0.7, "grad_norm": 1.890625, "learning_rate": 0.00014503881967623601, "loss": 1.9185, "step": 298545 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014503716945322862, "loss": 2.0857, "step": 298550 }, { "epoch": 0.7, "grad_norm": 2.109375, "learning_rate": 0.00014503551921483571, "loss": 2.0636, "step": 298555 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014503386896105785, "loss": 1.9598, "step": 298560 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014503221869189558, "loss": 1.9984, "step": 298565 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014503056840734947, "loss": 2.1624, "step": 298570 }, { "epoch": 0.7, "grad_norm": 1.9140625, "learning_rate": 0.00014502891810742015, "loss": 1.962, "step": 298575 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014502726779210812, "loss": 2.0271, "step": 298580 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014502561746141394, "loss": 2.1528, "step": 298585 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.0001450239671153382, "loss": 2.1037, "step": 298590 }, { "epoch": 0.7, "grad_norm": 1.4765625, "learning_rate": 0.00014502231675388145, "loss": 2.0148, "step": 298595 }, { "epoch": 0.7, "grad_norm": 2.0, "learning_rate": 0.00014502066637704427, "loss": 1.9646, "step": 298600 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.0001450190159848272, "loss": 1.8253, "step": 298605 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.00014501736557723084, "loss": 1.989, "step": 298610 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014501571515425574, "loss": 2.0589, "step": 298615 }, { "epoch": 0.7, "grad_norm": 2.421875, "learning_rate": 0.00014501406471590245, "loss": 2.1608, "step": 298620 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.00014501241426217152, "loss": 2.1393, "step": 298625 }, { "epoch": 0.7, "grad_norm": 2.546875, "learning_rate": 0.00014501076379306354, "loss": 2.1573, "step": 298630 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014500911330857905, "loss": 2.0094, "step": 298635 }, { "epoch": 0.7, "grad_norm": 2.8125, "learning_rate": 0.00014500746280871865, "loss": 1.9909, "step": 298640 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014500581229348288, "loss": 2.1627, "step": 298645 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.0001450041617628723, "loss": 1.9594, "step": 298650 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.0001450025112168875, "loss": 2.2511, "step": 298655 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014500086065552902, "loss": 2.1168, "step": 298660 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014499921007879741, "loss": 2.0833, "step": 298665 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.0001449975594866933, "loss": 2.0062, "step": 298670 }, { "epoch": 0.7, "grad_norm": 2.640625, "learning_rate": 0.00014499590887921717, "loss": 2.0754, "step": 298675 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.00014499425825636962, "loss": 2.1076, "step": 298680 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014499260761815123, "loss": 2.0818, "step": 298685 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014499095696456258, "loss": 1.9677, "step": 298690 }, { "epoch": 0.7, "grad_norm": 2.71875, "learning_rate": 0.00014498930629560416, "loss": 2.0665, "step": 298695 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.00014498765561127657, "loss": 2.1529, "step": 298700 }, { "epoch": 0.7, "grad_norm": 2.015625, "learning_rate": 0.0001449860049115804, "loss": 2.0391, "step": 298705 }, { "epoch": 0.7, "grad_norm": 1.953125, "learning_rate": 0.00014498435419651616, "loss": 2.0388, "step": 298710 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014498270346608453, "loss": 2.0304, "step": 298715 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014498105272028593, "loss": 2.1089, "step": 298720 }, { "epoch": 0.7, "grad_norm": 2.40625, "learning_rate": 0.000144979401959121, "loss": 2.1453, "step": 298725 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014497775118259027, "loss": 2.1986, "step": 298730 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014497610039069435, "loss": 2.1697, "step": 298735 }, { "epoch": 0.7, "grad_norm": 1.8046875, "learning_rate": 0.0001449744495834338, "loss": 2.021, "step": 298740 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014497279876080912, "loss": 1.9615, "step": 298745 }, { "epoch": 0.7, "grad_norm": 2.09375, "learning_rate": 0.00014497114792282094, "loss": 2.1615, "step": 298750 }, { "epoch": 0.7, "grad_norm": 2.046875, "learning_rate": 0.0001449694970694698, "loss": 2.0744, "step": 298755 }, { "epoch": 0.7, "grad_norm": 2.53125, "learning_rate": 0.00014496784620075624, "loss": 2.0175, "step": 298760 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014496619531668086, "loss": 1.846, "step": 298765 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014496454441724423, "loss": 2.0368, "step": 298770 }, { "epoch": 0.7, "grad_norm": 2.546875, "learning_rate": 0.00014496289350244686, "loss": 2.2012, "step": 298775 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.0001449612425722894, "loss": 2.0599, "step": 298780 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.00014495959162677232, "loss": 2.0472, "step": 298785 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.00014495794066589625, "loss": 2.0029, "step": 298790 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014495628968966173, "loss": 1.8681, "step": 298795 }, { "epoch": 0.7, "grad_norm": 2.734375, "learning_rate": 0.00014495463869806932, "loss": 1.8748, "step": 298800 }, { "epoch": 0.7, "grad_norm": 1.84375, "learning_rate": 0.0001449529876911196, "loss": 2.0012, "step": 298805 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.0001449513366688131, "loss": 2.1023, "step": 298810 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.00014494968563115044, "loss": 1.996, "step": 298815 }, { "epoch": 0.7, "grad_norm": 2.015625, "learning_rate": 0.00014494803457813213, "loss": 2.1008, "step": 298820 }, { "epoch": 0.7, "grad_norm": 1.984375, "learning_rate": 0.00014494638350975877, "loss": 2.0643, "step": 298825 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014494473242603092, "loss": 1.9117, "step": 298830 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.0001449430813269491, "loss": 1.9948, "step": 298835 }, { "epoch": 0.7, "grad_norm": 2.734375, "learning_rate": 0.0001449414302125139, "loss": 2.1229, "step": 298840 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014493977908272595, "loss": 2.0799, "step": 298845 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014493812793758574, "loss": 2.2356, "step": 298850 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.00014493647677709382, "loss": 2.0868, "step": 298855 }, { "epoch": 0.7, "grad_norm": 1.9609375, "learning_rate": 0.00014493482560125082, "loss": 2.1923, "step": 298860 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014493317441005723, "loss": 2.2843, "step": 298865 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.0001449315232035137, "loss": 1.9697, "step": 298870 }, { "epoch": 0.7, "grad_norm": 1.8203125, "learning_rate": 0.0001449298719816207, "loss": 1.9665, "step": 298875 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014492822074437886, "loss": 2.1288, "step": 298880 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.00014492656949178874, "loss": 2.0615, "step": 298885 }, { "epoch": 0.7, "grad_norm": 1.984375, "learning_rate": 0.00014492491822385085, "loss": 2.1146, "step": 298890 }, { "epoch": 0.7, "grad_norm": 2.484375, "learning_rate": 0.00014492326694056582, "loss": 2.1673, "step": 298895 }, { "epoch": 0.7, "grad_norm": 2.421875, "learning_rate": 0.0001449216156419342, "loss": 2.0483, "step": 298900 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014491996432795652, "loss": 1.9971, "step": 298905 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.00014491831299863337, "loss": 2.0837, "step": 298910 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014491666165396532, "loss": 1.9118, "step": 298915 }, { "epoch": 0.7, "grad_norm": 1.921875, "learning_rate": 0.00014491501029395288, "loss": 1.9908, "step": 298920 }, { "epoch": 0.7, "grad_norm": 2.703125, "learning_rate": 0.0001449133589185967, "loss": 2.1858, "step": 298925 }, { "epoch": 0.7, "grad_norm": 2.765625, "learning_rate": 0.0001449117075278973, "loss": 1.9746, "step": 298930 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014491005612185523, "loss": 1.9842, "step": 298935 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.00014490840470047112, "loss": 2.0731, "step": 298940 }, { "epoch": 0.7, "grad_norm": 1.7734375, "learning_rate": 0.00014490675326374542, "loss": 2.0027, "step": 298945 }, { "epoch": 0.7, "grad_norm": 2.359375, "learning_rate": 0.00014490510181167877, "loss": 2.0706, "step": 298950 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014490345034427174, "loss": 2.0796, "step": 298955 }, { "epoch": 0.7, "grad_norm": 2.0, "learning_rate": 0.00014490179886152488, "loss": 1.926, "step": 298960 }, { "epoch": 0.7, "grad_norm": 1.8046875, "learning_rate": 0.00014490014736343878, "loss": 1.9193, "step": 298965 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014489849585001393, "loss": 1.9301, "step": 298970 }, { "epoch": 0.7, "grad_norm": 1.9609375, "learning_rate": 0.00014489684432125093, "loss": 2.1613, "step": 298975 }, { "epoch": 0.7, "grad_norm": 1.9296875, "learning_rate": 0.0001448951927771504, "loss": 1.9359, "step": 298980 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.00014489354121771281, "loss": 2.1231, "step": 298985 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.00014489188964293882, "loss": 1.9272, "step": 298990 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.0001448902380528289, "loss": 1.9043, "step": 298995 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.00014488858644738368, "loss": 2.1137, "step": 299000 }, { "epoch": 0.7, "grad_norm": 1.8359375, "learning_rate": 0.00014488693482660368, "loss": 1.9206, "step": 299005 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014488528319048954, "loss": 2.1432, "step": 299010 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014488363153904177, "loss": 2.0609, "step": 299015 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.0001448819798722609, "loss": 2.0871, "step": 299020 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.00014488032819014754, "loss": 2.0023, "step": 299025 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014487867649270225, "loss": 2.0028, "step": 299030 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.0001448770247799256, "loss": 2.0857, "step": 299035 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014487537305181815, "loss": 2.0831, "step": 299040 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.00014487372130838044, "loss": 1.9797, "step": 299045 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014487206954961305, "loss": 2.0281, "step": 299050 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014487041777551656, "loss": 2.0931, "step": 299055 }, { "epoch": 0.7, "grad_norm": 2.765625, "learning_rate": 0.00014486876598609151, "loss": 2.0299, "step": 299060 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.0001448671141813385, "loss": 2.078, "step": 299065 }, { "epoch": 0.7, "grad_norm": 1.8671875, "learning_rate": 0.00014486546236125805, "loss": 2.1877, "step": 299070 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014486381052585074, "loss": 1.9425, "step": 299075 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.00014486215867511715, "loss": 2.0423, "step": 299080 }, { "epoch": 0.7, "grad_norm": 2.640625, "learning_rate": 0.00014486050680905782, "loss": 2.079, "step": 299085 }, { "epoch": 0.7, "grad_norm": 2.484375, "learning_rate": 0.00014485885492767335, "loss": 2.1085, "step": 299090 }, { "epoch": 0.7, "grad_norm": 2.5, "learning_rate": 0.00014485720303096427, "loss": 1.9846, "step": 299095 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014485555111893117, "loss": 2.0253, "step": 299100 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014485389919157457, "loss": 1.9491, "step": 299105 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014485224724889508, "loss": 2.1036, "step": 299110 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014485059529089324, "loss": 2.0262, "step": 299115 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.00014484894331756964, "loss": 2.1226, "step": 299120 }, { "epoch": 0.7, "grad_norm": 2.46875, "learning_rate": 0.00014484729132892483, "loss": 2.2178, "step": 299125 }, { "epoch": 0.7, "grad_norm": 2.640625, "learning_rate": 0.00014484563932495935, "loss": 2.0665, "step": 299130 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.0001448439873056738, "loss": 2.0613, "step": 299135 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014484233527106873, "loss": 1.8885, "step": 299140 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014484068322114473, "loss": 2.1955, "step": 299145 }, { "epoch": 0.7, "grad_norm": 2.546875, "learning_rate": 0.00014483903115590228, "loss": 2.0487, "step": 299150 }, { "epoch": 0.7, "grad_norm": 2.140625, "learning_rate": 0.00014483737907534203, "loss": 2.0187, "step": 299155 }, { "epoch": 0.7, "grad_norm": 2.421875, "learning_rate": 0.00014483572697946453, "loss": 1.9842, "step": 299160 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014483407486827035, "loss": 1.9986, "step": 299165 }, { "epoch": 0.7, "grad_norm": 2.0, "learning_rate": 0.00014483242274176, "loss": 2.0834, "step": 299170 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.0001448307705999341, "loss": 2.1992, "step": 299175 }, { "epoch": 0.7, "grad_norm": 2.3125, "learning_rate": 0.0001448291184427932, "loss": 2.0221, "step": 299180 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.0001448274662703378, "loss": 2.1396, "step": 299185 }, { "epoch": 0.7, "grad_norm": 1.984375, "learning_rate": 0.00014482581408256862, "loss": 2.0231, "step": 299190 }, { "epoch": 0.7, "grad_norm": 1.890625, "learning_rate": 0.00014482416187948607, "loss": 2.1891, "step": 299195 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.0001448225096610908, "loss": 2.0914, "step": 299200 }, { "epoch": 0.7, "grad_norm": 1.9453125, "learning_rate": 0.00014482085742738334, "loss": 2.2452, "step": 299205 }, { "epoch": 0.7, "grad_norm": 1.8515625, "learning_rate": 0.00014481920517836426, "loss": 1.9875, "step": 299210 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.0001448175529140341, "loss": 2.1956, "step": 299215 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014481590063439347, "loss": 1.9685, "step": 299220 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014481424833944292, "loss": 1.9674, "step": 299225 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014481259602918304, "loss": 2.036, "step": 299230 }, { "epoch": 0.7, "grad_norm": 2.5625, "learning_rate": 0.00014481094370361435, "loss": 2.0804, "step": 299235 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.0001448092913627374, "loss": 1.9574, "step": 299240 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014480763900655277, "loss": 2.0445, "step": 299245 }, { "epoch": 0.7, "grad_norm": 2.078125, "learning_rate": 0.00014480598663506108, "loss": 1.9908, "step": 299250 }, { "epoch": 0.7, "grad_norm": 1.8125, "learning_rate": 0.00014480433424826283, "loss": 2.0168, "step": 299255 }, { "epoch": 0.7, "grad_norm": 2.5625, "learning_rate": 0.00014480268184615864, "loss": 2.1347, "step": 299260 }, { "epoch": 0.7, "grad_norm": 1.796875, "learning_rate": 0.000144801029428749, "loss": 2.0476, "step": 299265 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.00014479937699603455, "loss": 1.8589, "step": 299270 }, { "epoch": 0.7, "grad_norm": 1.8515625, "learning_rate": 0.0001447977245480158, "loss": 2.0451, "step": 299275 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.00014479607208469334, "loss": 2.0505, "step": 299280 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014479441960606773, "loss": 2.145, "step": 299285 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014479276711213952, "loss": 1.9151, "step": 299290 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.0001447911146029093, "loss": 2.1794, "step": 299295 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.00014478946207837764, "loss": 2.2047, "step": 299300 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014478780953854504, "loss": 2.3086, "step": 299305 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.00014478615698341216, "loss": 1.9557, "step": 299310 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.0001447845044129795, "loss": 1.9644, "step": 299315 }, { "epoch": 0.7, "grad_norm": 2.234375, "learning_rate": 0.0001447828518272476, "loss": 2.0081, "step": 299320 }, { "epoch": 0.7, "grad_norm": 2.734375, "learning_rate": 0.00014478119922621714, "loss": 2.1125, "step": 299325 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.00014477954660988858, "loss": 2.0288, "step": 299330 }, { "epoch": 0.7, "grad_norm": 2.328125, "learning_rate": 0.00014477789397826252, "loss": 1.9972, "step": 299335 }, { "epoch": 0.7, "grad_norm": 2.84375, "learning_rate": 0.00014477624133133949, "loss": 1.9067, "step": 299340 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014477458866912012, "loss": 2.1423, "step": 299345 }, { "epoch": 0.7, "grad_norm": 2.40625, "learning_rate": 0.0001447729359916049, "loss": 2.1295, "step": 299350 }, { "epoch": 0.7, "grad_norm": 2.390625, "learning_rate": 0.00014477128329879447, "loss": 2.104, "step": 299355 }, { "epoch": 0.7, "grad_norm": 2.578125, "learning_rate": 0.00014476963059068933, "loss": 1.9357, "step": 299360 }, { "epoch": 0.7, "grad_norm": 2.109375, "learning_rate": 0.0001447679778672901, "loss": 2.0187, "step": 299365 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.00014476632512859728, "loss": 1.896, "step": 299370 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.0001447646723746115, "loss": 1.9656, "step": 299375 }, { "epoch": 0.7, "grad_norm": 2.21875, "learning_rate": 0.0001447630196053333, "loss": 2.1551, "step": 299380 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014476136682076322, "loss": 2.0437, "step": 299385 }, { "epoch": 0.7, "grad_norm": 2.296875, "learning_rate": 0.00014475971402090184, "loss": 2.3061, "step": 299390 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014475806120574976, "loss": 2.0719, "step": 299395 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.0001447564083753075, "loss": 2.1862, "step": 299400 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014475475552957562, "loss": 1.9867, "step": 299405 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.00014475310266855472, "loss": 2.1259, "step": 299410 }, { "epoch": 0.7, "grad_norm": 2.453125, "learning_rate": 0.00014475144979224537, "loss": 2.0781, "step": 299415 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014474979690064812, "loss": 2.0649, "step": 299420 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014474814399376346, "loss": 1.9425, "step": 299425 }, { "epoch": 0.7, "grad_norm": 2.25, "learning_rate": 0.0001447464910715921, "loss": 1.9254, "step": 299430 }, { "epoch": 0.7, "grad_norm": 2.125, "learning_rate": 0.00014474483813413445, "loss": 2.1248, "step": 299435 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.00014474318518139124, "loss": 2.0268, "step": 299440 }, { "epoch": 0.7, "grad_norm": 2.0625, "learning_rate": 0.0001447415322133629, "loss": 2.0786, "step": 299445 }, { "epoch": 0.7, "grad_norm": 2.03125, "learning_rate": 0.00014473987923005002, "loss": 2.0847, "step": 299450 }, { "epoch": 0.7, "grad_norm": 2.28125, "learning_rate": 0.00014473822623145322, "loss": 2.1307, "step": 299455 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.000144736573217573, "loss": 2.3213, "step": 299460 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.00014473492018841, "loss": 2.2443, "step": 299465 }, { "epoch": 0.7, "grad_norm": 2.265625, "learning_rate": 0.0001447332671439647, "loss": 2.0808, "step": 299470 }, { "epoch": 0.7, "grad_norm": 2.515625, "learning_rate": 0.00014473161408423772, "loss": 1.9113, "step": 299475 }, { "epoch": 0.7, "grad_norm": 1.9765625, "learning_rate": 0.00014472996100922957, "loss": 2.1397, "step": 299480 }, { "epoch": 0.7, "grad_norm": 2.5, "learning_rate": 0.00014472830791894092, "loss": 1.88, "step": 299485 }, { "epoch": 0.7, "grad_norm": 2.4375, "learning_rate": 0.00014472665481337225, "loss": 2.0879, "step": 299490 }, { "epoch": 0.7, "grad_norm": 1.9921875, "learning_rate": 0.0001447250016925241, "loss": 2.0611, "step": 299495 }, { "epoch": 0.7, "grad_norm": 1.8984375, "learning_rate": 0.00014472334855639713, "loss": 1.8597, "step": 299500 }, { "epoch": 0.7, "grad_norm": 2.375, "learning_rate": 0.00014472169540499184, "loss": 1.9637, "step": 299505 }, { "epoch": 0.7, "grad_norm": 2.0, "learning_rate": 0.0001447200422383088, "loss": 1.8324, "step": 299510 }, { "epoch": 0.7, "grad_norm": 2.65625, "learning_rate": 0.00014471838905634859, "loss": 1.9766, "step": 299515 }, { "epoch": 0.7, "grad_norm": 2.734375, "learning_rate": 0.00014471673585911176, "loss": 2.1551, "step": 299520 }, { "epoch": 0.7, "grad_norm": 2.171875, "learning_rate": 0.0001447150826465989, "loss": 2.203, "step": 299525 }, { "epoch": 0.7, "grad_norm": 1.8515625, "learning_rate": 0.00014471342941881052, "loss": 2.0691, "step": 299530 }, { "epoch": 0.7, "grad_norm": 1.9375, "learning_rate": 0.00014471177617574724, "loss": 2.0737, "step": 299535 }, { "epoch": 0.7, "grad_norm": 2.1875, "learning_rate": 0.0001447101229174096, "loss": 2.1328, "step": 299540 }, { "epoch": 0.7, "grad_norm": 2.15625, "learning_rate": 0.00014470846964379817, "loss": 2.1536, "step": 299545 }, { "epoch": 0.7, "grad_norm": 4.34375, "learning_rate": 0.00014470681635491355, "loss": 1.9822, "step": 299550 }, { "epoch": 0.7, "grad_norm": 1.765625, "learning_rate": 0.00014470516305075626, "loss": 2.0299, "step": 299555 }, { "epoch": 0.7, "grad_norm": 2.203125, "learning_rate": 0.00014470350973132685, "loss": 2.1045, "step": 299560 }, { "epoch": 0.7, "grad_norm": 2.828125, "learning_rate": 0.0001447018563966259, "loss": 1.9548, "step": 299565 }, { "epoch": 0.7, "grad_norm": 2.90625, "learning_rate": 0.00014470020304665403, "loss": 2.1457, "step": 299570 }, { "epoch": 0.7, "grad_norm": 2.34375, "learning_rate": 0.00014469854968141173, "loss": 2.1927, "step": 299575 }, { "epoch": 0.71, "grad_norm": 1.7421875, "learning_rate": 0.00014469689630089961, "loss": 2.0548, "step": 299580 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.0001446952429051182, "loss": 1.9149, "step": 299585 }, { "epoch": 0.71, "grad_norm": 2.5, "learning_rate": 0.0001446935894940681, "loss": 2.0819, "step": 299590 }, { "epoch": 0.71, "grad_norm": 2.65625, "learning_rate": 0.00014469193606774986, "loss": 1.9106, "step": 299595 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014469028262616406, "loss": 2.1119, "step": 299600 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.0001446886291693112, "loss": 1.976, "step": 299605 }, { "epoch": 0.71, "grad_norm": 2.46875, "learning_rate": 0.00014468697569719195, "loss": 2.03, "step": 299610 }, { "epoch": 0.71, "grad_norm": 2.515625, "learning_rate": 0.00014468532220980674, "loss": 1.9508, "step": 299615 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.0001446836687071563, "loss": 1.9776, "step": 299620 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014468201518924107, "loss": 2.1618, "step": 299625 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014468036165606166, "loss": 2.0153, "step": 299630 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014467870810761863, "loss": 2.1351, "step": 299635 }, { "epoch": 0.71, "grad_norm": 1.6796875, "learning_rate": 0.00014467705454391253, "loss": 2.0981, "step": 299640 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.00014467540096494395, "loss": 2.0544, "step": 299645 }, { "epoch": 0.71, "grad_norm": 1.90625, "learning_rate": 0.00014467374737071344, "loss": 1.9488, "step": 299650 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014467209376122157, "loss": 2.0518, "step": 299655 }, { "epoch": 0.71, "grad_norm": 1.90625, "learning_rate": 0.0001446704401364689, "loss": 2.0799, "step": 299660 }, { "epoch": 0.71, "grad_norm": 2.390625, "learning_rate": 0.000144668786496456, "loss": 2.2202, "step": 299665 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.0001446671328411834, "loss": 1.9648, "step": 299670 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014466547917065174, "loss": 1.9482, "step": 299675 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.00014466382548486152, "loss": 2.0776, "step": 299680 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014466217178381335, "loss": 2.0627, "step": 299685 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014466051806750777, "loss": 2.073, "step": 299690 }, { "epoch": 0.71, "grad_norm": 2.390625, "learning_rate": 0.0001446588643359453, "loss": 2.1171, "step": 299695 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.0001446572105891266, "loss": 1.9878, "step": 299700 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014465555682705218, "loss": 2.1329, "step": 299705 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.0001446539030497226, "loss": 2.0724, "step": 299710 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014465224925713844, "loss": 2.1735, "step": 299715 }, { "epoch": 0.71, "grad_norm": 2.0, "learning_rate": 0.00014465059544930027, "loss": 2.1154, "step": 299720 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014464894162620863, "loss": 2.0246, "step": 299725 }, { "epoch": 0.71, "grad_norm": 2.5, "learning_rate": 0.00014464728778786412, "loss": 2.1185, "step": 299730 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.0001446456339342673, "loss": 2.0966, "step": 299735 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014464398006541868, "loss": 1.9356, "step": 299740 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014464232618131888, "loss": 2.0709, "step": 299745 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014464067228196848, "loss": 2.1348, "step": 299750 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.000144639018367368, "loss": 2.0177, "step": 299755 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.000144637364437518, "loss": 2.1645, "step": 299760 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.0001446357104924191, "loss": 2.2044, "step": 299765 }, { "epoch": 0.71, "grad_norm": 2.0, "learning_rate": 0.00014463405653207181, "loss": 2.1049, "step": 299770 }, { "epoch": 0.71, "grad_norm": 2.390625, "learning_rate": 0.00014463240255647669, "loss": 2.121, "step": 299775 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.00014463074856563442, "loss": 1.8944, "step": 299780 }, { "epoch": 0.71, "grad_norm": 2.734375, "learning_rate": 0.0001446290945595454, "loss": 2.1173, "step": 299785 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014462744053821028, "loss": 2.029, "step": 299790 }, { "epoch": 0.71, "grad_norm": 2.546875, "learning_rate": 0.00014462578650162964, "loss": 2.0914, "step": 299795 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014462413244980402, "loss": 1.8696, "step": 299800 }, { "epoch": 0.71, "grad_norm": 2.546875, "learning_rate": 0.000144622478382734, "loss": 1.913, "step": 299805 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.0001446208243004201, "loss": 1.9559, "step": 299810 }, { "epoch": 0.71, "grad_norm": 2.59375, "learning_rate": 0.00014461917020286292, "loss": 2.0538, "step": 299815 }, { "epoch": 0.71, "grad_norm": 2.90625, "learning_rate": 0.00014461751609006303, "loss": 2.1384, "step": 299820 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.000144615861962021, "loss": 2.1423, "step": 299825 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014461420781873737, "loss": 2.0934, "step": 299830 }, { "epoch": 0.71, "grad_norm": 2.0, "learning_rate": 0.00014461255366021272, "loss": 2.0339, "step": 299835 }, { "epoch": 0.71, "grad_norm": 1.75, "learning_rate": 0.0001446108994864476, "loss": 2.1615, "step": 299840 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.0001446092452974426, "loss": 2.0511, "step": 299845 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014460759109319828, "loss": 1.8502, "step": 299850 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014460593687371518, "loss": 2.0598, "step": 299855 }, { "epoch": 0.71, "grad_norm": 1.828125, "learning_rate": 0.00014460428263899391, "loss": 2.0241, "step": 299860 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.00014460262838903498, "loss": 2.0507, "step": 299865 }, { "epoch": 0.71, "grad_norm": 1.9296875, "learning_rate": 0.00014460097412383898, "loss": 1.9429, "step": 299870 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014459931984340654, "loss": 2.0354, "step": 299875 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.0001445976655477381, "loss": 2.1628, "step": 299880 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014459601123683428, "loss": 1.9994, "step": 299885 }, { "epoch": 0.71, "grad_norm": 1.859375, "learning_rate": 0.00014459435691069566, "loss": 1.9021, "step": 299890 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014459270256932284, "loss": 2.0342, "step": 299895 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014459104821271633, "loss": 2.1744, "step": 299900 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.0001445893938408767, "loss": 2.1009, "step": 299905 }, { "epoch": 0.71, "grad_norm": 1.796875, "learning_rate": 0.0001445877394538045, "loss": 2.0459, "step": 299910 }, { "epoch": 0.71, "grad_norm": 2.609375, "learning_rate": 0.00014458608505150036, "loss": 2.0491, "step": 299915 }, { "epoch": 0.71, "grad_norm": 2.375, "learning_rate": 0.0001445844306339648, "loss": 2.0571, "step": 299920 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014458277620119837, "loss": 2.0791, "step": 299925 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.0001445811217532017, "loss": 2.0729, "step": 299930 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014457946728997525, "loss": 2.0889, "step": 299935 }, { "epoch": 0.71, "grad_norm": 2.5625, "learning_rate": 0.00014457781281151968, "loss": 2.0496, "step": 299940 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.0001445761583178355, "loss": 2.0774, "step": 299945 }, { "epoch": 0.71, "grad_norm": 1.96875, "learning_rate": 0.0001445745038089233, "loss": 2.0697, "step": 299950 }, { "epoch": 0.71, "grad_norm": 2.375, "learning_rate": 0.00014457284928478365, "loss": 1.9413, "step": 299955 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014457119474541708, "loss": 2.0892, "step": 299960 }, { "epoch": 0.71, "grad_norm": 2.53125, "learning_rate": 0.00014456954019082422, "loss": 2.1211, "step": 299965 }, { "epoch": 0.71, "grad_norm": 1.90625, "learning_rate": 0.00014456788562100558, "loss": 2.1096, "step": 299970 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014456623103596176, "loss": 2.0023, "step": 299975 }, { "epoch": 0.71, "grad_norm": 2.46875, "learning_rate": 0.00014456457643569328, "loss": 2.0739, "step": 299980 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014456292182020078, "loss": 2.1075, "step": 299985 }, { "epoch": 0.71, "grad_norm": 2.59375, "learning_rate": 0.0001445612671894847, "loss": 2.0263, "step": 299990 }, { "epoch": 0.71, "grad_norm": 1.7109375, "learning_rate": 0.00014455961254354572, "loss": 1.8767, "step": 299995 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.0001445579578823844, "loss": 2.103, "step": 300000 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014455630320600124, "loss": 2.0449, "step": 300005 }, { "epoch": 0.71, "grad_norm": 2.859375, "learning_rate": 0.00014455464851439686, "loss": 2.1119, "step": 300010 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.00014455299380757177, "loss": 2.0571, "step": 300015 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.0001445513390855266, "loss": 2.1286, "step": 300020 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014454968434826187, "loss": 2.1421, "step": 300025 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.00014454802959577818, "loss": 2.2596, "step": 300030 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.00014454637482807606, "loss": 2.0836, "step": 300035 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014454472004515604, "loss": 2.0405, "step": 300040 }, { "epoch": 0.71, "grad_norm": 1.8828125, "learning_rate": 0.0001445430652470188, "loss": 2.16, "step": 300045 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014454141043366485, "loss": 2.1224, "step": 300050 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014453975560509472, "loss": 1.9955, "step": 300055 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.000144538100761309, "loss": 2.1357, "step": 300060 }, { "epoch": 0.71, "grad_norm": 1.796875, "learning_rate": 0.00014453644590230825, "loss": 2.1594, "step": 300065 }, { "epoch": 0.71, "grad_norm": 1.890625, "learning_rate": 0.00014453479102809305, "loss": 2.133, "step": 300070 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014453313613866397, "loss": 2.1819, "step": 300075 }, { "epoch": 0.71, "grad_norm": 2.0, "learning_rate": 0.00014453148123402156, "loss": 1.9979, "step": 300080 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014452982631416638, "loss": 1.9718, "step": 300085 }, { "epoch": 0.71, "grad_norm": 1.84375, "learning_rate": 0.000144528171379099, "loss": 2.099, "step": 300090 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014452651642882, "loss": 2.0763, "step": 300095 }, { "epoch": 0.71, "grad_norm": 1.8984375, "learning_rate": 0.00014452486146332994, "loss": 2.0255, "step": 300100 }, { "epoch": 0.71, "grad_norm": 1.6796875, "learning_rate": 0.00014452320648262936, "loss": 2.0161, "step": 300105 }, { "epoch": 0.71, "grad_norm": 1.9453125, "learning_rate": 0.00014452155148671883, "loss": 2.1936, "step": 300110 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014451989647559899, "loss": 2.0552, "step": 300115 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014451824144927027, "loss": 2.0532, "step": 300120 }, { "epoch": 0.71, "grad_norm": 1.9609375, "learning_rate": 0.00014451658640773338, "loss": 1.9652, "step": 300125 }, { "epoch": 0.71, "grad_norm": 2.609375, "learning_rate": 0.00014451493135098878, "loss": 2.1065, "step": 300130 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.0001445132762790371, "loss": 1.9318, "step": 300135 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014451162119187883, "loss": 2.2012, "step": 300140 }, { "epoch": 0.71, "grad_norm": 1.5859375, "learning_rate": 0.00014450996608951464, "loss": 1.83, "step": 300145 }, { "epoch": 0.71, "grad_norm": 2.578125, "learning_rate": 0.000144508310971945, "loss": 2.0622, "step": 300150 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014450665583917052, "loss": 2.4116, "step": 300155 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014450500069119175, "loss": 2.0308, "step": 300160 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014450334552800932, "loss": 2.2322, "step": 300165 }, { "epoch": 0.71, "grad_norm": 2.65625, "learning_rate": 0.0001445016903496237, "loss": 2.0447, "step": 300170 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014450003515603546, "loss": 1.9991, "step": 300175 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014449837994724523, "loss": 1.9798, "step": 300180 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014449672472325356, "loss": 2.0868, "step": 300185 }, { "epoch": 0.71, "grad_norm": 2.546875, "learning_rate": 0.000144495069484061, "loss": 2.0178, "step": 300190 }, { "epoch": 0.71, "grad_norm": 2.46875, "learning_rate": 0.0001444934142296681, "loss": 1.8749, "step": 300195 }, { "epoch": 0.71, "grad_norm": 2.375, "learning_rate": 0.00014449175896007543, "loss": 1.9573, "step": 300200 }, { "epoch": 0.71, "grad_norm": 2.484375, "learning_rate": 0.0001444901036752836, "loss": 2.0543, "step": 300205 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014448844837529316, "loss": 2.0754, "step": 300210 }, { "epoch": 0.71, "grad_norm": 1.9921875, "learning_rate": 0.00014448679306010463, "loss": 2.0995, "step": 300215 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.0001444851377297186, "loss": 2.1027, "step": 300220 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.00014448348238413564, "loss": 2.174, "step": 300225 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014448182702335633, "loss": 2.1119, "step": 300230 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014448017164738123, "loss": 2.1033, "step": 300235 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014447851625621087, "loss": 2.0106, "step": 300240 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014447686084984585, "loss": 2.0457, "step": 300245 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.00014447520542828674, "loss": 2.0358, "step": 300250 }, { "epoch": 0.71, "grad_norm": 1.8671875, "learning_rate": 0.00014447354999153408, "loss": 1.9872, "step": 300255 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014447189453958845, "loss": 1.9258, "step": 300260 }, { "epoch": 0.71, "grad_norm": 1.828125, "learning_rate": 0.00014447023907245043, "loss": 2.0827, "step": 300265 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014446858359012054, "loss": 2.1595, "step": 300270 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.0001444669280925994, "loss": 1.7923, "step": 300275 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014446527257988754, "loss": 2.156, "step": 300280 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014446361705198555, "loss": 2.2459, "step": 300285 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014446196150889396, "loss": 2.0784, "step": 300290 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.00014446030595061338, "loss": 2.0612, "step": 300295 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014445865037714434, "loss": 2.0014, "step": 300300 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014445699478848745, "loss": 1.7843, "step": 300305 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014445533918464317, "loss": 2.2314, "step": 300310 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.0001444536835656122, "loss": 1.9773, "step": 300315 }, { "epoch": 0.71, "grad_norm": 2.671875, "learning_rate": 0.00014445202793139504, "loss": 2.1626, "step": 300320 }, { "epoch": 0.71, "grad_norm": 1.8515625, "learning_rate": 0.00014445037228199228, "loss": 1.9309, "step": 300325 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014444871661740442, "loss": 2.0578, "step": 300330 }, { "epoch": 0.71, "grad_norm": 2.875, "learning_rate": 0.00014444706093763208, "loss": 2.283, "step": 300335 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014444540524267583, "loss": 2.0611, "step": 300340 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014444374953253623, "loss": 2.022, "step": 300345 }, { "epoch": 0.71, "grad_norm": 2.609375, "learning_rate": 0.00014444209380721384, "loss": 2.0422, "step": 300350 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014444043806670923, "loss": 2.1417, "step": 300355 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014443878231102292, "loss": 1.9965, "step": 300360 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014443712654015552, "loss": 2.0575, "step": 300365 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014443547075410765, "loss": 2.183, "step": 300370 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.0001444338149528798, "loss": 2.0179, "step": 300375 }, { "epoch": 0.71, "grad_norm": 2.390625, "learning_rate": 0.00014443215913647253, "loss": 1.9702, "step": 300380 }, { "epoch": 0.71, "grad_norm": 2.0, "learning_rate": 0.0001444305033048864, "loss": 1.967, "step": 300385 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.00014442884745812206, "loss": 2.056, "step": 300390 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014442719159617998, "loss": 1.946, "step": 300395 }, { "epoch": 0.71, "grad_norm": 1.84375, "learning_rate": 0.0001444255357190608, "loss": 2.0061, "step": 300400 }, { "epoch": 0.71, "grad_norm": 2.4375, "learning_rate": 0.00014442387982676504, "loss": 2.0636, "step": 300405 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014442222391929327, "loss": 2.0977, "step": 300410 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014442056799664609, "loss": 2.1007, "step": 300415 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014441891205882397, "loss": 1.9834, "step": 300420 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014441725610582758, "loss": 1.9191, "step": 300425 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014441560013765748, "loss": 2.1495, "step": 300430 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014441394415431417, "loss": 2.0898, "step": 300435 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014441228815579828, "loss": 1.9713, "step": 300440 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.0001444106321421103, "loss": 1.9258, "step": 300445 }, { "epoch": 0.71, "grad_norm": 2.5, "learning_rate": 0.00014440897611325091, "loss": 2.0601, "step": 300450 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014440732006922055, "loss": 2.1237, "step": 300455 }, { "epoch": 0.71, "grad_norm": 1.9296875, "learning_rate": 0.00014440566401001989, "loss": 2.1108, "step": 300460 }, { "epoch": 0.71, "grad_norm": 1.90625, "learning_rate": 0.00014440400793564942, "loss": 2.1109, "step": 300465 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014440235184610972, "loss": 2.1343, "step": 300470 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.0001444006957414014, "loss": 2.2314, "step": 300475 }, { "epoch": 0.71, "grad_norm": 1.953125, "learning_rate": 0.000144399039621525, "loss": 2.2577, "step": 300480 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.0001443973834864811, "loss": 2.2752, "step": 300485 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.0001443957273362702, "loss": 2.0226, "step": 300490 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.0001443940711708929, "loss": 2.0894, "step": 300495 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014439241499034983, "loss": 2.1599, "step": 300500 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.0001443907587946415, "loss": 2.0484, "step": 300505 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014438910258376847, "loss": 1.85, "step": 300510 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014438744635773135, "loss": 2.1406, "step": 300515 }, { "epoch": 0.71, "grad_norm": 1.78125, "learning_rate": 0.00014438579011653063, "loss": 2.0739, "step": 300520 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014438413386016693, "loss": 1.9739, "step": 300525 }, { "epoch": 0.71, "grad_norm": 1.71875, "learning_rate": 0.0001443824775886408, "loss": 1.9861, "step": 300530 }, { "epoch": 0.71, "grad_norm": 2.59375, "learning_rate": 0.00014438082130195282, "loss": 2.007, "step": 300535 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014437916500010357, "loss": 1.9348, "step": 300540 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014437750868309354, "loss": 1.8939, "step": 300545 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.0001443758523509234, "loss": 2.0668, "step": 300550 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014437419600359363, "loss": 2.3721, "step": 300555 }, { "epoch": 0.71, "grad_norm": 1.9609375, "learning_rate": 0.0001443725396411048, "loss": 1.8579, "step": 300560 }, { "epoch": 0.71, "grad_norm": 2.375, "learning_rate": 0.00014437088326345757, "loss": 2.2251, "step": 300565 }, { "epoch": 0.71, "grad_norm": 1.984375, "learning_rate": 0.00014436922687065243, "loss": 2.1226, "step": 300570 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014436757046268993, "loss": 1.8587, "step": 300575 }, { "epoch": 0.71, "grad_norm": 2.609375, "learning_rate": 0.00014436591403957068, "loss": 2.0911, "step": 300580 }, { "epoch": 0.71, "grad_norm": 2.515625, "learning_rate": 0.00014436425760129524, "loss": 2.1358, "step": 300585 }, { "epoch": 0.71, "grad_norm": 2.421875, "learning_rate": 0.00014436260114786417, "loss": 2.0314, "step": 300590 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014436094467927798, "loss": 2.0811, "step": 300595 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014435928819553736, "loss": 2.042, "step": 300600 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.0001443576316966427, "loss": 2.0023, "step": 300605 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014435597518259476, "loss": 2.215, "step": 300610 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.000144354318653394, "loss": 2.0768, "step": 300615 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.000144352662109041, "loss": 2.266, "step": 300620 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014435100554953628, "loss": 2.0912, "step": 300625 }, { "epoch": 0.71, "grad_norm": 3.125, "learning_rate": 0.00014434934897488048, "loss": 2.3051, "step": 300630 }, { "epoch": 0.71, "grad_norm": 2.53125, "learning_rate": 0.00014434769238507415, "loss": 2.2578, "step": 300635 }, { "epoch": 0.71, "grad_norm": 1.859375, "learning_rate": 0.00014434603578011786, "loss": 1.9423, "step": 300640 }, { "epoch": 0.71, "grad_norm": 2.828125, "learning_rate": 0.00014434437916001214, "loss": 2.0769, "step": 300645 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.00014434272252475755, "loss": 2.0386, "step": 300650 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.0001443410658743547, "loss": 1.8368, "step": 300655 }, { "epoch": 0.71, "grad_norm": 2.53125, "learning_rate": 0.00014433940920880413, "loss": 2.0595, "step": 300660 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014433775252810645, "loss": 2.0333, "step": 300665 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014433609583226216, "loss": 2.0025, "step": 300670 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014433443912127185, "loss": 2.0951, "step": 300675 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.0001443327823951361, "loss": 1.9652, "step": 300680 }, { "epoch": 0.71, "grad_norm": 1.984375, "learning_rate": 0.0001443311256538555, "loss": 2.2131, "step": 300685 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014432946889743052, "loss": 2.0276, "step": 300690 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014432781212586184, "loss": 1.9575, "step": 300695 }, { "epoch": 0.71, "grad_norm": 1.9609375, "learning_rate": 0.00014432615533914995, "loss": 2.005, "step": 300700 }, { "epoch": 0.71, "grad_norm": 2.515625, "learning_rate": 0.00014432449853729544, "loss": 1.9209, "step": 300705 }, { "epoch": 0.71, "grad_norm": 1.90625, "learning_rate": 0.0001443228417202989, "loss": 2.0186, "step": 300710 }, { "epoch": 0.71, "grad_norm": 1.8046875, "learning_rate": 0.00014432118488816084, "loss": 1.9122, "step": 300715 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.0001443195280408819, "loss": 1.9964, "step": 300720 }, { "epoch": 0.71, "grad_norm": 1.9921875, "learning_rate": 0.00014431787117846257, "loss": 1.9687, "step": 300725 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014431621430090348, "loss": 2.1291, "step": 300730 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014431455740820517, "loss": 2.1433, "step": 300735 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014431290050036816, "loss": 2.0103, "step": 300740 }, { "epoch": 0.71, "grad_norm": 1.9296875, "learning_rate": 0.00014431124357739309, "loss": 2.0354, "step": 300745 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.0001443095866392805, "loss": 2.2623, "step": 300750 }, { "epoch": 0.71, "grad_norm": 1.9765625, "learning_rate": 0.00014430792968603094, "loss": 2.0442, "step": 300755 }, { "epoch": 0.71, "grad_norm": 2.5, "learning_rate": 0.000144306272717645, "loss": 2.0482, "step": 300760 }, { "epoch": 0.71, "grad_norm": 2.4375, "learning_rate": 0.0001443046157341232, "loss": 2.0319, "step": 300765 }, { "epoch": 0.71, "grad_norm": 2.46875, "learning_rate": 0.00014430295873546617, "loss": 2.0152, "step": 300770 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.00014430130172167445, "loss": 2.1434, "step": 300775 }, { "epoch": 0.71, "grad_norm": 2.4375, "learning_rate": 0.00014429964469274862, "loss": 2.1734, "step": 300780 }, { "epoch": 0.71, "grad_norm": 1.828125, "learning_rate": 0.00014429798764868917, "loss": 2.1768, "step": 300785 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.00014429633058949675, "loss": 2.1774, "step": 300790 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.0001442946735151719, "loss": 2.1078, "step": 300795 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014429301642571524, "loss": 1.8448, "step": 300800 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014429135932112722, "loss": 2.0236, "step": 300805 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.00014428970220140848, "loss": 1.8759, "step": 300810 }, { "epoch": 0.71, "grad_norm": 1.9609375, "learning_rate": 0.00014428804506655957, "loss": 2.0707, "step": 300815 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014428638791658108, "loss": 1.9986, "step": 300820 }, { "epoch": 0.71, "grad_norm": 1.9609375, "learning_rate": 0.00014428473075147352, "loss": 2.0626, "step": 300825 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014428307357123755, "loss": 2.0861, "step": 300830 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014428141637587362, "loss": 2.1786, "step": 300835 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.0001442797591653824, "loss": 1.9346, "step": 300840 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.00014427810193976437, "loss": 2.128, "step": 300845 }, { "epoch": 0.71, "grad_norm": 1.8203125, "learning_rate": 0.00014427644469902016, "loss": 2.0568, "step": 300850 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.0001442747874431503, "loss": 2.1947, "step": 300855 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014427313017215538, "loss": 2.1062, "step": 300860 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014427147288603598, "loss": 2.2094, "step": 300865 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.0001442698155847926, "loss": 2.2155, "step": 300870 }, { "epoch": 0.71, "grad_norm": 2.578125, "learning_rate": 0.00014426815826842587, "loss": 2.2123, "step": 300875 }, { "epoch": 0.71, "grad_norm": 1.7109375, "learning_rate": 0.00014426650093693635, "loss": 2.0292, "step": 300880 }, { "epoch": 0.71, "grad_norm": 1.8125, "learning_rate": 0.00014426484359032456, "loss": 1.9195, "step": 300885 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.0001442631862285911, "loss": 2.0782, "step": 300890 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014426152885173657, "loss": 2.1853, "step": 300895 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014425987145976147, "loss": 2.0296, "step": 300900 }, { "epoch": 0.71, "grad_norm": 2.59375, "learning_rate": 0.00014425821405266638, "loss": 2.026, "step": 300905 }, { "epoch": 0.71, "grad_norm": 1.9140625, "learning_rate": 0.0001442565566304519, "loss": 2.0104, "step": 300910 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014425489919311858, "loss": 1.9648, "step": 300915 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014425324174066698, "loss": 2.1798, "step": 300920 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014425158427309768, "loss": 2.1827, "step": 300925 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.0001442499267904112, "loss": 1.9903, "step": 300930 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014424826929260818, "loss": 1.8987, "step": 300935 }, { "epoch": 0.71, "grad_norm": 2.5, "learning_rate": 0.00014424661177968914, "loss": 2.0555, "step": 300940 }, { "epoch": 0.71, "grad_norm": 1.7109375, "learning_rate": 0.00014424495425165466, "loss": 2.087, "step": 300945 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014424329670850527, "loss": 1.9888, "step": 300950 }, { "epoch": 0.71, "grad_norm": 1.9140625, "learning_rate": 0.0001442416391502416, "loss": 1.9933, "step": 300955 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014423998157686417, "loss": 2.0851, "step": 300960 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014423832398837357, "loss": 2.1281, "step": 300965 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014423666638477037, "loss": 2.1318, "step": 300970 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.0001442350087660551, "loss": 2.1259, "step": 300975 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014423335113222834, "loss": 2.1176, "step": 300980 }, { "epoch": 0.71, "grad_norm": 2.84375, "learning_rate": 0.00014423169348329069, "loss": 2.2405, "step": 300985 }, { "epoch": 0.71, "grad_norm": 2.8125, "learning_rate": 0.0001442300358192427, "loss": 2.0744, "step": 300990 }, { "epoch": 0.71, "grad_norm": 1.9375, "learning_rate": 0.00014422837814008488, "loss": 2.0184, "step": 300995 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014422672044581787, "loss": 2.1289, "step": 301000 }, { "epoch": 0.71, "grad_norm": 1.8125, "learning_rate": 0.0001442250627364422, "loss": 2.0439, "step": 301005 }, { "epoch": 0.71, "grad_norm": 2.640625, "learning_rate": 0.00014422340501195845, "loss": 2.151, "step": 301010 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014422174727236718, "loss": 1.9269, "step": 301015 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.000144220089517669, "loss": 1.8726, "step": 301020 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014421843174786437, "loss": 2.1043, "step": 301025 }, { "epoch": 0.71, "grad_norm": 1.9453125, "learning_rate": 0.000144216773962954, "loss": 2.0358, "step": 301030 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.0001442151161629383, "loss": 1.9509, "step": 301035 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014421345834781795, "loss": 2.0321, "step": 301040 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014421180051759348, "loss": 1.9753, "step": 301045 }, { "epoch": 0.71, "grad_norm": 2.390625, "learning_rate": 0.00014421014267226547, "loss": 1.9949, "step": 301050 }, { "epoch": 0.71, "grad_norm": 1.8984375, "learning_rate": 0.00014420848481183448, "loss": 2.1222, "step": 301055 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.000144206826936301, "loss": 2.0445, "step": 301060 }, { "epoch": 0.71, "grad_norm": 1.9296875, "learning_rate": 0.0001442051690456657, "loss": 2.0871, "step": 301065 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.00014420351113992916, "loss": 1.8495, "step": 301070 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014420185321909187, "loss": 2.1453, "step": 301075 }, { "epoch": 0.71, "grad_norm": 2.84375, "learning_rate": 0.00014420019528315443, "loss": 2.136, "step": 301080 }, { "epoch": 0.71, "grad_norm": 1.65625, "learning_rate": 0.0001441985373321174, "loss": 1.7832, "step": 301085 }, { "epoch": 0.71, "grad_norm": 1.9375, "learning_rate": 0.00014419687936598132, "loss": 2.0646, "step": 301090 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014419522138474684, "loss": 2.2157, "step": 301095 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014419356338841443, "loss": 2.1467, "step": 301100 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014419190537698468, "loss": 2.0413, "step": 301105 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014419024735045822, "loss": 1.9568, "step": 301110 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014418858930883554, "loss": 1.9542, "step": 301115 }, { "epoch": 0.71, "grad_norm": 2.515625, "learning_rate": 0.00014418693125211725, "loss": 2.107, "step": 301120 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014418527318030392, "loss": 2.2047, "step": 301125 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014418361509339604, "loss": 2.2423, "step": 301130 }, { "epoch": 0.71, "grad_norm": 2.5, "learning_rate": 0.00014418195699139429, "loss": 1.9168, "step": 301135 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014418029887429916, "loss": 2.345, "step": 301140 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014417864074211124, "loss": 2.0382, "step": 301145 }, { "epoch": 0.71, "grad_norm": 1.890625, "learning_rate": 0.0001441769825948311, "loss": 2.0158, "step": 301150 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.0001441753244324593, "loss": 2.1389, "step": 301155 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.0001441736662549964, "loss": 2.155, "step": 301160 }, { "epoch": 0.71, "grad_norm": 2.78125, "learning_rate": 0.000144172008062443, "loss": 2.0454, "step": 301165 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014417034985479961, "loss": 2.1115, "step": 301170 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014416869163206682, "loss": 2.2007, "step": 301175 }, { "epoch": 0.71, "grad_norm": 2.546875, "learning_rate": 0.00014416703339424525, "loss": 2.0752, "step": 301180 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.0001441653751413354, "loss": 2.3633, "step": 301185 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014416371687333784, "loss": 1.9864, "step": 301190 }, { "epoch": 0.71, "grad_norm": 1.8671875, "learning_rate": 0.00014416205859025315, "loss": 2.2019, "step": 301195 }, { "epoch": 0.71, "grad_norm": 1.8828125, "learning_rate": 0.00014416040029208193, "loss": 1.9476, "step": 301200 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.0001441587419788247, "loss": 2.1225, "step": 301205 }, { "epoch": 0.71, "grad_norm": 1.90625, "learning_rate": 0.00014415708365048206, "loss": 2.1395, "step": 301210 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014415542530705455, "loss": 1.9568, "step": 301215 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014415376694854273, "loss": 2.1086, "step": 301220 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.0001441521085749472, "loss": 2.0689, "step": 301225 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.0001441504501862685, "loss": 2.0082, "step": 301230 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.0001441487917825072, "loss": 2.2367, "step": 301235 }, { "epoch": 0.71, "grad_norm": 1.9140625, "learning_rate": 0.00014414713336366388, "loss": 1.9865, "step": 301240 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.0001441454749297391, "loss": 1.9583, "step": 301245 }, { "epoch": 0.71, "grad_norm": 2.375, "learning_rate": 0.0001441438164807334, "loss": 2.1798, "step": 301250 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014414215801664742, "loss": 2.0835, "step": 301255 }, { "epoch": 0.71, "grad_norm": 2.0, "learning_rate": 0.00014414049953748165, "loss": 1.8509, "step": 301260 }, { "epoch": 0.71, "grad_norm": 2.890625, "learning_rate": 0.0001441388410432367, "loss": 2.1083, "step": 301265 }, { "epoch": 0.71, "grad_norm": 2.515625, "learning_rate": 0.0001441371825339131, "loss": 2.0361, "step": 301270 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014413552400951144, "loss": 2.1772, "step": 301275 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.0001441338654700323, "loss": 2.2128, "step": 301280 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014413220691547623, "loss": 1.9457, "step": 301285 }, { "epoch": 0.71, "grad_norm": 2.421875, "learning_rate": 0.00014413054834584377, "loss": 2.0555, "step": 301290 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.00014412888976113555, "loss": 1.9185, "step": 301295 }, { "epoch": 0.71, "grad_norm": 2.484375, "learning_rate": 0.00014412723116135207, "loss": 2.0425, "step": 301300 }, { "epoch": 0.71, "grad_norm": 2.375, "learning_rate": 0.00014412557254649396, "loss": 2.1298, "step": 301305 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.00014412391391656172, "loss": 2.3018, "step": 301310 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.00014412225527155596, "loss": 1.9313, "step": 301315 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014412059661147726, "loss": 2.0149, "step": 301320 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014411893793632615, "loss": 1.8797, "step": 301325 }, { "epoch": 0.71, "grad_norm": 1.65625, "learning_rate": 0.0001441172792461032, "loss": 1.899, "step": 301330 }, { "epoch": 0.71, "grad_norm": 1.8671875, "learning_rate": 0.000144115620540809, "loss": 2.1946, "step": 301335 }, { "epoch": 0.71, "grad_norm": 2.546875, "learning_rate": 0.0001441139618204441, "loss": 1.8966, "step": 301340 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014411230308500908, "loss": 2.1031, "step": 301345 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.0001441106443345045, "loss": 2.014, "step": 301350 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014410898556893088, "loss": 2.0455, "step": 301355 }, { "epoch": 0.71, "grad_norm": 2.390625, "learning_rate": 0.00014410732678828887, "loss": 2.0711, "step": 301360 }, { "epoch": 0.71, "grad_norm": 1.9453125, "learning_rate": 0.000144105667992579, "loss": 1.912, "step": 301365 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014410400918180185, "loss": 2.1989, "step": 301370 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014410235035595793, "loss": 1.8502, "step": 301375 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014410069151504784, "loss": 2.0739, "step": 301380 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.0001440990326590722, "loss": 2.0024, "step": 301385 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014409737378803152, "loss": 2.058, "step": 301390 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014409571490192635, "loss": 1.9073, "step": 301395 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014409405600075733, "loss": 2.0158, "step": 301400 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014409239708452494, "loss": 1.9665, "step": 301405 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.00014409073815322977, "loss": 2.0461, "step": 301410 }, { "epoch": 0.71, "grad_norm": 2.484375, "learning_rate": 0.00014408907920687245, "loss": 1.8106, "step": 301415 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014408742024545347, "loss": 2.0835, "step": 301420 }, { "epoch": 0.71, "grad_norm": 2.4375, "learning_rate": 0.00014408576126897344, "loss": 2.061, "step": 301425 }, { "epoch": 0.71, "grad_norm": 1.8671875, "learning_rate": 0.0001440841022774329, "loss": 2.1628, "step": 301430 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014408244327083247, "loss": 2.0484, "step": 301435 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014408078424917265, "loss": 1.9129, "step": 301440 }, { "epoch": 0.71, "grad_norm": 2.4375, "learning_rate": 0.00014407912521245404, "loss": 2.1182, "step": 301445 }, { "epoch": 0.71, "grad_norm": 2.46875, "learning_rate": 0.0001440774661606772, "loss": 2.1229, "step": 301450 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.0001440758070938427, "loss": 2.0544, "step": 301455 }, { "epoch": 0.71, "grad_norm": 1.875, "learning_rate": 0.0001440741480119511, "loss": 2.1041, "step": 301460 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.00014407248891500296, "loss": 2.0991, "step": 301465 }, { "epoch": 0.71, "grad_norm": 2.703125, "learning_rate": 0.00014407082980299885, "loss": 2.081, "step": 301470 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.0001440691706759394, "loss": 2.0357, "step": 301475 }, { "epoch": 0.71, "grad_norm": 2.546875, "learning_rate": 0.00014406751153382507, "loss": 2.0606, "step": 301480 }, { "epoch": 0.71, "grad_norm": 2.5, "learning_rate": 0.00014406585237665653, "loss": 2.155, "step": 301485 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.00014406419320443426, "loss": 2.1374, "step": 301490 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014406253401715887, "loss": 2.0649, "step": 301495 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.0001440608748148309, "loss": 1.919, "step": 301500 }, { "epoch": 0.71, "grad_norm": 2.4375, "learning_rate": 0.00014405921559745098, "loss": 1.9968, "step": 301505 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014405755636501957, "loss": 2.1487, "step": 301510 }, { "epoch": 0.71, "grad_norm": 2.0, "learning_rate": 0.00014405589711753732, "loss": 1.8352, "step": 301515 }, { "epoch": 0.71, "grad_norm": 2.59375, "learning_rate": 0.0001440542378550048, "loss": 1.9179, "step": 301520 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014405257857742258, "loss": 2.0721, "step": 301525 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014405091928479114, "loss": 2.0242, "step": 301530 }, { "epoch": 0.71, "grad_norm": 1.96875, "learning_rate": 0.00014404925997711114, "loss": 2.1447, "step": 301535 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.0001440476006543831, "loss": 2.2048, "step": 301540 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.0001440459413166076, "loss": 2.2165, "step": 301545 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014404428196378524, "loss": 1.7796, "step": 301550 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014404262259591651, "loss": 1.9915, "step": 301555 }, { "epoch": 0.71, "grad_norm": 1.8671875, "learning_rate": 0.00014404096321300204, "loss": 1.7891, "step": 301560 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.0001440393038150424, "loss": 1.9355, "step": 301565 }, { "epoch": 0.71, "grad_norm": 1.78125, "learning_rate": 0.0001440376444020381, "loss": 2.0312, "step": 301570 }, { "epoch": 0.71, "grad_norm": 2.65625, "learning_rate": 0.00014403598497398975, "loss": 2.264, "step": 301575 }, { "epoch": 0.71, "grad_norm": 2.4375, "learning_rate": 0.00014403432553089792, "loss": 2.1406, "step": 301580 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014403266607276315, "loss": 1.9937, "step": 301585 }, { "epoch": 0.71, "grad_norm": 1.8828125, "learning_rate": 0.00014403100659958603, "loss": 1.9978, "step": 301590 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014402934711136713, "loss": 1.9976, "step": 301595 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.000144027687608107, "loss": 2.0678, "step": 301600 }, { "epoch": 0.71, "grad_norm": 2.5625, "learning_rate": 0.0001440260280898062, "loss": 2.03, "step": 301605 }, { "epoch": 0.71, "grad_norm": 1.828125, "learning_rate": 0.00014402436855646532, "loss": 2.0382, "step": 301610 }, { "epoch": 0.71, "grad_norm": 1.71875, "learning_rate": 0.00014402270900808493, "loss": 2.1582, "step": 301615 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.0001440210494446656, "loss": 2.133, "step": 301620 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014401938986620784, "loss": 2.0192, "step": 301625 }, { "epoch": 0.71, "grad_norm": 3.09375, "learning_rate": 0.00014401773027271228, "loss": 2.1008, "step": 301630 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014401607066417946, "loss": 2.1108, "step": 301635 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.00014401441104060995, "loss": 2.1706, "step": 301640 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014401275140200434, "loss": 1.9741, "step": 301645 }, { "epoch": 0.71, "grad_norm": 1.8984375, "learning_rate": 0.00014401109174836313, "loss": 1.8733, "step": 301650 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014400943207968695, "loss": 1.901, "step": 301655 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014400777239597637, "loss": 2.08, "step": 301660 }, { "epoch": 0.71, "grad_norm": 1.984375, "learning_rate": 0.00014400611269723193, "loss": 2.0839, "step": 301665 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.0001440044529834542, "loss": 2.187, "step": 301670 }, { "epoch": 0.71, "grad_norm": 1.8515625, "learning_rate": 0.00014400279325464373, "loss": 2.0271, "step": 301675 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014400113351080112, "loss": 2.1577, "step": 301680 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.00014399947375192697, "loss": 2.0907, "step": 301685 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014399781397802177, "loss": 2.0273, "step": 301690 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.0001439961541890861, "loss": 2.0307, "step": 301695 }, { "epoch": 0.71, "grad_norm": 1.9453125, "learning_rate": 0.00014399449438512053, "loss": 2.1877, "step": 301700 }, { "epoch": 0.71, "grad_norm": 2.515625, "learning_rate": 0.00014399283456612572, "loss": 1.9484, "step": 301705 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014399117473210208, "loss": 2.0127, "step": 301710 }, { "epoch": 0.71, "grad_norm": 2.53125, "learning_rate": 0.0001439895148830503, "loss": 2.1457, "step": 301715 }, { "epoch": 0.71, "grad_norm": 2.4375, "learning_rate": 0.0001439878550189709, "loss": 2.2387, "step": 301720 }, { "epoch": 0.71, "grad_norm": 2.5625, "learning_rate": 0.00014398619513986445, "loss": 1.9363, "step": 301725 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.0001439845352457315, "loss": 2.2575, "step": 301730 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014398287533657264, "loss": 2.1217, "step": 301735 }, { "epoch": 0.71, "grad_norm": 2.703125, "learning_rate": 0.00014398121541238842, "loss": 1.9685, "step": 301740 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014397955547317944, "loss": 2.0363, "step": 301745 }, { "epoch": 0.71, "grad_norm": 2.515625, "learning_rate": 0.00014397789551894624, "loss": 2.1828, "step": 301750 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014397623554968943, "loss": 2.0133, "step": 301755 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.0001439745755654095, "loss": 2.2465, "step": 301760 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014397291556610706, "loss": 2.1341, "step": 301765 }, { "epoch": 0.71, "grad_norm": 2.53125, "learning_rate": 0.00014397125555178268, "loss": 2.054, "step": 301770 }, { "epoch": 0.71, "grad_norm": 1.7734375, "learning_rate": 0.00014396959552243692, "loss": 2.0408, "step": 301775 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014396793547807036, "loss": 2.0411, "step": 301780 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014396627541868353, "loss": 1.9034, "step": 301785 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014396461534427703, "loss": 2.2003, "step": 301790 }, { "epoch": 0.71, "grad_norm": 1.921875, "learning_rate": 0.00014396295525485143, "loss": 1.9581, "step": 301795 }, { "epoch": 0.71, "grad_norm": 2.515625, "learning_rate": 0.0001439612951504073, "loss": 2.2419, "step": 301800 }, { "epoch": 0.71, "grad_norm": 1.9765625, "learning_rate": 0.00014395963503094518, "loss": 2.077, "step": 301805 }, { "epoch": 0.71, "grad_norm": 1.8125, "learning_rate": 0.00014395797489646564, "loss": 2.0756, "step": 301810 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014395631474696928, "loss": 1.9681, "step": 301815 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014395465458245664, "loss": 1.9243, "step": 301820 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014395299440292828, "loss": 2.0553, "step": 301825 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.0001439513342083848, "loss": 2.144, "step": 301830 }, { "epoch": 0.71, "grad_norm": 1.96875, "learning_rate": 0.00014394967399882673, "loss": 1.9469, "step": 301835 }, { "epoch": 0.71, "grad_norm": 1.84375, "learning_rate": 0.00014394801377425463, "loss": 1.8691, "step": 301840 }, { "epoch": 0.71, "grad_norm": 2.375, "learning_rate": 0.00014394635353466917, "loss": 1.8837, "step": 301845 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.0001439446932800708, "loss": 1.9405, "step": 301850 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.0001439430330104601, "loss": 2.221, "step": 301855 }, { "epoch": 0.71, "grad_norm": 1.9921875, "learning_rate": 0.00014394137272583768, "loss": 2.0295, "step": 301860 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014393971242620407, "loss": 2.2162, "step": 301865 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.0001439380521115599, "loss": 2.0541, "step": 301870 }, { "epoch": 0.71, "grad_norm": 2.578125, "learning_rate": 0.00014393639178190566, "loss": 2.0235, "step": 301875 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014393473143724196, "loss": 2.2723, "step": 301880 }, { "epoch": 0.71, "grad_norm": 2.515625, "learning_rate": 0.00014393307107756935, "loss": 1.9482, "step": 301885 }, { "epoch": 0.71, "grad_norm": 2.625, "learning_rate": 0.00014393141070288842, "loss": 2.1, "step": 301890 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014392975031319972, "loss": 2.1082, "step": 301895 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014392808990850383, "loss": 2.0883, "step": 301900 }, { "epoch": 0.71, "grad_norm": 1.9140625, "learning_rate": 0.0001439264294888013, "loss": 1.7879, "step": 301905 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.0001439247690540927, "loss": 2.066, "step": 301910 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014392310860437862, "loss": 1.9178, "step": 301915 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014392144813965958, "loss": 2.0361, "step": 301920 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014391978765993622, "loss": 1.9713, "step": 301925 }, { "epoch": 0.71, "grad_norm": 2.875, "learning_rate": 0.000143918127165209, "loss": 2.1217, "step": 301930 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014391646665547862, "loss": 2.1753, "step": 301935 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014391480613074555, "loss": 2.0778, "step": 301940 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014391314559101037, "loss": 2.022, "step": 301945 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014391148503627366, "loss": 2.1964, "step": 301950 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.000143909824466536, "loss": 2.0532, "step": 301955 }, { "epoch": 0.71, "grad_norm": 2.625, "learning_rate": 0.00014390816388179796, "loss": 2.205, "step": 301960 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.0001439065032820601, "loss": 1.9712, "step": 301965 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014390484266732295, "loss": 2.0562, "step": 301970 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014390318203758715, "loss": 2.052, "step": 301975 }, { "epoch": 0.71, "grad_norm": 1.9765625, "learning_rate": 0.00014390152139285323, "loss": 1.9835, "step": 301980 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014389986073312173, "loss": 2.1629, "step": 301985 }, { "epoch": 0.71, "grad_norm": 1.8046875, "learning_rate": 0.00014389820005839322, "loss": 1.9969, "step": 301990 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.0001438965393686683, "loss": 2.1605, "step": 301995 }, { "epoch": 0.71, "grad_norm": 1.8984375, "learning_rate": 0.00014389487866394758, "loss": 2.1004, "step": 302000 }, { "epoch": 0.71, "grad_norm": 1.953125, "learning_rate": 0.0001438932179442315, "loss": 2.0142, "step": 302005 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014389155720952073, "loss": 1.7588, "step": 302010 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014388989645981582, "loss": 1.8952, "step": 302015 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014388823569511732, "loss": 2.0815, "step": 302020 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014388657491542578, "loss": 2.1646, "step": 302025 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014388491412074182, "loss": 2.2651, "step": 302030 }, { "epoch": 0.71, "grad_norm": 2.0, "learning_rate": 0.00014388325331106598, "loss": 2.0262, "step": 302035 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.0001438815924863988, "loss": 2.0862, "step": 302040 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014387993164674087, "loss": 2.0698, "step": 302045 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.00014387827079209278, "loss": 2.0148, "step": 302050 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014387660992245507, "loss": 1.9991, "step": 302055 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.0001438749490378283, "loss": 2.0727, "step": 302060 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.0001438732881382131, "loss": 2.042, "step": 302065 }, { "epoch": 0.71, "grad_norm": 1.9921875, "learning_rate": 0.00014387162722360994, "loss": 2.1082, "step": 302070 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014386996629401944, "loss": 1.8999, "step": 302075 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014386830534944217, "loss": 2.0707, "step": 302080 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.0001438666443898787, "loss": 2.0118, "step": 302085 }, { "epoch": 0.71, "grad_norm": 1.9453125, "learning_rate": 0.00014386498341532956, "loss": 2.1941, "step": 302090 }, { "epoch": 0.71, "grad_norm": 1.984375, "learning_rate": 0.00014386332242579541, "loss": 2.0455, "step": 302095 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.0001438616614212767, "loss": 2.1688, "step": 302100 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014386000040177404, "loss": 2.0871, "step": 302105 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014385833936728806, "loss": 2.1593, "step": 302110 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014385667831781927, "loss": 2.1521, "step": 302115 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014385501725336822, "loss": 2.0375, "step": 302120 }, { "epoch": 0.71, "grad_norm": 2.4375, "learning_rate": 0.0001438533561739355, "loss": 2.0265, "step": 302125 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014385169507952165, "loss": 1.9954, "step": 302130 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.0001438500339701273, "loss": 2.1545, "step": 302135 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014384837284575302, "loss": 2.175, "step": 302140 }, { "epoch": 0.71, "grad_norm": 1.8359375, "learning_rate": 0.00014384671170639926, "loss": 2.1286, "step": 302145 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.0001438450505520667, "loss": 2.2242, "step": 302150 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.0001438433893827559, "loss": 2.1277, "step": 302155 }, { "epoch": 0.71, "grad_norm": 1.9921875, "learning_rate": 0.0001438417281984674, "loss": 2.0232, "step": 302160 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014384006699920176, "loss": 1.9535, "step": 302165 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.00014383840578495956, "loss": 2.111, "step": 302170 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014383674455574136, "loss": 2.2384, "step": 302175 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014383508331154775, "loss": 1.9745, "step": 302180 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014383342205237924, "loss": 1.9801, "step": 302185 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014383176077823647, "loss": 1.9629, "step": 302190 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014383009948911997, "loss": 1.9855, "step": 302195 }, { "epoch": 0.71, "grad_norm": 2.78125, "learning_rate": 0.00014382843818503029, "loss": 2.054, "step": 302200 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014382677686596807, "loss": 2.072, "step": 302205 }, { "epoch": 0.71, "grad_norm": 2.5, "learning_rate": 0.00014382511553193378, "loss": 2.0798, "step": 302210 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014382345418292806, "loss": 2.0473, "step": 302215 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014382179281895144, "loss": 2.0555, "step": 302220 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.0001438201314400045, "loss": 2.1463, "step": 302225 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014381847004608787, "loss": 2.2112, "step": 302230 }, { "epoch": 0.71, "grad_norm": 1.96875, "learning_rate": 0.000143816808637202, "loss": 1.9447, "step": 302235 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.0001438151472133475, "loss": 1.9712, "step": 302240 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014381348577452495, "loss": 2.1419, "step": 302245 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014381182432073494, "loss": 2.0318, "step": 302250 }, { "epoch": 0.71, "grad_norm": 1.7265625, "learning_rate": 0.00014381016285197804, "loss": 2.1076, "step": 302255 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014380850136825476, "loss": 2.0375, "step": 302260 }, { "epoch": 0.71, "grad_norm": 2.46875, "learning_rate": 0.0001438068398695657, "loss": 1.9888, "step": 302265 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014380517835591142, "loss": 2.1547, "step": 302270 }, { "epoch": 0.71, "grad_norm": 2.0, "learning_rate": 0.00014380351682729257, "loss": 1.9662, "step": 302275 }, { "epoch": 0.71, "grad_norm": 3.28125, "learning_rate": 0.00014380185528370957, "loss": 2.1602, "step": 302280 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014380019372516308, "loss": 2.2227, "step": 302285 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014379853215165363, "loss": 2.0515, "step": 302290 }, { "epoch": 0.71, "grad_norm": 1.84375, "learning_rate": 0.00014379687056318186, "loss": 2.1324, "step": 302295 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014379520895974824, "loss": 2.1468, "step": 302300 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.0001437935473413534, "loss": 2.0577, "step": 302305 }, { "epoch": 0.71, "grad_norm": 1.953125, "learning_rate": 0.00014379188570799788, "loss": 1.9009, "step": 302310 }, { "epoch": 0.71, "grad_norm": 2.4375, "learning_rate": 0.00014379022405968224, "loss": 2.0827, "step": 302315 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.0001437885623964071, "loss": 2.0028, "step": 302320 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.000143786900718173, "loss": 2.282, "step": 302325 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014378523902498048, "loss": 2.0882, "step": 302330 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.0001437835773168301, "loss": 1.8877, "step": 302335 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014378191559372253, "loss": 1.9561, "step": 302340 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014378025385565818, "loss": 1.8603, "step": 302345 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014377859210263776, "loss": 2.1111, "step": 302350 }, { "epoch": 0.71, "grad_norm": 2.546875, "learning_rate": 0.00014377693033466174, "loss": 2.1495, "step": 302355 }, { "epoch": 0.71, "grad_norm": 2.59375, "learning_rate": 0.00014377526855173075, "loss": 1.9495, "step": 302360 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.00014377360675384534, "loss": 1.8594, "step": 302365 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014377194494100606, "loss": 1.9872, "step": 302370 }, { "epoch": 0.71, "grad_norm": 1.9140625, "learning_rate": 0.0001437702831132135, "loss": 1.827, "step": 302375 }, { "epoch": 0.71, "grad_norm": 1.625, "learning_rate": 0.00014376862127046819, "loss": 2.1503, "step": 302380 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014376695941277075, "loss": 2.1664, "step": 302385 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014376529754012174, "loss": 1.8226, "step": 302390 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014376363565252168, "loss": 2.0194, "step": 302395 }, { "epoch": 0.71, "grad_norm": 1.6875, "learning_rate": 0.00014376197374997115, "loss": 2.0306, "step": 302400 }, { "epoch": 0.71, "grad_norm": 2.484375, "learning_rate": 0.00014376031183247078, "loss": 2.316, "step": 302405 }, { "epoch": 0.71, "grad_norm": 2.421875, "learning_rate": 0.0001437586499000211, "loss": 2.0858, "step": 302410 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014375698795262263, "loss": 1.9992, "step": 302415 }, { "epoch": 0.71, "grad_norm": 1.9921875, "learning_rate": 0.000143755325990276, "loss": 2.1278, "step": 302420 }, { "epoch": 0.71, "grad_norm": 2.875, "learning_rate": 0.00014375366401298174, "loss": 2.0156, "step": 302425 }, { "epoch": 0.71, "grad_norm": 1.6640625, "learning_rate": 0.00014375200202074043, "loss": 2.0484, "step": 302430 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.0001437503400135527, "loss": 1.9304, "step": 302435 }, { "epoch": 0.71, "grad_norm": 2.390625, "learning_rate": 0.00014374867799141902, "loss": 2.1502, "step": 302440 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014374701595434, "loss": 2.1406, "step": 302445 }, { "epoch": 0.71, "grad_norm": 1.9375, "learning_rate": 0.00014374535390231617, "loss": 2.0337, "step": 302450 }, { "epoch": 0.71, "grad_norm": 2.421875, "learning_rate": 0.00014374369183534823, "loss": 2.0669, "step": 302455 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.0001437420297534366, "loss": 2.1975, "step": 302460 }, { "epoch": 0.71, "grad_norm": 1.9140625, "learning_rate": 0.00014374036765658189, "loss": 2.0933, "step": 302465 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014373870554478465, "loss": 2.0059, "step": 302470 }, { "epoch": 0.71, "grad_norm": 1.8515625, "learning_rate": 0.0001437370434180455, "loss": 2.1001, "step": 302475 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014373538127636498, "loss": 2.0659, "step": 302480 }, { "epoch": 0.71, "grad_norm": 2.390625, "learning_rate": 0.00014373371911974368, "loss": 2.2427, "step": 302485 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.00014373205694818213, "loss": 2.2461, "step": 302490 }, { "epoch": 0.71, "grad_norm": 2.484375, "learning_rate": 0.00014373039476168093, "loss": 2.1043, "step": 302495 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.0001437287325602406, "loss": 1.8936, "step": 302500 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.0001437270703438618, "loss": 2.1955, "step": 302505 }, { "epoch": 0.71, "grad_norm": 2.421875, "learning_rate": 0.000143725408112545, "loss": 1.9919, "step": 302510 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.0001437237458662908, "loss": 2.1238, "step": 302515 }, { "epoch": 0.71, "grad_norm": 1.8828125, "learning_rate": 0.0001437220836050998, "loss": 1.8788, "step": 302520 }, { "epoch": 0.71, "grad_norm": 1.9375, "learning_rate": 0.00014372042132897254, "loss": 2.0906, "step": 302525 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.0001437187590379096, "loss": 2.1057, "step": 302530 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014371709673191148, "loss": 2.2165, "step": 302535 }, { "epoch": 0.71, "grad_norm": 1.9453125, "learning_rate": 0.00014371543441097888, "loss": 2.1617, "step": 302540 }, { "epoch": 0.71, "grad_norm": 1.96875, "learning_rate": 0.00014371377207511224, "loss": 2.1244, "step": 302545 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014371210972431223, "loss": 2.0533, "step": 302550 }, { "epoch": 0.71, "grad_norm": 2.734375, "learning_rate": 0.00014371044735857934, "loss": 1.9455, "step": 302555 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014370878497791417, "loss": 1.96, "step": 302560 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.0001437071225823173, "loss": 2.0425, "step": 302565 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014370546017178929, "loss": 1.7775, "step": 302570 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014370379774633068, "loss": 2.0507, "step": 302575 }, { "epoch": 0.71, "grad_norm": 2.578125, "learning_rate": 0.00014370213530594207, "loss": 2.1747, "step": 302580 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.000143700472850624, "loss": 1.9506, "step": 302585 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014369881038037708, "loss": 2.0291, "step": 302590 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014369714789520187, "loss": 2.192, "step": 302595 }, { "epoch": 0.71, "grad_norm": 2.375, "learning_rate": 0.0001436954853950989, "loss": 2.0311, "step": 302600 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014369382288006877, "loss": 2.041, "step": 302605 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014369216035011202, "loss": 1.9548, "step": 302610 }, { "epoch": 0.71, "grad_norm": 1.9296875, "learning_rate": 0.00014369049780522922, "loss": 2.0882, "step": 302615 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014368883524542097, "loss": 1.8838, "step": 302620 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014368717267068784, "loss": 2.2452, "step": 302625 }, { "epoch": 0.71, "grad_norm": 1.7421875, "learning_rate": 0.00014368551008103037, "loss": 1.9331, "step": 302630 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014368384747644915, "loss": 1.9802, "step": 302635 }, { "epoch": 0.71, "grad_norm": 2.734375, "learning_rate": 0.0001436821848569447, "loss": 1.9898, "step": 302640 }, { "epoch": 0.71, "grad_norm": 1.859375, "learning_rate": 0.00014368052222251765, "loss": 2.1218, "step": 302645 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014367885957316853, "loss": 2.0826, "step": 302650 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014367719690889792, "loss": 2.0775, "step": 302655 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014367553422970638, "loss": 2.043, "step": 302660 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014367387153559454, "loss": 1.9526, "step": 302665 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014367220882656286, "loss": 1.9605, "step": 302670 }, { "epoch": 0.71, "grad_norm": 8.0, "learning_rate": 0.00014367054610261196, "loss": 2.121, "step": 302675 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014366888336374243, "loss": 1.9296, "step": 302680 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.0001436672206099548, "loss": 1.9865, "step": 302685 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.0001436655578412497, "loss": 1.9863, "step": 302690 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.00014366389505762758, "loss": 2.0307, "step": 302695 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014366223225908913, "loss": 1.9064, "step": 302700 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014366056944563487, "loss": 2.0478, "step": 302705 }, { "epoch": 0.71, "grad_norm": 2.390625, "learning_rate": 0.00014365890661726535, "loss": 1.9755, "step": 302710 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014365724377398114, "loss": 2.0112, "step": 302715 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014365558091578286, "loss": 2.0028, "step": 302720 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.000143653918042671, "loss": 1.9888, "step": 302725 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014365225515464624, "loss": 2.0216, "step": 302730 }, { "epoch": 0.71, "grad_norm": 2.78125, "learning_rate": 0.00014365059225170901, "loss": 2.1933, "step": 302735 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014364892933386, "loss": 2.0808, "step": 302740 }, { "epoch": 0.71, "grad_norm": 2.71875, "learning_rate": 0.00014364726640109967, "loss": 1.9521, "step": 302745 }, { "epoch": 0.71, "grad_norm": 2.375, "learning_rate": 0.0001436456034534287, "loss": 2.1219, "step": 302750 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014364394049084755, "loss": 1.9927, "step": 302755 }, { "epoch": 0.71, "grad_norm": 1.9375, "learning_rate": 0.00014364227751335686, "loss": 1.7838, "step": 302760 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014364061452095718, "loss": 1.9626, "step": 302765 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014363895151364906, "loss": 1.9932, "step": 302770 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.0001436372884914331, "loss": 2.0343, "step": 302775 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014363562545430982, "loss": 2.0765, "step": 302780 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014363396240227983, "loss": 1.9189, "step": 302785 }, { "epoch": 0.71, "grad_norm": 1.921875, "learning_rate": 0.0001436322993353437, "loss": 2.0661, "step": 302790 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014363063625350198, "loss": 2.1437, "step": 302795 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014362897315675527, "loss": 1.9171, "step": 302800 }, { "epoch": 0.71, "grad_norm": 1.8359375, "learning_rate": 0.00014362731004510408, "loss": 2.1012, "step": 302805 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.00014362564691854904, "loss": 1.993, "step": 302810 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014362398377709066, "loss": 2.0525, "step": 302815 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014362232062072953, "loss": 2.0836, "step": 302820 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014362065744946622, "loss": 2.0775, "step": 302825 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.0001436189942633013, "loss": 2.1824, "step": 302830 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.0001436173310622354, "loss": 2.0118, "step": 302835 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014361566784626897, "loss": 1.9457, "step": 302840 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014361400461540268, "loss": 2.0286, "step": 302845 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.000143612341369637, "loss": 1.9052, "step": 302850 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.0001436106781089726, "loss": 2.1098, "step": 302855 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014360901483340997, "loss": 2.1136, "step": 302860 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014360735154294972, "loss": 2.0461, "step": 302865 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014360568823759242, "loss": 2.0413, "step": 302870 }, { "epoch": 0.71, "grad_norm": 2.390625, "learning_rate": 0.00014360402491733862, "loss": 2.1111, "step": 302875 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014360236158218888, "loss": 1.8854, "step": 302880 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.0001436006982321438, "loss": 2.0745, "step": 302885 }, { "epoch": 0.71, "grad_norm": 2.625, "learning_rate": 0.00014359903486720394, "loss": 2.0851, "step": 302890 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014359737148736984, "loss": 1.9438, "step": 302895 }, { "epoch": 0.71, "grad_norm": 1.96875, "learning_rate": 0.00014359570809264208, "loss": 1.9517, "step": 302900 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014359404468302123, "loss": 1.9254, "step": 302905 }, { "epoch": 0.71, "grad_norm": 2.5, "learning_rate": 0.00014359238125850792, "loss": 2.1105, "step": 302910 }, { "epoch": 0.71, "grad_norm": 2.484375, "learning_rate": 0.0001435907178191026, "loss": 1.9546, "step": 302915 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.0001435890543648059, "loss": 1.9142, "step": 302920 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014358739089561842, "loss": 2.2005, "step": 302925 }, { "epoch": 0.71, "grad_norm": 2.578125, "learning_rate": 0.0001435857274115407, "loss": 2.2002, "step": 302930 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.0001435840639125733, "loss": 2.166, "step": 302935 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.00014358240039871677, "loss": 2.0232, "step": 302940 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014358073686997173, "loss": 2.0923, "step": 302945 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.0001435790733263387, "loss": 2.0389, "step": 302950 }, { "epoch": 0.71, "grad_norm": 1.9453125, "learning_rate": 0.0001435774097678183, "loss": 2.1296, "step": 302955 }, { "epoch": 0.71, "grad_norm": 2.921875, "learning_rate": 0.00014357574619441104, "loss": 2.0698, "step": 302960 }, { "epoch": 0.71, "grad_norm": 1.9453125, "learning_rate": 0.00014357408260611752, "loss": 2.1724, "step": 302965 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014357241900293826, "loss": 2.0685, "step": 302970 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.0001435707553848739, "loss": 2.0319, "step": 302975 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014356909175192503, "loss": 2.016, "step": 302980 }, { "epoch": 0.71, "grad_norm": 2.625, "learning_rate": 0.00014356742810409215, "loss": 2.0806, "step": 302985 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.0001435657644413758, "loss": 2.0574, "step": 302990 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.0001435641007637766, "loss": 1.9891, "step": 302995 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.00014356243707129516, "loss": 2.0251, "step": 303000 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014356077336393196, "loss": 2.1148, "step": 303005 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014355910964168763, "loss": 2.2145, "step": 303010 }, { "epoch": 0.71, "grad_norm": 1.8671875, "learning_rate": 0.0001435574459045627, "loss": 2.0412, "step": 303015 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014355578215255777, "loss": 2.1226, "step": 303020 }, { "epoch": 0.71, "grad_norm": 1.75, "learning_rate": 0.00014355411838567342, "loss": 2.1287, "step": 303025 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014355245460391015, "loss": 2.0067, "step": 303030 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014355079080726862, "loss": 2.0366, "step": 303035 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014354912699574926, "loss": 2.1851, "step": 303040 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014354746316935283, "loss": 2.0904, "step": 303045 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.00014354579932807977, "loss": 2.2849, "step": 303050 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014354413547193064, "loss": 2.1591, "step": 303055 }, { "epoch": 0.71, "grad_norm": 2.703125, "learning_rate": 0.00014354247160090606, "loss": 2.1389, "step": 303060 }, { "epoch": 0.71, "grad_norm": 2.0, "learning_rate": 0.00014354080771500658, "loss": 1.918, "step": 303065 }, { "epoch": 0.71, "grad_norm": 2.46875, "learning_rate": 0.0001435391438142328, "loss": 2.0214, "step": 303070 }, { "epoch": 0.71, "grad_norm": 2.546875, "learning_rate": 0.0001435374798985852, "loss": 2.2916, "step": 303075 }, { "epoch": 0.71, "grad_norm": 2.71875, "learning_rate": 0.00014353581596806447, "loss": 2.0588, "step": 303080 }, { "epoch": 0.71, "grad_norm": 2.703125, "learning_rate": 0.00014353415202267107, "loss": 2.086, "step": 303085 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.00014353248806240566, "loss": 2.1723, "step": 303090 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014353082408726872, "loss": 2.225, "step": 303095 }, { "epoch": 0.71, "grad_norm": 2.640625, "learning_rate": 0.00014352916009726088, "loss": 1.9879, "step": 303100 }, { "epoch": 0.71, "grad_norm": 1.90625, "learning_rate": 0.00014352749609238268, "loss": 1.9254, "step": 303105 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.0001435258320726347, "loss": 2.1503, "step": 303110 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014352416803801755, "loss": 2.1737, "step": 303115 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.00014352250398853167, "loss": 2.1057, "step": 303120 }, { "epoch": 0.71, "grad_norm": 2.46875, "learning_rate": 0.00014352083992417774, "loss": 2.2102, "step": 303125 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014351917584495635, "loss": 2.102, "step": 303130 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.000143517511750868, "loss": 1.9824, "step": 303135 }, { "epoch": 0.71, "grad_norm": 1.796875, "learning_rate": 0.00014351584764191327, "loss": 2.2433, "step": 303140 }, { "epoch": 0.71, "grad_norm": 2.0, "learning_rate": 0.00014351418351809273, "loss": 2.2658, "step": 303145 }, { "epoch": 0.71, "grad_norm": 2.8125, "learning_rate": 0.00014351251937940693, "loss": 1.8642, "step": 303150 }, { "epoch": 0.71, "grad_norm": 1.7265625, "learning_rate": 0.0001435108552258565, "loss": 1.9965, "step": 303155 }, { "epoch": 0.71, "grad_norm": 2.53125, "learning_rate": 0.00014350919105744197, "loss": 2.2188, "step": 303160 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014350752687416392, "loss": 1.9619, "step": 303165 }, { "epoch": 0.71, "grad_norm": 2.46875, "learning_rate": 0.00014350586267602292, "loss": 2.0202, "step": 303170 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014350419846301948, "loss": 2.0207, "step": 303175 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.00014350253423515424, "loss": 2.1142, "step": 303180 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014350086999242775, "loss": 2.2183, "step": 303185 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.0001434992057348406, "loss": 2.1468, "step": 303190 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014349754146239329, "loss": 2.0229, "step": 303195 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.0001434958771750864, "loss": 1.875, "step": 303200 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014349421287292063, "loss": 2.0456, "step": 303205 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014349254855589637, "loss": 2.0092, "step": 303210 }, { "epoch": 0.71, "grad_norm": 1.8515625, "learning_rate": 0.00014349088422401432, "loss": 2.1809, "step": 303215 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.00014348921987727495, "loss": 2.0563, "step": 303220 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.0001434875555156789, "loss": 2.0745, "step": 303225 }, { "epoch": 0.71, "grad_norm": 2.421875, "learning_rate": 0.00014348589113922667, "loss": 2.0272, "step": 303230 }, { "epoch": 0.71, "grad_norm": 1.765625, "learning_rate": 0.00014348422674791893, "loss": 2.139, "step": 303235 }, { "epoch": 0.71, "grad_norm": 2.65625, "learning_rate": 0.00014348256234175617, "loss": 2.0095, "step": 303240 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014348089792073896, "loss": 2.2463, "step": 303245 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.0001434792334848679, "loss": 2.0426, "step": 303250 }, { "epoch": 0.71, "grad_norm": 2.46875, "learning_rate": 0.00014347756903414354, "loss": 2.1813, "step": 303255 }, { "epoch": 0.71, "grad_norm": 2.390625, "learning_rate": 0.00014347590456856646, "loss": 2.1211, "step": 303260 }, { "epoch": 0.71, "grad_norm": 3.0, "learning_rate": 0.0001434742400881372, "loss": 2.0479, "step": 303265 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014347257559285638, "loss": 2.0549, "step": 303270 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014347091108272454, "loss": 2.0295, "step": 303275 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.00014346924655774226, "loss": 2.0042, "step": 303280 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014346758201791004, "loss": 2.0735, "step": 303285 }, { "epoch": 0.71, "grad_norm": 2.671875, "learning_rate": 0.00014346591746322855, "loss": 2.262, "step": 303290 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.00014346425289369833, "loss": 1.9951, "step": 303295 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014346258830931992, "loss": 2.1666, "step": 303300 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014346092371009389, "loss": 1.9428, "step": 303305 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014345925909602078, "loss": 2.0147, "step": 303310 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014345759446710124, "loss": 1.9458, "step": 303315 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.00014345592982333583, "loss": 1.8953, "step": 303320 }, { "epoch": 0.71, "grad_norm": 1.8828125, "learning_rate": 0.00014345426516472506, "loss": 2.0758, "step": 303325 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.0001434526004912695, "loss": 2.1369, "step": 303330 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014345093580296975, "loss": 2.124, "step": 303335 }, { "epoch": 0.71, "grad_norm": 2.421875, "learning_rate": 0.0001434492710998264, "loss": 1.9712, "step": 303340 }, { "epoch": 0.71, "grad_norm": 2.5, "learning_rate": 0.00014344760638183997, "loss": 2.1404, "step": 303345 }, { "epoch": 0.71, "grad_norm": 2.46875, "learning_rate": 0.00014344594164901104, "loss": 1.9894, "step": 303350 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.0001434442769013402, "loss": 1.9768, "step": 303355 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014344261213882802, "loss": 2.1925, "step": 303360 }, { "epoch": 0.71, "grad_norm": 1.953125, "learning_rate": 0.00014344094736147504, "loss": 1.9375, "step": 303365 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.00014343928256928186, "loss": 2.203, "step": 303370 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014343761776224902, "loss": 2.1978, "step": 303375 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.0001434359529403771, "loss": 2.0539, "step": 303380 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.0001434342881036667, "loss": 1.8214, "step": 303385 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014343262325211832, "loss": 1.8581, "step": 303390 }, { "epoch": 0.71, "grad_norm": 1.953125, "learning_rate": 0.00014343095838573256, "loss": 1.955, "step": 303395 }, { "epoch": 0.71, "grad_norm": 1.828125, "learning_rate": 0.00014342929350451004, "loss": 1.8323, "step": 303400 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.00014342762860845127, "loss": 2.1686, "step": 303405 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014342596369755684, "loss": 2.0235, "step": 303410 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.00014342429877182728, "loss": 2.0445, "step": 303415 }, { "epoch": 0.71, "grad_norm": 2.796875, "learning_rate": 0.00014342263383126323, "loss": 2.134, "step": 303420 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.0001434209688758652, "loss": 2.1552, "step": 303425 }, { "epoch": 0.71, "grad_norm": 2.5625, "learning_rate": 0.00014341930390563378, "loss": 1.9434, "step": 303430 }, { "epoch": 0.71, "grad_norm": 1.875, "learning_rate": 0.00014341763892056957, "loss": 1.7728, "step": 303435 }, { "epoch": 0.71, "grad_norm": 2.3125, "learning_rate": 0.00014341597392067307, "loss": 1.9572, "step": 303440 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.0001434143089059449, "loss": 2.1185, "step": 303445 }, { "epoch": 0.71, "grad_norm": 1.6171875, "learning_rate": 0.0001434126438763856, "loss": 2.2908, "step": 303450 }, { "epoch": 0.71, "grad_norm": 2.578125, "learning_rate": 0.0001434109788319958, "loss": 1.9933, "step": 303455 }, { "epoch": 0.71, "grad_norm": 1.84375, "learning_rate": 0.00014340931377277599, "loss": 2.2393, "step": 303460 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.00014340764869872675, "loss": 1.9079, "step": 303465 }, { "epoch": 0.71, "grad_norm": 1.921875, "learning_rate": 0.00014340598360984867, "loss": 2.1444, "step": 303470 }, { "epoch": 0.71, "grad_norm": 2.125, "learning_rate": 0.00014340431850614236, "loss": 2.0699, "step": 303475 }, { "epoch": 0.71, "grad_norm": 4.71875, "learning_rate": 0.00014340265338760833, "loss": 2.118, "step": 303480 }, { "epoch": 0.71, "grad_norm": 2.015625, "learning_rate": 0.00014340098825424717, "loss": 2.1737, "step": 303485 }, { "epoch": 0.71, "grad_norm": 1.796875, "learning_rate": 0.00014339932310605943, "loss": 1.8828, "step": 303490 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.0001433976579430457, "loss": 2.0485, "step": 303495 }, { "epoch": 0.71, "grad_norm": 1.953125, "learning_rate": 0.00014339599276520654, "loss": 2.132, "step": 303500 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014339432757254255, "loss": 2.0095, "step": 303505 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014339266236505424, "loss": 1.952, "step": 303510 }, { "epoch": 0.71, "grad_norm": 2.234375, "learning_rate": 0.0001433909971427422, "loss": 2.0598, "step": 303515 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014338933190560703, "loss": 2.216, "step": 303520 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.00014338766665364928, "loss": 1.9016, "step": 303525 }, { "epoch": 0.71, "grad_norm": 2.171875, "learning_rate": 0.0001433860013868695, "loss": 2.1628, "step": 303530 }, { "epoch": 0.71, "grad_norm": 1.9375, "learning_rate": 0.0001433843361052683, "loss": 2.0023, "step": 303535 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.0001433826708088462, "loss": 2.0137, "step": 303540 }, { "epoch": 0.71, "grad_norm": 2.078125, "learning_rate": 0.0001433810054976038, "loss": 1.9752, "step": 303545 }, { "epoch": 0.71, "grad_norm": 2.203125, "learning_rate": 0.00014337934017154166, "loss": 2.261, "step": 303550 }, { "epoch": 0.71, "grad_norm": 1.90625, "learning_rate": 0.00014337767483066035, "loss": 2.0735, "step": 303555 }, { "epoch": 0.71, "grad_norm": 1.8828125, "learning_rate": 0.00014337600947496044, "loss": 2.1116, "step": 303560 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014337434410444252, "loss": 2.1812, "step": 303565 }, { "epoch": 0.71, "grad_norm": 2.359375, "learning_rate": 0.0001433726787191071, "loss": 1.9873, "step": 303570 }, { "epoch": 0.71, "grad_norm": 2.4375, "learning_rate": 0.00014337101331895482, "loss": 2.2001, "step": 303575 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.0001433693479039862, "loss": 2.0976, "step": 303580 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.0001433676824742018, "loss": 1.9449, "step": 303585 }, { "epoch": 0.71, "grad_norm": 2.1875, "learning_rate": 0.00014336601702960228, "loss": 2.0853, "step": 303590 }, { "epoch": 0.71, "grad_norm": 2.40625, "learning_rate": 0.00014336435157018807, "loss": 2.0873, "step": 303595 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014336268609595987, "loss": 2.0017, "step": 303600 }, { "epoch": 0.71, "grad_norm": 2.4375, "learning_rate": 0.00014336102060691813, "loss": 1.9999, "step": 303605 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.0001433593551030635, "loss": 2.2478, "step": 303610 }, { "epoch": 0.71, "grad_norm": 2.375, "learning_rate": 0.00014335768958439657, "loss": 1.8364, "step": 303615 }, { "epoch": 0.71, "grad_norm": 2.6875, "learning_rate": 0.00014335602405091783, "loss": 1.949, "step": 303620 }, { "epoch": 0.71, "grad_norm": 1.890625, "learning_rate": 0.0001433543585026279, "loss": 1.8906, "step": 303625 }, { "epoch": 0.71, "grad_norm": 1.8359375, "learning_rate": 0.00014335269293952728, "loss": 1.9855, "step": 303630 }, { "epoch": 0.71, "grad_norm": 1.7734375, "learning_rate": 0.00014335102736161667, "loss": 2.0574, "step": 303635 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.00014334936176889652, "loss": 2.1376, "step": 303640 }, { "epoch": 0.71, "grad_norm": 2.0625, "learning_rate": 0.00014334769616136748, "loss": 1.9932, "step": 303645 }, { "epoch": 0.71, "grad_norm": 2.265625, "learning_rate": 0.00014334603053903005, "loss": 1.9805, "step": 303650 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.00014334436490188483, "loss": 1.9983, "step": 303655 }, { "epoch": 0.71, "grad_norm": 2.546875, "learning_rate": 0.0001433426992499324, "loss": 2.0983, "step": 303660 }, { "epoch": 0.71, "grad_norm": 2.390625, "learning_rate": 0.0001433410335831733, "loss": 2.1676, "step": 303665 }, { "epoch": 0.71, "grad_norm": 2.875, "learning_rate": 0.00014333936790160814, "loss": 2.0734, "step": 303670 }, { "epoch": 0.71, "grad_norm": 2.34375, "learning_rate": 0.00014333770220523745, "loss": 1.9731, "step": 303675 }, { "epoch": 0.71, "grad_norm": 1.828125, "learning_rate": 0.00014333603649406183, "loss": 2.2596, "step": 303680 }, { "epoch": 0.71, "grad_norm": 2.328125, "learning_rate": 0.0001433343707680818, "loss": 2.0149, "step": 303685 }, { "epoch": 0.71, "grad_norm": 2.28125, "learning_rate": 0.000143332705027298, "loss": 2.0909, "step": 303690 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014333103927171095, "loss": 2.2844, "step": 303695 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014332937350132122, "loss": 1.9932, "step": 303700 }, { "epoch": 0.71, "grad_norm": 2.546875, "learning_rate": 0.0001433277077161294, "loss": 2.0298, "step": 303705 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014332604191613605, "loss": 2.0068, "step": 303710 }, { "epoch": 0.71, "grad_norm": 1.96875, "learning_rate": 0.00014332437610134173, "loss": 2.172, "step": 303715 }, { "epoch": 0.71, "grad_norm": 2.0, "learning_rate": 0.00014332271027174705, "loss": 2.0211, "step": 303720 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.0001433210444273525, "loss": 2.114, "step": 303725 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014331937856815874, "loss": 2.1477, "step": 303730 }, { "epoch": 0.71, "grad_norm": 2.484375, "learning_rate": 0.00014331771269416628, "loss": 2.0107, "step": 303735 }, { "epoch": 0.71, "grad_norm": 2.03125, "learning_rate": 0.00014331604680537568, "loss": 2.0468, "step": 303740 }, { "epoch": 0.71, "grad_norm": 1.9609375, "learning_rate": 0.00014331438090178753, "loss": 2.0531, "step": 303745 }, { "epoch": 0.71, "grad_norm": 1.9296875, "learning_rate": 0.00014331271498340244, "loss": 1.9449, "step": 303750 }, { "epoch": 0.71, "grad_norm": 2.046875, "learning_rate": 0.0001433110490502209, "loss": 2.1722, "step": 303755 }, { "epoch": 0.71, "grad_norm": 2.578125, "learning_rate": 0.00014330938310224358, "loss": 2.0206, "step": 303760 }, { "epoch": 0.71, "grad_norm": 2.484375, "learning_rate": 0.0001433077171394709, "loss": 2.2092, "step": 303765 }, { "epoch": 0.71, "grad_norm": 2.09375, "learning_rate": 0.0001433060511619036, "loss": 1.9884, "step": 303770 }, { "epoch": 0.71, "grad_norm": 2.140625, "learning_rate": 0.00014330438516954214, "loss": 2.014, "step": 303775 }, { "epoch": 0.71, "grad_norm": 1.9765625, "learning_rate": 0.0001433027191623871, "loss": 2.05, "step": 303780 }, { "epoch": 0.71, "grad_norm": 2.453125, "learning_rate": 0.0001433010531404391, "loss": 1.9961, "step": 303785 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014329938710369862, "loss": 2.1853, "step": 303790 }, { "epoch": 0.71, "grad_norm": 2.21875, "learning_rate": 0.00014329772105216635, "loss": 2.1806, "step": 303795 }, { "epoch": 0.71, "grad_norm": 1.9375, "learning_rate": 0.00014329605498584276, "loss": 2.141, "step": 303800 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014329438890472844, "loss": 2.2535, "step": 303805 }, { "epoch": 0.71, "grad_norm": 2.15625, "learning_rate": 0.000143292722808824, "loss": 2.0436, "step": 303810 }, { "epoch": 0.71, "grad_norm": 2.296875, "learning_rate": 0.00014329105669812995, "loss": 2.2039, "step": 303815 }, { "epoch": 0.71, "grad_norm": 2.109375, "learning_rate": 0.0001432893905726469, "loss": 2.0445, "step": 303820 }, { "epoch": 0.71, "grad_norm": 2.25, "learning_rate": 0.00014328772443237541, "loss": 2.3044, "step": 303825 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014328605827731606, "loss": 2.0296, "step": 303830 }, { "epoch": 0.72, "grad_norm": 2.8125, "learning_rate": 0.0001432843921074694, "loss": 2.1582, "step": 303835 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014328272592283603, "loss": 2.0215, "step": 303840 }, { "epoch": 0.72, "grad_norm": 1.796875, "learning_rate": 0.00014328105972341646, "loss": 2.1022, "step": 303845 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.0001432793935092113, "loss": 2.11, "step": 303850 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.00014327772728022113, "loss": 2.0909, "step": 303855 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014327606103644647, "loss": 1.9843, "step": 303860 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.000143274394777888, "loss": 1.9786, "step": 303865 }, { "epoch": 0.72, "grad_norm": 2.703125, "learning_rate": 0.00014327272850454614, "loss": 2.0468, "step": 303870 }, { "epoch": 0.72, "grad_norm": 2.640625, "learning_rate": 0.00014327106221642154, "loss": 1.898, "step": 303875 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.00014326939591351477, "loss": 2.0834, "step": 303880 }, { "epoch": 0.72, "grad_norm": 2.515625, "learning_rate": 0.00014326772959582642, "loss": 2.1002, "step": 303885 }, { "epoch": 0.72, "grad_norm": 1.9765625, "learning_rate": 0.00014326606326335698, "loss": 2.0994, "step": 303890 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.00014326439691610712, "loss": 2.1374, "step": 303895 }, { "epoch": 0.72, "grad_norm": 2.703125, "learning_rate": 0.0001432627305540773, "loss": 2.0181, "step": 303900 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014326106417726818, "loss": 2.0875, "step": 303905 }, { "epoch": 0.72, "grad_norm": 2.9375, "learning_rate": 0.00014325939778568028, "loss": 2.0466, "step": 303910 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.0001432577313793142, "loss": 2.3566, "step": 303915 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.0001432560649581705, "loss": 1.9995, "step": 303920 }, { "epoch": 0.72, "grad_norm": 1.984375, "learning_rate": 0.0001432543985222497, "loss": 2.1571, "step": 303925 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014325273207155248, "loss": 2.0677, "step": 303930 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.0001432510656060793, "loss": 2.1883, "step": 303935 }, { "epoch": 0.72, "grad_norm": 1.8984375, "learning_rate": 0.00014324939912583077, "loss": 2.2263, "step": 303940 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014324773263080747, "loss": 2.1445, "step": 303945 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014324606612100996, "loss": 1.8759, "step": 303950 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.0001432443995964388, "loss": 1.925, "step": 303955 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.0001432427330570946, "loss": 2.074, "step": 303960 }, { "epoch": 0.72, "grad_norm": 2.828125, "learning_rate": 0.00014324106650297785, "loss": 2.1232, "step": 303965 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.0001432393999340892, "loss": 1.9404, "step": 303970 }, { "epoch": 0.72, "grad_norm": 2.46875, "learning_rate": 0.00014323773335042918, "loss": 2.0177, "step": 303975 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014323606675199836, "loss": 2.1004, "step": 303980 }, { "epoch": 0.72, "grad_norm": 2.671875, "learning_rate": 0.00014323440013879734, "loss": 1.9848, "step": 303985 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014323273351082665, "loss": 2.2051, "step": 303990 }, { "epoch": 0.72, "grad_norm": 2.4375, "learning_rate": 0.00014323106686808687, "loss": 2.0672, "step": 303995 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014322940021057858, "loss": 2.2707, "step": 304000 }, { "epoch": 0.72, "grad_norm": 1.96875, "learning_rate": 0.00014322773353830231, "loss": 2.0675, "step": 304005 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.0001432260668512587, "loss": 2.1738, "step": 304010 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.00014322440014944828, "loss": 1.9911, "step": 304015 }, { "epoch": 0.72, "grad_norm": 1.921875, "learning_rate": 0.0001432227334328716, "loss": 2.0263, "step": 304020 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014322106670152927, "loss": 1.8386, "step": 304025 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014321939995542185, "loss": 1.9097, "step": 304030 }, { "epoch": 0.72, "grad_norm": 2.53125, "learning_rate": 0.00014321773319454984, "loss": 2.1332, "step": 304035 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.00014321606641891393, "loss": 2.1626, "step": 304040 }, { "epoch": 0.72, "grad_norm": 1.7578125, "learning_rate": 0.00014321439962851462, "loss": 1.9062, "step": 304045 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014321273282335247, "loss": 2.1051, "step": 304050 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.0001432110660034281, "loss": 2.0232, "step": 304055 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.000143209399168742, "loss": 2.1878, "step": 304060 }, { "epoch": 0.72, "grad_norm": 2.84375, "learning_rate": 0.0001432077323192948, "loss": 2.0063, "step": 304065 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014320606545508707, "loss": 1.9283, "step": 304070 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014320439857611934, "loss": 1.9346, "step": 304075 }, { "epoch": 0.72, "grad_norm": 2.609375, "learning_rate": 0.00014320273168239223, "loss": 1.9911, "step": 304080 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014320106477390624, "loss": 2.1609, "step": 304085 }, { "epoch": 0.72, "grad_norm": 2.859375, "learning_rate": 0.00014319939785066202, "loss": 1.8027, "step": 304090 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.0001431977309126601, "loss": 1.8936, "step": 304095 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014319606395990107, "loss": 2.1335, "step": 304100 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014319439699238544, "loss": 2.1104, "step": 304105 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014319273001011385, "loss": 2.1344, "step": 304110 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014319106301308683, "loss": 1.9648, "step": 304115 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.00014318939600130494, "loss": 1.9399, "step": 304120 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.0001431877289747688, "loss": 1.8413, "step": 304125 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014318606193347892, "loss": 1.9813, "step": 304130 }, { "epoch": 0.72, "grad_norm": 2.640625, "learning_rate": 0.00014318439487743593, "loss": 2.0324, "step": 304135 }, { "epoch": 0.72, "grad_norm": 2.921875, "learning_rate": 0.00014318272780664034, "loss": 2.0843, "step": 304140 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014318106072109277, "loss": 2.2884, "step": 304145 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.00014317939362079376, "loss": 2.0579, "step": 304150 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014317772650574389, "loss": 1.9667, "step": 304155 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.0001431760593759437, "loss": 2.113, "step": 304160 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014317439223139383, "loss": 2.1486, "step": 304165 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014317272507209476, "loss": 2.0995, "step": 304170 }, { "epoch": 0.72, "grad_norm": 1.8515625, "learning_rate": 0.0001431710578980471, "loss": 1.8435, "step": 304175 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014316939070925145, "loss": 2.1553, "step": 304180 }, { "epoch": 0.72, "grad_norm": 1.953125, "learning_rate": 0.00014316772350570837, "loss": 2.0621, "step": 304185 }, { "epoch": 0.72, "grad_norm": 2.40625, "learning_rate": 0.0001431660562874184, "loss": 1.9746, "step": 304190 }, { "epoch": 0.72, "grad_norm": 2.609375, "learning_rate": 0.00014316438905438207, "loss": 2.1889, "step": 304195 }, { "epoch": 0.72, "grad_norm": 1.984375, "learning_rate": 0.00014316272180660007, "loss": 2.1378, "step": 304200 }, { "epoch": 0.72, "grad_norm": 2.484375, "learning_rate": 0.00014316105454407287, "loss": 2.217, "step": 304205 }, { "epoch": 0.72, "grad_norm": 2.609375, "learning_rate": 0.00014315938726680108, "loss": 2.1669, "step": 304210 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014315771997478526, "loss": 2.2743, "step": 304215 }, { "epoch": 0.72, "grad_norm": 1.9765625, "learning_rate": 0.00014315605266802597, "loss": 2.1374, "step": 304220 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.0001431543853465238, "loss": 1.9643, "step": 304225 }, { "epoch": 0.72, "grad_norm": 2.65625, "learning_rate": 0.0001431527180102793, "loss": 2.1674, "step": 304230 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014315105065929306, "loss": 1.9656, "step": 304235 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014314938329356564, "loss": 2.1254, "step": 304240 }, { "epoch": 0.72, "grad_norm": 1.984375, "learning_rate": 0.0001431477159130976, "loss": 2.07, "step": 304245 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014314604851788952, "loss": 2.1308, "step": 304250 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014314438110794197, "loss": 2.2004, "step": 304255 }, { "epoch": 0.72, "grad_norm": 1.7578125, "learning_rate": 0.00014314271368325553, "loss": 2.0972, "step": 304260 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014314104624383074, "loss": 2.0829, "step": 304265 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014313937878966817, "loss": 2.053, "step": 304270 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014313771132076842, "loss": 2.0005, "step": 304275 }, { "epoch": 0.72, "grad_norm": 2.59375, "learning_rate": 0.00014313604383713206, "loss": 2.1288, "step": 304280 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014313437633875964, "loss": 1.8792, "step": 304285 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014313270882565174, "loss": 2.0933, "step": 304290 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014313104129780893, "loss": 2.1801, "step": 304295 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.0001431293737552317, "loss": 2.2623, "step": 304300 }, { "epoch": 0.72, "grad_norm": 1.9375, "learning_rate": 0.00014312770619792078, "loss": 2.0246, "step": 304305 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014312603862587662, "loss": 2.0999, "step": 304310 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014312437103909982, "loss": 2.1738, "step": 304315 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014312270343759098, "loss": 1.9748, "step": 304320 }, { "epoch": 0.72, "grad_norm": 1.9453125, "learning_rate": 0.00014312103582135062, "loss": 1.9437, "step": 304325 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014311936819037932, "loss": 2.0798, "step": 304330 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.0001431177005446777, "loss": 1.8808, "step": 304335 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014311603288424626, "loss": 2.1885, "step": 304340 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014311436520908561, "loss": 2.0243, "step": 304345 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014311269751919633, "loss": 1.9265, "step": 304350 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.0001431110298145789, "loss": 2.0052, "step": 304355 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014310936209523402, "loss": 2.0764, "step": 304360 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.0001431076943611622, "loss": 1.9999, "step": 304365 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.000143106026612364, "loss": 2.1506, "step": 304370 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014310435884884, "loss": 2.0238, "step": 304375 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014310269107059077, "loss": 1.9471, "step": 304380 }, { "epoch": 0.72, "grad_norm": 2.484375, "learning_rate": 0.00014310102327761686, "loss": 2.2502, "step": 304385 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.0001430993554699189, "loss": 2.0177, "step": 304390 }, { "epoch": 0.72, "grad_norm": 2.578125, "learning_rate": 0.0001430976876474974, "loss": 2.2974, "step": 304395 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014309601981035296, "loss": 2.1106, "step": 304400 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014309435195848611, "loss": 2.1441, "step": 304405 }, { "epoch": 0.72, "grad_norm": 2.640625, "learning_rate": 0.00014309268409189743, "loss": 2.1681, "step": 304410 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014309101621058756, "loss": 2.198, "step": 304415 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.000143089348314557, "loss": 1.9463, "step": 304420 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014308768040380634, "loss": 2.1842, "step": 304425 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014308601247833613, "loss": 2.119, "step": 304430 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014308434453814696, "loss": 2.2007, "step": 304435 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014308267658323938, "loss": 2.1789, "step": 304440 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.000143081008613614, "loss": 1.9052, "step": 304445 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014307934062927137, "loss": 2.0705, "step": 304450 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014307767263021208, "loss": 1.8782, "step": 304455 }, { "epoch": 0.72, "grad_norm": 1.7578125, "learning_rate": 0.00014307600461643664, "loss": 1.9987, "step": 304460 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014307433658794565, "loss": 1.8904, "step": 304465 }, { "epoch": 0.72, "grad_norm": 2.40625, "learning_rate": 0.0001430726685447397, "loss": 1.932, "step": 304470 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014307100048681937, "loss": 2.0982, "step": 304475 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.00014306933241418516, "loss": 2.0138, "step": 304480 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.0001430676643268377, "loss": 2.0082, "step": 304485 }, { "epoch": 0.72, "grad_norm": 2.5, "learning_rate": 0.00014306599622477753, "loss": 2.0378, "step": 304490 }, { "epoch": 0.72, "grad_norm": 2.46875, "learning_rate": 0.00014306432810800524, "loss": 2.0569, "step": 304495 }, { "epoch": 0.72, "grad_norm": 2.484375, "learning_rate": 0.00014306265997652144, "loss": 2.0449, "step": 304500 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014306099183032664, "loss": 2.0991, "step": 304505 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014305932366942138, "loss": 2.0694, "step": 304510 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.0001430576554938063, "loss": 1.9928, "step": 304515 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014305598730348195, "loss": 2.0836, "step": 304520 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014305431909844888, "loss": 2.0593, "step": 304525 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.0001430526508787077, "loss": 2.0441, "step": 304530 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014305098264425894, "loss": 1.8469, "step": 304535 }, { "epoch": 0.72, "grad_norm": 1.9921875, "learning_rate": 0.00014304931439510315, "loss": 1.9566, "step": 304540 }, { "epoch": 0.72, "grad_norm": 1.9296875, "learning_rate": 0.00014304764613124097, "loss": 1.9755, "step": 304545 }, { "epoch": 0.72, "grad_norm": 1.703125, "learning_rate": 0.00014304597785267292, "loss": 2.1876, "step": 304550 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.0001430443095593996, "loss": 2.1827, "step": 304555 }, { "epoch": 0.72, "grad_norm": 2.515625, "learning_rate": 0.00014304264125142155, "loss": 1.9064, "step": 304560 }, { "epoch": 0.72, "grad_norm": 2.515625, "learning_rate": 0.00014304097292873935, "loss": 2.1059, "step": 304565 }, { "epoch": 0.72, "grad_norm": 2.5625, "learning_rate": 0.00014303930459135357, "loss": 1.9911, "step": 304570 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.0001430376362392648, "loss": 1.9724, "step": 304575 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014303596787247356, "loss": 1.9517, "step": 304580 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014303429949098052, "loss": 2.2353, "step": 304585 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014303263109478613, "loss": 2.084, "step": 304590 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.000143030962683891, "loss": 2.0283, "step": 304595 }, { "epoch": 0.72, "grad_norm": 2.640625, "learning_rate": 0.00014302929425829574, "loss": 2.1915, "step": 304600 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014302762581800088, "loss": 2.1898, "step": 304605 }, { "epoch": 0.72, "grad_norm": 2.640625, "learning_rate": 0.000143025957363007, "loss": 2.2147, "step": 304610 }, { "epoch": 0.72, "grad_norm": 2.40625, "learning_rate": 0.00014302428889331474, "loss": 2.1922, "step": 304615 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.0001430226204089245, "loss": 2.0287, "step": 304620 }, { "epoch": 0.72, "grad_norm": 2.5, "learning_rate": 0.00014302095190983703, "loss": 2.0247, "step": 304625 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014301928339605278, "loss": 2.3038, "step": 304630 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.0001430176148675724, "loss": 2.063, "step": 304635 }, { "epoch": 0.72, "grad_norm": 2.53125, "learning_rate": 0.0001430159463243964, "loss": 2.1162, "step": 304640 }, { "epoch": 0.72, "grad_norm": 1.65625, "learning_rate": 0.00014301427776652534, "loss": 1.8824, "step": 304645 }, { "epoch": 0.72, "grad_norm": 1.875, "learning_rate": 0.00014301260919395985, "loss": 1.8499, "step": 304650 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014301094060670047, "loss": 1.8817, "step": 304655 }, { "epoch": 0.72, "grad_norm": 2.53125, "learning_rate": 0.00014300927200474779, "loss": 2.0574, "step": 304660 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014300760338810235, "loss": 1.8891, "step": 304665 }, { "epoch": 0.72, "grad_norm": 1.8515625, "learning_rate": 0.00014300593475676473, "loss": 1.9338, "step": 304670 }, { "epoch": 0.72, "grad_norm": 2.703125, "learning_rate": 0.00014300426611073549, "loss": 2.0774, "step": 304675 }, { "epoch": 0.72, "grad_norm": 1.8359375, "learning_rate": 0.00014300259745001523, "loss": 2.0321, "step": 304680 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.0001430009287746045, "loss": 2.098, "step": 304685 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.0001429992600845039, "loss": 2.0476, "step": 304690 }, { "epoch": 0.72, "grad_norm": 3.375, "learning_rate": 0.00014299759137971393, "loss": 2.0167, "step": 304695 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014299592266023522, "loss": 2.3073, "step": 304700 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014299425392606832, "loss": 2.061, "step": 304705 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.0001429925851772138, "loss": 1.9975, "step": 304710 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014299091641367223, "loss": 1.9167, "step": 304715 }, { "epoch": 0.72, "grad_norm": 2.46875, "learning_rate": 0.00014298924763544418, "loss": 2.1343, "step": 304720 }, { "epoch": 0.72, "grad_norm": 1.9296875, "learning_rate": 0.00014298757884253024, "loss": 2.1778, "step": 304725 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014298591003493097, "loss": 2.033, "step": 304730 }, { "epoch": 0.72, "grad_norm": 2.703125, "learning_rate": 0.00014298424121264694, "loss": 2.025, "step": 304735 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014298257237567867, "loss": 1.9926, "step": 304740 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014298090352402683, "loss": 1.9044, "step": 304745 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.0001429792346576919, "loss": 1.9574, "step": 304750 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014297756577667448, "loss": 2.1493, "step": 304755 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014297589688097513, "loss": 1.9625, "step": 304760 }, { "epoch": 0.72, "grad_norm": 2.4375, "learning_rate": 0.00014297422797059447, "loss": 2.0777, "step": 304765 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.000142972559045533, "loss": 2.0199, "step": 304770 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.0001429708901057914, "loss": 1.9284, "step": 304775 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.0001429692211513701, "loss": 2.1889, "step": 304780 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014296755218226972, "loss": 2.0549, "step": 304785 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014296588319849085, "loss": 2.2451, "step": 304790 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.0001429642142000341, "loss": 1.9775, "step": 304795 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014296254518689997, "loss": 1.8866, "step": 304800 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014296087615908905, "loss": 2.0408, "step": 304805 }, { "epoch": 0.72, "grad_norm": 2.59375, "learning_rate": 0.0001429592071166019, "loss": 2.0228, "step": 304810 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014295753805943913, "loss": 2.0833, "step": 304815 }, { "epoch": 0.72, "grad_norm": 2.5, "learning_rate": 0.00014295586898760128, "loss": 2.2544, "step": 304820 }, { "epoch": 0.72, "grad_norm": 2.59375, "learning_rate": 0.00014295419990108897, "loss": 2.1203, "step": 304825 }, { "epoch": 0.72, "grad_norm": 1.90625, "learning_rate": 0.00014295253079990268, "loss": 2.0007, "step": 304830 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.000142950861684043, "loss": 2.1935, "step": 304835 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014294919255351056, "loss": 2.0185, "step": 304840 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.0001429475234083059, "loss": 1.9652, "step": 304845 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.0001429458542484296, "loss": 1.9285, "step": 304850 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014294418507388218, "loss": 2.1764, "step": 304855 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014294251588466427, "loss": 2.1113, "step": 304860 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014294084668077642, "loss": 2.0657, "step": 304865 }, { "epoch": 0.72, "grad_norm": 1.96875, "learning_rate": 0.00014293917746221918, "loss": 2.2615, "step": 304870 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014293750822899314, "loss": 2.0575, "step": 304875 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.0001429358389810989, "loss": 1.8993, "step": 304880 }, { "epoch": 0.72, "grad_norm": 1.9375, "learning_rate": 0.000142934169718537, "loss": 2.0791, "step": 304885 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014293250044130796, "loss": 2.0526, "step": 304890 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.0001429308311494124, "loss": 2.0555, "step": 304895 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014292916184285092, "loss": 2.0618, "step": 304900 }, { "epoch": 0.72, "grad_norm": 2.4375, "learning_rate": 0.00014292749252162404, "loss": 1.8578, "step": 304905 }, { "epoch": 0.72, "grad_norm": 1.953125, "learning_rate": 0.0001429258231857324, "loss": 2.2018, "step": 304910 }, { "epoch": 0.72, "grad_norm": 2.546875, "learning_rate": 0.00014292415383517648, "loss": 2.0685, "step": 304915 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014292248446995688, "loss": 2.0399, "step": 304920 }, { "epoch": 0.72, "grad_norm": 2.484375, "learning_rate": 0.0001429208150900742, "loss": 2.1109, "step": 304925 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.000142919145695529, "loss": 2.0897, "step": 304930 }, { "epoch": 0.72, "grad_norm": 1.90625, "learning_rate": 0.00014291747628632185, "loss": 2.1197, "step": 304935 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014291580686245328, "loss": 2.0543, "step": 304940 }, { "epoch": 0.72, "grad_norm": 2.828125, "learning_rate": 0.00014291413742392386, "loss": 2.1713, "step": 304945 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014291246797073424, "loss": 2.075, "step": 304950 }, { "epoch": 0.72, "grad_norm": 1.84375, "learning_rate": 0.000142910798502885, "loss": 2.0614, "step": 304955 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014290912902037656, "loss": 1.9064, "step": 304960 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.0001429074595232096, "loss": 1.9285, "step": 304965 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014290579001138468, "loss": 2.0665, "step": 304970 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.0001429041204849024, "loss": 2.0989, "step": 304975 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014290245094376327, "loss": 1.885, "step": 304980 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014290078138796787, "loss": 1.921, "step": 304985 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.0001428991118175168, "loss": 2.0689, "step": 304990 }, { "epoch": 0.72, "grad_norm": 2.46875, "learning_rate": 0.00014289744223241061, "loss": 1.9754, "step": 304995 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.0001428957726326499, "loss": 1.7303, "step": 305000 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014289410301823518, "loss": 2.0573, "step": 305005 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014289243338916707, "loss": 2.0512, "step": 305010 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014289076374544613, "loss": 2.1155, "step": 305015 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014288909408707292, "loss": 2.08, "step": 305020 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.000142887424414048, "loss": 1.9861, "step": 305025 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014288575472637201, "loss": 2.1788, "step": 305030 }, { "epoch": 0.72, "grad_norm": 1.9453125, "learning_rate": 0.00014288408502404542, "loss": 2.1183, "step": 305035 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014288241530706886, "loss": 2.1404, "step": 305040 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014288074557544292, "loss": 1.9056, "step": 305045 }, { "epoch": 0.72, "grad_norm": 2.515625, "learning_rate": 0.00014287907582916808, "loss": 2.0771, "step": 305050 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014287740606824502, "loss": 2.0497, "step": 305055 }, { "epoch": 0.72, "grad_norm": 2.765625, "learning_rate": 0.00014287573629267425, "loss": 2.2853, "step": 305060 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014287406650245635, "loss": 2.1943, "step": 305065 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.0001428723966975919, "loss": 2.0439, "step": 305070 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014287072687808145, "loss": 2.1368, "step": 305075 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014286905704392555, "loss": 1.8581, "step": 305080 }, { "epoch": 0.72, "grad_norm": 2.640625, "learning_rate": 0.00014286738719512484, "loss": 2.0301, "step": 305085 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.00014286571733167986, "loss": 2.1314, "step": 305090 }, { "epoch": 0.72, "grad_norm": 2.53125, "learning_rate": 0.00014286404745359117, "loss": 2.1, "step": 305095 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014286237756085934, "loss": 2.0653, "step": 305100 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.0001428607076534849, "loss": 2.143, "step": 305105 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014285903773146854, "loss": 2.1858, "step": 305110 }, { "epoch": 0.72, "grad_norm": 1.7109375, "learning_rate": 0.00014285736779481073, "loss": 2.0581, "step": 305115 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014285569784351206, "loss": 2.1275, "step": 305120 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.0001428540278775731, "loss": 2.0709, "step": 305125 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.0001428523578969944, "loss": 1.9986, "step": 305130 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014285068790177661, "loss": 1.9354, "step": 305135 }, { "epoch": 0.72, "grad_norm": 2.84375, "learning_rate": 0.00014284901789192022, "loss": 2.0215, "step": 305140 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014284734786742585, "loss": 1.8991, "step": 305145 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014284567782829401, "loss": 1.9935, "step": 305150 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014284400777452534, "loss": 2.0718, "step": 305155 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014284233770612038, "loss": 1.8293, "step": 305160 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014284066762307968, "loss": 1.8396, "step": 305165 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014283899752540383, "loss": 1.9917, "step": 305170 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014283732741309342, "loss": 1.8675, "step": 305175 }, { "epoch": 0.72, "grad_norm": 1.8515625, "learning_rate": 0.00014283565728614902, "loss": 2.0443, "step": 305180 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014283398714457114, "loss": 2.1443, "step": 305185 }, { "epoch": 0.72, "grad_norm": 1.90625, "learning_rate": 0.0001428323169883604, "loss": 2.0559, "step": 305190 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.0001428306468175174, "loss": 2.1528, "step": 305195 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014282897663204263, "loss": 1.9936, "step": 305200 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014282730643193676, "loss": 2.2637, "step": 305205 }, { "epoch": 0.72, "grad_norm": 2.4375, "learning_rate": 0.00014282563621720025, "loss": 2.0981, "step": 305210 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014282396598783373, "loss": 2.1883, "step": 305215 }, { "epoch": 0.72, "grad_norm": 2.4375, "learning_rate": 0.00014282229574383778, "loss": 1.9366, "step": 305220 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014282062548521299, "loss": 2.0208, "step": 305225 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014281895521195986, "loss": 1.848, "step": 305230 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.000142817284924079, "loss": 2.1894, "step": 305235 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014281561462157097, "loss": 2.034, "step": 305240 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014281394430443637, "loss": 2.0276, "step": 305245 }, { "epoch": 0.72, "grad_norm": 2.40625, "learning_rate": 0.00014281227397267576, "loss": 1.9916, "step": 305250 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014281060362628968, "loss": 2.0603, "step": 305255 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014280893326527874, "loss": 2.0501, "step": 305260 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014280726288964346, "loss": 2.2883, "step": 305265 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.0001428055924993845, "loss": 1.961, "step": 305270 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.0001428039220945023, "loss": 2.1463, "step": 305275 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014280225167499755, "loss": 2.0295, "step": 305280 }, { "epoch": 0.72, "grad_norm": 2.625, "learning_rate": 0.00014280058124087076, "loss": 2.1542, "step": 305285 }, { "epoch": 0.72, "grad_norm": 2.46875, "learning_rate": 0.00014279891079212252, "loss": 2.0429, "step": 305290 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014279724032875338, "loss": 2.0286, "step": 305295 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014279556985076394, "loss": 2.2127, "step": 305300 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014279389935815476, "loss": 2.0455, "step": 305305 }, { "epoch": 0.72, "grad_norm": 2.5625, "learning_rate": 0.00014279222885092642, "loss": 2.1161, "step": 305310 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014279055832907946, "loss": 2.0209, "step": 305315 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014278888779261446, "loss": 2.0598, "step": 305320 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014278721724153201, "loss": 1.7997, "step": 305325 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014278554667583268, "loss": 2.0749, "step": 305330 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014278387609551705, "loss": 1.9958, "step": 305335 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014278220550058564, "loss": 1.8407, "step": 305340 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014278053489103905, "loss": 2.0234, "step": 305345 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014277886426687788, "loss": 1.9844, "step": 305350 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014277719362810264, "loss": 2.2593, "step": 305355 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014277552297471395, "loss": 2.1204, "step": 305360 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014277385230671239, "loss": 2.0554, "step": 305365 }, { "epoch": 0.72, "grad_norm": 1.9296875, "learning_rate": 0.00014277218162409846, "loss": 2.0188, "step": 305370 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014277051092687282, "loss": 2.0113, "step": 305375 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014276884021503596, "loss": 1.9837, "step": 305380 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014276716948858853, "loss": 2.0445, "step": 305385 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014276549874753103, "loss": 2.0035, "step": 305390 }, { "epoch": 0.72, "grad_norm": 2.671875, "learning_rate": 0.00014276382799186408, "loss": 1.9279, "step": 305395 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014276215722158818, "loss": 2.1856, "step": 305400 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014276048643670404, "loss": 2.0384, "step": 305405 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014275881563721209, "loss": 1.9465, "step": 305410 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014275714482311292, "loss": 1.8414, "step": 305415 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.0001427554739944072, "loss": 2.1596, "step": 305420 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014275380315109537, "loss": 1.9489, "step": 305425 }, { "epoch": 0.72, "grad_norm": 1.9921875, "learning_rate": 0.00014275213229317813, "loss": 2.0721, "step": 305430 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014275046142065594, "loss": 2.0342, "step": 305435 }, { "epoch": 0.72, "grad_norm": 1.8203125, "learning_rate": 0.00014274879053352942, "loss": 2.0073, "step": 305440 }, { "epoch": 0.72, "grad_norm": 1.9921875, "learning_rate": 0.00014274711963179916, "loss": 2.1736, "step": 305445 }, { "epoch": 0.72, "grad_norm": 1.9140625, "learning_rate": 0.00014274544871546572, "loss": 1.9776, "step": 305450 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.0001427437777845296, "loss": 1.8193, "step": 305455 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.0001427421068389915, "loss": 2.0315, "step": 305460 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.0001427404358788519, "loss": 1.9829, "step": 305465 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014273876490411135, "loss": 2.0701, "step": 305470 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.0001427370939147705, "loss": 1.9032, "step": 305475 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.0001427354229108299, "loss": 2.0902, "step": 305480 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014273375189229005, "loss": 2.0386, "step": 305485 }, { "epoch": 0.72, "grad_norm": 1.8203125, "learning_rate": 0.0001427320808591516, "loss": 1.9379, "step": 305490 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.00014273040981141512, "loss": 2.0708, "step": 305495 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014272873874908117, "loss": 1.9508, "step": 305500 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014272706767215024, "loss": 2.2197, "step": 305505 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014272539658062304, "loss": 2.0039, "step": 305510 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.0001427237254745, "loss": 2.0224, "step": 305515 }, { "epoch": 0.72, "grad_norm": 1.9453125, "learning_rate": 0.00014272205435378183, "loss": 2.0896, "step": 305520 }, { "epoch": 0.72, "grad_norm": 2.515625, "learning_rate": 0.00014272038321846902, "loss": 2.0003, "step": 305525 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.0001427187120685621, "loss": 1.8718, "step": 305530 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014271704090406174, "loss": 2.1247, "step": 305535 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014271536972496843, "loss": 2.0178, "step": 305540 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014271369853128283, "loss": 1.976, "step": 305545 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.0001427120273230054, "loss": 2.0434, "step": 305550 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.00014271035610013679, "loss": 2.0453, "step": 305555 }, { "epoch": 0.72, "grad_norm": 2.4375, "learning_rate": 0.00014270868486267752, "loss": 1.9549, "step": 305560 }, { "epoch": 0.72, "grad_norm": 1.90625, "learning_rate": 0.00014270701361062825, "loss": 2.0223, "step": 305565 }, { "epoch": 0.72, "grad_norm": 2.578125, "learning_rate": 0.00014270534234398944, "loss": 2.0031, "step": 305570 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014270367106276172, "loss": 2.2809, "step": 305575 }, { "epoch": 0.72, "grad_norm": 3.328125, "learning_rate": 0.00014270199976694565, "loss": 2.0019, "step": 305580 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014270032845654183, "loss": 2.1415, "step": 305585 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014269865713155077, "loss": 1.9583, "step": 305590 }, { "epoch": 0.72, "grad_norm": 2.46875, "learning_rate": 0.00014269698579197308, "loss": 2.1183, "step": 305595 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014269531443780932, "loss": 2.1609, "step": 305600 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.0001426936430690601, "loss": 2.1395, "step": 305605 }, { "epoch": 0.72, "grad_norm": 2.515625, "learning_rate": 0.00014269197168572593, "loss": 1.9055, "step": 305610 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.0001426903002878074, "loss": 1.8918, "step": 305615 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.0001426886288753051, "loss": 2.1959, "step": 305620 }, { "epoch": 0.72, "grad_norm": 2.84375, "learning_rate": 0.0001426869574482196, "loss": 2.026, "step": 305625 }, { "epoch": 0.72, "grad_norm": 2.6875, "learning_rate": 0.00014268528600655146, "loss": 2.3067, "step": 305630 }, { "epoch": 0.72, "grad_norm": 3.0625, "learning_rate": 0.00014268361455030125, "loss": 2.2303, "step": 305635 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.00014268194307946952, "loss": 2.0329, "step": 305640 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014268027159405688, "loss": 2.0766, "step": 305645 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014267860009406388, "loss": 1.9874, "step": 305650 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.0001426769285794911, "loss": 1.8967, "step": 305655 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014267525705033914, "loss": 2.1365, "step": 305660 }, { "epoch": 0.72, "grad_norm": 1.953125, "learning_rate": 0.0001426735855066085, "loss": 2.1902, "step": 305665 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.0001426719139482998, "loss": 1.9084, "step": 305670 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.0001426702423754136, "loss": 1.9825, "step": 305675 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014266857078795047, "loss": 1.8793, "step": 305680 }, { "epoch": 0.72, "grad_norm": 2.703125, "learning_rate": 0.00014266689918591098, "loss": 2.0058, "step": 305685 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014266522756929573, "loss": 2.1004, "step": 305690 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.00014266355593810522, "loss": 1.9713, "step": 305695 }, { "epoch": 0.72, "grad_norm": 2.59375, "learning_rate": 0.0001426618842923401, "loss": 2.1484, "step": 305700 }, { "epoch": 0.72, "grad_norm": 2.53125, "learning_rate": 0.0001426602126320009, "loss": 1.8532, "step": 305705 }, { "epoch": 0.72, "grad_norm": 1.796875, "learning_rate": 0.00014265854095708817, "loss": 2.205, "step": 305710 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014265686926760254, "loss": 1.9625, "step": 305715 }, { "epoch": 0.72, "grad_norm": 2.84375, "learning_rate": 0.0001426551975635445, "loss": 2.1501, "step": 305720 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014265352584491475, "loss": 1.9473, "step": 305725 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014265185411171374, "loss": 2.1919, "step": 305730 }, { "epoch": 0.72, "grad_norm": 1.9765625, "learning_rate": 0.0001426501823639421, "loss": 2.0877, "step": 305735 }, { "epoch": 0.72, "grad_norm": 2.515625, "learning_rate": 0.00014264851060160033, "loss": 2.1246, "step": 305740 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.0001426468388246891, "loss": 2.0364, "step": 305745 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014264516703320893, "loss": 1.7748, "step": 305750 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.0001426434952271604, "loss": 2.168, "step": 305755 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014264182340654408, "loss": 2.009, "step": 305760 }, { "epoch": 0.72, "grad_norm": 2.46875, "learning_rate": 0.00014264015157136055, "loss": 2.0931, "step": 305765 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014263847972161036, "loss": 2.0359, "step": 305770 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014263680785729408, "loss": 1.9563, "step": 305775 }, { "epoch": 0.72, "grad_norm": 2.53125, "learning_rate": 0.0001426351359784123, "loss": 2.0761, "step": 305780 }, { "epoch": 0.72, "grad_norm": 3.203125, "learning_rate": 0.00014263346408496558, "loss": 2.09, "step": 305785 }, { "epoch": 0.72, "grad_norm": 1.96875, "learning_rate": 0.0001426317921769545, "loss": 2.0959, "step": 305790 }, { "epoch": 0.72, "grad_norm": 1.9609375, "learning_rate": 0.00014263012025437965, "loss": 2.1594, "step": 305795 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014262844831724154, "loss": 2.0904, "step": 305800 }, { "epoch": 0.72, "grad_norm": 1.8203125, "learning_rate": 0.0001426267763655408, "loss": 1.8305, "step": 305805 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.000142625104399278, "loss": 1.9154, "step": 305810 }, { "epoch": 0.72, "grad_norm": 1.9375, "learning_rate": 0.00014262343241845367, "loss": 2.095, "step": 305815 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014262176042306837, "loss": 2.0738, "step": 305820 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014262008841312276, "loss": 2.1114, "step": 305825 }, { "epoch": 0.72, "grad_norm": 1.859375, "learning_rate": 0.0001426184163886173, "loss": 2.0326, "step": 305830 }, { "epoch": 0.72, "grad_norm": 1.8984375, "learning_rate": 0.00014261674434955265, "loss": 1.9979, "step": 305835 }, { "epoch": 0.72, "grad_norm": 1.8515625, "learning_rate": 0.00014261507229592938, "loss": 1.9829, "step": 305840 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014261340022774795, "loss": 1.8016, "step": 305845 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014261172814500908, "loss": 2.1739, "step": 305850 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.0001426100560477132, "loss": 2.0814, "step": 305855 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.000142608383935861, "loss": 2.0181, "step": 305860 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014260671180945302, "loss": 2.0295, "step": 305865 }, { "epoch": 0.72, "grad_norm": 2.4375, "learning_rate": 0.0001426050396684898, "loss": 2.1096, "step": 305870 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.0001426033675129719, "loss": 2.0045, "step": 305875 }, { "epoch": 0.72, "grad_norm": 2.75, "learning_rate": 0.00014260169534289994, "loss": 2.0676, "step": 305880 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014260002315827445, "loss": 2.0804, "step": 305885 }, { "epoch": 0.72, "grad_norm": 1.90625, "learning_rate": 0.00014259835095909603, "loss": 2.1243, "step": 305890 }, { "epoch": 0.72, "grad_norm": 2.40625, "learning_rate": 0.00014259667874536524, "loss": 2.1184, "step": 305895 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014259500651708266, "loss": 2.0611, "step": 305900 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014259333427424885, "loss": 2.0786, "step": 305905 }, { "epoch": 0.72, "grad_norm": 1.7734375, "learning_rate": 0.0001425916620168644, "loss": 2.1187, "step": 305910 }, { "epoch": 0.72, "grad_norm": 2.40625, "learning_rate": 0.00014258998974492983, "loss": 2.1257, "step": 305915 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014258831745844576, "loss": 1.8331, "step": 305920 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014258664515741276, "loss": 2.0179, "step": 305925 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.0001425849728418314, "loss": 1.9512, "step": 305930 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.0001425833005117022, "loss": 2.1009, "step": 305935 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.0001425816281670258, "loss": 2.057, "step": 305940 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014257995580780272, "loss": 2.0407, "step": 305945 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.0001425782834340336, "loss": 2.1009, "step": 305950 }, { "epoch": 0.72, "grad_norm": 2.484375, "learning_rate": 0.00014257661104571896, "loss": 2.1147, "step": 305955 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014257493864285934, "loss": 2.0533, "step": 305960 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.00014257326622545534, "loss": 1.9176, "step": 305965 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014257159379350758, "loss": 2.1908, "step": 305970 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014256992134701658, "loss": 1.9391, "step": 305975 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014256824888598293, "loss": 1.9269, "step": 305980 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014256657641040717, "loss": 2.0502, "step": 305985 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.0001425649039202899, "loss": 2.0766, "step": 305990 }, { "epoch": 0.72, "grad_norm": 2.515625, "learning_rate": 0.00014256323141563173, "loss": 2.0318, "step": 305995 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014256155889643315, "loss": 2.1487, "step": 306000 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014255988636269478, "loss": 2.0505, "step": 306005 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014255821381441717, "loss": 1.9849, "step": 306010 }, { "epoch": 0.72, "grad_norm": 2.78125, "learning_rate": 0.0001425565412516009, "loss": 1.9156, "step": 306015 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014255486867424658, "loss": 2.1229, "step": 306020 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.0001425531960823547, "loss": 2.214, "step": 306025 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.0001425515234759259, "loss": 1.7641, "step": 306030 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.0001425498508549607, "loss": 1.9728, "step": 306035 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014254817821945974, "loss": 1.984, "step": 306040 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014254650556942352, "loss": 2.0935, "step": 306045 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014254483290485265, "loss": 2.2011, "step": 306050 }, { "epoch": 0.72, "grad_norm": 1.8984375, "learning_rate": 0.0001425431602257477, "loss": 2.0504, "step": 306055 }, { "epoch": 0.72, "grad_norm": 1.875, "learning_rate": 0.00014254148753210924, "loss": 1.9915, "step": 306060 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014253981482393783, "loss": 2.0682, "step": 306065 }, { "epoch": 0.72, "grad_norm": 2.4375, "learning_rate": 0.00014253814210123404, "loss": 2.034, "step": 306070 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014253646936399848, "loss": 2.1686, "step": 306075 }, { "epoch": 0.72, "grad_norm": 2.625, "learning_rate": 0.00014253479661223162, "loss": 2.0676, "step": 306080 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014253312384593416, "loss": 2.032, "step": 306085 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014253145106510665, "loss": 2.3853, "step": 306090 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014252977826974953, "loss": 1.8601, "step": 306095 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014252810545986353, "loss": 2.1498, "step": 306100 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014252643263544914, "loss": 2.0231, "step": 306105 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014252475979650698, "loss": 2.0369, "step": 306110 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014252308694303754, "loss": 2.0741, "step": 306115 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014252141407504145, "loss": 2.2675, "step": 306120 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.0001425197411925193, "loss": 2.1687, "step": 306125 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.0001425180682954716, "loss": 2.0113, "step": 306130 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014251639538389902, "loss": 1.9988, "step": 306135 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014251472245780202, "loss": 2.2544, "step": 306140 }, { "epoch": 0.72, "grad_norm": 1.859375, "learning_rate": 0.00014251304951718122, "loss": 1.9769, "step": 306145 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014251137656203717, "loss": 2.1715, "step": 306150 }, { "epoch": 0.72, "grad_norm": 1.96875, "learning_rate": 0.00014250970359237054, "loss": 1.8032, "step": 306155 }, { "epoch": 0.72, "grad_norm": 3.046875, "learning_rate": 0.00014250803060818177, "loss": 1.9579, "step": 306160 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014250635760947148, "loss": 2.2319, "step": 306165 }, { "epoch": 0.72, "grad_norm": 2.484375, "learning_rate": 0.00014250468459624024, "loss": 2.0749, "step": 306170 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014250301156848867, "loss": 2.1534, "step": 306175 }, { "epoch": 0.72, "grad_norm": 1.9921875, "learning_rate": 0.00014250133852621727, "loss": 1.969, "step": 306180 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014249966546942667, "loss": 2.1639, "step": 306185 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014249799239811735, "loss": 1.9573, "step": 306190 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014249631931229003, "loss": 2.0881, "step": 306195 }, { "epoch": 0.72, "grad_norm": 2.6875, "learning_rate": 0.00014249464621194513, "loss": 2.201, "step": 306200 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.0001424929730970833, "loss": 2.1951, "step": 306205 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014249129996770515, "loss": 2.0685, "step": 306210 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.00014248962682381116, "loss": 2.1371, "step": 306215 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014248795366540192, "loss": 2.0096, "step": 306220 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014248628049247808, "loss": 1.92, "step": 306225 }, { "epoch": 0.72, "grad_norm": 2.53125, "learning_rate": 0.0001424846073050401, "loss": 2.083, "step": 306230 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014248293410308864, "loss": 1.8994, "step": 306235 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014248126088662423, "loss": 2.2307, "step": 306240 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014247958765564747, "loss": 2.1603, "step": 306245 }, { "epoch": 0.72, "grad_norm": 1.953125, "learning_rate": 0.00014247791441015893, "loss": 2.094, "step": 306250 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014247624115015908, "loss": 2.1525, "step": 306255 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014247456787564862, "loss": 1.9755, "step": 306260 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.0001424728945866281, "loss": 2.0953, "step": 306265 }, { "epoch": 0.72, "grad_norm": 1.8203125, "learning_rate": 0.0001424712212830981, "loss": 2.2256, "step": 306270 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.0001424695479650591, "loss": 2.1783, "step": 306275 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014246787463251175, "loss": 1.9831, "step": 306280 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014246620128545657, "loss": 2.0407, "step": 306285 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.0001424645279238942, "loss": 1.9447, "step": 306290 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014246285454782518, "loss": 2.243, "step": 306295 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.0001424611811572501, "loss": 2.0215, "step": 306300 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014245950775216946, "loss": 2.2021, "step": 306305 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.0001424578343325839, "loss": 2.0898, "step": 306310 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.000142456160898494, "loss": 2.0362, "step": 306315 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014245448744990027, "loss": 2.246, "step": 306320 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014245281398680333, "loss": 1.9747, "step": 306325 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014245114050920373, "loss": 2.0885, "step": 306330 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014244946701710208, "loss": 2.3433, "step": 306335 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014244779351049887, "loss": 1.9501, "step": 306340 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014244611998939476, "loss": 2.0771, "step": 306345 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014244444645379029, "loss": 2.0419, "step": 306350 }, { "epoch": 0.72, "grad_norm": 1.9921875, "learning_rate": 0.00014244277290368604, "loss": 2.0653, "step": 306355 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014244109933908255, "loss": 2.0489, "step": 306360 }, { "epoch": 0.72, "grad_norm": 2.609375, "learning_rate": 0.0001424394257599804, "loss": 1.9689, "step": 306365 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014243775216638018, "loss": 2.0661, "step": 306370 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014243607855828245, "loss": 1.9177, "step": 306375 }, { "epoch": 0.72, "grad_norm": 4.0625, "learning_rate": 0.00014243440493568778, "loss": 2.0416, "step": 306380 }, { "epoch": 0.72, "grad_norm": 1.6484375, "learning_rate": 0.00014243273129859677, "loss": 2.114, "step": 306385 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014243105764700993, "loss": 2.1021, "step": 306390 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014242938398092793, "loss": 2.0092, "step": 306395 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014242771030035123, "loss": 2.0144, "step": 306400 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014242603660528051, "loss": 2.1447, "step": 306405 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014242436289571626, "loss": 2.0395, "step": 306410 }, { "epoch": 0.72, "grad_norm": 1.84375, "learning_rate": 0.00014242268917165904, "loss": 1.9939, "step": 306415 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.0001424210154331095, "loss": 1.8705, "step": 306420 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014241934168006816, "loss": 2.0268, "step": 306425 }, { "epoch": 0.72, "grad_norm": 2.671875, "learning_rate": 0.00014241766791253562, "loss": 2.266, "step": 306430 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014241599413051243, "loss": 2.1171, "step": 306435 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014241432033399918, "loss": 2.0286, "step": 306440 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014241264652299637, "loss": 2.1976, "step": 306445 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.0001424109726975047, "loss": 2.027, "step": 306450 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014240929885752464, "loss": 2.105, "step": 306455 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.0001424076250030568, "loss": 2.1266, "step": 306460 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014240595113410173, "loss": 2.0246, "step": 306465 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014240427725066004, "loss": 2.0327, "step": 306470 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014240260335273227, "loss": 2.0333, "step": 306475 }, { "epoch": 0.72, "grad_norm": 1.8828125, "learning_rate": 0.000142400929440319, "loss": 2.0971, "step": 306480 }, { "epoch": 0.72, "grad_norm": 1.765625, "learning_rate": 0.0001423992555134208, "loss": 1.9831, "step": 306485 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014239758157203827, "loss": 2.15, "step": 306490 }, { "epoch": 0.72, "grad_norm": 1.984375, "learning_rate": 0.0001423959076161719, "loss": 1.9907, "step": 306495 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.0001423942336458224, "loss": 2.1017, "step": 306500 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014239255966099021, "loss": 2.1811, "step": 306505 }, { "epoch": 0.72, "grad_norm": 2.59375, "learning_rate": 0.00014239088566167597, "loss": 2.1168, "step": 306510 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014238921164788022, "loss": 2.0894, "step": 306515 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014238753761960354, "loss": 1.9981, "step": 306520 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.0001423858635768465, "loss": 2.1288, "step": 306525 }, { "epoch": 0.72, "grad_norm": 1.84375, "learning_rate": 0.00014238418951960973, "loss": 2.2083, "step": 306530 }, { "epoch": 0.72, "grad_norm": 1.921875, "learning_rate": 0.0001423825154478937, "loss": 2.0736, "step": 306535 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014238084136169906, "loss": 2.0542, "step": 306540 }, { "epoch": 0.72, "grad_norm": 2.53125, "learning_rate": 0.00014237916726102639, "loss": 2.0983, "step": 306545 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014237749314587617, "loss": 1.9974, "step": 306550 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014237581901624906, "loss": 1.9806, "step": 306555 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014237414487214557, "loss": 1.8767, "step": 306560 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014237247071356636, "loss": 2.1457, "step": 306565 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014237079654051191, "loss": 2.1272, "step": 306570 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014236912235298283, "loss": 2.0126, "step": 306575 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014236744815097967, "loss": 2.2385, "step": 306580 }, { "epoch": 0.72, "grad_norm": 2.828125, "learning_rate": 0.00014236577393450306, "loss": 2.2125, "step": 306585 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.0001423640997035535, "loss": 2.179, "step": 306590 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.0001423624254581316, "loss": 2.0254, "step": 306595 }, { "epoch": 0.72, "grad_norm": 1.84375, "learning_rate": 0.00014236075119823796, "loss": 2.2306, "step": 306600 }, { "epoch": 0.72, "grad_norm": 2.40625, "learning_rate": 0.00014235907692387306, "loss": 2.0234, "step": 306605 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.00014235740263503758, "loss": 2.168, "step": 306610 }, { "epoch": 0.72, "grad_norm": 1.8203125, "learning_rate": 0.00014235572833173204, "loss": 1.9963, "step": 306615 }, { "epoch": 0.72, "grad_norm": 3.171875, "learning_rate": 0.00014235405401395702, "loss": 2.0895, "step": 306620 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014235237968171304, "loss": 2.0755, "step": 306625 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014235070533500076, "loss": 1.9801, "step": 306630 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.0001423490309738207, "loss": 2.017, "step": 306635 }, { "epoch": 0.72, "grad_norm": 2.578125, "learning_rate": 0.00014234735659817343, "loss": 2.0778, "step": 306640 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014234568220805957, "loss": 2.1082, "step": 306645 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014234400780347963, "loss": 2.098, "step": 306650 }, { "epoch": 0.72, "grad_norm": 1.9609375, "learning_rate": 0.0001423423333844342, "loss": 2.1073, "step": 306655 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.0001423406589509239, "loss": 2.0155, "step": 306660 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014233898450294923, "loss": 2.0212, "step": 306665 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014233731004051082, "loss": 1.9977, "step": 306670 }, { "epoch": 0.72, "grad_norm": 2.609375, "learning_rate": 0.00014233563556360917, "loss": 2.0275, "step": 306675 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014233396107224497, "loss": 2.0744, "step": 306680 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014233228656641866, "loss": 2.1551, "step": 306685 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.0001423306120461309, "loss": 2.1631, "step": 306690 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014232893751138224, "loss": 1.9037, "step": 306695 }, { "epoch": 0.72, "grad_norm": 2.609375, "learning_rate": 0.00014232726296217326, "loss": 2.0947, "step": 306700 }, { "epoch": 0.72, "grad_norm": 7.09375, "learning_rate": 0.00014232558839850448, "loss": 2.0989, "step": 306705 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014232391382037652, "loss": 1.9526, "step": 306710 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014232223922778993, "loss": 2.094, "step": 306715 }, { "epoch": 0.72, "grad_norm": 1.7265625, "learning_rate": 0.00014232056462074532, "loss": 2.1423, "step": 306720 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014231888999924325, "loss": 1.8558, "step": 306725 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.00014231721536328427, "loss": 2.2498, "step": 306730 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014231554071286894, "loss": 2.2636, "step": 306735 }, { "epoch": 0.72, "grad_norm": 1.8203125, "learning_rate": 0.00014231386604799786, "loss": 2.2013, "step": 306740 }, { "epoch": 0.72, "grad_norm": 2.5, "learning_rate": 0.00014231219136867163, "loss": 1.9799, "step": 306745 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014231051667489076, "loss": 2.0728, "step": 306750 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014230884196665585, "loss": 2.0707, "step": 306755 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.0001423071672439675, "loss": 2.1145, "step": 306760 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014230549250682622, "loss": 2.2162, "step": 306765 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014230381775523263, "loss": 2.1509, "step": 306770 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014230214298918727, "loss": 2.2256, "step": 306775 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014230046820869077, "loss": 1.8277, "step": 306780 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014229879341374363, "loss": 1.838, "step": 306785 }, { "epoch": 0.72, "grad_norm": 2.71875, "learning_rate": 0.00014229711860434647, "loss": 2.0238, "step": 306790 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014229544378049983, "loss": 1.9516, "step": 306795 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014229376894220433, "loss": 2.1837, "step": 306800 }, { "epoch": 0.72, "grad_norm": 2.84375, "learning_rate": 0.00014229209408946048, "loss": 1.9971, "step": 306805 }, { "epoch": 0.72, "grad_norm": 1.8046875, "learning_rate": 0.0001422904192222689, "loss": 2.0864, "step": 306810 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.00014228874434063017, "loss": 2.0827, "step": 306815 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014228706944454476, "loss": 1.9223, "step": 306820 }, { "epoch": 0.72, "grad_norm": 1.875, "learning_rate": 0.00014228539453401337, "loss": 2.1777, "step": 306825 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014228371960903652, "loss": 2.112, "step": 306830 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.0001422820446696148, "loss": 2.1983, "step": 306835 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014228036971574877, "loss": 2.0763, "step": 306840 }, { "epoch": 0.72, "grad_norm": 2.4375, "learning_rate": 0.00014227869474743896, "loss": 1.9672, "step": 306845 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014227701976468602, "loss": 2.0481, "step": 306850 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014227534476749044, "loss": 2.1471, "step": 306855 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.0001422736697558529, "loss": 2.1801, "step": 306860 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014227199472977386, "loss": 2.0111, "step": 306865 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014227031968925392, "loss": 2.0648, "step": 306870 }, { "epoch": 0.72, "grad_norm": 2.703125, "learning_rate": 0.0001422686446342937, "loss": 1.9948, "step": 306875 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014226696956489378, "loss": 2.0115, "step": 306880 }, { "epoch": 0.72, "grad_norm": 1.8046875, "learning_rate": 0.00014226529448105465, "loss": 1.9614, "step": 306885 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014226361938277698, "loss": 2.0787, "step": 306890 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.00014226194427006123, "loss": 2.0385, "step": 306895 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014226026914290803, "loss": 2.1556, "step": 306900 }, { "epoch": 0.72, "grad_norm": 3.375, "learning_rate": 0.000142258594001318, "loss": 2.1414, "step": 306905 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.00014225691884529166, "loss": 2.0507, "step": 306910 }, { "epoch": 0.72, "grad_norm": 1.9921875, "learning_rate": 0.0001422552436748296, "loss": 1.9775, "step": 306915 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014225356848993236, "loss": 1.93, "step": 306920 }, { "epoch": 0.72, "grad_norm": 2.640625, "learning_rate": 0.0001422518932906005, "loss": 2.0978, "step": 306925 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014225021807683468, "loss": 1.9915, "step": 306930 }, { "epoch": 0.72, "grad_norm": 1.8203125, "learning_rate": 0.0001422485428486354, "loss": 1.9093, "step": 306935 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014224686760600324, "loss": 1.9428, "step": 306940 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014224519234893882, "loss": 2.1301, "step": 306945 }, { "epoch": 0.72, "grad_norm": 2.484375, "learning_rate": 0.00014224351707744264, "loss": 2.0916, "step": 306950 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014224184179151533, "loss": 2.1727, "step": 306955 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014224016649115744, "loss": 2.1369, "step": 306960 }, { "epoch": 0.72, "grad_norm": 1.984375, "learning_rate": 0.00014223849117636953, "loss": 2.2109, "step": 306965 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014223681584715222, "loss": 2.0362, "step": 306970 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014223514050350602, "loss": 1.9996, "step": 306975 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014223346514543152, "loss": 2.1, "step": 306980 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.0001422317897729293, "loss": 2.2105, "step": 306985 }, { "epoch": 0.72, "grad_norm": 1.796875, "learning_rate": 0.00014223011438599997, "loss": 2.0259, "step": 306990 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014222843898464404, "loss": 2.2608, "step": 306995 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.0001422267635688621, "loss": 2.0546, "step": 307000 }, { "epoch": 0.72, "grad_norm": 2.78125, "learning_rate": 0.00014222508813865474, "loss": 2.0183, "step": 307005 }, { "epoch": 0.72, "grad_norm": 2.671875, "learning_rate": 0.00014222341269402256, "loss": 1.7579, "step": 307010 }, { "epoch": 0.72, "grad_norm": 1.8828125, "learning_rate": 0.00014222173723496605, "loss": 1.9995, "step": 307015 }, { "epoch": 0.72, "grad_norm": 1.6796875, "learning_rate": 0.00014222006176148585, "loss": 1.7573, "step": 307020 }, { "epoch": 0.72, "grad_norm": 1.875, "learning_rate": 0.0001422183862735825, "loss": 2.0603, "step": 307025 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.0001422167107712566, "loss": 2.0378, "step": 307030 }, { "epoch": 0.72, "grad_norm": 1.9453125, "learning_rate": 0.00014221503525450868, "loss": 2.1051, "step": 307035 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.0001422133597233394, "loss": 1.8501, "step": 307040 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.0001422116841777492, "loss": 2.045, "step": 307045 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014221000861773875, "loss": 2.1019, "step": 307050 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.0001422083330433086, "loss": 2.0943, "step": 307055 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014220665745445928, "loss": 1.9273, "step": 307060 }, { "epoch": 0.72, "grad_norm": 1.9765625, "learning_rate": 0.00014220498185119147, "loss": 2.14, "step": 307065 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014220330623350563, "loss": 2.0034, "step": 307070 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014220163060140237, "loss": 2.074, "step": 307075 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014219995495488229, "loss": 2.1715, "step": 307080 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014219827929394592, "loss": 2.1297, "step": 307085 }, { "epoch": 0.72, "grad_norm": 1.9140625, "learning_rate": 0.00014219660361859386, "loss": 2.0782, "step": 307090 }, { "epoch": 0.72, "grad_norm": 1.875, "learning_rate": 0.00014219492792882666, "loss": 2.1358, "step": 307095 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014219325222464494, "loss": 2.1588, "step": 307100 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.0001421915765060492, "loss": 2.0829, "step": 307105 }, { "epoch": 0.72, "grad_norm": 2.4375, "learning_rate": 0.00014218990077304012, "loss": 1.9811, "step": 307110 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014218822502561814, "loss": 2.0916, "step": 307115 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.0001421865492637839, "loss": 2.2201, "step": 307120 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.000142184873487538, "loss": 1.8683, "step": 307125 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014218319769688096, "loss": 1.9774, "step": 307130 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.0001421815218918134, "loss": 2.0426, "step": 307135 }, { "epoch": 0.72, "grad_norm": 2.65625, "learning_rate": 0.00014217984607233584, "loss": 2.1482, "step": 307140 }, { "epoch": 0.72, "grad_norm": 2.578125, "learning_rate": 0.00014217817023844887, "loss": 1.8868, "step": 307145 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.0001421764943901531, "loss": 2.1178, "step": 307150 }, { "epoch": 0.72, "grad_norm": 2.59375, "learning_rate": 0.0001421748185274491, "loss": 2.1465, "step": 307155 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014217314265033737, "loss": 2.0034, "step": 307160 }, { "epoch": 0.72, "grad_norm": 1.9375, "learning_rate": 0.00014217146675881852, "loss": 2.2005, "step": 307165 }, { "epoch": 0.72, "grad_norm": 2.734375, "learning_rate": 0.00014216979085289317, "loss": 1.9612, "step": 307170 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014216811493256187, "loss": 1.9639, "step": 307175 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014216643899782515, "loss": 2.0272, "step": 307180 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014216476304868358, "loss": 2.1207, "step": 307185 }, { "epoch": 0.72, "grad_norm": 2.578125, "learning_rate": 0.0001421630870851378, "loss": 1.9147, "step": 307190 }, { "epoch": 0.72, "grad_norm": 1.8359375, "learning_rate": 0.0001421614111071883, "loss": 2.076, "step": 307195 }, { "epoch": 0.72, "grad_norm": 3.046875, "learning_rate": 0.00014215973511483575, "loss": 1.9494, "step": 307200 }, { "epoch": 0.72, "grad_norm": 1.9140625, "learning_rate": 0.00014215805910808066, "loss": 2.1219, "step": 307205 }, { "epoch": 0.72, "grad_norm": 1.828125, "learning_rate": 0.00014215638308692362, "loss": 1.9933, "step": 307210 }, { "epoch": 0.72, "grad_norm": 2.890625, "learning_rate": 0.00014215470705136515, "loss": 2.1863, "step": 307215 }, { "epoch": 0.72, "grad_norm": 1.4921875, "learning_rate": 0.0001421530310014059, "loss": 1.8774, "step": 307220 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014215135493704642, "loss": 2.1705, "step": 307225 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014214967885828726, "loss": 1.9952, "step": 307230 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014214800276512898, "loss": 2.0042, "step": 307235 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014214632665757223, "loss": 2.1847, "step": 307240 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014214465053561748, "loss": 2.079, "step": 307245 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014214297439926537, "loss": 1.992, "step": 307250 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.00014214129824851647, "loss": 2.0311, "step": 307255 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014213962208337134, "loss": 1.9889, "step": 307260 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014213794590383055, "loss": 2.1466, "step": 307265 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014213626970989467, "loss": 1.8897, "step": 307270 }, { "epoch": 0.72, "grad_norm": 1.8203125, "learning_rate": 0.00014213459350156425, "loss": 2.1372, "step": 307275 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.0001421329172788399, "loss": 2.2553, "step": 307280 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014213124104172218, "loss": 2.0687, "step": 307285 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.0001421295647902117, "loss": 2.0037, "step": 307290 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014212788852430898, "loss": 2.0936, "step": 307295 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014212621224401456, "loss": 2.0266, "step": 307300 }, { "epoch": 0.72, "grad_norm": 2.46875, "learning_rate": 0.00014212453594932912, "loss": 2.0477, "step": 307305 }, { "epoch": 0.72, "grad_norm": 1.765625, "learning_rate": 0.00014212285964025317, "loss": 1.9398, "step": 307310 }, { "epoch": 0.72, "grad_norm": 1.9140625, "learning_rate": 0.00014212118331678728, "loss": 2.1865, "step": 307315 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014211950697893202, "loss": 1.9747, "step": 307320 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014211783062668796, "loss": 2.1192, "step": 307325 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014211615426005568, "loss": 2.0284, "step": 307330 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.0001421144778790358, "loss": 2.0242, "step": 307335 }, { "epoch": 0.72, "grad_norm": 1.890625, "learning_rate": 0.00014211280148362885, "loss": 2.0037, "step": 307340 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014211112507383537, "loss": 1.9769, "step": 307345 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.000142109448649656, "loss": 2.0693, "step": 307350 }, { "epoch": 0.72, "grad_norm": 2.640625, "learning_rate": 0.00014210777221109123, "loss": 2.0508, "step": 307355 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.0001421060957581417, "loss": 2.158, "step": 307360 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.000142104419290808, "loss": 2.0911, "step": 307365 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014210274280909065, "loss": 2.0367, "step": 307370 }, { "epoch": 0.72, "grad_norm": 1.953125, "learning_rate": 0.0001421010663129902, "loss": 2.0421, "step": 307375 }, { "epoch": 0.72, "grad_norm": 1.9375, "learning_rate": 0.0001420993898025073, "loss": 1.9567, "step": 307380 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014209771327764248, "loss": 2.1819, "step": 307385 }, { "epoch": 0.72, "grad_norm": 1.859375, "learning_rate": 0.00014209603673839632, "loss": 1.9078, "step": 307390 }, { "epoch": 0.72, "grad_norm": 2.75, "learning_rate": 0.00014209436018476938, "loss": 2.0479, "step": 307395 }, { "epoch": 0.72, "grad_norm": 2.71875, "learning_rate": 0.00014209268361676223, "loss": 2.1354, "step": 307400 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.0001420910070343755, "loss": 2.0707, "step": 307405 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.00014208933043760968, "loss": 2.1169, "step": 307410 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014208765382646538, "loss": 2.0005, "step": 307415 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.00014208597720094318, "loss": 2.0352, "step": 307420 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014208430056104367, "loss": 1.8952, "step": 307425 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014208262390676742, "loss": 2.1481, "step": 307430 }, { "epoch": 0.72, "grad_norm": 1.734375, "learning_rate": 0.00014208094723811493, "loss": 2.0451, "step": 307435 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.0001420792705550868, "loss": 1.9457, "step": 307440 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014207759385768368, "loss": 1.9524, "step": 307445 }, { "epoch": 0.72, "grad_norm": 2.640625, "learning_rate": 0.00014207591714590612, "loss": 1.9931, "step": 307450 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014207424041975462, "loss": 2.197, "step": 307455 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014207256367922976, "loss": 2.1122, "step": 307460 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.0001420708869243322, "loss": 1.9753, "step": 307465 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.00014206921015506247, "loss": 2.058, "step": 307470 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014206753337142112, "loss": 2.0225, "step": 307475 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014206585657340875, "loss": 2.1496, "step": 307480 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014206417976102588, "loss": 2.076, "step": 307485 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014206250293427312, "loss": 2.1654, "step": 307490 }, { "epoch": 0.72, "grad_norm": 2.109375, "learning_rate": 0.0001420608260931511, "loss": 2.1877, "step": 307495 }, { "epoch": 0.72, "grad_norm": 1.9921875, "learning_rate": 0.00014205914923766033, "loss": 2.1117, "step": 307500 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014205747236780138, "loss": 2.0772, "step": 307505 }, { "epoch": 0.72, "grad_norm": 2.640625, "learning_rate": 0.00014205579548357483, "loss": 1.8994, "step": 307510 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014205411858498126, "loss": 2.0576, "step": 307515 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.0001420524416720212, "loss": 2.1224, "step": 307520 }, { "epoch": 0.72, "grad_norm": 1.984375, "learning_rate": 0.00014205076474469531, "loss": 2.1177, "step": 307525 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014204908780300412, "loss": 2.0071, "step": 307530 }, { "epoch": 0.72, "grad_norm": 2.421875, "learning_rate": 0.0001420474108469482, "loss": 2.0652, "step": 307535 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014204573387652808, "loss": 1.9956, "step": 307540 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.0001420440568917444, "loss": 2.0481, "step": 307545 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014204237989259773, "loss": 2.0868, "step": 307550 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.0001420407028790886, "loss": 2.0504, "step": 307555 }, { "epoch": 0.72, "grad_norm": 2.515625, "learning_rate": 0.0001420390258512176, "loss": 2.0278, "step": 307560 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014203734880898533, "loss": 2.0046, "step": 307565 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.0001420356717523923, "loss": 2.1482, "step": 307570 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014203399468143914, "loss": 2.2381, "step": 307575 }, { "epoch": 0.72, "grad_norm": 2.46875, "learning_rate": 0.0001420323175961264, "loss": 2.071, "step": 307580 }, { "epoch": 0.72, "grad_norm": 1.984375, "learning_rate": 0.0001420306404964547, "loss": 2.0733, "step": 307585 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014202896338242452, "loss": 2.0434, "step": 307590 }, { "epoch": 0.72, "grad_norm": 2.515625, "learning_rate": 0.0001420272862540365, "loss": 2.1594, "step": 307595 }, { "epoch": 0.72, "grad_norm": 2.203125, "learning_rate": 0.0001420256091112912, "loss": 1.8767, "step": 307600 }, { "epoch": 0.72, "grad_norm": 1.9140625, "learning_rate": 0.00014202393195418916, "loss": 2.0036, "step": 307605 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014202225478273102, "loss": 2.0806, "step": 307610 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014202057759691732, "loss": 2.0757, "step": 307615 }, { "epoch": 0.72, "grad_norm": 2.59375, "learning_rate": 0.0001420189003967486, "loss": 1.9769, "step": 307620 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014201722318222547, "loss": 2.1965, "step": 307625 }, { "epoch": 0.72, "grad_norm": 1.9296875, "learning_rate": 0.00014201554595334853, "loss": 1.9432, "step": 307630 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.00014201386871011827, "loss": 2.0181, "step": 307635 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.00014201219145253534, "loss": 2.0905, "step": 307640 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014201051418060028, "loss": 2.0198, "step": 307645 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014200883689431364, "loss": 2.1285, "step": 307650 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014200715959367604, "loss": 2.0758, "step": 307655 }, { "epoch": 0.72, "grad_norm": 1.90625, "learning_rate": 0.00014200548227868805, "loss": 1.8613, "step": 307660 }, { "epoch": 0.72, "grad_norm": 2.6875, "learning_rate": 0.0001420038049493502, "loss": 2.0923, "step": 307665 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014200212760566308, "loss": 2.0248, "step": 307670 }, { "epoch": 0.72, "grad_norm": 2.609375, "learning_rate": 0.0001420004502476273, "loss": 2.0426, "step": 307675 }, { "epoch": 0.72, "grad_norm": 1.9453125, "learning_rate": 0.00014199877287524336, "loss": 2.034, "step": 307680 }, { "epoch": 0.72, "grad_norm": 2.03125, "learning_rate": 0.00014199709548851192, "loss": 2.066, "step": 307685 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.0001419954180874335, "loss": 2.2157, "step": 307690 }, { "epoch": 0.72, "grad_norm": 2.59375, "learning_rate": 0.00014199374067200868, "loss": 2.0644, "step": 307695 }, { "epoch": 0.72, "grad_norm": 2.09375, "learning_rate": 0.00014199206324223807, "loss": 1.9759, "step": 307700 }, { "epoch": 0.72, "grad_norm": 2.515625, "learning_rate": 0.00014199038579812214, "loss": 2.0066, "step": 307705 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014198870833966157, "loss": 2.1027, "step": 307710 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.0001419870308668569, "loss": 2.0748, "step": 307715 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014198535337970868, "loss": 2.0338, "step": 307720 }, { "epoch": 0.72, "grad_norm": 3.328125, "learning_rate": 0.00014198367587821756, "loss": 2.2258, "step": 307725 }, { "epoch": 0.72, "grad_norm": 1.984375, "learning_rate": 0.00014198199836238398, "loss": 2.1615, "step": 307730 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.0001419803208322086, "loss": 2.229, "step": 307735 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.000141978643287692, "loss": 2.1686, "step": 307740 }, { "epoch": 0.72, "grad_norm": 2.390625, "learning_rate": 0.00014197696572883478, "loss": 1.9168, "step": 307745 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014197528815563742, "loss": 2.067, "step": 307750 }, { "epoch": 0.72, "grad_norm": 1.8671875, "learning_rate": 0.00014197361056810052, "loss": 2.1104, "step": 307755 }, { "epoch": 0.72, "grad_norm": 2.5625, "learning_rate": 0.0001419719329662247, "loss": 2.2372, "step": 307760 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014197025535001053, "loss": 2.0997, "step": 307765 }, { "epoch": 0.72, "grad_norm": 2.4375, "learning_rate": 0.0001419685777194585, "loss": 2.0104, "step": 307770 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014196690007456928, "loss": 1.9983, "step": 307775 }, { "epoch": 0.72, "grad_norm": 1.5703125, "learning_rate": 0.0001419652224153434, "loss": 2.2685, "step": 307780 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.0001419635447417814, "loss": 2.0323, "step": 307785 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.00014196186705388398, "loss": 2.2758, "step": 307790 }, { "epoch": 0.72, "grad_norm": 2.59375, "learning_rate": 0.00014196018935165158, "loss": 2.1411, "step": 307795 }, { "epoch": 0.72, "grad_norm": 2.578125, "learning_rate": 0.0001419585116350848, "loss": 2.0269, "step": 307800 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014195683390418425, "loss": 2.077, "step": 307805 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014195515615895047, "loss": 2.0756, "step": 307810 }, { "epoch": 0.72, "grad_norm": 1.953125, "learning_rate": 0.00014195347839938407, "loss": 2.0107, "step": 307815 }, { "epoch": 0.72, "grad_norm": 1.9296875, "learning_rate": 0.00014195180062548558, "loss": 2.1103, "step": 307820 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014195012283725562, "loss": 2.1362, "step": 307825 }, { "epoch": 0.72, "grad_norm": 2.59375, "learning_rate": 0.0001419484450346947, "loss": 2.0683, "step": 307830 }, { "epoch": 0.72, "grad_norm": 1.9609375, "learning_rate": 0.00014194676721780347, "loss": 1.9119, "step": 307835 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.00014194508938658243, "loss": 2.042, "step": 307840 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014194341154103222, "loss": 2.123, "step": 307845 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014194173368115335, "loss": 2.173, "step": 307850 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.00014194005580694643, "loss": 1.9493, "step": 307855 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014193837791841204, "loss": 2.0162, "step": 307860 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014193670001555074, "loss": 2.1068, "step": 307865 }, { "epoch": 0.72, "grad_norm": 2.0, "learning_rate": 0.00014193502209836306, "loss": 2.07, "step": 307870 }, { "epoch": 0.72, "grad_norm": 2.34375, "learning_rate": 0.00014193334416684968, "loss": 1.9413, "step": 307875 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.0001419316662210111, "loss": 2.2146, "step": 307880 }, { "epoch": 0.72, "grad_norm": 2.359375, "learning_rate": 0.00014192998826084783, "loss": 2.0751, "step": 307885 }, { "epoch": 0.72, "grad_norm": 2.5, "learning_rate": 0.00014192831028636058, "loss": 2.0957, "step": 307890 }, { "epoch": 0.72, "grad_norm": 2.046875, "learning_rate": 0.00014192663229754983, "loss": 2.0452, "step": 307895 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.0001419249542944162, "loss": 2.1321, "step": 307900 }, { "epoch": 0.72, "grad_norm": 1.953125, "learning_rate": 0.00014192327627696028, "loss": 1.9427, "step": 307905 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014192159824518257, "loss": 1.9236, "step": 307910 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.00014191992019908363, "loss": 2.0897, "step": 307915 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.00014191824213866415, "loss": 2.1245, "step": 307920 }, { "epoch": 0.72, "grad_norm": 1.9140625, "learning_rate": 0.00014191656406392465, "loss": 2.0426, "step": 307925 }, { "epoch": 0.72, "grad_norm": 2.28125, "learning_rate": 0.00014191488597486567, "loss": 2.1568, "step": 307930 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.0001419132078714878, "loss": 2.0573, "step": 307935 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.00014191152975379163, "loss": 1.9785, "step": 307940 }, { "epoch": 0.72, "grad_norm": 1.9921875, "learning_rate": 0.0001419098516217777, "loss": 2.0116, "step": 307945 }, { "epoch": 0.72, "grad_norm": 2.078125, "learning_rate": 0.0001419081734754466, "loss": 1.9184, "step": 307950 }, { "epoch": 0.72, "grad_norm": 2.59375, "learning_rate": 0.00014190649531479893, "loss": 2.0956, "step": 307955 }, { "epoch": 0.72, "grad_norm": 2.125, "learning_rate": 0.00014190481713983522, "loss": 2.11, "step": 307960 }, { "epoch": 0.72, "grad_norm": 2.015625, "learning_rate": 0.00014190313895055608, "loss": 2.0874, "step": 307965 }, { "epoch": 0.72, "grad_norm": 1.8984375, "learning_rate": 0.0001419014607469621, "loss": 1.8444, "step": 307970 }, { "epoch": 0.72, "grad_norm": 2.1875, "learning_rate": 0.00014189978252905378, "loss": 2.1079, "step": 307975 }, { "epoch": 0.72, "grad_norm": 1.96875, "learning_rate": 0.00014189810429683175, "loss": 1.9769, "step": 307980 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014189642605029655, "loss": 2.1337, "step": 307985 }, { "epoch": 0.72, "grad_norm": 2.265625, "learning_rate": 0.0001418947477894488, "loss": 2.1529, "step": 307990 }, { "epoch": 0.72, "grad_norm": 2.234375, "learning_rate": 0.000141893069514289, "loss": 1.9985, "step": 307995 }, { "epoch": 0.72, "grad_norm": 2.21875, "learning_rate": 0.0001418913912248178, "loss": 1.9396, "step": 308000 }, { "epoch": 0.72, "grad_norm": 2.15625, "learning_rate": 0.00014188971292103573, "loss": 1.9756, "step": 308005 }, { "epoch": 0.72, "grad_norm": 2.296875, "learning_rate": 0.00014188803460294338, "loss": 2.1082, "step": 308010 }, { "epoch": 0.72, "grad_norm": 1.9921875, "learning_rate": 0.0001418863562705413, "loss": 1.9945, "step": 308015 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014188467792383014, "loss": 2.071, "step": 308020 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.00014188299956281034, "loss": 2.0529, "step": 308025 }, { "epoch": 0.72, "grad_norm": 1.90625, "learning_rate": 0.0001418813211874826, "loss": 1.9986, "step": 308030 }, { "epoch": 0.72, "grad_norm": 1.796875, "learning_rate": 0.0001418796427978474, "loss": 2.1705, "step": 308035 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 0.0001418779643939054, "loss": 1.8616, "step": 308040 }, { "epoch": 0.72, "grad_norm": 2.3125, "learning_rate": 0.0001418762859756571, "loss": 2.214, "step": 308045 }, { "epoch": 0.72, "grad_norm": 2.25, "learning_rate": 0.0001418746075431031, "loss": 2.045, "step": 308050 }, { "epoch": 0.72, "grad_norm": 2.171875, "learning_rate": 0.00014187292909624398, "loss": 2.0789, "step": 308055 }, { "epoch": 0.72, "grad_norm": 2.453125, "learning_rate": 0.0001418712506350803, "loss": 2.1661, "step": 308060 }, { "epoch": 0.72, "grad_norm": 2.328125, "learning_rate": 0.00014186957215961266, "loss": 2.1239, "step": 308065 }, { "epoch": 0.72, "grad_norm": 2.140625, "learning_rate": 0.0001418678936698416, "loss": 2.1894, "step": 308070 }, { "epoch": 0.72, "grad_norm": 2.0625, "learning_rate": 0.0001418662151657677, "loss": 2.1172, "step": 308075 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014186453664739155, "loss": 2.0888, "step": 308080 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.00014186285811471373, "loss": 2.0477, "step": 308085 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.0001418611795677348, "loss": 2.1378, "step": 308090 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.0001418595010064553, "loss": 2.0086, "step": 308095 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.00014185782243087585, "loss": 2.0966, "step": 308100 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.00014185614384099702, "loss": 1.9019, "step": 308105 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.00014185446523681935, "loss": 2.0056, "step": 308110 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014185278661834347, "loss": 2.1476, "step": 308115 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.0001418511079855699, "loss": 2.0993, "step": 308120 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.00014184942933849923, "loss": 2.1496, "step": 308125 }, { "epoch": 0.73, "grad_norm": 1.9453125, "learning_rate": 0.00014184775067713204, "loss": 2.1136, "step": 308130 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014184607200146888, "loss": 2.1856, "step": 308135 }, { "epoch": 0.73, "grad_norm": 1.9453125, "learning_rate": 0.00014184439331151036, "loss": 1.975, "step": 308140 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014184271460725704, "loss": 2.1651, "step": 308145 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014184103588870948, "loss": 1.8362, "step": 308150 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.0001418393571558683, "loss": 2.208, "step": 308155 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.000141837678408734, "loss": 1.8671, "step": 308160 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.0001418359996473072, "loss": 1.9932, "step": 308165 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014183432087158844, "loss": 2.0188, "step": 308170 }, { "epoch": 0.73, "grad_norm": 2.515625, "learning_rate": 0.00014183264208157836, "loss": 2.0217, "step": 308175 }, { "epoch": 0.73, "grad_norm": 2.578125, "learning_rate": 0.00014183096327727746, "loss": 2.3162, "step": 308180 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.00014182928445868634, "loss": 1.8573, "step": 308185 }, { "epoch": 0.73, "grad_norm": 1.84375, "learning_rate": 0.0001418276056258056, "loss": 2.07, "step": 308190 }, { "epoch": 0.73, "grad_norm": 1.78125, "learning_rate": 0.00014182592677863576, "loss": 2.1616, "step": 308195 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014182424791717749, "loss": 2.0807, "step": 308200 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014182256904143124, "loss": 2.009, "step": 308205 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014182089015139763, "loss": 2.1523, "step": 308210 }, { "epoch": 0.73, "grad_norm": 2.0, "learning_rate": 0.00014181921124707726, "loss": 2.1681, "step": 308215 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014181753232847072, "loss": 2.1335, "step": 308220 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014181585339557854, "loss": 2.1431, "step": 308225 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014181417444840132, "loss": 1.9933, "step": 308230 }, { "epoch": 0.73, "grad_norm": 1.8984375, "learning_rate": 0.00014181249548693957, "loss": 2.2013, "step": 308235 }, { "epoch": 0.73, "grad_norm": 2.484375, "learning_rate": 0.00014181081651119397, "loss": 2.1446, "step": 308240 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.000141809137521165, "loss": 1.9853, "step": 308245 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.0001418074585168533, "loss": 1.9958, "step": 308250 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014180577949825937, "loss": 2.1646, "step": 308255 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014180410046538386, "loss": 2.064, "step": 308260 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.0001418024214182273, "loss": 1.9802, "step": 308265 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014180074235679027, "loss": 2.113, "step": 308270 }, { "epoch": 0.73, "grad_norm": 2.484375, "learning_rate": 0.00014179906328107338, "loss": 1.9463, "step": 308275 }, { "epoch": 0.73, "grad_norm": 1.9765625, "learning_rate": 0.00014179738419107713, "loss": 1.9172, "step": 308280 }, { "epoch": 0.73, "grad_norm": 2.46875, "learning_rate": 0.00014179570508680216, "loss": 2.1901, "step": 308285 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.000141794025968249, "loss": 2.069, "step": 308290 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014179234683541826, "loss": 2.1296, "step": 308295 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.0001417906676883105, "loss": 2.2515, "step": 308300 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014178898852692625, "loss": 1.9494, "step": 308305 }, { "epoch": 0.73, "grad_norm": 1.9765625, "learning_rate": 0.0001417873093512662, "loss": 2.045, "step": 308310 }, { "epoch": 0.73, "grad_norm": 1.9296875, "learning_rate": 0.0001417856301613308, "loss": 2.0793, "step": 308315 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014178395095712065, "loss": 2.1179, "step": 308320 }, { "epoch": 0.73, "grad_norm": 2.515625, "learning_rate": 0.00014178227173863636, "loss": 2.2616, "step": 308325 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014178059250587852, "loss": 1.9005, "step": 308330 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014177891325884765, "loss": 1.9414, "step": 308335 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.00014177723399754434, "loss": 2.0322, "step": 308340 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014177555472196914, "loss": 1.8751, "step": 308345 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.0001417738754321227, "loss": 2.1964, "step": 308350 }, { "epoch": 0.73, "grad_norm": 1.765625, "learning_rate": 0.00014177219612800557, "loss": 1.8828, "step": 308355 }, { "epoch": 0.73, "grad_norm": 1.9921875, "learning_rate": 0.00014177051680961826, "loss": 2.2443, "step": 308360 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014176883747696136, "loss": 1.9601, "step": 308365 }, { "epoch": 0.73, "grad_norm": 1.9375, "learning_rate": 0.0001417671581300355, "loss": 2.0808, "step": 308370 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014176547876884122, "loss": 1.8701, "step": 308375 }, { "epoch": 0.73, "grad_norm": 1.921875, "learning_rate": 0.0001417637993933791, "loss": 2.0831, "step": 308380 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.00014176212000364968, "loss": 2.1975, "step": 308385 }, { "epoch": 0.73, "grad_norm": 2.46875, "learning_rate": 0.00014176044059965357, "loss": 1.9129, "step": 308390 }, { "epoch": 0.73, "grad_norm": 2.515625, "learning_rate": 0.00014175876118139138, "loss": 2.3301, "step": 308395 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.0001417570817488636, "loss": 2.1304, "step": 308400 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014175540230207086, "loss": 2.0708, "step": 308405 }, { "epoch": 0.73, "grad_norm": 1.9921875, "learning_rate": 0.00014175372284101369, "loss": 1.9443, "step": 308410 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.0001417520433656927, "loss": 2.1282, "step": 308415 }, { "epoch": 0.73, "grad_norm": 2.484375, "learning_rate": 0.00014175036387610847, "loss": 2.0165, "step": 308420 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.00014174868437226155, "loss": 2.2265, "step": 308425 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014174700485415254, "loss": 2.0626, "step": 308430 }, { "epoch": 0.73, "grad_norm": 2.0, "learning_rate": 0.00014174532532178197, "loss": 2.0242, "step": 308435 }, { "epoch": 0.73, "grad_norm": 2.671875, "learning_rate": 0.00014174364577515045, "loss": 2.204, "step": 308440 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.00014174196621425856, "loss": 1.9903, "step": 308445 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014174028663910683, "loss": 2.0076, "step": 308450 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014173860704969586, "loss": 2.1546, "step": 308455 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014173692744602625, "loss": 2.0079, "step": 308460 }, { "epoch": 0.73, "grad_norm": 1.859375, "learning_rate": 0.00014173524782809852, "loss": 1.8796, "step": 308465 }, { "epoch": 0.73, "grad_norm": 2.609375, "learning_rate": 0.0001417335681959133, "loss": 2.1328, "step": 308470 }, { "epoch": 0.73, "grad_norm": 1.6875, "learning_rate": 0.00014173188854947114, "loss": 1.8814, "step": 308475 }, { "epoch": 0.73, "grad_norm": 1.7578125, "learning_rate": 0.00014173020888877259, "loss": 2.0342, "step": 308480 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014172852921381823, "loss": 1.957, "step": 308485 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014172684952460868, "loss": 1.9217, "step": 308490 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014172516982114447, "loss": 2.1634, "step": 308495 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.0001417234901034262, "loss": 2.1081, "step": 308500 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014172181037145438, "loss": 2.2182, "step": 308505 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014172013062522967, "loss": 2.1414, "step": 308510 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.0001417184508647526, "loss": 2.1312, "step": 308515 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014171677109002376, "loss": 2.3455, "step": 308520 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014171509130104373, "loss": 2.0831, "step": 308525 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014171341149781303, "loss": 1.9672, "step": 308530 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.0001417117316803323, "loss": 2.016, "step": 308535 }, { "epoch": 0.73, "grad_norm": 1.984375, "learning_rate": 0.00014171005184860207, "loss": 1.9579, "step": 308540 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014170837200262293, "loss": 2.0024, "step": 308545 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.00014170669214239544, "loss": 2.0647, "step": 308550 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.0001417050122679202, "loss": 2.1378, "step": 308555 }, { "epoch": 0.73, "grad_norm": 1.9453125, "learning_rate": 0.00014170333237919778, "loss": 2.0357, "step": 308560 }, { "epoch": 0.73, "grad_norm": 2.546875, "learning_rate": 0.00014170165247622874, "loss": 2.085, "step": 308565 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.00014169997255901367, "loss": 2.1297, "step": 308570 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014169829262755313, "loss": 2.1373, "step": 308575 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014169661268184769, "loss": 1.9733, "step": 308580 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.00014169493272189794, "loss": 2.0616, "step": 308585 }, { "epoch": 0.73, "grad_norm": 2.671875, "learning_rate": 0.0001416932527477044, "loss": 2.0784, "step": 308590 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014169157275926774, "loss": 2.0703, "step": 308595 }, { "epoch": 0.73, "grad_norm": 1.9921875, "learning_rate": 0.00014168989275658845, "loss": 1.965, "step": 308600 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014168821273966717, "loss": 2.0358, "step": 308605 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.00014168653270850445, "loss": 2.1016, "step": 308610 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.00014168485266310082, "loss": 1.9742, "step": 308615 }, { "epoch": 0.73, "grad_norm": 2.0, "learning_rate": 0.00014168317260345687, "loss": 1.8794, "step": 308620 }, { "epoch": 0.73, "grad_norm": 1.859375, "learning_rate": 0.00014168149252957323, "loss": 2.0351, "step": 308625 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014167981244145046, "loss": 2.2531, "step": 308630 }, { "epoch": 0.73, "grad_norm": 2.46875, "learning_rate": 0.00014167813233908908, "loss": 2.0755, "step": 308635 }, { "epoch": 0.73, "grad_norm": 2.484375, "learning_rate": 0.00014167645222248963, "loss": 1.982, "step": 308640 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014167477209165282, "loss": 2.0269, "step": 308645 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014167309194657916, "loss": 2.1454, "step": 308650 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.0001416714117872692, "loss": 2.1731, "step": 308655 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014166973161372352, "loss": 2.0906, "step": 308660 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.0001416680514259427, "loss": 2.2052, "step": 308665 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.0001416663712239273, "loss": 2.1899, "step": 308670 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014166469100767798, "loss": 1.9303, "step": 308675 }, { "epoch": 0.73, "grad_norm": 1.8828125, "learning_rate": 0.0001416630107771952, "loss": 2.0164, "step": 308680 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.0001416613305324796, "loss": 1.8905, "step": 308685 }, { "epoch": 0.73, "grad_norm": 1.9765625, "learning_rate": 0.00014165965027353172, "loss": 2.0525, "step": 308690 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014165797000035215, "loss": 2.0695, "step": 308695 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014165628971294145, "loss": 2.0872, "step": 308700 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014165460941130021, "loss": 2.0565, "step": 308705 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.000141652929095429, "loss": 2.0491, "step": 308710 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.0001416512487653284, "loss": 1.7744, "step": 308715 }, { "epoch": 0.73, "grad_norm": 3.4375, "learning_rate": 0.000141649568420999, "loss": 1.9187, "step": 308720 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014164788806244133, "loss": 2.0944, "step": 308725 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014164620768965597, "loss": 1.9678, "step": 308730 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.0001416445273026435, "loss": 2.2234, "step": 308735 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014164284690140454, "loss": 1.9943, "step": 308740 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.0001416411664859396, "loss": 2.0335, "step": 308745 }, { "epoch": 0.73, "grad_norm": 1.8359375, "learning_rate": 0.00014163948605624933, "loss": 2.0915, "step": 308750 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.0001416378056123342, "loss": 2.199, "step": 308755 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.0001416361251541949, "loss": 1.9705, "step": 308760 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014163444468183193, "loss": 2.2712, "step": 308765 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014163276419524584, "loss": 2.1422, "step": 308770 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014163108369443726, "loss": 2.181, "step": 308775 }, { "epoch": 0.73, "grad_norm": 2.59375, "learning_rate": 0.00014162940317940674, "loss": 2.1475, "step": 308780 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.00014162772265015488, "loss": 2.055, "step": 308785 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014162604210668224, "loss": 2.0824, "step": 308790 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014162436154898937, "loss": 2.0578, "step": 308795 }, { "epoch": 0.73, "grad_norm": 2.90625, "learning_rate": 0.00014162268097707686, "loss": 2.1348, "step": 308800 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014162100039094528, "loss": 1.8607, "step": 308805 }, { "epoch": 0.73, "grad_norm": 3.03125, "learning_rate": 0.00014161931979059524, "loss": 2.0658, "step": 308810 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.0001416176391760273, "loss": 1.8439, "step": 308815 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014161595854724198, "loss": 2.0698, "step": 308820 }, { "epoch": 0.73, "grad_norm": 2.765625, "learning_rate": 0.00014161427790423988, "loss": 2.0879, "step": 308825 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.00014161259724702164, "loss": 2.1598, "step": 308830 }, { "epoch": 0.73, "grad_norm": 2.625, "learning_rate": 0.00014161091657558775, "loss": 2.152, "step": 308835 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014160923588993883, "loss": 2.207, "step": 308840 }, { "epoch": 0.73, "grad_norm": 2.65625, "learning_rate": 0.00014160755519007542, "loss": 2.336, "step": 308845 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014160587447599815, "loss": 2.0893, "step": 308850 }, { "epoch": 0.73, "grad_norm": 2.65625, "learning_rate": 0.0001416041937477075, "loss": 1.9451, "step": 308855 }, { "epoch": 0.73, "grad_norm": 1.828125, "learning_rate": 0.00014160251300520416, "loss": 2.1173, "step": 308860 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.0001416008322484886, "loss": 2.0965, "step": 308865 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.0001415991514775615, "loss": 2.1907, "step": 308870 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014159747069242333, "loss": 1.9255, "step": 308875 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014159578989307472, "loss": 2.0638, "step": 308880 }, { "epoch": 0.73, "grad_norm": 1.9453125, "learning_rate": 0.0001415941090795162, "loss": 2.199, "step": 308885 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.00014159242825174844, "loss": 2.0251, "step": 308890 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014159074740977194, "loss": 2.1067, "step": 308895 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014158906655358728, "loss": 2.0874, "step": 308900 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014158738568319503, "loss": 2.123, "step": 308905 }, { "epoch": 0.73, "grad_norm": 2.421875, "learning_rate": 0.00014158570479859577, "loss": 2.1167, "step": 308910 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.0001415840238997901, "loss": 1.9579, "step": 308915 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014158234298677856, "loss": 2.0642, "step": 308920 }, { "epoch": 0.73, "grad_norm": 2.515625, "learning_rate": 0.00014158066205956175, "loss": 2.0618, "step": 308925 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.00014157898111814021, "loss": 2.0968, "step": 308930 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014157730016251452, "loss": 2.1109, "step": 308935 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014157561919268528, "loss": 2.0617, "step": 308940 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.0001415739382086531, "loss": 2.089, "step": 308945 }, { "epoch": 0.73, "grad_norm": 2.484375, "learning_rate": 0.00014157225721041848, "loss": 2.2277, "step": 308950 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014157057619798203, "loss": 1.9504, "step": 308955 }, { "epoch": 0.73, "grad_norm": 2.546875, "learning_rate": 0.0001415688951713443, "loss": 1.967, "step": 308960 }, { "epoch": 0.73, "grad_norm": 2.421875, "learning_rate": 0.00014156721413050586, "loss": 2.2124, "step": 308965 }, { "epoch": 0.73, "grad_norm": 1.9921875, "learning_rate": 0.00014156553307546736, "loss": 2.2691, "step": 308970 }, { "epoch": 0.73, "grad_norm": 1.921875, "learning_rate": 0.0001415638520062293, "loss": 1.9947, "step": 308975 }, { "epoch": 0.73, "grad_norm": 2.0, "learning_rate": 0.0001415621709227923, "loss": 1.8837, "step": 308980 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.00014156048982515686, "loss": 2.1334, "step": 308985 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.00014155880871332362, "loss": 2.0356, "step": 308990 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014155712758729313, "loss": 2.0894, "step": 308995 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.000141555446447066, "loss": 1.8553, "step": 309000 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.00014155376529264275, "loss": 2.1614, "step": 309005 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.000141552084124024, "loss": 2.0444, "step": 309010 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014155040294121026, "loss": 1.8577, "step": 309015 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014154872174420218, "loss": 2.2832, "step": 309020 }, { "epoch": 0.73, "grad_norm": 2.5, "learning_rate": 0.00014154704053300033, "loss": 2.0673, "step": 309025 }, { "epoch": 0.73, "grad_norm": 2.46875, "learning_rate": 0.0001415453593076052, "loss": 2.0492, "step": 309030 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014154367806801748, "loss": 2.1357, "step": 309035 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014154199681423764, "loss": 2.1556, "step": 309040 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.0001415403155462663, "loss": 2.0067, "step": 309045 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014153863426410407, "loss": 2.0231, "step": 309050 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.0001415369529677515, "loss": 2.0981, "step": 309055 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014153527165720913, "loss": 2.1488, "step": 309060 }, { "epoch": 0.73, "grad_norm": 1.96875, "learning_rate": 0.00014153359033247754, "loss": 2.0406, "step": 309065 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014153190899355732, "loss": 1.9992, "step": 309070 }, { "epoch": 0.73, "grad_norm": 1.8046875, "learning_rate": 0.0001415302276404491, "loss": 1.9487, "step": 309075 }, { "epoch": 0.73, "grad_norm": 3.03125, "learning_rate": 0.00014152854627315334, "loss": 1.9964, "step": 309080 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014152686489167073, "loss": 2.0403, "step": 309085 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014152518349600174, "loss": 2.1361, "step": 309090 }, { "epoch": 0.73, "grad_norm": 2.0, "learning_rate": 0.000141523502086147, "loss": 1.8395, "step": 309095 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.0001415218206621071, "loss": 1.8927, "step": 309100 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.0001415201392238826, "loss": 1.8661, "step": 309105 }, { "epoch": 0.73, "grad_norm": 1.9296875, "learning_rate": 0.00014151845777147406, "loss": 2.0881, "step": 309110 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014151677630488204, "loss": 2.1383, "step": 309115 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014151509482410713, "loss": 2.0076, "step": 309120 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014151341332914994, "loss": 2.0311, "step": 309125 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014151173182001104, "loss": 2.2378, "step": 309130 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014151005029669093, "loss": 1.9951, "step": 309135 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014150836875919027, "loss": 2.2302, "step": 309140 }, { "epoch": 0.73, "grad_norm": 1.96875, "learning_rate": 0.00014150668720750958, "loss": 1.9963, "step": 309145 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014150500564164944, "loss": 2.1928, "step": 309150 }, { "epoch": 0.73, "grad_norm": 1.75, "learning_rate": 0.00014150332406161044, "loss": 1.9875, "step": 309155 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.00014150164246739318, "loss": 2.1267, "step": 309160 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014149996085899817, "loss": 1.9665, "step": 309165 }, { "epoch": 0.73, "grad_norm": 1.953125, "learning_rate": 0.00014149827923642605, "loss": 2.0251, "step": 309170 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.00014149659759967736, "loss": 2.0322, "step": 309175 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014149491594875265, "loss": 2.0473, "step": 309180 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014149323428365256, "loss": 1.8494, "step": 309185 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.0001414915526043776, "loss": 1.9241, "step": 309190 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.0001414898709109284, "loss": 1.9742, "step": 309195 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.0001414881892033055, "loss": 1.9935, "step": 309200 }, { "epoch": 0.73, "grad_norm": 1.96875, "learning_rate": 0.00014148650748150947, "loss": 1.9344, "step": 309205 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014148482574554092, "loss": 2.0839, "step": 309210 }, { "epoch": 0.73, "grad_norm": 2.609375, "learning_rate": 0.00014148314399540037, "loss": 2.1187, "step": 309215 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014148146223108847, "loss": 2.0093, "step": 309220 }, { "epoch": 0.73, "grad_norm": 1.984375, "learning_rate": 0.00014147978045260572, "loss": 1.9449, "step": 309225 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.0001414780986599527, "loss": 2.0988, "step": 309230 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014147641685313006, "loss": 2.0427, "step": 309235 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.0001414747350321383, "loss": 2.1225, "step": 309240 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.00014147305319697802, "loss": 1.9709, "step": 309245 }, { "epoch": 0.73, "grad_norm": 2.609375, "learning_rate": 0.0001414713713476498, "loss": 1.955, "step": 309250 }, { "epoch": 0.73, "grad_norm": 2.0, "learning_rate": 0.00014146968948415417, "loss": 2.033, "step": 309255 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014146800760649178, "loss": 2.0924, "step": 309260 }, { "epoch": 0.73, "grad_norm": 1.8828125, "learning_rate": 0.00014146632571466317, "loss": 1.9762, "step": 309265 }, { "epoch": 0.73, "grad_norm": 1.8828125, "learning_rate": 0.00014146464380866892, "loss": 2.1541, "step": 309270 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.00014146296188850954, "loss": 1.9964, "step": 309275 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.0001414612799541857, "loss": 1.9357, "step": 309280 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014145959800569794, "loss": 2.0604, "step": 309285 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.0001414579160430468, "loss": 2.0698, "step": 309290 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014145623406623293, "loss": 2.129, "step": 309295 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014145455207525683, "loss": 1.9774, "step": 309300 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.0001414528700701191, "loss": 2.0647, "step": 309305 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014145118805082032, "loss": 2.1046, "step": 309310 }, { "epoch": 0.73, "grad_norm": 1.8125, "learning_rate": 0.00014144950601736108, "loss": 1.9494, "step": 309315 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.00014144782396974189, "loss": 1.9881, "step": 309320 }, { "epoch": 0.73, "grad_norm": 2.46875, "learning_rate": 0.00014144614190796342, "loss": 2.1433, "step": 309325 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014144445983202617, "loss": 2.115, "step": 309330 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014144277774193076, "loss": 2.0635, "step": 309335 }, { "epoch": 0.73, "grad_norm": 2.5, "learning_rate": 0.0001414410956376777, "loss": 2.0376, "step": 309340 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.0001414394135192677, "loss": 2.0419, "step": 309345 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.00014143773138670116, "loss": 2.0365, "step": 309350 }, { "epoch": 0.73, "grad_norm": 1.84375, "learning_rate": 0.00014143604923997879, "loss": 2.0086, "step": 309355 }, { "epoch": 0.73, "grad_norm": 2.890625, "learning_rate": 0.0001414343670791011, "loss": 2.143, "step": 309360 }, { "epoch": 0.73, "grad_norm": 1.9609375, "learning_rate": 0.00014143268490406864, "loss": 2.0886, "step": 309365 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014143100271488205, "loss": 2.0092, "step": 309370 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.00014142932051154189, "loss": 1.9458, "step": 309375 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.00014142763829404876, "loss": 2.088, "step": 309380 }, { "epoch": 0.73, "grad_norm": 1.8828125, "learning_rate": 0.00014142595606240314, "loss": 2.0591, "step": 309385 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014142427381660567, "loss": 1.7999, "step": 309390 }, { "epoch": 0.73, "grad_norm": 1.96875, "learning_rate": 0.00014142259155665692, "loss": 2.1118, "step": 309395 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014142090928255746, "loss": 2.0561, "step": 309400 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.0001414192269943079, "loss": 1.9729, "step": 309405 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014141754469190873, "loss": 1.9541, "step": 309410 }, { "epoch": 0.73, "grad_norm": 2.515625, "learning_rate": 0.00014141586237536056, "loss": 2.1902, "step": 309415 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.00014141418004466407, "loss": 1.9393, "step": 309420 }, { "epoch": 0.73, "grad_norm": 3.109375, "learning_rate": 0.0001414124976998197, "loss": 1.9575, "step": 309425 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014141081534082805, "loss": 2.0718, "step": 309430 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.00014140913296768973, "loss": 2.1135, "step": 309435 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014140745058040529, "loss": 2.1708, "step": 309440 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.00014140576817897534, "loss": 1.9195, "step": 309445 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014140408576340037, "loss": 2.1304, "step": 309450 }, { "epoch": 0.73, "grad_norm": 2.671875, "learning_rate": 0.00014140240333368107, "loss": 2.1389, "step": 309455 }, { "epoch": 0.73, "grad_norm": 2.46875, "learning_rate": 0.00014140072088981794, "loss": 2.0326, "step": 309460 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014139903843181156, "loss": 2.0156, "step": 309465 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014139735595966254, "loss": 2.1072, "step": 309470 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014139567347337144, "loss": 2.0909, "step": 309475 }, { "epoch": 0.73, "grad_norm": 2.5625, "learning_rate": 0.0001413939909729388, "loss": 2.1726, "step": 309480 }, { "epoch": 0.73, "grad_norm": 2.703125, "learning_rate": 0.00014139230845836525, "loss": 2.0175, "step": 309485 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014139062592965132, "loss": 2.0015, "step": 309490 }, { "epoch": 0.73, "grad_norm": 1.984375, "learning_rate": 0.0001413889433867976, "loss": 2.0649, "step": 309495 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014138726082980465, "loss": 2.0011, "step": 309500 }, { "epoch": 0.73, "grad_norm": 2.578125, "learning_rate": 0.0001413855782586731, "loss": 1.9747, "step": 309505 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014138389567340344, "loss": 2.1428, "step": 309510 }, { "epoch": 0.73, "grad_norm": 2.65625, "learning_rate": 0.00014138221307399636, "loss": 2.1136, "step": 309515 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.0001413805304604523, "loss": 2.1428, "step": 309520 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.0001413788478327719, "loss": 2.1433, "step": 309525 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014137716519095576, "loss": 1.9556, "step": 309530 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014137548253500442, "loss": 1.9537, "step": 309535 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.0001413737998649185, "loss": 1.9592, "step": 309540 }, { "epoch": 0.73, "grad_norm": 1.75, "learning_rate": 0.00014137211718069848, "loss": 1.9271, "step": 309545 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014137043448234505, "loss": 2.0909, "step": 309550 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.0001413687517698587, "loss": 2.1084, "step": 309555 }, { "epoch": 0.73, "grad_norm": 1.5546875, "learning_rate": 0.00014136706904324003, "loss": 2.2522, "step": 309560 }, { "epoch": 0.73, "grad_norm": 1.9921875, "learning_rate": 0.00014136538630248963, "loss": 2.2584, "step": 309565 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014136370354760807, "loss": 2.0604, "step": 309570 }, { "epoch": 0.73, "grad_norm": 1.96875, "learning_rate": 0.0001413620207785959, "loss": 1.9303, "step": 309575 }, { "epoch": 0.73, "grad_norm": 2.5625, "learning_rate": 0.00014136033799545374, "loss": 2.1378, "step": 309580 }, { "epoch": 0.73, "grad_norm": 1.9296875, "learning_rate": 0.00014135865519818212, "loss": 1.985, "step": 309585 }, { "epoch": 0.73, "grad_norm": 1.828125, "learning_rate": 0.00014135697238678162, "loss": 2.1098, "step": 309590 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014135528956125287, "loss": 2.078, "step": 309595 }, { "epoch": 0.73, "grad_norm": 1.9609375, "learning_rate": 0.00014135360672159637, "loss": 2.1252, "step": 309600 }, { "epoch": 0.73, "grad_norm": 2.609375, "learning_rate": 0.00014135192386781272, "loss": 2.1282, "step": 309605 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014135024099990254, "loss": 2.0878, "step": 309610 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.0001413485581178663, "loss": 2.2952, "step": 309615 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014134687522170472, "loss": 2.084, "step": 309620 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014134519231141825, "loss": 2.0577, "step": 309625 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014134350938700753, "loss": 1.9772, "step": 309630 }, { "epoch": 0.73, "grad_norm": 1.8203125, "learning_rate": 0.00014134182644847312, "loss": 1.9254, "step": 309635 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.0001413401434958156, "loss": 1.9763, "step": 309640 }, { "epoch": 0.73, "grad_norm": 1.90625, "learning_rate": 0.0001413384605290355, "loss": 1.9914, "step": 309645 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014133677754813348, "loss": 2.1289, "step": 309650 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014133509455311005, "loss": 1.9459, "step": 309655 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014133341154396579, "loss": 2.1704, "step": 309660 }, { "epoch": 0.73, "grad_norm": 2.484375, "learning_rate": 0.00014133172852070128, "loss": 2.1741, "step": 309665 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014133004548331712, "loss": 2.083, "step": 309670 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014132836243181386, "loss": 2.1788, "step": 309675 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.0001413266793661921, "loss": 1.8915, "step": 309680 }, { "epoch": 0.73, "grad_norm": 2.0, "learning_rate": 0.00014132499628645236, "loss": 2.2053, "step": 309685 }, { "epoch": 0.73, "grad_norm": 2.0, "learning_rate": 0.00014132331319259526, "loss": 2.3304, "step": 309690 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.0001413216300846214, "loss": 2.1826, "step": 309695 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.0001413199469625313, "loss": 1.9743, "step": 309700 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014131826382632556, "loss": 2.0583, "step": 309705 }, { "epoch": 0.73, "grad_norm": 2.671875, "learning_rate": 0.00014131658067600473, "loss": 2.0015, "step": 309710 }, { "epoch": 0.73, "grad_norm": 1.671875, "learning_rate": 0.00014131489751156943, "loss": 2.1044, "step": 309715 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014131321433302022, "loss": 2.0903, "step": 309720 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014131153114035766, "loss": 2.0721, "step": 309725 }, { "epoch": 0.73, "grad_norm": 1.9765625, "learning_rate": 0.00014130984793358234, "loss": 1.9553, "step": 309730 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.0001413081647126948, "loss": 2.0734, "step": 309735 }, { "epoch": 0.73, "grad_norm": 2.484375, "learning_rate": 0.00014130648147769566, "loss": 2.2237, "step": 309740 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014130479822858547, "loss": 2.0596, "step": 309745 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014130311496536482, "loss": 2.2711, "step": 309750 }, { "epoch": 0.73, "grad_norm": 2.703125, "learning_rate": 0.00014130143168803426, "loss": 2.0312, "step": 309755 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.0001412997483965944, "loss": 2.0623, "step": 309760 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.00014129806509104577, "loss": 2.0215, "step": 309765 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.000141296381771389, "loss": 2.1743, "step": 309770 }, { "epoch": 0.73, "grad_norm": 2.546875, "learning_rate": 0.0001412946984376246, "loss": 2.1605, "step": 309775 }, { "epoch": 0.73, "grad_norm": 2.484375, "learning_rate": 0.00014129301508975322, "loss": 2.0299, "step": 309780 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.0001412913317277754, "loss": 2.0316, "step": 309785 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.0001412896483516917, "loss": 2.1311, "step": 309790 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.0001412879649615027, "loss": 2.115, "step": 309795 }, { "epoch": 0.73, "grad_norm": 1.953125, "learning_rate": 0.000141286281557209, "loss": 1.9487, "step": 309800 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014128459813881114, "loss": 2.0673, "step": 309805 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014128291470630975, "loss": 2.0647, "step": 309810 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014128123125970533, "loss": 2.0089, "step": 309815 }, { "epoch": 0.73, "grad_norm": 2.65625, "learning_rate": 0.00014127954779899848, "loss": 1.9743, "step": 309820 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.0001412778643241898, "loss": 2.2316, "step": 309825 }, { "epoch": 0.73, "grad_norm": 2.5, "learning_rate": 0.0001412761808352799, "loss": 2.3167, "step": 309830 }, { "epoch": 0.73, "grad_norm": 2.0, "learning_rate": 0.00014127449733226924, "loss": 1.9494, "step": 309835 }, { "epoch": 0.73, "grad_norm": 2.421875, "learning_rate": 0.0001412728138151585, "loss": 2.0158, "step": 309840 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014127113028394818, "loss": 2.1581, "step": 309845 }, { "epoch": 0.73, "grad_norm": 2.5625, "learning_rate": 0.00014126944673863894, "loss": 1.9529, "step": 309850 }, { "epoch": 0.73, "grad_norm": 1.96875, "learning_rate": 0.0001412677631792313, "loss": 2.0644, "step": 309855 }, { "epoch": 0.73, "grad_norm": 2.546875, "learning_rate": 0.00014126607960572585, "loss": 2.0548, "step": 309860 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014126439601812313, "loss": 2.0673, "step": 309865 }, { "epoch": 0.73, "grad_norm": 1.8203125, "learning_rate": 0.00014126271241642378, "loss": 2.0916, "step": 309870 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.0001412610288006283, "loss": 2.1034, "step": 309875 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.0001412593451707373, "loss": 2.1747, "step": 309880 }, { "epoch": 0.73, "grad_norm": 1.921875, "learning_rate": 0.0001412576615267514, "loss": 2.0577, "step": 309885 }, { "epoch": 0.73, "grad_norm": 1.828125, "learning_rate": 0.00014125597786867112, "loss": 1.9913, "step": 309890 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014125429419649705, "loss": 2.0143, "step": 309895 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014125261051022977, "loss": 1.8833, "step": 309900 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.0001412509268098698, "loss": 2.0259, "step": 309905 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.00014124924309541782, "loss": 1.8769, "step": 309910 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014124755936687435, "loss": 2.0665, "step": 309915 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014124587562423994, "loss": 1.8048, "step": 309920 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.0001412441918675152, "loss": 2.0347, "step": 309925 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.0001412425080967007, "loss": 2.0129, "step": 309930 }, { "epoch": 0.73, "grad_norm": 2.578125, "learning_rate": 0.000141240824311797, "loss": 2.0882, "step": 309935 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014123914051280471, "loss": 2.0515, "step": 309940 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.0001412374566997244, "loss": 2.0215, "step": 309945 }, { "epoch": 0.73, "grad_norm": 1.984375, "learning_rate": 0.0001412357728725566, "loss": 2.0512, "step": 309950 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.00014123408903130187, "loss": 2.0495, "step": 309955 }, { "epoch": 0.73, "grad_norm": 1.9375, "learning_rate": 0.00014123240517596085, "loss": 2.1959, "step": 309960 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.00014123072130653412, "loss": 2.199, "step": 309965 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.00014122903742302224, "loss": 1.8983, "step": 309970 }, { "epoch": 0.73, "grad_norm": 1.6796875, "learning_rate": 0.00014122735352542574, "loss": 1.9094, "step": 309975 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.0001412256696137452, "loss": 2.0158, "step": 309980 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014122398568798127, "loss": 2.018, "step": 309985 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.0001412223017481345, "loss": 1.9216, "step": 309990 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.00014122061779420542, "loss": 2.1633, "step": 309995 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.00014121893382619462, "loss": 1.991, "step": 310000 }, { "epoch": 0.73, "grad_norm": 1.8203125, "learning_rate": 0.00014121724984410264, "loss": 2.0588, "step": 310005 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014121556584793015, "loss": 2.1599, "step": 310010 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.0001412138818376777, "loss": 1.926, "step": 310015 }, { "epoch": 0.73, "grad_norm": 2.484375, "learning_rate": 0.00014121219781334582, "loss": 1.9187, "step": 310020 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.0001412105137749351, "loss": 2.0023, "step": 310025 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.0001412088297224461, "loss": 2.1391, "step": 310030 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014120714565587944, "loss": 2.0153, "step": 310035 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.00014120546157523568, "loss": 2.2174, "step": 310040 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014120377748051536, "loss": 2.2453, "step": 310045 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.0001412020933717191, "loss": 1.8181, "step": 310050 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.00014120040924884745, "loss": 1.9952, "step": 310055 }, { "epoch": 0.73, "grad_norm": 2.65625, "learning_rate": 0.000141198725111901, "loss": 2.1409, "step": 310060 }, { "epoch": 0.73, "grad_norm": 2.65625, "learning_rate": 0.0001411970409608803, "loss": 1.9052, "step": 310065 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.000141195356795786, "loss": 2.0695, "step": 310070 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014119367261661856, "loss": 2.08, "step": 310075 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014119198842337864, "loss": 1.984, "step": 310080 }, { "epoch": 0.73, "grad_norm": 1.96875, "learning_rate": 0.00014119030421606677, "loss": 2.0594, "step": 310085 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014118861999468357, "loss": 1.9138, "step": 310090 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014118693575922956, "loss": 2.0594, "step": 310095 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014118525150970537, "loss": 2.0503, "step": 310100 }, { "epoch": 0.73, "grad_norm": 1.953125, "learning_rate": 0.00014118356724611157, "loss": 1.9208, "step": 310105 }, { "epoch": 0.73, "grad_norm": 2.765625, "learning_rate": 0.0001411818829684487, "loss": 2.0861, "step": 310110 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.0001411801986767173, "loss": 2.0737, "step": 310115 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014117851437091803, "loss": 2.0656, "step": 310120 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014117683005105146, "loss": 2.0797, "step": 310125 }, { "epoch": 0.73, "grad_norm": 1.9375, "learning_rate": 0.00014117514571711817, "loss": 1.8852, "step": 310130 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014117346136911863, "loss": 1.9806, "step": 310135 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014117177700705347, "loss": 2.1293, "step": 310140 }, { "epoch": 0.73, "grad_norm": 3.0, "learning_rate": 0.00014117009263092336, "loss": 2.0153, "step": 310145 }, { "epoch": 0.73, "grad_norm": 1.9609375, "learning_rate": 0.0001411684082407288, "loss": 1.9879, "step": 310150 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.0001411667238364703, "loss": 2.013, "step": 310155 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014116503941814855, "loss": 2.0602, "step": 310160 }, { "epoch": 0.73, "grad_norm": 1.8515625, "learning_rate": 0.00014116335498576404, "loss": 2.032, "step": 310165 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.0001411616705393174, "loss": 1.9323, "step": 310170 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.0001411599860788092, "loss": 2.0903, "step": 310175 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014115830160424, "loss": 1.9449, "step": 310180 }, { "epoch": 0.73, "grad_norm": 1.796875, "learning_rate": 0.00014115661711561038, "loss": 2.0122, "step": 310185 }, { "epoch": 0.73, "grad_norm": 2.5, "learning_rate": 0.0001411549326129209, "loss": 2.1732, "step": 310190 }, { "epoch": 0.73, "grad_norm": 1.7734375, "learning_rate": 0.00014115324809617214, "loss": 2.2343, "step": 310195 }, { "epoch": 0.73, "grad_norm": 1.9921875, "learning_rate": 0.00014115156356536468, "loss": 2.2054, "step": 310200 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.00014114987902049913, "loss": 1.8409, "step": 310205 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.000141148194461576, "loss": 2.1267, "step": 310210 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.00014114650988859592, "loss": 2.0997, "step": 310215 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014114482530155944, "loss": 2.1108, "step": 310220 }, { "epoch": 0.73, "grad_norm": 2.0, "learning_rate": 0.00014114314070046716, "loss": 2.0856, "step": 310225 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.0001411414560853196, "loss": 2.0321, "step": 310230 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014113977145611742, "loss": 2.1631, "step": 310235 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.00014113808681286114, "loss": 1.9568, "step": 310240 }, { "epoch": 0.73, "grad_norm": 1.8828125, "learning_rate": 0.00014113640215555133, "loss": 2.2242, "step": 310245 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014113471748418855, "loss": 2.0336, "step": 310250 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014113303279877343, "loss": 2.1207, "step": 310255 }, { "epoch": 0.73, "grad_norm": 1.8046875, "learning_rate": 0.00014113134809930653, "loss": 1.9371, "step": 310260 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.00014112966338578842, "loss": 1.8675, "step": 310265 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.00014112797865821966, "loss": 2.2609, "step": 310270 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.0001411262939166008, "loss": 1.9676, "step": 310275 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014112460916093248, "loss": 1.973, "step": 310280 }, { "epoch": 0.73, "grad_norm": 2.625, "learning_rate": 0.00014112292439121527, "loss": 2.1766, "step": 310285 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.0001411212396074497, "loss": 1.8915, "step": 310290 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.0001411195548096364, "loss": 1.8715, "step": 310295 }, { "epoch": 0.73, "grad_norm": 1.9453125, "learning_rate": 0.00014111786999777584, "loss": 2.187, "step": 310300 }, { "epoch": 0.73, "grad_norm": 2.5, "learning_rate": 0.00014111618517186875, "loss": 2.0308, "step": 310305 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.0001411145003319156, "loss": 2.032, "step": 310310 }, { "epoch": 0.73, "grad_norm": 2.921875, "learning_rate": 0.000141112815477917, "loss": 2.2433, "step": 310315 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.0001411111306098735, "loss": 2.1285, "step": 310320 }, { "epoch": 0.73, "grad_norm": 1.9765625, "learning_rate": 0.00014110944572778567, "loss": 2.1333, "step": 310325 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014110776083165415, "loss": 2.0992, "step": 310330 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.00014110607592147947, "loss": 2.1908, "step": 310335 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.00014110439099726218, "loss": 2.1117, "step": 310340 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014110270605900292, "loss": 1.8922, "step": 310345 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.0001411010211067022, "loss": 2.0072, "step": 310350 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.0001410993361403606, "loss": 1.9878, "step": 310355 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014109765115997877, "loss": 2.1247, "step": 310360 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.00014109596616555722, "loss": 2.2126, "step": 310365 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014109428115709655, "loss": 2.1285, "step": 310370 }, { "epoch": 0.73, "grad_norm": 1.828125, "learning_rate": 0.00014109259613459733, "loss": 1.9566, "step": 310375 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014109091109806013, "loss": 1.9893, "step": 310380 }, { "epoch": 0.73, "grad_norm": 2.484375, "learning_rate": 0.00014108922604748553, "loss": 2.045, "step": 310385 }, { "epoch": 0.73, "grad_norm": 1.7890625, "learning_rate": 0.0001410875409828741, "loss": 1.9483, "step": 310390 }, { "epoch": 0.73, "grad_norm": 1.9921875, "learning_rate": 0.00014108585590422642, "loss": 1.9291, "step": 310395 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.0001410841708115431, "loss": 2.2473, "step": 310400 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.00014108248570482464, "loss": 2.0282, "step": 310405 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014108080058407166, "loss": 2.1569, "step": 310410 }, { "epoch": 0.73, "grad_norm": 1.890625, "learning_rate": 0.00014107911544928477, "loss": 1.9637, "step": 310415 }, { "epoch": 0.73, "grad_norm": 2.421875, "learning_rate": 0.00014107743030046445, "loss": 1.8795, "step": 310420 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014107574513761142, "loss": 2.2491, "step": 310425 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.0001410740599607261, "loss": 1.9842, "step": 310430 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014107237476980913, "loss": 1.8908, "step": 310435 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.0001410706895648611, "loss": 1.9453, "step": 310440 }, { "epoch": 0.73, "grad_norm": 1.9296875, "learning_rate": 0.00014106900434588264, "loss": 2.1839, "step": 310445 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.0001410673191128742, "loss": 1.9266, "step": 310450 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014106563386583645, "loss": 2.0553, "step": 310455 }, { "epoch": 0.73, "grad_norm": 1.7578125, "learning_rate": 0.0001410639486047699, "loss": 2.1159, "step": 310460 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014106226332967522, "loss": 1.9159, "step": 310465 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.00014106057804055285, "loss": 2.1065, "step": 310470 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.0001410588927374035, "loss": 2.222, "step": 310475 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014105720742022766, "loss": 2.1206, "step": 310480 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.00014105552208902594, "loss": 1.9379, "step": 310485 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014105383674379893, "loss": 2.1984, "step": 310490 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014105215138454714, "loss": 2.0076, "step": 310495 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014105046601127122, "loss": 1.9984, "step": 310500 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014104878062397172, "loss": 2.1374, "step": 310505 }, { "epoch": 0.73, "grad_norm": 2.703125, "learning_rate": 0.0001410470952226492, "loss": 2.0027, "step": 310510 }, { "epoch": 0.73, "grad_norm": 2.5625, "learning_rate": 0.00014104540980730426, "loss": 2.0259, "step": 310515 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014104372437793745, "loss": 1.9928, "step": 310520 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014104203893454937, "loss": 2.3619, "step": 310525 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014104035347714056, "loss": 1.7744, "step": 310530 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.0001410386680057117, "loss": 1.8933, "step": 310535 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014103698252026318, "loss": 2.1744, "step": 310540 }, { "epoch": 0.73, "grad_norm": 1.953125, "learning_rate": 0.00014103529702079573, "loss": 2.0311, "step": 310545 }, { "epoch": 0.73, "grad_norm": 2.625, "learning_rate": 0.00014103361150730987, "loss": 2.0541, "step": 310550 }, { "epoch": 0.73, "grad_norm": 2.546875, "learning_rate": 0.00014103192597980622, "loss": 2.1781, "step": 310555 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.0001410302404382853, "loss": 2.1332, "step": 310560 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014102855488274768, "loss": 2.1752, "step": 310565 }, { "epoch": 0.73, "grad_norm": 2.59375, "learning_rate": 0.00014102686931319398, "loss": 2.1758, "step": 310570 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014102518372962475, "loss": 1.8907, "step": 310575 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.00014102349813204058, "loss": 1.9501, "step": 310580 }, { "epoch": 0.73, "grad_norm": 1.9375, "learning_rate": 0.00014102181252044206, "loss": 1.9955, "step": 310585 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.00014102012689482974, "loss": 2.1638, "step": 310590 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014101844125520414, "loss": 2.0197, "step": 310595 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014101675560156593, "loss": 2.1648, "step": 310600 }, { "epoch": 0.73, "grad_norm": 2.421875, "learning_rate": 0.00014101506993391568, "loss": 1.9561, "step": 310605 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.00014101338425225396, "loss": 2.0561, "step": 310610 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014101169855658126, "loss": 2.1648, "step": 310615 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014101001284689825, "loss": 1.9621, "step": 310620 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014100832712320548, "loss": 2.1195, "step": 310625 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.0001410066413855035, "loss": 2.0315, "step": 310630 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014100495563379293, "loss": 1.9878, "step": 310635 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014100326986807432, "loss": 2.0611, "step": 310640 }, { "epoch": 0.73, "grad_norm": 2.421875, "learning_rate": 0.00014100158408834825, "loss": 2.1709, "step": 310645 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.00014099989829461528, "loss": 1.9097, "step": 310650 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.000140998212486876, "loss": 2.0865, "step": 310655 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014099652666513102, "loss": 1.8678, "step": 310660 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014099484082938084, "loss": 2.0772, "step": 310665 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014099315497962612, "loss": 2.0485, "step": 310670 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014099146911586736, "loss": 1.9675, "step": 310675 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014098978323810518, "loss": 2.2583, "step": 310680 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014098809734634015, "loss": 2.0149, "step": 310685 }, { "epoch": 0.73, "grad_norm": 2.6875, "learning_rate": 0.00014098641144057286, "loss": 2.1087, "step": 310690 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014098472552080387, "loss": 2.0753, "step": 310695 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014098303958703373, "loss": 2.1296, "step": 310700 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.000140981353639263, "loss": 1.9939, "step": 310705 }, { "epoch": 0.73, "grad_norm": 2.46875, "learning_rate": 0.00014097966767749238, "loss": 2.0096, "step": 310710 }, { "epoch": 0.73, "grad_norm": 1.828125, "learning_rate": 0.00014097798170172233, "loss": 2.0288, "step": 310715 }, { "epoch": 0.73, "grad_norm": 2.546875, "learning_rate": 0.00014097629571195345, "loss": 2.1277, "step": 310720 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014097460970818635, "loss": 2.0787, "step": 310725 }, { "epoch": 0.73, "grad_norm": 1.734375, "learning_rate": 0.00014097292369042152, "loss": 1.9619, "step": 310730 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014097123765865964, "loss": 2.1489, "step": 310735 }, { "epoch": 0.73, "grad_norm": 1.7734375, "learning_rate": 0.00014096955161290126, "loss": 1.9336, "step": 310740 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.0001409678655531469, "loss": 1.9862, "step": 310745 }, { "epoch": 0.73, "grad_norm": 1.8828125, "learning_rate": 0.0001409661794793972, "loss": 2.0355, "step": 310750 }, { "epoch": 0.73, "grad_norm": 2.8125, "learning_rate": 0.0001409644933916527, "loss": 2.0412, "step": 310755 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.000140962807289914, "loss": 1.8925, "step": 310760 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014096112117418163, "loss": 2.0775, "step": 310765 }, { "epoch": 0.73, "grad_norm": 1.8671875, "learning_rate": 0.0001409594350444562, "loss": 1.9486, "step": 310770 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.0001409577489007383, "loss": 2.1687, "step": 310775 }, { "epoch": 0.73, "grad_norm": 1.9453125, "learning_rate": 0.00014095606274302852, "loss": 2.0429, "step": 310780 }, { "epoch": 0.73, "grad_norm": 1.765625, "learning_rate": 0.00014095437657132736, "loss": 1.8943, "step": 310785 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014095269038563548, "loss": 2.0122, "step": 310790 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014095100418595337, "loss": 2.2209, "step": 310795 }, { "epoch": 0.73, "grad_norm": 1.71875, "learning_rate": 0.0001409493179722817, "loss": 1.9174, "step": 310800 }, { "epoch": 0.73, "grad_norm": 2.421875, "learning_rate": 0.000140947631744621, "loss": 2.2101, "step": 310805 }, { "epoch": 0.73, "grad_norm": 1.9296875, "learning_rate": 0.0001409459455029718, "loss": 2.0286, "step": 310810 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014094425924733475, "loss": 2.1121, "step": 310815 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.0001409425729777104, "loss": 2.1502, "step": 310820 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014094088669409932, "loss": 1.9417, "step": 310825 }, { "epoch": 0.73, "grad_norm": 1.8671875, "learning_rate": 0.0001409392003965021, "loss": 2.1415, "step": 310830 }, { "epoch": 0.73, "grad_norm": 1.9765625, "learning_rate": 0.0001409375140849193, "loss": 2.1452, "step": 310835 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.0001409358277593515, "loss": 2.0734, "step": 310840 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014093414141979927, "loss": 1.9049, "step": 310845 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014093245506626323, "loss": 2.0427, "step": 310850 }, { "epoch": 0.73, "grad_norm": 3.265625, "learning_rate": 0.00014093076869874392, "loss": 1.994, "step": 310855 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.0001409290823172419, "loss": 1.9386, "step": 310860 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014092739592175773, "loss": 2.1206, "step": 310865 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014092570951229204, "loss": 2.0298, "step": 310870 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014092402308884543, "loss": 2.2455, "step": 310875 }, { "epoch": 0.73, "grad_norm": 2.59375, "learning_rate": 0.0001409223366514184, "loss": 2.2073, "step": 310880 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.00014092065020001156, "loss": 2.0186, "step": 310885 }, { "epoch": 0.73, "grad_norm": 2.734375, "learning_rate": 0.00014091896373462544, "loss": 2.2604, "step": 310890 }, { "epoch": 0.73, "grad_norm": 1.9765625, "learning_rate": 0.0001409172772552607, "loss": 2.0627, "step": 310895 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014091559076191788, "loss": 1.9509, "step": 310900 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014091390425459755, "loss": 2.0211, "step": 310905 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.00014091221773330028, "loss": 2.0376, "step": 310910 }, { "epoch": 0.73, "grad_norm": 2.59375, "learning_rate": 0.00014091053119802665, "loss": 2.1711, "step": 310915 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.00014090884464877724, "loss": 1.992, "step": 310920 }, { "epoch": 0.73, "grad_norm": 1.96875, "learning_rate": 0.00014090715808555264, "loss": 1.953, "step": 310925 }, { "epoch": 0.73, "grad_norm": 2.609375, "learning_rate": 0.0001409054715083534, "loss": 2.0523, "step": 310930 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014090378491718014, "loss": 2.0938, "step": 310935 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014090209831203336, "loss": 2.1237, "step": 310940 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.0001409004116929137, "loss": 2.1056, "step": 310945 }, { "epoch": 0.73, "grad_norm": 2.609375, "learning_rate": 0.00014089872505982173, "loss": 1.9321, "step": 310950 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.00014089703841275802, "loss": 2.0175, "step": 310955 }, { "epoch": 0.73, "grad_norm": 1.875, "learning_rate": 0.00014089535175172314, "loss": 2.1325, "step": 310960 }, { "epoch": 0.73, "grad_norm": 2.71875, "learning_rate": 0.0001408936650767176, "loss": 2.0983, "step": 310965 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014089197838774214, "loss": 2.0361, "step": 310970 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014089029168479718, "loss": 1.9938, "step": 310975 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014088860496788333, "loss": 2.1632, "step": 310980 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014088691823700125, "loss": 2.0497, "step": 310985 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.00014088523149215145, "loss": 2.1439, "step": 310990 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.0001408835447333345, "loss": 2.0631, "step": 310995 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.00014088185796055097, "loss": 2.0334, "step": 311000 }, { "epoch": 0.73, "grad_norm": 2.609375, "learning_rate": 0.00014088017117380147, "loss": 1.9015, "step": 311005 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014087848437308658, "loss": 1.994, "step": 311010 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014087679755840686, "loss": 1.9376, "step": 311015 }, { "epoch": 0.73, "grad_norm": 1.84375, "learning_rate": 0.00014087511072976287, "loss": 2.1685, "step": 311020 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014087342388715516, "loss": 2.1558, "step": 311025 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014087173703058442, "loss": 2.2732, "step": 311030 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.0001408700501600511, "loss": 2.3617, "step": 311035 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014086836327555587, "loss": 2.1857, "step": 311040 }, { "epoch": 0.73, "grad_norm": 2.515625, "learning_rate": 0.00014086667637709926, "loss": 1.9589, "step": 311045 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.0001408649894646818, "loss": 1.9715, "step": 311050 }, { "epoch": 0.73, "grad_norm": 1.8984375, "learning_rate": 0.00014086330253830417, "loss": 1.9203, "step": 311055 }, { "epoch": 0.73, "grad_norm": 1.9296875, "learning_rate": 0.00014086161559796688, "loss": 1.9966, "step": 311060 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.00014085992864367052, "loss": 2.1574, "step": 311065 }, { "epoch": 0.73, "grad_norm": 1.640625, "learning_rate": 0.00014085824167541568, "loss": 1.9909, "step": 311070 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.0001408565546932029, "loss": 2.2353, "step": 311075 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.0001408548676970328, "loss": 2.0228, "step": 311080 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014085318068690595, "loss": 2.0614, "step": 311085 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014085149366282286, "loss": 2.116, "step": 311090 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.00014084980662478418, "loss": 1.9909, "step": 311095 }, { "epoch": 0.73, "grad_norm": 2.640625, "learning_rate": 0.0001408481195727905, "loss": 2.1973, "step": 311100 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014084643250684232, "loss": 2.0732, "step": 311105 }, { "epoch": 0.73, "grad_norm": 2.671875, "learning_rate": 0.00014084474542694028, "loss": 1.9664, "step": 311110 }, { "epoch": 0.73, "grad_norm": 2.421875, "learning_rate": 0.00014084305833308492, "loss": 2.1132, "step": 311115 }, { "epoch": 0.73, "grad_norm": 2.515625, "learning_rate": 0.00014084137122527684, "loss": 1.9879, "step": 311120 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014083968410351662, "loss": 2.0762, "step": 311125 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.00014083799696780483, "loss": 1.8844, "step": 311130 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.00014083630981814198, "loss": 2.0325, "step": 311135 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014083462265452877, "loss": 1.9021, "step": 311140 }, { "epoch": 0.73, "grad_norm": 2.578125, "learning_rate": 0.0001408329354769657, "loss": 2.1062, "step": 311145 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014083124828545335, "loss": 2.2416, "step": 311150 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.0001408295610799923, "loss": 1.86, "step": 311155 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014082787386058312, "loss": 1.7782, "step": 311160 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014082618662722643, "loss": 2.1064, "step": 311165 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014082449937992278, "loss": 2.2043, "step": 311170 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014082281211867269, "loss": 1.9792, "step": 311175 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.0001408211248434768, "loss": 1.9514, "step": 311180 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014081943755433568, "loss": 2.1021, "step": 311185 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.0001408177502512499, "loss": 2.0371, "step": 311190 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014081606293422008, "loss": 2.0888, "step": 311195 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.0001408143756032467, "loss": 2.1001, "step": 311200 }, { "epoch": 0.73, "grad_norm": 1.859375, "learning_rate": 0.0001408126882583304, "loss": 2.0082, "step": 311205 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014081100089947173, "loss": 2.1309, "step": 311210 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014080931352667133, "loss": 2.2285, "step": 311215 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.0001408076261399297, "loss": 2.0303, "step": 311220 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.00014080593873924745, "loss": 2.0967, "step": 311225 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.00014080425132462514, "loss": 2.0459, "step": 311230 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.00014080256389606337, "loss": 1.9571, "step": 311235 }, { "epoch": 0.73, "grad_norm": 2.640625, "learning_rate": 0.0001408008764535627, "loss": 1.984, "step": 311240 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.0001407991889971237, "loss": 1.9546, "step": 311245 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.00014079750152674696, "loss": 2.0141, "step": 311250 }, { "epoch": 0.73, "grad_norm": 2.421875, "learning_rate": 0.00014079581404243308, "loss": 2.0943, "step": 311255 }, { "epoch": 0.73, "grad_norm": 2.546875, "learning_rate": 0.00014079412654418257, "loss": 2.0708, "step": 311260 }, { "epoch": 0.73, "grad_norm": 2.546875, "learning_rate": 0.00014079243903199605, "loss": 1.9387, "step": 311265 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.00014079075150587412, "loss": 2.2126, "step": 311270 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.0001407890639658173, "loss": 2.0537, "step": 311275 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014078737641182623, "loss": 2.2569, "step": 311280 }, { "epoch": 0.73, "grad_norm": 2.625, "learning_rate": 0.00014078568884390148, "loss": 2.0687, "step": 311285 }, { "epoch": 0.73, "grad_norm": 1.6953125, "learning_rate": 0.00014078400126204353, "loss": 2.13, "step": 311290 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.00014078231366625304, "loss": 2.0663, "step": 311295 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.00014078062605653058, "loss": 2.1355, "step": 311300 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014077893843287672, "loss": 2.0596, "step": 311305 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014077725079529208, "loss": 2.0044, "step": 311310 }, { "epoch": 0.73, "grad_norm": 2.484375, "learning_rate": 0.00014077556314377713, "loss": 2.1727, "step": 311315 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.0001407738754783325, "loss": 1.9928, "step": 311320 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.0001407721877989588, "loss": 1.9738, "step": 311325 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.0001407705001056566, "loss": 1.8597, "step": 311330 }, { "epoch": 0.73, "grad_norm": 3.34375, "learning_rate": 0.00014076881239842646, "loss": 2.0824, "step": 311335 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014076712467726895, "loss": 1.9909, "step": 311340 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.0001407654369421846, "loss": 1.8706, "step": 311345 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.0001407637491931741, "loss": 2.1056, "step": 311350 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.00014076206143023794, "loss": 1.9862, "step": 311355 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.0001407603736533767, "loss": 2.0361, "step": 311360 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.000140758685862591, "loss": 1.9183, "step": 311365 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.0001407569980578814, "loss": 1.9763, "step": 311370 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.00014075531023924844, "loss": 1.8788, "step": 311375 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014075362240669275, "loss": 2.1484, "step": 311380 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014075193456021488, "loss": 2.0132, "step": 311385 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.0001407502466998154, "loss": 1.8556, "step": 311390 }, { "epoch": 0.73, "grad_norm": 2.625, "learning_rate": 0.0001407485588254949, "loss": 2.1365, "step": 311395 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014074687093725395, "loss": 1.9183, "step": 311400 }, { "epoch": 0.73, "grad_norm": 1.9140625, "learning_rate": 0.00014074518303509317, "loss": 2.0257, "step": 311405 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014074349511901307, "loss": 2.0245, "step": 311410 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.00014074180718901422, "loss": 1.9688, "step": 311415 }, { "epoch": 0.73, "grad_norm": 2.5, "learning_rate": 0.00014074011924509726, "loss": 1.9689, "step": 311420 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.00014073843128726275, "loss": 2.0354, "step": 311425 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.00014073674331551122, "loss": 1.9555, "step": 311430 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014073505532984331, "loss": 1.9233, "step": 311435 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014073336733025954, "loss": 2.1147, "step": 311440 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014073167931676056, "loss": 2.0311, "step": 311445 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014072999128934686, "loss": 1.9867, "step": 311450 }, { "epoch": 0.73, "grad_norm": 1.8203125, "learning_rate": 0.00014072830324801905, "loss": 1.8945, "step": 311455 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014072661519277772, "loss": 2.3013, "step": 311460 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014072492712362345, "loss": 2.1123, "step": 311465 }, { "epoch": 0.73, "grad_norm": 2.5625, "learning_rate": 0.0001407232390405568, "loss": 2.202, "step": 311470 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.00014072155094357837, "loss": 2.1844, "step": 311475 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.00014071986283268869, "loss": 2.0373, "step": 311480 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014071817470788836, "loss": 2.2328, "step": 311485 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014071648656917803, "loss": 2.1429, "step": 311490 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014071479841655816, "loss": 1.9968, "step": 311495 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014071311025002937, "loss": 1.9879, "step": 311500 }, { "epoch": 0.73, "grad_norm": 2.609375, "learning_rate": 0.00014071142206959224, "loss": 2.0949, "step": 311505 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.0001407097338752474, "loss": 1.9668, "step": 311510 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.00014070804566699533, "loss": 1.9487, "step": 311515 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014070635744483666, "loss": 2.0666, "step": 311520 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.00014070466920877193, "loss": 1.7787, "step": 311525 }, { "epoch": 0.73, "grad_norm": 2.203125, "learning_rate": 0.0001407029809588018, "loss": 2.0618, "step": 311530 }, { "epoch": 0.73, "grad_norm": 2.515625, "learning_rate": 0.00014070129269492677, "loss": 1.9755, "step": 311535 }, { "epoch": 0.73, "grad_norm": 1.8984375, "learning_rate": 0.00014069960441714745, "loss": 2.0284, "step": 311540 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.0001406979161254644, "loss": 1.8223, "step": 311545 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.00014069622781987822, "loss": 2.0687, "step": 311550 }, { "epoch": 0.73, "grad_norm": 1.9296875, "learning_rate": 0.00014069453950038945, "loss": 2.1628, "step": 311555 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.0001406928511669987, "loss": 2.0162, "step": 311560 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.0001406911628197065, "loss": 2.0495, "step": 311565 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014068947445851351, "loss": 1.9747, "step": 311570 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014068778608342022, "loss": 2.1292, "step": 311575 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.00014068609769442728, "loss": 2.1377, "step": 311580 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.0001406844092915352, "loss": 2.0144, "step": 311585 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.0001406827208747446, "loss": 2.072, "step": 311590 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014068103244405604, "loss": 2.0005, "step": 311595 }, { "epoch": 0.73, "grad_norm": 2.671875, "learning_rate": 0.0001406793439994701, "loss": 2.1253, "step": 311600 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.00014067765554098737, "loss": 1.83, "step": 311605 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014067596706860842, "loss": 1.9618, "step": 311610 }, { "epoch": 0.73, "grad_norm": 2.78125, "learning_rate": 0.0001406742785823338, "loss": 2.073, "step": 311615 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014067259008216412, "loss": 2.0592, "step": 311620 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014067090156809995, "loss": 2.0906, "step": 311625 }, { "epoch": 0.73, "grad_norm": 2.921875, "learning_rate": 0.00014066921304014185, "loss": 2.0946, "step": 311630 }, { "epoch": 0.73, "grad_norm": 1.8671875, "learning_rate": 0.00014066752449829042, "loss": 2.1209, "step": 311635 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.00014066583594254622, "loss": 2.021, "step": 311640 }, { "epoch": 0.73, "grad_norm": 2.53125, "learning_rate": 0.00014066414737290984, "loss": 2.1149, "step": 311645 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014066245878938184, "loss": 2.1295, "step": 311650 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.0001406607701919628, "loss": 2.0626, "step": 311655 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.00014065908158065332, "loss": 2.0601, "step": 311660 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014065739295545396, "loss": 1.9598, "step": 311665 }, { "epoch": 0.73, "grad_norm": 1.9140625, "learning_rate": 0.0001406557043163653, "loss": 2.2565, "step": 311670 }, { "epoch": 0.73, "grad_norm": 1.8671875, "learning_rate": 0.0001406540156633879, "loss": 2.1406, "step": 311675 }, { "epoch": 0.73, "grad_norm": 2.546875, "learning_rate": 0.00014065232699652234, "loss": 2.1709, "step": 311680 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.00014065063831576922, "loss": 1.974, "step": 311685 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014064894962112913, "loss": 2.16, "step": 311690 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.0001406472609126026, "loss": 2.0177, "step": 311695 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.00014064557219019023, "loss": 1.9297, "step": 311700 }, { "epoch": 0.73, "grad_norm": 1.875, "learning_rate": 0.0001406438834538926, "loss": 2.1236, "step": 311705 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014064219470371028, "loss": 2.0604, "step": 311710 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014064050593964384, "loss": 2.2391, "step": 311715 }, { "epoch": 0.73, "grad_norm": 1.9453125, "learning_rate": 0.00014063881716169387, "loss": 2.0703, "step": 311720 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014063712836986093, "loss": 2.2794, "step": 311725 }, { "epoch": 0.73, "grad_norm": 2.4375, "learning_rate": 0.00014063543956414565, "loss": 2.0923, "step": 311730 }, { "epoch": 0.73, "grad_norm": 1.9921875, "learning_rate": 0.00014063375074454854, "loss": 2.1983, "step": 311735 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.0001406320619110702, "loss": 2.1389, "step": 311740 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014063037306371123, "loss": 2.0968, "step": 311745 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.00014062868420247217, "loss": 2.069, "step": 311750 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.0001406269953273536, "loss": 2.099, "step": 311755 }, { "epoch": 0.73, "grad_norm": 1.734375, "learning_rate": 0.00014062530643835615, "loss": 1.9047, "step": 311760 }, { "epoch": 0.73, "grad_norm": 2.515625, "learning_rate": 0.00014062361753548034, "loss": 2.0767, "step": 311765 }, { "epoch": 0.73, "grad_norm": 1.7890625, "learning_rate": 0.0001406219286187268, "loss": 1.8477, "step": 311770 }, { "epoch": 0.73, "grad_norm": 1.96875, "learning_rate": 0.000140620239688096, "loss": 2.1138, "step": 311775 }, { "epoch": 0.73, "grad_norm": 1.9453125, "learning_rate": 0.0001406185507435886, "loss": 2.14, "step": 311780 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.00014061686178520525, "loss": 2.004, "step": 311785 }, { "epoch": 0.73, "grad_norm": 3.0, "learning_rate": 0.00014061517281294637, "loss": 1.9124, "step": 311790 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014061348382681264, "loss": 2.0773, "step": 311795 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014061179482680457, "loss": 2.0697, "step": 311800 }, { "epoch": 0.73, "grad_norm": 2.5, "learning_rate": 0.00014061010581292282, "loss": 2.1809, "step": 311805 }, { "epoch": 0.73, "grad_norm": 2.546875, "learning_rate": 0.0001406084167851679, "loss": 2.0552, "step": 311810 }, { "epoch": 0.73, "grad_norm": 2.65625, "learning_rate": 0.0001406067277435404, "loss": 2.043, "step": 311815 }, { "epoch": 0.73, "grad_norm": 2.171875, "learning_rate": 0.00014060503868804092, "loss": 2.0821, "step": 311820 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014060334961867, "loss": 2.0539, "step": 311825 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.00014060166053542827, "loss": 2.0312, "step": 311830 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014059997143831625, "loss": 1.8779, "step": 311835 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014059828232733455, "loss": 2.1381, "step": 311840 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014059659320248376, "loss": 2.0748, "step": 311845 }, { "epoch": 0.73, "grad_norm": 2.59375, "learning_rate": 0.0001405949040637644, "loss": 2.0634, "step": 311850 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014059321491117715, "loss": 1.9781, "step": 311855 }, { "epoch": 0.73, "grad_norm": 2.046875, "learning_rate": 0.00014059152574472244, "loss": 2.0124, "step": 311860 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.000140589836564401, "loss": 2.0086, "step": 311865 }, { "epoch": 0.73, "grad_norm": 2.609375, "learning_rate": 0.00014058814737021329, "loss": 2.055, "step": 311870 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014058645816215996, "loss": 1.9749, "step": 311875 }, { "epoch": 0.73, "grad_norm": 1.9296875, "learning_rate": 0.00014058476894024153, "loss": 1.9608, "step": 311880 }, { "epoch": 0.73, "grad_norm": 2.703125, "learning_rate": 0.00014058307970445864, "loss": 1.85, "step": 311885 }, { "epoch": 0.73, "grad_norm": 1.9296875, "learning_rate": 0.0001405813904548118, "loss": 2.3322, "step": 311890 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014057970119130163, "loss": 1.966, "step": 311895 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014057801191392875, "loss": 1.9992, "step": 311900 }, { "epoch": 0.73, "grad_norm": 2.421875, "learning_rate": 0.00014057632262269362, "loss": 1.9568, "step": 311905 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014057463331759688, "loss": 2.1665, "step": 311910 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014057294399863913, "loss": 2.0122, "step": 311915 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014057125466582096, "loss": 2.1264, "step": 311920 }, { "epoch": 0.73, "grad_norm": 2.625, "learning_rate": 0.0001405695653191429, "loss": 2.0864, "step": 311925 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014056787595860553, "loss": 2.0736, "step": 311930 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014056618658420938, "loss": 1.9496, "step": 311935 }, { "epoch": 0.73, "grad_norm": 2.734375, "learning_rate": 0.00014056449719595517, "loss": 2.0372, "step": 311940 }, { "epoch": 0.73, "grad_norm": 2.03125, "learning_rate": 0.00014056280779384337, "loss": 1.9992, "step": 311945 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.00014056111837787454, "loss": 2.2241, "step": 311950 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014055942894804934, "loss": 1.9529, "step": 311955 }, { "epoch": 0.73, "grad_norm": 2.578125, "learning_rate": 0.00014055773950436828, "loss": 1.9354, "step": 311960 }, { "epoch": 0.73, "grad_norm": 2.09375, "learning_rate": 0.00014055605004683198, "loss": 2.065, "step": 311965 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.000140554360575441, "loss": 2.032, "step": 311970 }, { "epoch": 0.73, "grad_norm": 2.453125, "learning_rate": 0.0001405526710901959, "loss": 1.9719, "step": 311975 }, { "epoch": 0.73, "grad_norm": 3.0, "learning_rate": 0.00014055098159109725, "loss": 2.0001, "step": 311980 }, { "epoch": 0.73, "grad_norm": 2.640625, "learning_rate": 0.0001405492920781457, "loss": 2.1347, "step": 311985 }, { "epoch": 0.73, "grad_norm": 1.9921875, "learning_rate": 0.00014054760255134173, "loss": 2.0949, "step": 311990 }, { "epoch": 0.73, "grad_norm": 2.390625, "learning_rate": 0.000140545913010686, "loss": 2.0332, "step": 311995 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014054422345617903, "loss": 1.9674, "step": 312000 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014054253388782141, "loss": 2.1024, "step": 312005 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014054084430561374, "loss": 2.0017, "step": 312010 }, { "epoch": 0.73, "grad_norm": 1.84375, "learning_rate": 0.0001405391547095566, "loss": 2.0368, "step": 312015 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.0001405374650996505, "loss": 2.0388, "step": 312020 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014053577547589613, "loss": 1.987, "step": 312025 }, { "epoch": 0.73, "grad_norm": 2.5, "learning_rate": 0.00014053408583829396, "loss": 2.1168, "step": 312030 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.0001405323961868446, "loss": 2.0276, "step": 312035 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014053070652154866, "loss": 2.1134, "step": 312040 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.0001405290168424067, "loss": 2.1148, "step": 312045 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.00014052732714941928, "loss": 1.9146, "step": 312050 }, { "epoch": 0.73, "grad_norm": 2.515625, "learning_rate": 0.000140525637442587, "loss": 2.05, "step": 312055 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.00014052394772191045, "loss": 1.9831, "step": 312060 }, { "epoch": 0.73, "grad_norm": 2.46875, "learning_rate": 0.00014052225798739015, "loss": 1.999, "step": 312065 }, { "epoch": 0.73, "grad_norm": 2.140625, "learning_rate": 0.0001405205682390267, "loss": 2.1413, "step": 312070 }, { "epoch": 0.73, "grad_norm": 2.6875, "learning_rate": 0.00014051887847682073, "loss": 2.0204, "step": 312075 }, { "epoch": 0.73, "grad_norm": 2.28125, "learning_rate": 0.00014051718870077275, "loss": 1.9951, "step": 312080 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014051549891088338, "loss": 2.1769, "step": 312085 }, { "epoch": 0.73, "grad_norm": 1.9296875, "learning_rate": 0.00014051380910715317, "loss": 2.0164, "step": 312090 }, { "epoch": 0.73, "grad_norm": 2.296875, "learning_rate": 0.0001405121192895827, "loss": 2.0201, "step": 312095 }, { "epoch": 0.73, "grad_norm": 1.921875, "learning_rate": 0.00014051042945817258, "loss": 2.0068, "step": 312100 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014050873961292333, "loss": 1.917, "step": 312105 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.00014050704975383557, "loss": 2.1057, "step": 312110 }, { "epoch": 0.73, "grad_norm": 2.359375, "learning_rate": 0.0001405053598809099, "loss": 2.2134, "step": 312115 }, { "epoch": 0.73, "grad_norm": 1.75, "learning_rate": 0.00014050366999414683, "loss": 2.0821, "step": 312120 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.00014050198009354697, "loss": 1.8858, "step": 312125 }, { "epoch": 0.73, "grad_norm": 1.8828125, "learning_rate": 0.0001405002901791109, "loss": 2.1362, "step": 312130 }, { "epoch": 0.73, "grad_norm": 2.1875, "learning_rate": 0.00014049860025083924, "loss": 2.0453, "step": 312135 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014049691030873246, "loss": 2.1956, "step": 312140 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014049522035279125, "loss": 2.0151, "step": 312145 }, { "epoch": 0.73, "grad_norm": 1.6875, "learning_rate": 0.00014049353038301612, "loss": 2.0936, "step": 312150 }, { "epoch": 0.73, "grad_norm": 1.9375, "learning_rate": 0.00014049184039940764, "loss": 1.9126, "step": 312155 }, { "epoch": 0.73, "grad_norm": 2.078125, "learning_rate": 0.00014049015040196644, "loss": 2.0582, "step": 312160 }, { "epoch": 0.73, "grad_norm": 2.15625, "learning_rate": 0.00014048846039069307, "loss": 2.1001, "step": 312165 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.00014048677036558813, "loss": 2.1094, "step": 312170 }, { "epoch": 0.73, "grad_norm": 2.21875, "learning_rate": 0.00014048508032665213, "loss": 2.1804, "step": 312175 }, { "epoch": 0.73, "grad_norm": 2.109375, "learning_rate": 0.0001404833902738857, "loss": 1.8663, "step": 312180 }, { "epoch": 0.73, "grad_norm": 2.25, "learning_rate": 0.00014048170020728945, "loss": 1.8988, "step": 312185 }, { "epoch": 0.73, "grad_norm": 2.234375, "learning_rate": 0.00014048001012686388, "loss": 2.0792, "step": 312190 }, { "epoch": 0.73, "grad_norm": 2.5, "learning_rate": 0.00014047832003260963, "loss": 2.1726, "step": 312195 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.0001404766299245272, "loss": 2.1858, "step": 312200 }, { "epoch": 0.73, "grad_norm": 2.375, "learning_rate": 0.00014047493980261726, "loss": 2.0444, "step": 312205 }, { "epoch": 0.73, "grad_norm": 2.328125, "learning_rate": 0.00014047324966688033, "loss": 1.9588, "step": 312210 }, { "epoch": 0.73, "grad_norm": 2.984375, "learning_rate": 0.00014047155951731706, "loss": 1.9745, "step": 312215 }, { "epoch": 0.73, "grad_norm": 2.8125, "learning_rate": 0.00014046986935392793, "loss": 2.0182, "step": 312220 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014046817917671354, "loss": 2.1107, "step": 312225 }, { "epoch": 0.73, "grad_norm": 1.953125, "learning_rate": 0.00014046648898567447, "loss": 1.9296, "step": 312230 }, { "epoch": 0.73, "grad_norm": 1.90625, "learning_rate": 0.00014046479878081138, "loss": 1.8954, "step": 312235 }, { "epoch": 0.73, "grad_norm": 1.953125, "learning_rate": 0.00014046310856212472, "loss": 1.9768, "step": 312240 }, { "epoch": 0.73, "grad_norm": 2.59375, "learning_rate": 0.00014046141832961518, "loss": 2.0274, "step": 312245 }, { "epoch": 0.73, "grad_norm": 2.46875, "learning_rate": 0.00014045972808328326, "loss": 2.145, "step": 312250 }, { "epoch": 0.73, "grad_norm": 2.0, "learning_rate": 0.00014045803782312955, "loss": 2.066, "step": 312255 }, { "epoch": 0.73, "grad_norm": 2.40625, "learning_rate": 0.00014045634754915464, "loss": 2.0586, "step": 312260 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014045465726135914, "loss": 2.0962, "step": 312265 }, { "epoch": 0.73, "grad_norm": 2.3125, "learning_rate": 0.00014045296695974356, "loss": 1.9921, "step": 312270 }, { "epoch": 0.73, "grad_norm": 1.9765625, "learning_rate": 0.00014045127664430852, "loss": 2.0573, "step": 312275 }, { "epoch": 0.73, "grad_norm": 2.703125, "learning_rate": 0.0001404495863150546, "loss": 2.0269, "step": 312280 }, { "epoch": 0.73, "grad_norm": 2.0, "learning_rate": 0.00014044789597198238, "loss": 1.9732, "step": 312285 }, { "epoch": 0.73, "grad_norm": 2.125, "learning_rate": 0.0001404462056150924, "loss": 2.0074, "step": 312290 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014044451524438528, "loss": 2.0741, "step": 312295 }, { "epoch": 0.73, "grad_norm": 2.0625, "learning_rate": 0.00014044282485986158, "loss": 1.9177, "step": 312300 }, { "epoch": 0.73, "grad_norm": 2.65625, "learning_rate": 0.00014044113446152188, "loss": 2.1475, "step": 312305 }, { "epoch": 0.73, "grad_norm": 2.421875, "learning_rate": 0.00014043944404936674, "loss": 2.0221, "step": 312310 }, { "epoch": 0.73, "grad_norm": 2.265625, "learning_rate": 0.00014043775362339674, "loss": 1.8899, "step": 312315 }, { "epoch": 0.73, "grad_norm": 2.34375, "learning_rate": 0.0001404360631836125, "loss": 2.1414, "step": 312320 }, { "epoch": 0.73, "grad_norm": 2.015625, "learning_rate": 0.00014043437273001457, "loss": 1.94, "step": 312325 }, { "epoch": 0.74, "grad_norm": 3.515625, "learning_rate": 0.00014043268226260352, "loss": 1.8897, "step": 312330 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.0001404309917813799, "loss": 2.1275, "step": 312335 }, { "epoch": 0.74, "grad_norm": 1.9765625, "learning_rate": 0.00014042930128634436, "loss": 1.9628, "step": 312340 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.00014042761077749741, "loss": 2.153, "step": 312345 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00014042592025483968, "loss": 1.9513, "step": 312350 }, { "epoch": 0.74, "grad_norm": 1.9765625, "learning_rate": 0.00014042422971837173, "loss": 2.1037, "step": 312355 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.0001404225391680941, "loss": 2.054, "step": 312360 }, { "epoch": 0.74, "grad_norm": 2.546875, "learning_rate": 0.0001404208486040074, "loss": 2.0746, "step": 312365 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.0001404191580261122, "loss": 1.9522, "step": 312370 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.0001404174674344091, "loss": 2.1171, "step": 312375 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00014041577682889867, "loss": 2.0965, "step": 312380 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.00014041408620958144, "loss": 2.1547, "step": 312385 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00014041239557645803, "loss": 1.9255, "step": 312390 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00014041070492952905, "loss": 2.0039, "step": 312395 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00014040901426879505, "loss": 2.0633, "step": 312400 }, { "epoch": 0.74, "grad_norm": 1.859375, "learning_rate": 0.00014040732359425653, "loss": 2.1808, "step": 312405 }, { "epoch": 0.74, "grad_norm": 1.921875, "learning_rate": 0.0001404056329059142, "loss": 2.0215, "step": 312410 }, { "epoch": 0.74, "grad_norm": 2.53125, "learning_rate": 0.00014040394220376853, "loss": 2.0815, "step": 312415 }, { "epoch": 0.74, "grad_norm": 2.484375, "learning_rate": 0.00014040225148782018, "loss": 2.1964, "step": 312420 }, { "epoch": 0.74, "grad_norm": 2.640625, "learning_rate": 0.00014040056075806967, "loss": 1.9954, "step": 312425 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.00014039887001451756, "loss": 1.9893, "step": 312430 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.0001403971792571645, "loss": 2.1127, "step": 312435 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00014039548848601103, "loss": 2.0701, "step": 312440 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00014039379770105774, "loss": 1.9469, "step": 312445 }, { "epoch": 0.74, "grad_norm": 2.609375, "learning_rate": 0.00014039210690230518, "loss": 2.264, "step": 312450 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.00014039041608975394, "loss": 2.1374, "step": 312455 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.00014038872526340462, "loss": 2.1204, "step": 312460 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00014038703442325777, "loss": 2.0804, "step": 312465 }, { "epoch": 0.74, "grad_norm": 3.046875, "learning_rate": 0.00014038534356931395, "loss": 1.9279, "step": 312470 }, { "epoch": 0.74, "grad_norm": 3.078125, "learning_rate": 0.00014038365270157378, "loss": 1.9665, "step": 312475 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.0001403819618200378, "loss": 1.9452, "step": 312480 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.00014038027092470667, "loss": 1.9861, "step": 312485 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.0001403785800155809, "loss": 2.0643, "step": 312490 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.000140376889092661, "loss": 2.0422, "step": 312495 }, { "epoch": 0.74, "grad_norm": 1.7890625, "learning_rate": 0.0001403751981559477, "loss": 2.1028, "step": 312500 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.00014037350720544144, "loss": 2.1087, "step": 312505 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.00014037181624114293, "loss": 2.1124, "step": 312510 }, { "epoch": 0.74, "grad_norm": 2.578125, "learning_rate": 0.00014037012526305263, "loss": 2.1141, "step": 312515 }, { "epoch": 0.74, "grad_norm": 2.46875, "learning_rate": 0.00014036843427117117, "loss": 1.9035, "step": 312520 }, { "epoch": 0.74, "grad_norm": 2.5625, "learning_rate": 0.0001403667432654991, "loss": 2.0428, "step": 312525 }, { "epoch": 0.74, "grad_norm": 1.7265625, "learning_rate": 0.00014036505224603705, "loss": 2.0792, "step": 312530 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00014036336121278555, "loss": 2.1084, "step": 312535 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.0001403616701657452, "loss": 2.0357, "step": 312540 }, { "epoch": 0.74, "grad_norm": 2.453125, "learning_rate": 0.00014035997910491656, "loss": 1.9395, "step": 312545 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.0001403582880303002, "loss": 2.0202, "step": 312550 }, { "epoch": 0.74, "grad_norm": 1.734375, "learning_rate": 0.00014035659694189677, "loss": 2.058, "step": 312555 }, { "epoch": 0.74, "grad_norm": 1.828125, "learning_rate": 0.00014035490583970677, "loss": 2.0859, "step": 312560 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.0001403532147237308, "loss": 1.9122, "step": 312565 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00014035152359396944, "loss": 2.0424, "step": 312570 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00014034983245042324, "loss": 2.1108, "step": 312575 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00014034814129309286, "loss": 1.9044, "step": 312580 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.0001403464501219788, "loss": 2.208, "step": 312585 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.00014034475893708165, "loss": 2.0517, "step": 312590 }, { "epoch": 0.74, "grad_norm": 2.6875, "learning_rate": 0.000140343067738402, "loss": 2.0437, "step": 312595 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00014034137652594043, "loss": 1.9969, "step": 312600 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.0001403396852996975, "loss": 1.9782, "step": 312605 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00014033799405967386, "loss": 2.0565, "step": 312610 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00014033630280586997, "loss": 2.083, "step": 312615 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.0001403346115382865, "loss": 2.1442, "step": 312620 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.000140332920256924, "loss": 1.9074, "step": 312625 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.000140331228961783, "loss": 2.2662, "step": 312630 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00014032953765286414, "loss": 2.087, "step": 312635 }, { "epoch": 0.74, "grad_norm": 2.453125, "learning_rate": 0.000140327846330168, "loss": 2.0689, "step": 312640 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00014032615499369512, "loss": 2.101, "step": 312645 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.00014032446364344608, "loss": 2.073, "step": 312650 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.0001403227722794215, "loss": 2.0987, "step": 312655 }, { "epoch": 0.74, "grad_norm": 2.796875, "learning_rate": 0.0001403210809016219, "loss": 2.0869, "step": 312660 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.0001403193895100479, "loss": 1.966, "step": 312665 }, { "epoch": 0.74, "grad_norm": 1.984375, "learning_rate": 0.00014031769810470007, "loss": 2.0896, "step": 312670 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00014031600668557901, "loss": 2.0868, "step": 312675 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.0001403143152526852, "loss": 1.9057, "step": 312680 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00014031262380601934, "loss": 2.0622, "step": 312685 }, { "epoch": 0.74, "grad_norm": 2.671875, "learning_rate": 0.00014031093234558197, "loss": 2.0003, "step": 312690 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.00014030924087137363, "loss": 2.0294, "step": 312695 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00014030754938339492, "loss": 1.9236, "step": 312700 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00014030585788164644, "loss": 2.1508, "step": 312705 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00014030416636612872, "loss": 2.0618, "step": 312710 }, { "epoch": 0.74, "grad_norm": 2.375, "learning_rate": 0.00014030247483684237, "loss": 2.1755, "step": 312715 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.00014030078329378798, "loss": 2.0146, "step": 312720 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.0001402990917369661, "loss": 2.1186, "step": 312725 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.0001402974001663773, "loss": 2.0382, "step": 312730 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.00014029570858202224, "loss": 1.9733, "step": 312735 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00014029401698390135, "loss": 1.9928, "step": 312740 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.0001402923253720154, "loss": 2.1487, "step": 312745 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00014029063374636474, "loss": 1.9821, "step": 312750 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00014028894210695017, "loss": 2.1997, "step": 312755 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00014028725045377212, "loss": 2.1083, "step": 312760 }, { "epoch": 0.74, "grad_norm": 2.484375, "learning_rate": 0.0001402855587868312, "loss": 2.0421, "step": 312765 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.00014028386710612803, "loss": 2.1893, "step": 312770 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.00014028217541166315, "loss": 2.0568, "step": 312775 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00014028048370343715, "loss": 1.8704, "step": 312780 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.0001402787919814506, "loss": 2.1324, "step": 312785 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.00014027710024570408, "loss": 1.9734, "step": 312790 }, { "epoch": 0.74, "grad_norm": 1.96875, "learning_rate": 0.0001402754084961982, "loss": 2.112, "step": 312795 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00014027371673293348, "loss": 1.9841, "step": 312800 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.00014027202495591056, "loss": 2.0964, "step": 312805 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00014027033316512996, "loss": 2.1198, "step": 312810 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.0001402686413605923, "loss": 2.1592, "step": 312815 }, { "epoch": 0.74, "grad_norm": 2.890625, "learning_rate": 0.00014026694954229812, "loss": 2.167, "step": 312820 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.00014026525771024807, "loss": 2.0929, "step": 312825 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00014026356586444263, "loss": 2.1734, "step": 312830 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00014026187400488241, "loss": 2.1551, "step": 312835 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00014026018213156804, "loss": 2.0228, "step": 312840 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00014025849024450004, "loss": 2.055, "step": 312845 }, { "epoch": 0.74, "grad_norm": 2.515625, "learning_rate": 0.00014025679834367904, "loss": 2.086, "step": 312850 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00014025510642910556, "loss": 2.0722, "step": 312855 }, { "epoch": 0.74, "grad_norm": 1.8828125, "learning_rate": 0.00014025341450078023, "loss": 1.9982, "step": 312860 }, { "epoch": 0.74, "grad_norm": 2.453125, "learning_rate": 0.00014025172255870358, "loss": 2.1073, "step": 312865 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.0001402500306028762, "loss": 2.0293, "step": 312870 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00014024833863329872, "loss": 2.2458, "step": 312875 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.00014024664664997165, "loss": 2.0627, "step": 312880 }, { "epoch": 0.74, "grad_norm": 2.375, "learning_rate": 0.0001402449546528956, "loss": 2.1068, "step": 312885 }, { "epoch": 0.74, "grad_norm": 2.5625, "learning_rate": 0.00014024326264207114, "loss": 1.9209, "step": 312890 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.00014024157061749885, "loss": 1.9399, "step": 312895 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.00014023987857917934, "loss": 2.0432, "step": 312900 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.0001402381865271131, "loss": 2.19, "step": 312905 }, { "epoch": 0.74, "grad_norm": 2.375, "learning_rate": 0.00014023649446130083, "loss": 2.0245, "step": 312910 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.000140234802381743, "loss": 2.2104, "step": 312915 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00014023311028844027, "loss": 1.9822, "step": 312920 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.00014023141818139313, "loss": 1.9011, "step": 312925 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00014022972606060223, "loss": 2.0542, "step": 312930 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.00014022803392606814, "loss": 2.0843, "step": 312935 }, { "epoch": 0.74, "grad_norm": 2.671875, "learning_rate": 0.00014022634177779142, "loss": 2.0627, "step": 312940 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00014022464961577265, "loss": 1.8436, "step": 312945 }, { "epoch": 0.74, "grad_norm": 2.484375, "learning_rate": 0.0001402229574400124, "loss": 1.8681, "step": 312950 }, { "epoch": 0.74, "grad_norm": 1.7578125, "learning_rate": 0.00014022126525051125, "loss": 2.0911, "step": 312955 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.0001402195730472698, "loss": 2.1245, "step": 312960 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.0001402178808302886, "loss": 2.0141, "step": 312965 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00014021618859956828, "loss": 2.0739, "step": 312970 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00014021449635510938, "loss": 2.023, "step": 312975 }, { "epoch": 0.74, "grad_norm": 2.0, "learning_rate": 0.00014021280409691242, "loss": 2.053, "step": 312980 }, { "epoch": 0.74, "grad_norm": 1.9921875, "learning_rate": 0.00014021111182497807, "loss": 2.0583, "step": 312985 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.0001402094195393069, "loss": 2.0138, "step": 312990 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00014020772723989943, "loss": 1.868, "step": 312995 }, { "epoch": 0.74, "grad_norm": 2.5625, "learning_rate": 0.0001402060349267563, "loss": 1.9877, "step": 313000 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00014020434259987802, "loss": 1.9986, "step": 313005 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00014020265025926524, "loss": 1.9699, "step": 313010 }, { "epoch": 0.74, "grad_norm": 2.640625, "learning_rate": 0.0001402009579049185, "loss": 2.0221, "step": 313015 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00014019926553683837, "loss": 2.0388, "step": 313020 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00014019757315502547, "loss": 1.8873, "step": 313025 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00014019588075948032, "loss": 2.1656, "step": 313030 }, { "epoch": 0.74, "grad_norm": 2.0, "learning_rate": 0.00014019418835020352, "loss": 1.9583, "step": 313035 }, { "epoch": 0.74, "grad_norm": 1.8359375, "learning_rate": 0.0001401924959271957, "loss": 2.0732, "step": 313040 }, { "epoch": 0.74, "grad_norm": 2.625, "learning_rate": 0.00014019080349045738, "loss": 2.1857, "step": 313045 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.00014018911103998912, "loss": 2.1229, "step": 313050 }, { "epoch": 0.74, "grad_norm": 2.59375, "learning_rate": 0.00014018741857579157, "loss": 2.0277, "step": 313055 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.0001401857260978652, "loss": 2.0659, "step": 313060 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00014018403360621076, "loss": 2.1322, "step": 313065 }, { "epoch": 0.74, "grad_norm": 2.515625, "learning_rate": 0.00014018234110082863, "loss": 2.1446, "step": 313070 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00014018064858171956, "loss": 2.0343, "step": 313075 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.00014017895604888402, "loss": 2.1415, "step": 313080 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00014017726350232262, "loss": 2.2303, "step": 313085 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00014017557094203592, "loss": 2.0111, "step": 313090 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.00014017387836802453, "loss": 2.172, "step": 313095 }, { "epoch": 0.74, "grad_norm": 2.765625, "learning_rate": 0.00014017218578028903, "loss": 1.7543, "step": 313100 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00014017049317883, "loss": 2.0292, "step": 313105 }, { "epoch": 0.74, "grad_norm": 2.5625, "learning_rate": 0.00014016880056364796, "loss": 2.0369, "step": 313110 }, { "epoch": 0.74, "grad_norm": 2.796875, "learning_rate": 0.00014016710793474351, "loss": 2.0782, "step": 313115 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.0001401654152921173, "loss": 1.8096, "step": 313120 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00014016372263576983, "loss": 2.2998, "step": 313125 }, { "epoch": 0.74, "grad_norm": 3.09375, "learning_rate": 0.0001401620299657017, "loss": 1.8842, "step": 313130 }, { "epoch": 0.74, "grad_norm": 2.953125, "learning_rate": 0.0001401603372819135, "loss": 1.9707, "step": 313135 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.0001401586445844058, "loss": 2.2389, "step": 313140 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00014015695187317921, "loss": 2.0178, "step": 313145 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00014015525914823425, "loss": 2.0391, "step": 313150 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00014015356640957152, "loss": 2.1745, "step": 313155 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00014015187365719157, "loss": 2.1091, "step": 313160 }, { "epoch": 0.74, "grad_norm": 1.8984375, "learning_rate": 0.00014015018089109506, "loss": 2.1041, "step": 313165 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.00014014848811128253, "loss": 2.0675, "step": 313170 }, { "epoch": 0.74, "grad_norm": 2.640625, "learning_rate": 0.0001401467953177545, "loss": 2.038, "step": 313175 }, { "epoch": 0.74, "grad_norm": 2.703125, "learning_rate": 0.00014014510251051163, "loss": 2.127, "step": 313180 }, { "epoch": 0.74, "grad_norm": 2.59375, "learning_rate": 0.00014014340968955446, "loss": 2.0868, "step": 313185 }, { "epoch": 0.74, "grad_norm": 2.75, "learning_rate": 0.00014014171685488357, "loss": 1.9824, "step": 313190 }, { "epoch": 0.74, "grad_norm": 2.875, "learning_rate": 0.00014014002400649953, "loss": 1.9973, "step": 313195 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00014013833114440296, "loss": 2.0931, "step": 313200 }, { "epoch": 0.74, "grad_norm": 1.8671875, "learning_rate": 0.00014013663826859438, "loss": 2.137, "step": 313205 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.0001401349453790744, "loss": 1.973, "step": 313210 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.00014013325247584364, "loss": 2.0949, "step": 313215 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00014013155955890257, "loss": 1.8513, "step": 313220 }, { "epoch": 0.74, "grad_norm": 1.8984375, "learning_rate": 0.00014012986662825184, "loss": 2.1607, "step": 313225 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00014012817368389203, "loss": 1.9641, "step": 313230 }, { "epoch": 0.74, "grad_norm": 2.546875, "learning_rate": 0.00014012648072582375, "loss": 2.1066, "step": 313235 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.0001401247877540475, "loss": 2.1741, "step": 313240 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.0001401230947685639, "loss": 1.9266, "step": 313245 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.0001401214017693735, "loss": 2.0776, "step": 313250 }, { "epoch": 0.74, "grad_norm": 1.984375, "learning_rate": 0.00014011970875647692, "loss": 2.129, "step": 313255 }, { "epoch": 0.74, "grad_norm": 2.8125, "learning_rate": 0.00014011801572987473, "loss": 1.9461, "step": 313260 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00014011632268956747, "loss": 2.0272, "step": 313265 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.00014011462963555576, "loss": 2.0919, "step": 313270 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00014011293656784014, "loss": 2.0681, "step": 313275 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.0001401112434864213, "loss": 1.8803, "step": 313280 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00014010955039129964, "loss": 1.8602, "step": 313285 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00014010785728247588, "loss": 2.094, "step": 313290 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00014010616415995053, "loss": 2.1274, "step": 313295 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.0001401044710237242, "loss": 2.1351, "step": 313300 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.00014010277787379743, "loss": 2.1861, "step": 313305 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00014010108471017085, "loss": 2.0692, "step": 313310 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.000140099391532845, "loss": 1.9614, "step": 313315 }, { "epoch": 0.74, "grad_norm": 2.453125, "learning_rate": 0.00014009769834182048, "loss": 2.0078, "step": 313320 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00014009600513709784, "loss": 1.8866, "step": 313325 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00014009431191867769, "loss": 1.9175, "step": 313330 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.0001400926186865606, "loss": 1.9294, "step": 313335 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00014009092544074714, "loss": 2.2064, "step": 313340 }, { "epoch": 0.74, "grad_norm": 3.34375, "learning_rate": 0.0001400892321812379, "loss": 2.1082, "step": 313345 }, { "epoch": 0.74, "grad_norm": 2.515625, "learning_rate": 0.00014008753890803344, "loss": 1.9794, "step": 313350 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.00014008584562113434, "loss": 2.0722, "step": 313355 }, { "epoch": 0.74, "grad_norm": 2.90625, "learning_rate": 0.0001400841523205412, "loss": 2.1601, "step": 313360 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.0001400824590062546, "loss": 1.9771, "step": 313365 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.0001400807656782751, "loss": 2.0729, "step": 313370 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00014007907233660327, "loss": 2.0765, "step": 313375 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00014007737898123968, "loss": 2.0954, "step": 313380 }, { "epoch": 0.74, "grad_norm": 2.484375, "learning_rate": 0.00014007568561218495, "loss": 2.1821, "step": 313385 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00014007399222943967, "loss": 1.7763, "step": 313390 }, { "epoch": 0.74, "grad_norm": 2.53125, "learning_rate": 0.00014007229883300436, "loss": 2.1034, "step": 313395 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00014007060542287964, "loss": 1.9684, "step": 313400 }, { "epoch": 0.74, "grad_norm": 1.9921875, "learning_rate": 0.00014006891199906606, "loss": 2.1443, "step": 313405 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.0001400672185615642, "loss": 2.1975, "step": 313410 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.0001400655251103747, "loss": 1.9406, "step": 313415 }, { "epoch": 0.74, "grad_norm": 1.875, "learning_rate": 0.00014006383164549805, "loss": 2.1813, "step": 313420 }, { "epoch": 0.74, "grad_norm": 2.53125, "learning_rate": 0.00014006213816693489, "loss": 2.0219, "step": 313425 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00014006044467468576, "loss": 2.0792, "step": 313430 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00014005875116875123, "loss": 2.2115, "step": 313435 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00014005705764913196, "loss": 2.028, "step": 313440 }, { "epoch": 0.74, "grad_norm": 2.0, "learning_rate": 0.00014005536411582846, "loss": 2.1391, "step": 313445 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.0001400536705688413, "loss": 1.8622, "step": 313450 }, { "epoch": 0.74, "grad_norm": 2.46875, "learning_rate": 0.00014005197700817107, "loss": 2.1769, "step": 313455 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.0001400502834338184, "loss": 2.0014, "step": 313460 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.0001400485898457838, "loss": 2.1277, "step": 313465 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00014004689624406787, "loss": 2.1285, "step": 313470 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.0001400452026286712, "loss": 1.9428, "step": 313475 }, { "epoch": 0.74, "grad_norm": 2.515625, "learning_rate": 0.00014004350899959436, "loss": 1.9515, "step": 313480 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00014004181535683794, "loss": 2.1589, "step": 313485 }, { "epoch": 0.74, "grad_norm": 1.875, "learning_rate": 0.00014004012170040252, "loss": 2.0059, "step": 313490 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00014003842803028864, "loss": 2.2311, "step": 313495 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.0001400367343464969, "loss": 1.9741, "step": 313500 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00014003504064902792, "loss": 2.0913, "step": 313505 }, { "epoch": 0.74, "grad_norm": 2.46875, "learning_rate": 0.00014003334693788225, "loss": 1.9161, "step": 313510 }, { "epoch": 0.74, "grad_norm": 1.8671875, "learning_rate": 0.00014003165321306043, "loss": 2.0513, "step": 313515 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.00014002995947456307, "loss": 1.985, "step": 313520 }, { "epoch": 0.74, "grad_norm": 2.53125, "learning_rate": 0.00014002826572239078, "loss": 2.0666, "step": 313525 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00014002657195654413, "loss": 1.9619, "step": 313530 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.0001400248781770236, "loss": 1.9512, "step": 313535 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.0001400231843838299, "loss": 2.0893, "step": 313540 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.0001400214905769635, "loss": 2.1436, "step": 313545 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.0001400197967564251, "loss": 2.0474, "step": 313550 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.0001400181029222152, "loss": 1.9573, "step": 313555 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00014001640907433438, "loss": 1.9812, "step": 313560 }, { "epoch": 0.74, "grad_norm": 2.0, "learning_rate": 0.00014001471521278322, "loss": 2.1125, "step": 313565 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.00014001302133756228, "loss": 2.1568, "step": 313570 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.00014001132744867223, "loss": 2.1455, "step": 313575 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00014000963354611354, "loss": 2.153, "step": 313580 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00014000793962988685, "loss": 2.0273, "step": 313585 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.00014000624569999271, "loss": 2.2196, "step": 313590 }, { "epoch": 0.74, "grad_norm": 1.6875, "learning_rate": 0.00014000455175643172, "loss": 1.8045, "step": 313595 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00014000285779920446, "loss": 1.9229, "step": 313600 }, { "epoch": 0.74, "grad_norm": 2.546875, "learning_rate": 0.0001400011638283115, "loss": 1.9668, "step": 313605 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.0001399994698437534, "loss": 1.9956, "step": 313610 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00013999777584553075, "loss": 2.0268, "step": 313615 }, { "epoch": 0.74, "grad_norm": 2.640625, "learning_rate": 0.00013999608183364414, "loss": 2.0452, "step": 313620 }, { "epoch": 0.74, "grad_norm": 1.8515625, "learning_rate": 0.00013999438780809415, "loss": 1.94, "step": 313625 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.00013999269376888134, "loss": 1.9379, "step": 313630 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00013999099971600631, "loss": 2.3153, "step": 313635 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00013998930564946963, "loss": 2.2154, "step": 313640 }, { "epoch": 0.74, "grad_norm": 1.9296875, "learning_rate": 0.0001399876115692719, "loss": 2.1086, "step": 313645 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013998591747541362, "loss": 2.0073, "step": 313650 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.00013998422336789548, "loss": 1.9732, "step": 313655 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00013998252924671799, "loss": 2.284, "step": 313660 }, { "epoch": 0.74, "grad_norm": 2.78125, "learning_rate": 0.00013998083511188172, "loss": 1.9536, "step": 313665 }, { "epoch": 0.74, "grad_norm": 1.96875, "learning_rate": 0.00013997914096338732, "loss": 2.0578, "step": 313670 }, { "epoch": 0.74, "grad_norm": 2.0, "learning_rate": 0.00013997744680123526, "loss": 2.0165, "step": 313675 }, { "epoch": 0.74, "grad_norm": 3.40625, "learning_rate": 0.0001399757526254262, "loss": 2.1024, "step": 313680 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.0001399740584359607, "loss": 1.8011, "step": 313685 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013997236423283937, "loss": 2.0995, "step": 313690 }, { "epoch": 0.74, "grad_norm": 2.578125, "learning_rate": 0.0001399706700160627, "loss": 2.0192, "step": 313695 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.00013996897578563139, "loss": 2.0573, "step": 313700 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.0001399672815415459, "loss": 2.0886, "step": 313705 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013996558728380688, "loss": 2.2194, "step": 313710 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00013996389301241488, "loss": 1.9694, "step": 313715 }, { "epoch": 0.74, "grad_norm": 1.9609375, "learning_rate": 0.00013996219872737048, "loss": 1.9903, "step": 313720 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.0001399605044286743, "loss": 1.9312, "step": 313725 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.00013995881011632686, "loss": 2.1133, "step": 313730 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00013995711579032878, "loss": 2.029, "step": 313735 }, { "epoch": 0.74, "grad_norm": 2.921875, "learning_rate": 0.00013995542145068063, "loss": 2.197, "step": 313740 }, { "epoch": 0.74, "grad_norm": 1.8359375, "learning_rate": 0.00013995372709738296, "loss": 2.0656, "step": 313745 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.0001399520327304364, "loss": 2.0695, "step": 313750 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013995033834984148, "loss": 2.0674, "step": 313755 }, { "epoch": 0.74, "grad_norm": 2.46875, "learning_rate": 0.00013994864395559881, "loss": 2.191, "step": 313760 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00013994694954770894, "loss": 2.0162, "step": 313765 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.00013994525512617248, "loss": 2.1247, "step": 313770 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.00013994356069099001, "loss": 2.0223, "step": 313775 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.0001399418662421621, "loss": 2.0319, "step": 313780 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.0001399401717796893, "loss": 2.0114, "step": 313785 }, { "epoch": 0.74, "grad_norm": 1.78125, "learning_rate": 0.00013993847730357222, "loss": 2.0603, "step": 313790 }, { "epoch": 0.74, "grad_norm": 2.75, "learning_rate": 0.00013993678281381145, "loss": 1.9283, "step": 313795 }, { "epoch": 0.74, "grad_norm": 1.9921875, "learning_rate": 0.00013993508831040754, "loss": 1.9392, "step": 313800 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00013993339379336108, "loss": 2.2613, "step": 313805 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.00013993169926267263, "loss": 2.1119, "step": 313810 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.0001399300047183428, "loss": 2.1239, "step": 313815 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.00013992831016037215, "loss": 2.1857, "step": 313820 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.0001399266155887613, "loss": 2.1572, "step": 313825 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013992492100351074, "loss": 1.8742, "step": 313830 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.00013992322640462112, "loss": 2.1125, "step": 313835 }, { "epoch": 0.74, "grad_norm": 2.515625, "learning_rate": 0.00013992153179209302, "loss": 2.0013, "step": 313840 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00013991983716592698, "loss": 2.0695, "step": 313845 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.00013991814252612366, "loss": 1.9703, "step": 313850 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00013991644787268349, "loss": 1.9969, "step": 313855 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.0001399147532056072, "loss": 1.8635, "step": 313860 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013991305852489524, "loss": 2.0233, "step": 313865 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013991136383054834, "loss": 2.1323, "step": 313870 }, { "epoch": 0.74, "grad_norm": 1.9296875, "learning_rate": 0.00013990966912256694, "loss": 1.9536, "step": 313875 }, { "epoch": 0.74, "grad_norm": 1.953125, "learning_rate": 0.0001399079744009517, "loss": 1.8464, "step": 313880 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.00013990627966570316, "loss": 2.0209, "step": 313885 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.0001399045849168219, "loss": 1.9495, "step": 313890 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013990289015430854, "loss": 1.9503, "step": 313895 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.0001399011953781636, "loss": 2.0021, "step": 313900 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.0001398995005883877, "loss": 1.8722, "step": 313905 }, { "epoch": 0.74, "grad_norm": 1.8359375, "learning_rate": 0.0001398978057849814, "loss": 2.0861, "step": 313910 }, { "epoch": 0.74, "grad_norm": 1.7421875, "learning_rate": 0.00013989611096794529, "loss": 2.0635, "step": 313915 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00013989441613727993, "loss": 2.0665, "step": 313920 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00013989272129298595, "loss": 1.9682, "step": 313925 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013989102643506386, "loss": 1.929, "step": 313930 }, { "epoch": 0.74, "grad_norm": 2.6875, "learning_rate": 0.00013988933156351428, "loss": 2.0634, "step": 313935 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.0001398876366783378, "loss": 1.9099, "step": 313940 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00013988594177953494, "loss": 2.2076, "step": 313945 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.00013988424686710635, "loss": 2.2455, "step": 313950 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.00013988255194105257, "loss": 1.9822, "step": 313955 }, { "epoch": 0.74, "grad_norm": 2.609375, "learning_rate": 0.00013988085700137419, "loss": 1.9191, "step": 313960 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.0001398791620480718, "loss": 2.119, "step": 313965 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.00013987746708114593, "loss": 1.9746, "step": 313970 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013987577210059721, "loss": 2.0094, "step": 313975 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.0001398740771064262, "loss": 2.059, "step": 313980 }, { "epoch": 0.74, "grad_norm": 2.375, "learning_rate": 0.00013987238209863353, "loss": 2.0788, "step": 313985 }, { "epoch": 0.74, "grad_norm": 2.59375, "learning_rate": 0.0001398706870772197, "loss": 2.1805, "step": 313990 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.0001398689920421853, "loss": 2.175, "step": 313995 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.0001398672969935309, "loss": 2.1906, "step": 314000 }, { "epoch": 0.74, "grad_norm": 2.609375, "learning_rate": 0.00013986560193125717, "loss": 2.163, "step": 314005 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013986390685536462, "loss": 1.9125, "step": 314010 }, { "epoch": 0.74, "grad_norm": 2.6875, "learning_rate": 0.0001398622117658538, "loss": 1.9854, "step": 314015 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013986051666272536, "loss": 2.1075, "step": 314020 }, { "epoch": 0.74, "grad_norm": 2.75, "learning_rate": 0.0001398588215459798, "loss": 2.1242, "step": 314025 }, { "epoch": 0.74, "grad_norm": 2.515625, "learning_rate": 0.0001398571264156178, "loss": 2.1067, "step": 314030 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00013985543127163986, "loss": 1.8131, "step": 314035 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00013985373611404657, "loss": 2.0777, "step": 314040 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.00013985204094283852, "loss": 1.912, "step": 314045 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.0001398503457580163, "loss": 2.0165, "step": 314050 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.0001398486505595805, "loss": 2.0494, "step": 314055 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.00013984695534753164, "loss": 2.0728, "step": 314060 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.00013984526012187035, "loss": 1.9224, "step": 314065 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013984356488259722, "loss": 2.0683, "step": 314070 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.0001398418696297128, "loss": 2.1094, "step": 314075 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.00013984017436321765, "loss": 1.9236, "step": 314080 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.00013983847908311239, "loss": 2.0429, "step": 314085 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.00013983678378939757, "loss": 2.0222, "step": 314090 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.0001398350884820738, "loss": 2.1696, "step": 314095 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00013983339316114162, "loss": 2.01, "step": 314100 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013983169782660164, "loss": 1.9622, "step": 314105 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.00013983000247845445, "loss": 2.0412, "step": 314110 }, { "epoch": 0.74, "grad_norm": 2.5625, "learning_rate": 0.00013982830711670058, "loss": 2.012, "step": 314115 }, { "epoch": 0.74, "grad_norm": 2.46875, "learning_rate": 0.00013982661174134067, "loss": 1.9221, "step": 314120 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013982491635237525, "loss": 2.2102, "step": 314125 }, { "epoch": 0.74, "grad_norm": 2.671875, "learning_rate": 0.00013982322094980489, "loss": 1.9848, "step": 314130 }, { "epoch": 0.74, "grad_norm": 2.90625, "learning_rate": 0.00013982152553363022, "loss": 1.9333, "step": 314135 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.0001398198301038518, "loss": 1.9317, "step": 314140 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.0001398181346604702, "loss": 2.1481, "step": 314145 }, { "epoch": 0.74, "grad_norm": 2.0, "learning_rate": 0.00013981643920348602, "loss": 2.0453, "step": 314150 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.00013981474373289977, "loss": 1.9858, "step": 314155 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.0001398130482487121, "loss": 2.1378, "step": 314160 }, { "epoch": 0.74, "grad_norm": 2.453125, "learning_rate": 0.0001398113527509236, "loss": 1.9046, "step": 314165 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013980965723953484, "loss": 1.94, "step": 314170 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00013980796171454634, "loss": 2.1915, "step": 314175 }, { "epoch": 0.74, "grad_norm": 2.78125, "learning_rate": 0.0001398062661759587, "loss": 2.1093, "step": 314180 }, { "epoch": 0.74, "grad_norm": 2.765625, "learning_rate": 0.00013980457062377254, "loss": 2.0002, "step": 314185 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.0001398028750579884, "loss": 1.9389, "step": 314190 }, { "epoch": 0.74, "grad_norm": 1.96875, "learning_rate": 0.00013980117947860692, "loss": 1.8632, "step": 314195 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.0001397994838856286, "loss": 2.0259, "step": 314200 }, { "epoch": 0.74, "grad_norm": 1.9140625, "learning_rate": 0.00013979778827905405, "loss": 1.9318, "step": 314205 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00013979609265888388, "loss": 2.1204, "step": 314210 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00013979439702511862, "loss": 2.0346, "step": 314215 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.0001397927013777589, "loss": 2.1593, "step": 314220 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00013979100571680523, "loss": 1.9798, "step": 314225 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.00013978931004225825, "loss": 2.1925, "step": 314230 }, { "epoch": 0.74, "grad_norm": 1.90625, "learning_rate": 0.00013978761435411854, "loss": 1.961, "step": 314235 }, { "epoch": 0.74, "grad_norm": 2.5625, "learning_rate": 0.00013978591865238663, "loss": 1.9341, "step": 314240 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013978422293706315, "loss": 1.9665, "step": 314245 }, { "epoch": 0.74, "grad_norm": 2.78125, "learning_rate": 0.00013978252720814864, "loss": 2.0537, "step": 314250 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013978083146564372, "loss": 2.1746, "step": 314255 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013977913570954894, "loss": 2.2409, "step": 314260 }, { "epoch": 0.74, "grad_norm": 2.515625, "learning_rate": 0.00013977743993986484, "loss": 1.9311, "step": 314265 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.0001397757441565921, "loss": 2.1198, "step": 314270 }, { "epoch": 0.74, "grad_norm": 1.984375, "learning_rate": 0.0001397740483597312, "loss": 2.0212, "step": 314275 }, { "epoch": 0.74, "grad_norm": 1.8359375, "learning_rate": 0.00013977235254928285, "loss": 1.8878, "step": 314280 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.00013977065672524748, "loss": 2.0428, "step": 314285 }, { "epoch": 0.74, "grad_norm": 1.9609375, "learning_rate": 0.00013976896088762573, "loss": 2.0502, "step": 314290 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00013976726503641816, "loss": 1.8989, "step": 314295 }, { "epoch": 0.74, "grad_norm": 2.75, "learning_rate": 0.00013976556917162544, "loss": 1.7787, "step": 314300 }, { "epoch": 0.74, "grad_norm": 2.5625, "learning_rate": 0.00013976387329324802, "loss": 2.144, "step": 314305 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013976217740128657, "loss": 1.9948, "step": 314310 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013976048149574164, "loss": 1.8543, "step": 314315 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013975878557661377, "loss": 1.9034, "step": 314320 }, { "epoch": 0.74, "grad_norm": 1.921875, "learning_rate": 0.0001397570896439036, "loss": 2.1288, "step": 314325 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.0001397553936976117, "loss": 2.0271, "step": 314330 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00013975369773773863, "loss": 2.2458, "step": 314335 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.000139752001764285, "loss": 2.1337, "step": 314340 }, { "epoch": 0.74, "grad_norm": 2.640625, "learning_rate": 0.00013975030577725132, "loss": 2.1891, "step": 314345 }, { "epoch": 0.74, "grad_norm": 1.875, "learning_rate": 0.00013974860977663823, "loss": 1.9703, "step": 314350 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.0001397469137624463, "loss": 2.1441, "step": 314355 }, { "epoch": 0.74, "grad_norm": 1.8984375, "learning_rate": 0.00013974521773467614, "loss": 2.0462, "step": 314360 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.00013974352169332822, "loss": 2.1202, "step": 314365 }, { "epoch": 0.74, "grad_norm": 2.53125, "learning_rate": 0.00013974182563840324, "loss": 2.0575, "step": 314370 }, { "epoch": 0.74, "grad_norm": 1.9765625, "learning_rate": 0.00013974012956990172, "loss": 2.0944, "step": 314375 }, { "epoch": 0.74, "grad_norm": 1.8984375, "learning_rate": 0.00013973843348782426, "loss": 2.2356, "step": 314380 }, { "epoch": 0.74, "grad_norm": 3.25, "learning_rate": 0.00013973673739217142, "loss": 2.184, "step": 314385 }, { "epoch": 0.74, "grad_norm": 2.609375, "learning_rate": 0.00013973504128294377, "loss": 2.0109, "step": 314390 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00013973334516014196, "loss": 2.0907, "step": 314395 }, { "epoch": 0.74, "grad_norm": 1.8125, "learning_rate": 0.0001397316490237665, "loss": 2.0668, "step": 314400 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013972995287381794, "loss": 2.04, "step": 314405 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013972825671029694, "loss": 2.0727, "step": 314410 }, { "epoch": 0.74, "grad_norm": 1.8671875, "learning_rate": 0.00013972656053320406, "loss": 2.0683, "step": 314415 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00013972486434253988, "loss": 1.9314, "step": 314420 }, { "epoch": 0.74, "grad_norm": 1.6796875, "learning_rate": 0.00013972316813830492, "loss": 2.1087, "step": 314425 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00013972147192049982, "loss": 2.209, "step": 314430 }, { "epoch": 0.74, "grad_norm": 1.84375, "learning_rate": 0.00013971977568912516, "loss": 1.9951, "step": 314435 }, { "epoch": 0.74, "grad_norm": 2.65625, "learning_rate": 0.0001397180794441815, "loss": 1.9143, "step": 314440 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.00013971638318566944, "loss": 1.9732, "step": 314445 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.0001397146869135895, "loss": 2.1261, "step": 314450 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00013971299062794231, "loss": 2.0774, "step": 314455 }, { "epoch": 0.74, "grad_norm": 2.640625, "learning_rate": 0.00013971129432872848, "loss": 2.1416, "step": 314460 }, { "epoch": 0.74, "grad_norm": 2.578125, "learning_rate": 0.00013970959801594854, "loss": 2.0838, "step": 314465 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.0001397079016896031, "loss": 1.9121, "step": 314470 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00013970620534969267, "loss": 2.154, "step": 314475 }, { "epoch": 0.74, "grad_norm": 2.375, "learning_rate": 0.00013970450899621788, "loss": 2.0071, "step": 314480 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00013970281262917935, "loss": 1.8741, "step": 314485 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.0001397011162485776, "loss": 2.0016, "step": 314490 }, { "epoch": 0.74, "grad_norm": 2.546875, "learning_rate": 0.00013969941985441325, "loss": 2.0844, "step": 314495 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013969772344668682, "loss": 2.1758, "step": 314500 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.00013969602702539895, "loss": 2.0232, "step": 314505 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00013969433059055018, "loss": 2.1213, "step": 314510 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00013969263414214113, "loss": 2.2223, "step": 314515 }, { "epoch": 0.74, "grad_norm": 2.375, "learning_rate": 0.00013969093768017237, "loss": 1.9846, "step": 314520 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013968924120464442, "loss": 2.1647, "step": 314525 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00013968754471555794, "loss": 1.9769, "step": 314530 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00013968584821291347, "loss": 2.0596, "step": 314535 }, { "epoch": 0.74, "grad_norm": 1.84375, "learning_rate": 0.00013968415169671156, "loss": 1.8695, "step": 314540 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00013968245516695287, "loss": 2.0496, "step": 314545 }, { "epoch": 0.74, "grad_norm": 2.375, "learning_rate": 0.00013968075862363788, "loss": 2.0411, "step": 314550 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.0001396790620667673, "loss": 2.076, "step": 314555 }, { "epoch": 0.74, "grad_norm": 2.375, "learning_rate": 0.00013967736549634158, "loss": 2.0146, "step": 314560 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00013967566891236137, "loss": 2.139, "step": 314565 }, { "epoch": 0.74, "grad_norm": 1.8203125, "learning_rate": 0.00013967397231482722, "loss": 1.8887, "step": 314570 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00013967227570373973, "loss": 2.1034, "step": 314575 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00013967057907909947, "loss": 1.9658, "step": 314580 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.000139668882440907, "loss": 2.127, "step": 314585 }, { "epoch": 0.74, "grad_norm": 1.7265625, "learning_rate": 0.00013966718578916295, "loss": 2.0771, "step": 314590 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00013966548912386787, "loss": 2.0594, "step": 314595 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00013966379244502233, "loss": 2.0177, "step": 314600 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013966209575262692, "loss": 1.8708, "step": 314605 }, { "epoch": 0.74, "grad_norm": 2.5625, "learning_rate": 0.00013966039904668223, "loss": 2.0614, "step": 314610 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00013965870232718878, "loss": 1.9389, "step": 314615 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00013965700559414725, "loss": 2.049, "step": 314620 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013965530884755815, "loss": 2.0635, "step": 314625 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.00013965361208742206, "loss": 2.0543, "step": 314630 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013965191531373962, "loss": 1.9461, "step": 314635 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013965021852651135, "loss": 1.9777, "step": 314640 }, { "epoch": 0.74, "grad_norm": 2.984375, "learning_rate": 0.0001396485217257378, "loss": 2.1752, "step": 314645 }, { "epoch": 0.74, "grad_norm": 1.9921875, "learning_rate": 0.00013964682491141966, "loss": 2.2704, "step": 314650 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.00013964512808355744, "loss": 1.9545, "step": 314655 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.0001396434312421517, "loss": 2.0234, "step": 314660 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00013964173438720301, "loss": 1.9115, "step": 314665 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.00013964003751871205, "loss": 2.1756, "step": 314670 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.0001396383406366793, "loss": 2.0899, "step": 314675 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.0001396366437411054, "loss": 2.0531, "step": 314680 }, { "epoch": 0.74, "grad_norm": 1.9765625, "learning_rate": 0.00013963494683199087, "loss": 2.006, "step": 314685 }, { "epoch": 0.74, "grad_norm": 1.9921875, "learning_rate": 0.00013963324990933637, "loss": 1.9208, "step": 314690 }, { "epoch": 0.74, "grad_norm": 2.734375, "learning_rate": 0.00013963155297314238, "loss": 2.1718, "step": 314695 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.00013962985602340955, "loss": 1.9322, "step": 314700 }, { "epoch": 0.74, "grad_norm": 2.484375, "learning_rate": 0.00013962815906013845, "loss": 2.065, "step": 314705 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.00013962646208332967, "loss": 2.1818, "step": 314710 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013962476509298374, "loss": 2.2466, "step": 314715 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013962306808910127, "loss": 2.09, "step": 314720 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00013962137107168286, "loss": 1.9053, "step": 314725 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013961967404072905, "loss": 1.9092, "step": 314730 }, { "epoch": 0.74, "grad_norm": 1.9765625, "learning_rate": 0.00013961797699624048, "loss": 1.9739, "step": 314735 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013961627993821767, "loss": 2.1788, "step": 314740 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013961458286666122, "loss": 2.0317, "step": 314745 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013961288578157168, "loss": 1.887, "step": 314750 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.0001396111886829497, "loss": 2.1182, "step": 314755 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.00013960949157079583, "loss": 1.936, "step": 314760 }, { "epoch": 0.74, "grad_norm": 1.9609375, "learning_rate": 0.00013960779444511062, "loss": 2.0762, "step": 314765 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.00013960609730589464, "loss": 1.8449, "step": 314770 }, { "epoch": 0.74, "grad_norm": 1.984375, "learning_rate": 0.00013960440015314853, "loss": 1.9138, "step": 314775 }, { "epoch": 0.74, "grad_norm": 1.8671875, "learning_rate": 0.00013960270298687282, "loss": 2.0483, "step": 314780 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00013960100580706814, "loss": 2.093, "step": 314785 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.000139599308613735, "loss": 2.0868, "step": 314790 }, { "epoch": 0.74, "grad_norm": 1.9296875, "learning_rate": 0.00013959761140687403, "loss": 2.1219, "step": 314795 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.00013959591418648582, "loss": 2.191, "step": 314800 }, { "epoch": 0.74, "grad_norm": 2.5625, "learning_rate": 0.00013959421695257094, "loss": 2.0228, "step": 314805 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.0001395925197051299, "loss": 2.1035, "step": 314810 }, { "epoch": 0.74, "grad_norm": 2.6875, "learning_rate": 0.00013959082244416337, "loss": 2.0285, "step": 314815 }, { "epoch": 0.74, "grad_norm": 2.703125, "learning_rate": 0.0001395891251696719, "loss": 2.1068, "step": 314820 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.0001395874278816561, "loss": 2.0419, "step": 314825 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00013958573058011644, "loss": 1.9987, "step": 314830 }, { "epoch": 0.74, "grad_norm": 2.546875, "learning_rate": 0.00013958403326505363, "loss": 2.0928, "step": 314835 }, { "epoch": 0.74, "grad_norm": 1.75, "learning_rate": 0.00013958233593646817, "loss": 2.1372, "step": 314840 }, { "epoch": 0.74, "grad_norm": 1.6640625, "learning_rate": 0.00013958063859436067, "loss": 2.0133, "step": 314845 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013957894123873174, "loss": 2.0618, "step": 314850 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.00013957724386958186, "loss": 2.0129, "step": 314855 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.00013957554648691173, "loss": 2.0593, "step": 314860 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.00013957384909072187, "loss": 2.164, "step": 314865 }, { "epoch": 0.74, "grad_norm": 2.46875, "learning_rate": 0.00013957215168101285, "loss": 2.083, "step": 314870 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00013957045425778527, "loss": 2.0168, "step": 314875 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.0001395687568210397, "loss": 2.0736, "step": 314880 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00013956705937077671, "loss": 1.8651, "step": 314885 }, { "epoch": 0.74, "grad_norm": 1.8984375, "learning_rate": 0.00013956536190699696, "loss": 1.9436, "step": 314890 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.00013956366442970092, "loss": 2.2618, "step": 314895 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.00013956196693888922, "loss": 2.139, "step": 314900 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013956026943456241, "loss": 2.117, "step": 314905 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.00013955857191672112, "loss": 2.0124, "step": 314910 }, { "epoch": 0.74, "grad_norm": 2.578125, "learning_rate": 0.00013955687438536591, "loss": 1.8469, "step": 314915 }, { "epoch": 0.74, "grad_norm": 2.546875, "learning_rate": 0.00013955517684049733, "loss": 2.2393, "step": 314920 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.000139553479282116, "loss": 2.1253, "step": 314925 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.0001395517817102225, "loss": 2.0219, "step": 314930 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00013955008412481736, "loss": 2.1967, "step": 314935 }, { "epoch": 0.74, "grad_norm": 1.96875, "learning_rate": 0.0001395483865259012, "loss": 2.0986, "step": 314940 }, { "epoch": 0.74, "grad_norm": 2.53125, "learning_rate": 0.00013954668891347463, "loss": 2.0738, "step": 314945 }, { "epoch": 0.74, "grad_norm": 2.375, "learning_rate": 0.00013954499128753816, "loss": 2.2301, "step": 314950 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.0001395432936480924, "loss": 2.1177, "step": 314955 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.00013954159599513795, "loss": 2.0039, "step": 314960 }, { "epoch": 0.74, "grad_norm": 2.453125, "learning_rate": 0.00013953989832867536, "loss": 2.0167, "step": 314965 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013953820064870523, "loss": 2.1491, "step": 314970 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013953650295522814, "loss": 2.2768, "step": 314975 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00013953480524824468, "loss": 2.0108, "step": 314980 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.0001395331075277554, "loss": 1.9892, "step": 314985 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013953140979376086, "loss": 1.866, "step": 314990 }, { "epoch": 0.74, "grad_norm": 2.375, "learning_rate": 0.00013952971204626171, "loss": 2.0812, "step": 314995 }, { "epoch": 0.74, "grad_norm": 2.453125, "learning_rate": 0.00013952801428525848, "loss": 2.0969, "step": 315000 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.00013952631651075178, "loss": 2.0958, "step": 315005 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00013952461872274215, "loss": 1.9733, "step": 315010 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.0001395229209212302, "loss": 2.3071, "step": 315015 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.00013952122310621648, "loss": 2.1198, "step": 315020 }, { "epoch": 0.74, "grad_norm": 1.9921875, "learning_rate": 0.00013951952527770166, "loss": 1.9093, "step": 315025 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.0001395178274356862, "loss": 1.877, "step": 315030 }, { "epoch": 0.74, "grad_norm": 2.46875, "learning_rate": 0.00013951612958017074, "loss": 2.3177, "step": 315035 }, { "epoch": 0.74, "grad_norm": 1.71875, "learning_rate": 0.00013951443171115587, "loss": 2.2049, "step": 315040 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00013951273382864214, "loss": 1.9602, "step": 315045 }, { "epoch": 0.74, "grad_norm": 1.9375, "learning_rate": 0.00013951103593263015, "loss": 2.0363, "step": 315050 }, { "epoch": 0.74, "grad_norm": 1.796875, "learning_rate": 0.0001395093380231205, "loss": 1.8837, "step": 315055 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.0001395076401001137, "loss": 1.9458, "step": 315060 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.0001395059421636104, "loss": 2.1441, "step": 315065 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.0001395042442136111, "loss": 2.1082, "step": 315070 }, { "epoch": 0.74, "grad_norm": 2.375, "learning_rate": 0.0001395025462501165, "loss": 1.8648, "step": 315075 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.0001395008482731271, "loss": 2.0243, "step": 315080 }, { "epoch": 0.74, "grad_norm": 3.0, "learning_rate": 0.0001394991502826435, "loss": 2.0439, "step": 315085 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.00013949745227866624, "loss": 2.1568, "step": 315090 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013949575426119593, "loss": 1.976, "step": 315095 }, { "epoch": 0.74, "grad_norm": 1.8203125, "learning_rate": 0.0001394940562302332, "loss": 1.8357, "step": 315100 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013949235818577855, "loss": 2.2017, "step": 315105 }, { "epoch": 0.74, "grad_norm": 1.9296875, "learning_rate": 0.0001394906601278326, "loss": 2.1111, "step": 315110 }, { "epoch": 0.74, "grad_norm": 1.921875, "learning_rate": 0.00013948896205639595, "loss": 2.1258, "step": 315115 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.00013948726397146913, "loss": 2.0993, "step": 315120 }, { "epoch": 0.74, "grad_norm": 2.859375, "learning_rate": 0.00013948556587305272, "loss": 2.061, "step": 315125 }, { "epoch": 0.74, "grad_norm": 3.09375, "learning_rate": 0.00013948386776114737, "loss": 2.2526, "step": 315130 }, { "epoch": 0.74, "grad_norm": 2.953125, "learning_rate": 0.0001394821696357536, "loss": 1.9022, "step": 315135 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.000139480471496872, "loss": 2.0156, "step": 315140 }, { "epoch": 0.74, "grad_norm": 2.484375, "learning_rate": 0.00013947877334450316, "loss": 1.9036, "step": 315145 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013947707517864764, "loss": 2.2255, "step": 315150 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.00013947537699930603, "loss": 2.1094, "step": 315155 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00013947367880647895, "loss": 2.1068, "step": 315160 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.00013947198060016696, "loss": 1.9713, "step": 315165 }, { "epoch": 0.74, "grad_norm": 2.453125, "learning_rate": 0.0001394702823803706, "loss": 2.102, "step": 315170 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.00013946858414709044, "loss": 2.0824, "step": 315175 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.0001394668859003271, "loss": 2.0141, "step": 315180 }, { "epoch": 0.74, "grad_norm": 2.0, "learning_rate": 0.0001394651876400812, "loss": 1.9078, "step": 315185 }, { "epoch": 0.74, "grad_norm": 2.546875, "learning_rate": 0.00013946348936635326, "loss": 1.9366, "step": 315190 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00013946179107914388, "loss": 1.9278, "step": 315195 }, { "epoch": 0.74, "grad_norm": 2.734375, "learning_rate": 0.00013946009277845362, "loss": 2.0512, "step": 315200 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013945839446428306, "loss": 1.874, "step": 315205 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00013945669613663286, "loss": 2.0148, "step": 315210 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00013945499779550349, "loss": 2.0729, "step": 315215 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.0001394532994408956, "loss": 2.0293, "step": 315220 }, { "epoch": 0.74, "grad_norm": 2.78125, "learning_rate": 0.00013945160107280973, "loss": 2.2021, "step": 315225 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.00013944990269124647, "loss": 2.0547, "step": 315230 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.00013944820429620641, "loss": 2.2381, "step": 315235 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013944650588769016, "loss": 2.1912, "step": 315240 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00013944480746569824, "loss": 2.0468, "step": 315245 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00013944310903023127, "loss": 2.1185, "step": 315250 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.0001394414105812898, "loss": 2.2475, "step": 315255 }, { "epoch": 0.74, "grad_norm": 2.53125, "learning_rate": 0.00013943971211887447, "loss": 2.1287, "step": 315260 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.00013943801364298578, "loss": 2.1956, "step": 315265 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.00013943631515362433, "loss": 1.9657, "step": 315270 }, { "epoch": 0.74, "grad_norm": 1.84375, "learning_rate": 0.00013943461665079075, "loss": 2.0123, "step": 315275 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013943291813448564, "loss": 1.9851, "step": 315280 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013943121960470944, "loss": 2.1371, "step": 315285 }, { "epoch": 0.74, "grad_norm": 1.9765625, "learning_rate": 0.00013942952106146285, "loss": 1.8978, "step": 315290 }, { "epoch": 0.74, "grad_norm": 2.546875, "learning_rate": 0.00013942782250474643, "loss": 2.1034, "step": 315295 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.00013942612393456078, "loss": 2.2184, "step": 315300 }, { "epoch": 0.74, "grad_norm": 1.9453125, "learning_rate": 0.00013942442535090643, "loss": 1.9673, "step": 315305 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00013942272675378394, "loss": 2.184, "step": 315310 }, { "epoch": 0.74, "grad_norm": 1.96875, "learning_rate": 0.00013942102814319397, "loss": 2.0787, "step": 315315 }, { "epoch": 0.74, "grad_norm": 1.9921875, "learning_rate": 0.00013941932951913705, "loss": 1.9061, "step": 315320 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013941763088161382, "loss": 2.0452, "step": 315325 }, { "epoch": 0.74, "grad_norm": 1.90625, "learning_rate": 0.00013941593223062477, "loss": 2.1896, "step": 315330 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.0001394142335661705, "loss": 2.093, "step": 315335 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.00013941253488825164, "loss": 2.1197, "step": 315340 }, { "epoch": 0.74, "grad_norm": 2.515625, "learning_rate": 0.00013941083619686874, "loss": 1.9908, "step": 315345 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.0001394091374920224, "loss": 2.0457, "step": 315350 }, { "epoch": 0.74, "grad_norm": 2.0, "learning_rate": 0.00013940743877371317, "loss": 2.1311, "step": 315355 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.00013940574004194165, "loss": 2.0926, "step": 315360 }, { "epoch": 0.74, "grad_norm": 1.8359375, "learning_rate": 0.00013940404129670836, "loss": 1.9385, "step": 315365 }, { "epoch": 0.74, "grad_norm": 1.921875, "learning_rate": 0.00013940234253801402, "loss": 1.8896, "step": 315370 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.0001394006437658591, "loss": 1.9616, "step": 315375 }, { "epoch": 0.74, "grad_norm": 2.375, "learning_rate": 0.00013939894498024417, "loss": 2.0896, "step": 315380 }, { "epoch": 0.74, "grad_norm": 1.921875, "learning_rate": 0.00013939724618116988, "loss": 1.949, "step": 315385 }, { "epoch": 0.74, "grad_norm": 2.546875, "learning_rate": 0.00013939554736863675, "loss": 1.9946, "step": 315390 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.0001393938485426454, "loss": 2.2353, "step": 315395 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00013939214970319638, "loss": 1.9866, "step": 315400 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.00013939045085029032, "loss": 2.1228, "step": 315405 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00013938875198392776, "loss": 2.1075, "step": 315410 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.00013938705310410928, "loss": 2.1405, "step": 315415 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013938535421083548, "loss": 2.1389, "step": 315420 }, { "epoch": 0.74, "grad_norm": 1.8671875, "learning_rate": 0.0001393836553041069, "loss": 2.1343, "step": 315425 }, { "epoch": 0.74, "grad_norm": 1.890625, "learning_rate": 0.00013938195638392418, "loss": 2.0621, "step": 315430 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00013938025745028785, "loss": 1.9646, "step": 315435 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.00013937855850319853, "loss": 2.029, "step": 315440 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.00013937685954265672, "loss": 2.0478, "step": 315445 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00013937516056866314, "loss": 2.0273, "step": 315450 }, { "epoch": 0.74, "grad_norm": 2.953125, "learning_rate": 0.00013937346158121824, "loss": 2.1401, "step": 315455 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.0001393717625803227, "loss": 1.8775, "step": 315460 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00013937006356597702, "loss": 2.0684, "step": 315465 }, { "epoch": 0.74, "grad_norm": 1.8671875, "learning_rate": 0.00013936836453818177, "loss": 2.0758, "step": 315470 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.00013936666549693765, "loss": 1.9324, "step": 315475 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00013936496644224514, "loss": 1.9682, "step": 315480 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.00013936326737410483, "loss": 2.127, "step": 315485 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00013936156829251733, "loss": 1.9885, "step": 315490 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.00013935986919748317, "loss": 1.9388, "step": 315495 }, { "epoch": 0.74, "grad_norm": 1.9921875, "learning_rate": 0.00013935817008900298, "loss": 2.1552, "step": 315500 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00013935647096707733, "loss": 2.0863, "step": 315505 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.0001393547718317068, "loss": 2.1312, "step": 315510 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013935307268289198, "loss": 1.9673, "step": 315515 }, { "epoch": 0.74, "grad_norm": 2.59375, "learning_rate": 0.0001393513735206334, "loss": 2.1459, "step": 315520 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.0001393496743449317, "loss": 2.1605, "step": 315525 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013934797515578742, "loss": 2.1476, "step": 315530 }, { "epoch": 0.74, "grad_norm": 1.765625, "learning_rate": 0.0001393462759532012, "loss": 1.9487, "step": 315535 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00013934457673717355, "loss": 2.1248, "step": 315540 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.00013934287750770507, "loss": 2.0494, "step": 315545 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.00013934117826479636, "loss": 2.1585, "step": 315550 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.000139339479008448, "loss": 1.9951, "step": 315555 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00013933777973866053, "loss": 2.0247, "step": 315560 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.0001393360804554346, "loss": 2.1088, "step": 315565 }, { "epoch": 0.74, "grad_norm": 1.9609375, "learning_rate": 0.00013933438115877074, "loss": 2.0677, "step": 315570 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00013933268184866955, "loss": 2.008, "step": 315575 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00013933098252513158, "loss": 1.9474, "step": 315580 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.0001393292831881574, "loss": 1.958, "step": 315585 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.0001393275838377477, "loss": 2.045, "step": 315590 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.00013932588447390296, "loss": 2.0961, "step": 315595 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013932418509662376, "loss": 2.1137, "step": 315600 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.0001393224857059107, "loss": 2.0209, "step": 315605 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00013932078630176438, "loss": 2.054, "step": 315610 }, { "epoch": 0.74, "grad_norm": 1.96875, "learning_rate": 0.0001393190868841854, "loss": 2.0003, "step": 315615 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.00013931738745317428, "loss": 2.0062, "step": 315620 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013931568800873163, "loss": 2.0653, "step": 315625 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.000139313988550858, "loss": 2.0978, "step": 315630 }, { "epoch": 0.74, "grad_norm": 2.546875, "learning_rate": 0.000139312289079554, "loss": 1.9378, "step": 315635 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013931058959482025, "loss": 2.0373, "step": 315640 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013930889009665728, "loss": 2.1094, "step": 315645 }, { "epoch": 0.74, "grad_norm": 2.59375, "learning_rate": 0.00013930719058506566, "loss": 2.0908, "step": 315650 }, { "epoch": 0.74, "grad_norm": 1.8984375, "learning_rate": 0.000139305491060046, "loss": 1.9403, "step": 315655 }, { "epoch": 0.74, "grad_norm": 1.984375, "learning_rate": 0.00013930379152159885, "loss": 1.9834, "step": 315660 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.00013930209196972486, "loss": 1.8003, "step": 315665 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013930039240442454, "loss": 2.004, "step": 315670 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.0001392986928256985, "loss": 2.1475, "step": 315675 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.0001392969932335473, "loss": 1.9874, "step": 315680 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.00013929529362797155, "loss": 1.9749, "step": 315685 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.00013929359400897178, "loss": 2.1691, "step": 315690 }, { "epoch": 0.74, "grad_norm": 2.546875, "learning_rate": 0.00013929189437654864, "loss": 1.742, "step": 315695 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00013929019473070265, "loss": 2.1102, "step": 315700 }, { "epoch": 0.74, "grad_norm": 1.953125, "learning_rate": 0.00013928849507143445, "loss": 2.1623, "step": 315705 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00013928679539874456, "loss": 2.0589, "step": 315710 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.0001392850957126336, "loss": 1.9673, "step": 315715 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.00013928339601310214, "loss": 1.8089, "step": 315720 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.00013928169630015073, "loss": 1.9133, "step": 315725 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00013927999657378, "loss": 2.086, "step": 315730 }, { "epoch": 0.74, "grad_norm": 1.9453125, "learning_rate": 0.00013927829683399055, "loss": 2.2008, "step": 315735 }, { "epoch": 0.74, "grad_norm": 1.921875, "learning_rate": 0.00013927659708078287, "loss": 1.984, "step": 315740 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00013927489731415762, "loss": 2.1221, "step": 315745 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.00013927319753411535, "loss": 2.0527, "step": 315750 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.00013927149774065664, "loss": 2.0521, "step": 315755 }, { "epoch": 0.74, "grad_norm": 1.6953125, "learning_rate": 0.0001392697979337821, "loss": 1.843, "step": 315760 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00013926809811349222, "loss": 2.268, "step": 315765 }, { "epoch": 0.74, "grad_norm": 2.625, "learning_rate": 0.00013926639827978768, "loss": 1.7456, "step": 315770 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.000139264698432669, "loss": 2.119, "step": 315775 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.00013926299857213684, "loss": 2.1571, "step": 315780 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.0001392612986981917, "loss": 1.9243, "step": 315785 }, { "epoch": 0.74, "grad_norm": 2.53125, "learning_rate": 0.0001392595988108342, "loss": 1.9797, "step": 315790 }, { "epoch": 0.74, "grad_norm": 1.96875, "learning_rate": 0.0001392578989100649, "loss": 1.9491, "step": 315795 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.0001392561989958844, "loss": 2.0346, "step": 315800 }, { "epoch": 0.74, "grad_norm": 2.46875, "learning_rate": 0.00013925449906829326, "loss": 2.0837, "step": 315805 }, { "epoch": 0.74, "grad_norm": 2.765625, "learning_rate": 0.00013925279912729206, "loss": 2.054, "step": 315810 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.0001392510991728814, "loss": 2.0093, "step": 315815 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00013924939920506186, "loss": 2.1789, "step": 315820 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.000139247699223834, "loss": 2.0648, "step": 315825 }, { "epoch": 0.74, "grad_norm": 1.9296875, "learning_rate": 0.00013924599922919843, "loss": 1.9335, "step": 315830 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.0001392442992211557, "loss": 1.9979, "step": 315835 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.0001392425991997064, "loss": 2.2048, "step": 315840 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.00013924089916485114, "loss": 2.2043, "step": 315845 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.0001392391991165905, "loss": 2.0745, "step": 315850 }, { "epoch": 0.74, "grad_norm": 1.8828125, "learning_rate": 0.00013923749905492497, "loss": 2.1054, "step": 315855 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013923579897985526, "loss": 1.6906, "step": 315860 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00013923409889138183, "loss": 2.1495, "step": 315865 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.0001392323987895054, "loss": 2.0138, "step": 315870 }, { "epoch": 0.74, "grad_norm": 2.75, "learning_rate": 0.0001392306986742264, "loss": 2.1011, "step": 315875 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.0001392289985455455, "loss": 2.01, "step": 315880 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00013922729840346328, "loss": 2.0033, "step": 315885 }, { "epoch": 0.74, "grad_norm": 1.9375, "learning_rate": 0.0001392255982479803, "loss": 1.979, "step": 315890 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.00013922389807909715, "loss": 2.1229, "step": 315895 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00013922219789681438, "loss": 2.1141, "step": 315900 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.0001392204977011326, "loss": 1.9996, "step": 315905 }, { "epoch": 0.74, "grad_norm": 2.515625, "learning_rate": 0.00013921879749205238, "loss": 1.9678, "step": 315910 }, { "epoch": 0.74, "grad_norm": 1.7265625, "learning_rate": 0.00013921709726957436, "loss": 1.9303, "step": 315915 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.000139215397033699, "loss": 2.1592, "step": 315920 }, { "epoch": 0.74, "grad_norm": 2.015625, "learning_rate": 0.00013921369678442696, "loss": 2.0805, "step": 315925 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013921199652175883, "loss": 2.1463, "step": 315930 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.0001392102962456952, "loss": 2.2038, "step": 315935 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.00013920859595623658, "loss": 2.0508, "step": 315940 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.00013920689565338363, "loss": 2.0095, "step": 315945 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013920519533713684, "loss": 2.0682, "step": 315950 }, { "epoch": 0.74, "grad_norm": 1.546875, "learning_rate": 0.00013920349500749687, "loss": 1.8277, "step": 315955 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.0001392017946644643, "loss": 2.1436, "step": 315960 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00013920009430803964, "loss": 2.2354, "step": 315965 }, { "epoch": 0.74, "grad_norm": 2.0, "learning_rate": 0.00013919839393822356, "loss": 2.1968, "step": 315970 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013919669355501657, "loss": 1.9707, "step": 315975 }, { "epoch": 0.74, "grad_norm": 2.640625, "learning_rate": 0.00013919499315841928, "loss": 2.0979, "step": 315980 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.0001391932927484323, "loss": 2.0286, "step": 315985 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.00013919159232505615, "loss": 2.0141, "step": 315990 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00013918989188829142, "loss": 2.0362, "step": 315995 }, { "epoch": 0.74, "grad_norm": 2.53125, "learning_rate": 0.00013918819143813875, "loss": 2.029, "step": 316000 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013918649097459867, "loss": 1.7999, "step": 316005 }, { "epoch": 0.74, "grad_norm": 2.5625, "learning_rate": 0.00013918479049767178, "loss": 2.0764, "step": 316010 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013918309000735866, "loss": 1.8564, "step": 316015 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00013918138950365987, "loss": 1.9746, "step": 316020 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.000139179688986576, "loss": 2.2395, "step": 316025 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.0001391779884561077, "loss": 1.988, "step": 316030 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00013917628791225542, "loss": 2.1088, "step": 316035 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00013917458735501984, "loss": 2.0341, "step": 316040 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.0001391728867844015, "loss": 2.132, "step": 316045 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.000139171186200401, "loss": 1.8746, "step": 316050 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.0001391694856030189, "loss": 2.1894, "step": 316055 }, { "epoch": 0.74, "grad_norm": 2.625, "learning_rate": 0.00013916778499225579, "loss": 2.0643, "step": 316060 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013916608436811224, "loss": 2.1085, "step": 316065 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00013916438373058886, "loss": 2.0595, "step": 316070 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013916268307968626, "loss": 2.0386, "step": 316075 }, { "epoch": 0.74, "grad_norm": 1.875, "learning_rate": 0.00013916098241540493, "loss": 2.0899, "step": 316080 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.0001391592817377455, "loss": 1.98, "step": 316085 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.00013915758104670855, "loss": 1.9416, "step": 316090 }, { "epoch": 0.74, "grad_norm": 2.8125, "learning_rate": 0.0001391558803422947, "loss": 1.9844, "step": 316095 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.00013915417962450446, "loss": 2.0733, "step": 316100 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00013915247889333842, "loss": 2.0596, "step": 316105 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.0001391507781487972, "loss": 2.1638, "step": 316110 }, { "epoch": 0.74, "grad_norm": 2.09375, "learning_rate": 0.0001391490773908814, "loss": 2.0841, "step": 316115 }, { "epoch": 0.74, "grad_norm": 1.953125, "learning_rate": 0.00013914737661959153, "loss": 2.003, "step": 316120 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.0001391456758349282, "loss": 2.074, "step": 316125 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.000139143975036892, "loss": 2.0774, "step": 316130 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013914227422548352, "loss": 1.9178, "step": 316135 }, { "epoch": 0.74, "grad_norm": 2.203125, "learning_rate": 0.00013914057340070333, "loss": 2.2371, "step": 316140 }, { "epoch": 0.74, "grad_norm": 2.21875, "learning_rate": 0.000139138872562552, "loss": 2.1349, "step": 316145 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00013913717171103014, "loss": 1.9333, "step": 316150 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013913547084613828, "loss": 1.9923, "step": 316155 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.00013913376996787705, "loss": 2.0615, "step": 316160 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00013913206907624704, "loss": 2.0499, "step": 316165 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013913036817124876, "loss": 1.9598, "step": 316170 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00013912866725288288, "loss": 2.0177, "step": 316175 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.0001391269663211499, "loss": 1.7614, "step": 316180 }, { "epoch": 0.74, "grad_norm": 2.4375, "learning_rate": 0.00013912526537605046, "loss": 2.1663, "step": 316185 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00013912356441758513, "loss": 2.1345, "step": 316190 }, { "epoch": 0.74, "grad_norm": 2.515625, "learning_rate": 0.00013912186344575443, "loss": 2.1053, "step": 316195 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00013912016246055904, "loss": 2.1117, "step": 316200 }, { "epoch": 0.74, "grad_norm": 2.546875, "learning_rate": 0.00013911846146199948, "loss": 1.9235, "step": 316205 }, { "epoch": 0.74, "grad_norm": 2.453125, "learning_rate": 0.00013911676045007636, "loss": 2.1242, "step": 316210 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013911505942479021, "loss": 2.0367, "step": 316215 }, { "epoch": 0.74, "grad_norm": 1.859375, "learning_rate": 0.00013911335838614165, "loss": 1.9718, "step": 316220 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.0001391116573341313, "loss": 2.2537, "step": 316225 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013910995626875968, "loss": 2.0181, "step": 316230 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.0001391082551900274, "loss": 2.0732, "step": 316235 }, { "epoch": 0.74, "grad_norm": 2.59375, "learning_rate": 0.000139106554097935, "loss": 1.949, "step": 316240 }, { "epoch": 0.74, "grad_norm": 2.40625, "learning_rate": 0.0001391048529924831, "loss": 2.1342, "step": 316245 }, { "epoch": 0.74, "grad_norm": 1.8515625, "learning_rate": 0.00013910315187367223, "loss": 2.1391, "step": 316250 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.0001391014507415031, "loss": 2.0198, "step": 316255 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.00013909974959597617, "loss": 1.9632, "step": 316260 }, { "epoch": 0.74, "grad_norm": 2.515625, "learning_rate": 0.00013909804843709205, "loss": 1.8683, "step": 316265 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00013909634726485135, "loss": 2.1695, "step": 316270 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.0001390946460792546, "loss": 2.0439, "step": 316275 }, { "epoch": 0.74, "grad_norm": 2.53125, "learning_rate": 0.00013909294488030242, "loss": 2.0876, "step": 316280 }, { "epoch": 0.74, "grad_norm": 2.609375, "learning_rate": 0.00013909124366799538, "loss": 2.0898, "step": 316285 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.00013908954244233406, "loss": 2.0102, "step": 316290 }, { "epoch": 0.74, "grad_norm": 1.890625, "learning_rate": 0.00013908784120331903, "loss": 1.901, "step": 316295 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.0001390861399509509, "loss": 2.14, "step": 316300 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013908443868523024, "loss": 2.0668, "step": 316305 }, { "epoch": 0.74, "grad_norm": 2.078125, "learning_rate": 0.00013908273740615763, "loss": 1.9429, "step": 316310 }, { "epoch": 0.74, "grad_norm": 1.984375, "learning_rate": 0.00013908103611373363, "loss": 2.1515, "step": 316315 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00013907933480795888, "loss": 2.2328, "step": 316320 }, { "epoch": 0.74, "grad_norm": 2.65625, "learning_rate": 0.00013907763348883388, "loss": 2.0226, "step": 316325 }, { "epoch": 0.74, "grad_norm": 2.5, "learning_rate": 0.00013907593215635927, "loss": 1.905, "step": 316330 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00013907423081053558, "loss": 1.8658, "step": 316335 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00013907252945136346, "loss": 2.0055, "step": 316340 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.00013907082807884346, "loss": 2.1869, "step": 316345 }, { "epoch": 0.74, "grad_norm": 2.390625, "learning_rate": 0.00013906912669297614, "loss": 2.1037, "step": 316350 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00013906742529376208, "loss": 2.1432, "step": 316355 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00013906572388120193, "loss": 2.2124, "step": 316360 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.00013906402245529616, "loss": 1.954, "step": 316365 }, { "epoch": 0.74, "grad_norm": 1.984375, "learning_rate": 0.00013906232101604547, "loss": 2.0774, "step": 316370 }, { "epoch": 0.74, "grad_norm": 1.9296875, "learning_rate": 0.00013906061956345033, "loss": 2.1366, "step": 316375 }, { "epoch": 0.74, "grad_norm": 2.90625, "learning_rate": 0.00013905891809751138, "loss": 2.0223, "step": 316380 }, { "epoch": 0.74, "grad_norm": 2.28125, "learning_rate": 0.0001390572166182292, "loss": 1.9915, "step": 316385 }, { "epoch": 0.74, "grad_norm": 2.3125, "learning_rate": 0.0001390555151256044, "loss": 1.9747, "step": 316390 }, { "epoch": 0.74, "grad_norm": 2.46875, "learning_rate": 0.00013905381361963755, "loss": 2.0388, "step": 316395 }, { "epoch": 0.74, "grad_norm": 2.0625, "learning_rate": 0.00013905211210032915, "loss": 1.9941, "step": 316400 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00013905041056767983, "loss": 1.9722, "step": 316405 }, { "epoch": 0.74, "grad_norm": 2.15625, "learning_rate": 0.00013904870902169022, "loss": 2.1387, "step": 316410 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00013904700746236086, "loss": 2.1175, "step": 316415 }, { "epoch": 0.74, "grad_norm": 2.1875, "learning_rate": 0.00013904530588969231, "loss": 2.2265, "step": 316420 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.00013904360430368518, "loss": 2.1757, "step": 316425 }, { "epoch": 0.74, "grad_norm": 2.03125, "learning_rate": 0.00013904190270434008, "loss": 2.1211, "step": 316430 }, { "epoch": 0.74, "grad_norm": 2.359375, "learning_rate": 0.0001390402010916575, "loss": 2.0956, "step": 316435 }, { "epoch": 0.74, "grad_norm": 2.125, "learning_rate": 0.00013903849946563814, "loss": 2.0713, "step": 316440 }, { "epoch": 0.74, "grad_norm": 2.046875, "learning_rate": 0.00013903679782628249, "loss": 1.9866, "step": 316445 }, { "epoch": 0.74, "grad_norm": 1.8671875, "learning_rate": 0.00013903509617359117, "loss": 2.0784, "step": 316450 }, { "epoch": 0.74, "grad_norm": 2.265625, "learning_rate": 0.00013903339450756474, "loss": 2.1612, "step": 316455 }, { "epoch": 0.74, "grad_norm": 2.234375, "learning_rate": 0.00013903169282820382, "loss": 2.0512, "step": 316460 }, { "epoch": 0.74, "grad_norm": 2.421875, "learning_rate": 0.00013902999113550894, "loss": 2.0099, "step": 316465 }, { "epoch": 0.74, "grad_norm": 2.328125, "learning_rate": 0.0001390282894294807, "loss": 2.0709, "step": 316470 }, { "epoch": 0.74, "grad_norm": 1.8671875, "learning_rate": 0.0001390265877101197, "loss": 2.0683, "step": 316475 }, { "epoch": 0.74, "grad_norm": 2.484375, "learning_rate": 0.00013902488597742653, "loss": 1.9658, "step": 316480 }, { "epoch": 0.74, "grad_norm": 1.8671875, "learning_rate": 0.00013902318423140178, "loss": 2.0673, "step": 316485 }, { "epoch": 0.74, "grad_norm": 1.75, "learning_rate": 0.00013902148247204593, "loss": 2.0186, "step": 316490 }, { "epoch": 0.74, "grad_norm": 1.96875, "learning_rate": 0.00013901978069935967, "loss": 1.9797, "step": 316495 }, { "epoch": 0.74, "grad_norm": 2.53125, "learning_rate": 0.0001390180789133435, "loss": 2.1156, "step": 316500 }, { "epoch": 0.74, "grad_norm": 2.34375, "learning_rate": 0.00013901637711399812, "loss": 2.1188, "step": 316505 }, { "epoch": 0.74, "grad_norm": 1.953125, "learning_rate": 0.000139014675301324, "loss": 2.0307, "step": 316510 }, { "epoch": 0.74, "grad_norm": 2.296875, "learning_rate": 0.00013901297347532175, "loss": 2.2283, "step": 316515 }, { "epoch": 0.74, "grad_norm": 2.171875, "learning_rate": 0.00013901127163599197, "loss": 2.2101, "step": 316520 }, { "epoch": 0.74, "grad_norm": 1.8828125, "learning_rate": 0.00013900956978333526, "loss": 1.9316, "step": 316525 }, { "epoch": 0.74, "grad_norm": 1.9765625, "learning_rate": 0.00013900786791735213, "loss": 2.1053, "step": 316530 }, { "epoch": 0.74, "grad_norm": 2.734375, "learning_rate": 0.00013900616603804324, "loss": 1.8825, "step": 316535 }, { "epoch": 0.74, "grad_norm": 2.8125, "learning_rate": 0.00013900446414540912, "loss": 2.1188, "step": 316540 }, { "epoch": 0.74, "grad_norm": 1.9765625, "learning_rate": 0.00013900276223945035, "loss": 2.0936, "step": 316545 }, { "epoch": 0.74, "grad_norm": 2.109375, "learning_rate": 0.00013900106032016754, "loss": 2.0844, "step": 316550 }, { "epoch": 0.74, "grad_norm": 2.46875, "learning_rate": 0.00013899935838756127, "loss": 2.13, "step": 316555 }, { "epoch": 0.74, "grad_norm": 2.25, "learning_rate": 0.00013899765644163212, "loss": 2.2154, "step": 316560 }, { "epoch": 0.74, "grad_norm": 2.515625, "learning_rate": 0.00013899595448238066, "loss": 2.167, "step": 316565 }, { "epoch": 0.74, "grad_norm": 2.140625, "learning_rate": 0.00013899425250980745, "loss": 2.1845, "step": 316570 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.0001389925505239131, "loss": 2.103, "step": 316575 }, { "epoch": 0.75, "grad_norm": 1.9140625, "learning_rate": 0.00013899084852469823, "loss": 2.0808, "step": 316580 }, { "epoch": 0.75, "grad_norm": 1.953125, "learning_rate": 0.00013898914651216333, "loss": 1.8303, "step": 316585 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013898744448630904, "loss": 1.9683, "step": 316590 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013898574244713597, "loss": 2.0691, "step": 316595 }, { "epoch": 0.75, "grad_norm": 2.265625, "learning_rate": 0.00013898404039464462, "loss": 1.9708, "step": 316600 }, { "epoch": 0.75, "grad_norm": 2.625, "learning_rate": 0.00013898233832883563, "loss": 1.9077, "step": 316605 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013898063624970955, "loss": 2.0098, "step": 316610 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.00013897893415726703, "loss": 2.0773, "step": 316615 }, { "epoch": 0.75, "grad_norm": 2.515625, "learning_rate": 0.00013897723205150858, "loss": 2.0938, "step": 316620 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013897552993243474, "loss": 2.1138, "step": 316625 }, { "epoch": 0.75, "grad_norm": 2.484375, "learning_rate": 0.0001389738278000462, "loss": 2.1869, "step": 316630 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013897212565434347, "loss": 2.0545, "step": 316635 }, { "epoch": 0.75, "grad_norm": 1.953125, "learning_rate": 0.00013897042349532722, "loss": 1.9861, "step": 316640 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.0001389687213229979, "loss": 2.1374, "step": 316645 }, { "epoch": 0.75, "grad_norm": 2.265625, "learning_rate": 0.00013896701913735618, "loss": 2.0546, "step": 316650 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013896531693840261, "loss": 1.8563, "step": 316655 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.00013896361472613778, "loss": 2.0803, "step": 316660 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.0001389619125005623, "loss": 2.0351, "step": 316665 }, { "epoch": 0.75, "grad_norm": 2.484375, "learning_rate": 0.0001389602102616767, "loss": 1.9298, "step": 316670 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013895850800948157, "loss": 1.837, "step": 316675 }, { "epoch": 0.75, "grad_norm": 2.046875, "learning_rate": 0.00013895680574397754, "loss": 2.1504, "step": 316680 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.00013895510346516516, "loss": 1.9509, "step": 316685 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013895340117304502, "loss": 2.175, "step": 316690 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013895169886761764, "loss": 2.245, "step": 316695 }, { "epoch": 0.75, "grad_norm": 2.5625, "learning_rate": 0.0001389499965488837, "loss": 2.1019, "step": 316700 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013894829421684372, "loss": 2.1974, "step": 316705 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.00013894659187149827, "loss": 2.174, "step": 316710 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013894488951284799, "loss": 2.1208, "step": 316715 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013894318714089343, "loss": 2.2079, "step": 316720 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013894148475563517, "loss": 2.2999, "step": 316725 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013893978235707377, "loss": 2.0852, "step": 316730 }, { "epoch": 0.75, "grad_norm": 2.265625, "learning_rate": 0.00013893807994520984, "loss": 2.0331, "step": 316735 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.000138936377520044, "loss": 1.9475, "step": 316740 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013893467508157672, "loss": 2.0546, "step": 316745 }, { "epoch": 0.75, "grad_norm": 2.515625, "learning_rate": 0.0001389329726298087, "loss": 1.888, "step": 316750 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013893127016474045, "loss": 1.9384, "step": 316755 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013892956768637256, "loss": 2.1004, "step": 316760 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.00013892786519470565, "loss": 2.0537, "step": 316765 }, { "epoch": 0.75, "grad_norm": 2.796875, "learning_rate": 0.00013892616268974022, "loss": 1.9304, "step": 316770 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013892446017147697, "loss": 1.972, "step": 316775 }, { "epoch": 0.75, "grad_norm": 1.8671875, "learning_rate": 0.00013892275763991642, "loss": 2.2111, "step": 316780 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.0001389210550950591, "loss": 2.0782, "step": 316785 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013891935253690566, "loss": 2.2416, "step": 316790 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013891764996545668, "loss": 2.0455, "step": 316795 }, { "epoch": 0.75, "grad_norm": 3.015625, "learning_rate": 0.00013891594738071273, "loss": 1.981, "step": 316800 }, { "epoch": 0.75, "grad_norm": 2.71875, "learning_rate": 0.00013891424478267437, "loss": 2.0419, "step": 316805 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.0001389125421713422, "loss": 1.8724, "step": 316810 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.0001389108395467168, "loss": 2.0298, "step": 316815 }, { "epoch": 0.75, "grad_norm": 2.546875, "learning_rate": 0.00013890913690879877, "loss": 2.0468, "step": 316820 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013890743425758865, "loss": 2.2007, "step": 316825 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013890573159308707, "loss": 1.9932, "step": 316830 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.00013890402891529453, "loss": 1.746, "step": 316835 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.0001389023262242117, "loss": 2.0964, "step": 316840 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013890062351983916, "loss": 2.1516, "step": 316845 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013889892080217746, "loss": 2.0315, "step": 316850 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013889721807122714, "loss": 1.84, "step": 316855 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013889551532698886, "loss": 1.963, "step": 316860 }, { "epoch": 0.75, "grad_norm": 2.515625, "learning_rate": 0.00013889381256946315, "loss": 1.9971, "step": 316865 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013889210979865058, "loss": 2.1185, "step": 316870 }, { "epoch": 0.75, "grad_norm": 1.9609375, "learning_rate": 0.0001388904070145518, "loss": 2.0252, "step": 316875 }, { "epoch": 0.75, "grad_norm": 2.0, "learning_rate": 0.00013888870421716734, "loss": 2.2292, "step": 316880 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.0001388870014064978, "loss": 1.9784, "step": 316885 }, { "epoch": 0.75, "grad_norm": 2.0, "learning_rate": 0.00013888529858254375, "loss": 2.1194, "step": 316890 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013888359574530578, "loss": 2.0486, "step": 316895 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013888189289478446, "loss": 1.959, "step": 316900 }, { "epoch": 0.75, "grad_norm": 1.921875, "learning_rate": 0.0001388801900309804, "loss": 2.036, "step": 316905 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013887848715389412, "loss": 2.0172, "step": 316910 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013887678426352632, "loss": 1.9825, "step": 316915 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013887508135987744, "loss": 2.2337, "step": 316920 }, { "epoch": 0.75, "grad_norm": 2.53125, "learning_rate": 0.00013887337844294815, "loss": 1.923, "step": 316925 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013887167551273897, "loss": 2.056, "step": 316930 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013886997256925057, "loss": 2.2229, "step": 316935 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.0001388682696124835, "loss": 2.0645, "step": 316940 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.00013886656664243823, "loss": 2.1449, "step": 316945 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013886486365911554, "loss": 2.0011, "step": 316950 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.00013886316066251585, "loss": 2.1457, "step": 316955 }, { "epoch": 0.75, "grad_norm": 1.90625, "learning_rate": 0.00013886145765263982, "loss": 2.1601, "step": 316960 }, { "epoch": 0.75, "grad_norm": 1.875, "learning_rate": 0.00013885975462948801, "loss": 2.2699, "step": 316965 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.000138858051593061, "loss": 2.1433, "step": 316970 }, { "epoch": 0.75, "grad_norm": 2.515625, "learning_rate": 0.00013885634854335935, "loss": 2.128, "step": 316975 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.0001388546454803837, "loss": 2.1265, "step": 316980 }, { "epoch": 0.75, "grad_norm": 1.8984375, "learning_rate": 0.0001388529424041346, "loss": 1.8645, "step": 316985 }, { "epoch": 0.75, "grad_norm": 2.65625, "learning_rate": 0.00013885123931461262, "loss": 1.9728, "step": 316990 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013884953621181835, "loss": 2.3124, "step": 316995 }, { "epoch": 0.75, "grad_norm": 1.59375, "learning_rate": 0.00013884783309575237, "loss": 1.9776, "step": 317000 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013884612996641527, "loss": 2.123, "step": 317005 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013884442682380764, "loss": 2.1136, "step": 317010 }, { "epoch": 0.75, "grad_norm": 1.6796875, "learning_rate": 0.00013884272366793002, "loss": 1.8866, "step": 317015 }, { "epoch": 0.75, "grad_norm": 2.703125, "learning_rate": 0.00013884102049878304, "loss": 1.9845, "step": 317020 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013883931731636725, "loss": 2.0884, "step": 317025 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.00013883761412068326, "loss": 1.9436, "step": 317030 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013883591091173165, "loss": 2.0157, "step": 317035 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013883420768951297, "loss": 2.0766, "step": 317040 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.00013883250445402781, "loss": 1.9534, "step": 317045 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013883080120527676, "loss": 2.054, "step": 317050 }, { "epoch": 0.75, "grad_norm": 2.84375, "learning_rate": 0.00013882909794326042, "loss": 2.227, "step": 317055 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013882739466797937, "loss": 2.1147, "step": 317060 }, { "epoch": 0.75, "grad_norm": 1.8828125, "learning_rate": 0.00013882569137943414, "loss": 1.9919, "step": 317065 }, { "epoch": 0.75, "grad_norm": 1.90625, "learning_rate": 0.00013882398807762538, "loss": 1.8527, "step": 317070 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013882228476255366, "loss": 2.1346, "step": 317075 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.00013882058143421949, "loss": 1.8181, "step": 317080 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013881887809262354, "loss": 2.1036, "step": 317085 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.0001388171747377663, "loss": 1.9452, "step": 317090 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013881547136964853, "loss": 2.1165, "step": 317095 }, { "epoch": 0.75, "grad_norm": 1.9765625, "learning_rate": 0.00013881376798827058, "loss": 2.068, "step": 317100 }, { "epoch": 0.75, "grad_norm": 2.46875, "learning_rate": 0.0001388120645936332, "loss": 1.9953, "step": 317105 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013881036118573687, "loss": 1.9464, "step": 317110 }, { "epoch": 0.75, "grad_norm": 2.59375, "learning_rate": 0.00013880865776458228, "loss": 2.0217, "step": 317115 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.0001388069543301699, "loss": 1.9806, "step": 317120 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013880525088250036, "loss": 1.9682, "step": 317125 }, { "epoch": 0.75, "grad_norm": 2.59375, "learning_rate": 0.00013880354742157425, "loss": 2.1195, "step": 317130 }, { "epoch": 0.75, "grad_norm": 1.8203125, "learning_rate": 0.00013880184394739214, "loss": 2.0211, "step": 317135 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013880014045995463, "loss": 2.0742, "step": 317140 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013879843695926232, "loss": 2.023, "step": 317145 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.0001387967334453157, "loss": 2.1795, "step": 317150 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013879502991811545, "loss": 2.1466, "step": 317155 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.0001387933263776621, "loss": 2.1975, "step": 317160 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013879162282395623, "loss": 2.033, "step": 317165 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013878991925699846, "loss": 1.8387, "step": 317170 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013878821567678935, "loss": 2.0774, "step": 317175 }, { "epoch": 0.75, "grad_norm": 1.828125, "learning_rate": 0.00013878651208332947, "loss": 2.0895, "step": 317180 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.0001387848084766194, "loss": 2.0093, "step": 317185 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013878310485665978, "loss": 2.1236, "step": 317190 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013878140122345112, "loss": 1.9883, "step": 317195 }, { "epoch": 0.75, "grad_norm": 1.953125, "learning_rate": 0.00013877969757699403, "loss": 2.1804, "step": 317200 }, { "epoch": 0.75, "grad_norm": 2.625, "learning_rate": 0.0001387779939172891, "loss": 1.835, "step": 317205 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013877629024433688, "loss": 2.21, "step": 317210 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.000138774586558138, "loss": 2.0559, "step": 317215 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013877288285869303, "loss": 1.9306, "step": 317220 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013877117914600252, "loss": 2.1345, "step": 317225 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.0001387694754200671, "loss": 2.0266, "step": 317230 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.0001387677716808873, "loss": 2.0276, "step": 317235 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.0001387660679284637, "loss": 2.1208, "step": 317240 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013876436416279694, "loss": 2.0044, "step": 317245 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013876266038388759, "loss": 1.8857, "step": 317250 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.0001387609565917362, "loss": 2.1172, "step": 317255 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013875925278634333, "loss": 2.0844, "step": 317260 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013875754896770963, "loss": 1.9893, "step": 317265 }, { "epoch": 0.75, "grad_norm": 2.5625, "learning_rate": 0.00013875584513583562, "loss": 2.0848, "step": 317270 }, { "epoch": 0.75, "grad_norm": 2.734375, "learning_rate": 0.00013875414129072195, "loss": 1.9644, "step": 317275 }, { "epoch": 0.75, "grad_norm": 1.9453125, "learning_rate": 0.00013875243743236915, "loss": 2.0445, "step": 317280 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013875073356077778, "loss": 1.9782, "step": 317285 }, { "epoch": 0.75, "grad_norm": 3.203125, "learning_rate": 0.0001387490296759485, "loss": 2.0648, "step": 317290 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013874732577788182, "loss": 2.1915, "step": 317295 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013874562186657835, "loss": 2.182, "step": 317300 }, { "epoch": 0.75, "grad_norm": 2.59375, "learning_rate": 0.0001387439179420387, "loss": 2.0657, "step": 317305 }, { "epoch": 0.75, "grad_norm": 1.9609375, "learning_rate": 0.0001387422140042634, "loss": 1.9357, "step": 317310 }, { "epoch": 0.75, "grad_norm": 1.890625, "learning_rate": 0.00013874051005325307, "loss": 2.2048, "step": 317315 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013873880608900827, "loss": 2.1056, "step": 317320 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.0001387371021115296, "loss": 2.3252, "step": 317325 }, { "epoch": 0.75, "grad_norm": 1.921875, "learning_rate": 0.0001387353981208176, "loss": 1.9647, "step": 317330 }, { "epoch": 0.75, "grad_norm": 2.734375, "learning_rate": 0.00013873369411687294, "loss": 2.0688, "step": 317335 }, { "epoch": 0.75, "grad_norm": 2.453125, "learning_rate": 0.00013873199009969614, "loss": 2.0764, "step": 317340 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013873028606928775, "loss": 1.9801, "step": 317345 }, { "epoch": 0.75, "grad_norm": 1.9765625, "learning_rate": 0.0001387285820256484, "loss": 2.1203, "step": 317350 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.0001387268779687787, "loss": 2.2275, "step": 317355 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013872517389867917, "loss": 2.1762, "step": 317360 }, { "epoch": 0.75, "grad_norm": 1.9296875, "learning_rate": 0.00013872346981535043, "loss": 1.9096, "step": 317365 }, { "epoch": 0.75, "grad_norm": 1.859375, "learning_rate": 0.00013872176571879305, "loss": 1.9065, "step": 317370 }, { "epoch": 0.75, "grad_norm": 1.9609375, "learning_rate": 0.00013872006160900758, "loss": 1.9721, "step": 317375 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.00013871835748599466, "loss": 2.0815, "step": 317380 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013871665334975485, "loss": 2.0882, "step": 317385 }, { "epoch": 0.75, "grad_norm": 2.890625, "learning_rate": 0.00013871494920028872, "loss": 2.1949, "step": 317390 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.00013871324503759687, "loss": 2.1304, "step": 317395 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013871154086167987, "loss": 2.1682, "step": 317400 }, { "epoch": 0.75, "grad_norm": 1.90625, "learning_rate": 0.0001387098366725383, "loss": 2.1131, "step": 317405 }, { "epoch": 0.75, "grad_norm": 1.734375, "learning_rate": 0.00013870813247017277, "loss": 2.0017, "step": 317410 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013870642825458384, "loss": 1.9117, "step": 317415 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013870472402577208, "loss": 2.0879, "step": 317420 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013870301978373805, "loss": 2.0976, "step": 317425 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.0001387013155284824, "loss": 2.1143, "step": 317430 }, { "epoch": 0.75, "grad_norm": 5.21875, "learning_rate": 0.0001386996112600057, "loss": 2.0487, "step": 317435 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.0001386979069783085, "loss": 2.0214, "step": 317440 }, { "epoch": 0.75, "grad_norm": 2.265625, "learning_rate": 0.00013869620268339137, "loss": 2.0744, "step": 317445 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.0001386944983752549, "loss": 2.0602, "step": 317450 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013869279405389972, "loss": 2.0265, "step": 317455 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013869108971932637, "loss": 1.7865, "step": 317460 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013868938537153546, "loss": 1.97, "step": 317465 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.00013868768101052753, "loss": 2.1675, "step": 317470 }, { "epoch": 0.75, "grad_norm": 2.515625, "learning_rate": 0.0001386859766363032, "loss": 2.0509, "step": 317475 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013868427224886302, "loss": 1.9905, "step": 317480 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.0001386825678482076, "loss": 1.9731, "step": 317485 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013868086343433753, "loss": 2.2212, "step": 317490 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.00013867915900725336, "loss": 2.1144, "step": 317495 }, { "epoch": 0.75, "grad_norm": 2.6875, "learning_rate": 0.0001386774545669557, "loss": 2.0202, "step": 317500 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013867575011344512, "loss": 1.8506, "step": 317505 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013867404564672217, "loss": 2.1337, "step": 317510 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013867234116678748, "loss": 2.138, "step": 317515 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013867063667364165, "loss": 1.9195, "step": 317520 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013866893216728522, "loss": 2.0587, "step": 317525 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013866722764771874, "loss": 1.9931, "step": 317530 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013866552311494288, "loss": 2.2054, "step": 317535 }, { "epoch": 0.75, "grad_norm": 1.8359375, "learning_rate": 0.00013866381856895814, "loss": 2.1571, "step": 317540 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013866211400976515, "loss": 2.2192, "step": 317545 }, { "epoch": 0.75, "grad_norm": 2.53125, "learning_rate": 0.00013866040943736452, "loss": 2.0642, "step": 317550 }, { "epoch": 0.75, "grad_norm": 2.578125, "learning_rate": 0.00013865870485175674, "loss": 2.0134, "step": 317555 }, { "epoch": 0.75, "grad_norm": 1.953125, "learning_rate": 0.00013865700025294245, "loss": 2.0118, "step": 317560 }, { "epoch": 0.75, "grad_norm": 2.46875, "learning_rate": 0.00013865529564092223, "loss": 2.1207, "step": 317565 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.0001386535910156967, "loss": 2.2306, "step": 317570 }, { "epoch": 0.75, "grad_norm": 2.609375, "learning_rate": 0.00013865188637726637, "loss": 1.8087, "step": 317575 }, { "epoch": 0.75, "grad_norm": 1.984375, "learning_rate": 0.00013865018172563187, "loss": 2.2047, "step": 317580 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013864847706079373, "loss": 2.0438, "step": 317585 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013864677238275262, "loss": 1.9669, "step": 317590 }, { "epoch": 0.75, "grad_norm": 1.90625, "learning_rate": 0.000138645067691509, "loss": 2.0202, "step": 317595 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.0001386433629870636, "loss": 2.0662, "step": 317600 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.0001386416582694169, "loss": 2.1706, "step": 317605 }, { "epoch": 0.75, "grad_norm": 4.03125, "learning_rate": 0.0001386399535385695, "loss": 2.1224, "step": 317610 }, { "epoch": 0.75, "grad_norm": 2.5625, "learning_rate": 0.000138638248794522, "loss": 1.973, "step": 317615 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013863654403727498, "loss": 2.2069, "step": 317620 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.000138634839266829, "loss": 2.0361, "step": 317625 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013863313448318464, "loss": 2.0813, "step": 317630 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013863142968634256, "loss": 2.0996, "step": 317635 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.00013862972487630325, "loss": 2.1112, "step": 317640 }, { "epoch": 0.75, "grad_norm": 2.265625, "learning_rate": 0.0001386280200530673, "loss": 2.0318, "step": 317645 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013862631521663533, "loss": 2.0392, "step": 317650 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013862461036700795, "loss": 2.1042, "step": 317655 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013862290550418566, "loss": 2.0562, "step": 317660 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013862120062816912, "loss": 2.0858, "step": 317665 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.00013861949573895882, "loss": 2.2178, "step": 317670 }, { "epoch": 0.75, "grad_norm": 1.71875, "learning_rate": 0.00013861779083655543, "loss": 1.9561, "step": 317675 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013861608592095952, "loss": 2.19, "step": 317680 }, { "epoch": 0.75, "grad_norm": 2.84375, "learning_rate": 0.00013861438099217166, "loss": 2.1558, "step": 317685 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.00013861267605019242, "loss": 1.9152, "step": 317690 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.00013861097109502235, "loss": 2.0675, "step": 317695 }, { "epoch": 0.75, "grad_norm": 2.625, "learning_rate": 0.0001386092661266621, "loss": 1.92, "step": 317700 }, { "epoch": 0.75, "grad_norm": 2.484375, "learning_rate": 0.00013860756114511224, "loss": 2.1675, "step": 317705 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.0001386058561503733, "loss": 1.8531, "step": 317710 }, { "epoch": 0.75, "grad_norm": 2.046875, "learning_rate": 0.00013860415114244593, "loss": 2.1271, "step": 317715 }, { "epoch": 0.75, "grad_norm": 2.640625, "learning_rate": 0.00013860244612133067, "loss": 2.126, "step": 317720 }, { "epoch": 0.75, "grad_norm": 2.484375, "learning_rate": 0.00013860074108702812, "loss": 2.1411, "step": 317725 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.00013859903603953889, "loss": 2.035, "step": 317730 }, { "epoch": 0.75, "grad_norm": 2.46875, "learning_rate": 0.00013859733097886347, "loss": 1.9872, "step": 317735 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.00013859562590500253, "loss": 1.9374, "step": 317740 }, { "epoch": 0.75, "grad_norm": 2.53125, "learning_rate": 0.0001385939208179566, "loss": 1.8371, "step": 317745 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013859221571772633, "loss": 2.2557, "step": 317750 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013859051060431224, "loss": 2.0367, "step": 317755 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013858880547771492, "loss": 2.0, "step": 317760 }, { "epoch": 0.75, "grad_norm": 2.609375, "learning_rate": 0.00013858710033793498, "loss": 2.1003, "step": 317765 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013858539518497298, "loss": 2.1315, "step": 317770 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.0001385836900188295, "loss": 2.096, "step": 317775 }, { "epoch": 0.75, "grad_norm": 1.8984375, "learning_rate": 0.00013858198483950512, "loss": 2.1372, "step": 317780 }, { "epoch": 0.75, "grad_norm": 2.5625, "learning_rate": 0.00013858027964700045, "loss": 1.8471, "step": 317785 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.0001385785744413161, "loss": 2.1003, "step": 317790 }, { "epoch": 0.75, "grad_norm": 1.875, "learning_rate": 0.00013857686922245254, "loss": 2.0179, "step": 317795 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013857516399041046, "loss": 2.1322, "step": 317800 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.0001385734587451904, "loss": 1.7456, "step": 317805 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013857175348679294, "loss": 2.1089, "step": 317810 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013857004821521868, "loss": 2.0292, "step": 317815 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013856834293046822, "loss": 1.9199, "step": 317820 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013856663763254208, "loss": 1.9573, "step": 317825 }, { "epoch": 0.75, "grad_norm": 1.8046875, "learning_rate": 0.00013856493232144084, "loss": 1.999, "step": 317830 }, { "epoch": 0.75, "grad_norm": 2.578125, "learning_rate": 0.00013856322699716515, "loss": 2.193, "step": 317835 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013856152165971558, "loss": 2.1662, "step": 317840 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013855981630909268, "loss": 1.9857, "step": 317845 }, { "epoch": 0.75, "grad_norm": 1.734375, "learning_rate": 0.00013855811094529708, "loss": 2.0123, "step": 317850 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013855640556832927, "loss": 1.9876, "step": 317855 }, { "epoch": 0.75, "grad_norm": 2.546875, "learning_rate": 0.0001385547001781899, "loss": 2.2317, "step": 317860 }, { "epoch": 0.75, "grad_norm": 2.671875, "learning_rate": 0.0001385529947748796, "loss": 2.1368, "step": 317865 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013855128935839886, "loss": 1.9063, "step": 317870 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013854958392874834, "loss": 2.0874, "step": 317875 }, { "epoch": 0.75, "grad_norm": 2.046875, "learning_rate": 0.00013854787848592852, "loss": 1.9904, "step": 317880 }, { "epoch": 0.75, "grad_norm": 2.265625, "learning_rate": 0.0001385461730299401, "loss": 2.2234, "step": 317885 }, { "epoch": 0.75, "grad_norm": 2.265625, "learning_rate": 0.00013854446756078357, "loss": 1.9563, "step": 317890 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.00013854276207845954, "loss": 1.8418, "step": 317895 }, { "epoch": 0.75, "grad_norm": 2.96875, "learning_rate": 0.00013854105658296864, "loss": 2.1225, "step": 317900 }, { "epoch": 0.75, "grad_norm": 1.9296875, "learning_rate": 0.0001385393510743114, "loss": 2.1857, "step": 317905 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.00013853764555248844, "loss": 2.0665, "step": 317910 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.0001385359400175003, "loss": 2.0824, "step": 317915 }, { "epoch": 0.75, "grad_norm": 2.828125, "learning_rate": 0.00013853423446934755, "loss": 2.0119, "step": 317920 }, { "epoch": 0.75, "grad_norm": 2.265625, "learning_rate": 0.0001385325289080309, "loss": 1.966, "step": 317925 }, { "epoch": 0.75, "grad_norm": 2.515625, "learning_rate": 0.00013853082333355077, "loss": 2.0578, "step": 317930 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013852911774590783, "loss": 2.0004, "step": 317935 }, { "epoch": 0.75, "grad_norm": 2.59375, "learning_rate": 0.00013852741214510263, "loss": 2.1673, "step": 317940 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013852570653113578, "loss": 2.1706, "step": 317945 }, { "epoch": 0.75, "grad_norm": 1.8359375, "learning_rate": 0.00013852400090400783, "loss": 2.1153, "step": 317950 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.0001385222952637194, "loss": 2.1854, "step": 317955 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013852058961027108, "loss": 2.0686, "step": 317960 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.0001385188839436634, "loss": 2.1313, "step": 317965 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013851717826389697, "loss": 1.8341, "step": 317970 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013851547257097237, "loss": 2.0078, "step": 317975 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.0001385137668648902, "loss": 1.9002, "step": 317980 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013851206114565102, "loss": 2.0386, "step": 317985 }, { "epoch": 0.75, "grad_norm": 1.9453125, "learning_rate": 0.0001385103554132554, "loss": 2.1533, "step": 317990 }, { "epoch": 0.75, "grad_norm": 1.96875, "learning_rate": 0.000138508649667704, "loss": 2.3078, "step": 317995 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013850694390899732, "loss": 1.9827, "step": 318000 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.00013850523813713596, "loss": 2.2097, "step": 318005 }, { "epoch": 0.75, "grad_norm": 1.9609375, "learning_rate": 0.00013850353235212053, "loss": 2.0922, "step": 318010 }, { "epoch": 0.75, "grad_norm": 2.453125, "learning_rate": 0.00013850182655395155, "loss": 2.0487, "step": 318015 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013850012074262969, "loss": 2.0614, "step": 318020 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.0001384984149181555, "loss": 2.0094, "step": 318025 }, { "epoch": 0.75, "grad_norm": 1.9765625, "learning_rate": 0.0001384967090805295, "loss": 2.2159, "step": 318030 }, { "epoch": 0.75, "grad_norm": 2.046875, "learning_rate": 0.00013849500322975238, "loss": 2.1334, "step": 318035 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013849329736582465, "loss": 1.9564, "step": 318040 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.0001384915914887469, "loss": 2.1807, "step": 318045 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013848988559851975, "loss": 2.0922, "step": 318050 }, { "epoch": 0.75, "grad_norm": 2.0, "learning_rate": 0.00013848817969514373, "loss": 1.9175, "step": 318055 }, { "epoch": 0.75, "grad_norm": 2.515625, "learning_rate": 0.00013848647377861947, "loss": 2.1604, "step": 318060 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013848476784894754, "loss": 1.9563, "step": 318065 }, { "epoch": 0.75, "grad_norm": 1.9375, "learning_rate": 0.00013848306190612848, "loss": 2.0717, "step": 318070 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.0001384813559501629, "loss": 1.9844, "step": 318075 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013847964998105146, "loss": 2.0258, "step": 318080 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.0001384779439987946, "loss": 1.9278, "step": 318085 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013847623800339302, "loss": 2.0004, "step": 318090 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013847453199484724, "loss": 2.1593, "step": 318095 }, { "epoch": 0.75, "grad_norm": 2.90625, "learning_rate": 0.00013847282597315787, "loss": 2.0258, "step": 318100 }, { "epoch": 0.75, "grad_norm": 2.71875, "learning_rate": 0.00013847111993832548, "loss": 1.9866, "step": 318105 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013846941389035065, "loss": 2.1177, "step": 318110 }, { "epoch": 0.75, "grad_norm": 1.8671875, "learning_rate": 0.00013846770782923403, "loss": 1.946, "step": 318115 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.0001384660017549761, "loss": 1.981, "step": 318120 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013846429566757745, "loss": 2.1219, "step": 318125 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013846258956703872, "loss": 2.0925, "step": 318130 }, { "epoch": 0.75, "grad_norm": 1.8203125, "learning_rate": 0.0001384608834533605, "loss": 2.0827, "step": 318135 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.00013845917732654335, "loss": 2.0407, "step": 318140 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.0001384574711865878, "loss": 2.0689, "step": 318145 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.0001384557650334945, "loss": 2.1043, "step": 318150 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013845405886726402, "loss": 2.0402, "step": 318155 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013845235268789696, "loss": 2.1804, "step": 318160 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013845064649539384, "loss": 2.091, "step": 318165 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.0001384489402897553, "loss": 1.9544, "step": 318170 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013844723407098187, "loss": 2.0141, "step": 318175 }, { "epoch": 0.75, "grad_norm": 2.796875, "learning_rate": 0.00013844552783907424, "loss": 2.1105, "step": 318180 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013844382159403285, "loss": 1.9513, "step": 318185 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.0001384421153358584, "loss": 2.1272, "step": 318190 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.0001384404090645514, "loss": 2.0499, "step": 318195 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013843870278011248, "loss": 2.1205, "step": 318200 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013843699648254218, "loss": 2.1011, "step": 318205 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.0001384352901718411, "loss": 2.1703, "step": 318210 }, { "epoch": 0.75, "grad_norm": 1.9921875, "learning_rate": 0.00013843358384800985, "loss": 2.003, "step": 318215 }, { "epoch": 0.75, "grad_norm": 1.921875, "learning_rate": 0.000138431877511049, "loss": 2.1327, "step": 318220 }, { "epoch": 0.75, "grad_norm": 1.8984375, "learning_rate": 0.0001384301711609591, "loss": 1.8119, "step": 318225 }, { "epoch": 0.75, "grad_norm": 2.046875, "learning_rate": 0.00013842846479774075, "loss": 1.9827, "step": 318230 }, { "epoch": 0.75, "grad_norm": 2.046875, "learning_rate": 0.00013842675842139457, "loss": 2.0111, "step": 318235 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013842505203192108, "loss": 2.1757, "step": 318240 }, { "epoch": 0.75, "grad_norm": 2.0, "learning_rate": 0.0001384233456293209, "loss": 2.278, "step": 318245 }, { "epoch": 0.75, "grad_norm": 2.265625, "learning_rate": 0.00013842163921359463, "loss": 2.2035, "step": 318250 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.00013841993278474282, "loss": 2.1811, "step": 318255 }, { "epoch": 0.75, "grad_norm": 3.15625, "learning_rate": 0.00013841822634276608, "loss": 1.9101, "step": 318260 }, { "epoch": 0.75, "grad_norm": 1.8828125, "learning_rate": 0.00013841651988766494, "loss": 2.1333, "step": 318265 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013841481341944005, "loss": 2.24, "step": 318270 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013841310693809197, "loss": 1.9648, "step": 318275 }, { "epoch": 0.75, "grad_norm": 2.515625, "learning_rate": 0.00013841140044362126, "loss": 1.9709, "step": 318280 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.0001384096939360285, "loss": 1.9658, "step": 318285 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.0001384079874153143, "loss": 2.0171, "step": 318290 }, { "epoch": 0.75, "grad_norm": 1.828125, "learning_rate": 0.00013840628088147928, "loss": 1.8423, "step": 318295 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013840457433452393, "loss": 2.0421, "step": 318300 }, { "epoch": 0.75, "grad_norm": 2.453125, "learning_rate": 0.00013840286777444888, "loss": 1.9461, "step": 318305 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013840116120125473, "loss": 1.8834, "step": 318310 }, { "epoch": 0.75, "grad_norm": 2.453125, "learning_rate": 0.00013839945461494203, "loss": 2.0081, "step": 318315 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.0001383977480155114, "loss": 1.9661, "step": 318320 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013839604140296344, "loss": 1.8342, "step": 318325 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013839433477729865, "loss": 2.1288, "step": 318330 }, { "epoch": 0.75, "grad_norm": 2.6875, "learning_rate": 0.00013839262813851767, "loss": 2.0489, "step": 318335 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013839092148662104, "loss": 2.0996, "step": 318340 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013838921482160941, "loss": 1.9158, "step": 318345 }, { "epoch": 0.75, "grad_norm": 1.9453125, "learning_rate": 0.0001383875081434833, "loss": 2.0101, "step": 318350 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.00013838580145224337, "loss": 2.1227, "step": 318355 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.00013838409474789008, "loss": 2.1693, "step": 318360 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.00013838238803042416, "loss": 2.204, "step": 318365 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.0001383806812998461, "loss": 2.016, "step": 318370 }, { "epoch": 0.75, "grad_norm": 2.75, "learning_rate": 0.00013837897455615646, "loss": 1.9447, "step": 318375 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013837726779935593, "loss": 2.1334, "step": 318380 }, { "epoch": 0.75, "grad_norm": 1.953125, "learning_rate": 0.00013837556102944498, "loss": 2.0582, "step": 318385 }, { "epoch": 0.75, "grad_norm": 1.9765625, "learning_rate": 0.00013837385424642426, "loss": 2.1205, "step": 318390 }, { "epoch": 0.75, "grad_norm": 1.953125, "learning_rate": 0.00013837214745029435, "loss": 1.7432, "step": 318395 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.0001383704406410558, "loss": 2.1725, "step": 318400 }, { "epoch": 0.75, "grad_norm": 2.0, "learning_rate": 0.0001383687338187092, "loss": 1.9749, "step": 318405 }, { "epoch": 0.75, "grad_norm": 2.453125, "learning_rate": 0.0001383670269832552, "loss": 2.1955, "step": 318410 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013836532013469427, "loss": 1.9412, "step": 318415 }, { "epoch": 0.75, "grad_norm": 2.59375, "learning_rate": 0.00013836361327302705, "loss": 2.0904, "step": 318420 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013836190639825415, "loss": 2.2316, "step": 318425 }, { "epoch": 0.75, "grad_norm": 3.078125, "learning_rate": 0.00013836019951037613, "loss": 2.1608, "step": 318430 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013835849260939356, "loss": 2.0408, "step": 318435 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013835678569530706, "loss": 1.9904, "step": 318440 }, { "epoch": 0.75, "grad_norm": 2.46875, "learning_rate": 0.00013835507876811714, "loss": 1.8573, "step": 318445 }, { "epoch": 0.75, "grad_norm": 2.640625, "learning_rate": 0.00013835337182782444, "loss": 1.9193, "step": 318450 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013835166487442956, "loss": 2.1091, "step": 318455 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013834995790793303, "loss": 2.0739, "step": 318460 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013834825092833548, "loss": 1.9448, "step": 318465 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.00013834654393563744, "loss": 2.2009, "step": 318470 }, { "epoch": 0.75, "grad_norm": 1.9453125, "learning_rate": 0.00013834483692983955, "loss": 2.0936, "step": 318475 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013834312991094238, "loss": 2.113, "step": 318480 }, { "epoch": 0.75, "grad_norm": 2.5625, "learning_rate": 0.00013834142287894648, "loss": 2.0506, "step": 318485 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.00013833971583385244, "loss": 2.1353, "step": 318490 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.0001383380087756609, "loss": 2.2414, "step": 318495 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013833630170437237, "loss": 2.0656, "step": 318500 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013833459461998747, "loss": 2.2656, "step": 318505 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013833288752250678, "loss": 2.1197, "step": 318510 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.0001383311804119309, "loss": 2.0764, "step": 318515 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013832947328826036, "loss": 1.9672, "step": 318520 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013832776615149578, "loss": 1.8989, "step": 318525 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013832605900163775, "loss": 1.9773, "step": 318530 }, { "epoch": 0.75, "grad_norm": 2.515625, "learning_rate": 0.00013832435183868685, "loss": 1.9141, "step": 318535 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.00013832264466264363, "loss": 2.1038, "step": 318540 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013832093747350875, "loss": 2.1511, "step": 318545 }, { "epoch": 0.75, "grad_norm": 1.9453125, "learning_rate": 0.0001383192302712827, "loss": 2.0235, "step": 318550 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013831752305596613, "loss": 2.2055, "step": 318555 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013831581582755958, "loss": 2.0329, "step": 318560 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013831410858606365, "loss": 2.1101, "step": 318565 }, { "epoch": 0.75, "grad_norm": 1.7578125, "learning_rate": 0.00013831240133147895, "loss": 1.9406, "step": 318570 }, { "epoch": 0.75, "grad_norm": 1.703125, "learning_rate": 0.00013831069406380602, "loss": 2.0938, "step": 318575 }, { "epoch": 0.75, "grad_norm": 2.75, "learning_rate": 0.00013830898678304546, "loss": 2.1094, "step": 318580 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013830727948919785, "loss": 2.0772, "step": 318585 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.0001383055721822638, "loss": 2.1595, "step": 318590 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013830386486224388, "loss": 2.0825, "step": 318595 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013830215752913864, "loss": 1.9523, "step": 318600 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013830045018294867, "loss": 2.2017, "step": 318605 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.0001382987428236746, "loss": 2.1229, "step": 318610 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.000138297035451317, "loss": 2.0518, "step": 318615 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013829532806587642, "loss": 2.0516, "step": 318620 }, { "epoch": 0.75, "grad_norm": 2.578125, "learning_rate": 0.00013829362066735348, "loss": 2.0879, "step": 318625 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.0001382919132557487, "loss": 2.0643, "step": 318630 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013829020583106273, "loss": 2.0672, "step": 318635 }, { "epoch": 0.75, "grad_norm": 1.890625, "learning_rate": 0.00013828849839329614, "loss": 2.07, "step": 318640 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.0001382867909424495, "loss": 2.0047, "step": 318645 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.0001382850834785234, "loss": 2.0154, "step": 318650 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013828337600151842, "loss": 1.9683, "step": 318655 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.00013828166851143515, "loss": 1.8491, "step": 318660 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013827996100827417, "loss": 1.9504, "step": 318665 }, { "epoch": 0.75, "grad_norm": 2.046875, "learning_rate": 0.00013827825349203602, "loss": 1.8281, "step": 318670 }, { "epoch": 0.75, "grad_norm": 2.53125, "learning_rate": 0.00013827654596272138, "loss": 2.0009, "step": 318675 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013827483842033075, "loss": 1.8745, "step": 318680 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.00013827313086486474, "loss": 2.0289, "step": 318685 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013827142329632394, "loss": 2.2325, "step": 318690 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013826971571470893, "loss": 2.0055, "step": 318695 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013826800812002027, "loss": 2.0224, "step": 318700 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.0001382663005122586, "loss": 2.1821, "step": 318705 }, { "epoch": 0.75, "grad_norm": 1.8046875, "learning_rate": 0.00013826459289142446, "loss": 2.1042, "step": 318710 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.00013826288525751844, "loss": 2.0231, "step": 318715 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013826117761054108, "loss": 2.0111, "step": 318720 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.00013825946995049305, "loss": 2.1284, "step": 318725 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.0001382577622773749, "loss": 2.0302, "step": 318730 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.0001382560545911872, "loss": 2.0889, "step": 318735 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.0001382543468919305, "loss": 2.1489, "step": 318740 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013825263917960546, "loss": 2.0214, "step": 318745 }, { "epoch": 0.75, "grad_norm": 2.578125, "learning_rate": 0.00013825093145421262, "loss": 2.0021, "step": 318750 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013824922371575259, "loss": 2.1251, "step": 318755 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013824751596422586, "loss": 2.1591, "step": 318760 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013824580819963314, "loss": 1.976, "step": 318765 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013824410042197495, "loss": 2.1862, "step": 318770 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013824239263125187, "loss": 2.0558, "step": 318775 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013824068482746452, "loss": 1.841, "step": 318780 }, { "epoch": 0.75, "grad_norm": 1.8984375, "learning_rate": 0.00013823897701061344, "loss": 1.8765, "step": 318785 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013823726918069923, "loss": 2.0366, "step": 318790 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013823556133772246, "loss": 2.0623, "step": 318795 }, { "epoch": 0.75, "grad_norm": 2.46875, "learning_rate": 0.00013823385348168377, "loss": 2.166, "step": 318800 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013823214561258366, "loss": 2.0745, "step": 318805 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.0001382304377304228, "loss": 2.0927, "step": 318810 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.0001382287298352017, "loss": 2.1488, "step": 318815 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013822702192692096, "loss": 1.9412, "step": 318820 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.0001382253140055812, "loss": 2.0689, "step": 318825 }, { "epoch": 0.75, "grad_norm": 2.890625, "learning_rate": 0.00013822360607118297, "loss": 2.1018, "step": 318830 }, { "epoch": 0.75, "grad_norm": 2.5625, "learning_rate": 0.00013822189812372687, "loss": 1.9371, "step": 318835 }, { "epoch": 0.75, "grad_norm": 2.53125, "learning_rate": 0.00013822019016321345, "loss": 2.0033, "step": 318840 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013821848218964337, "loss": 2.013, "step": 318845 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.00013821677420301713, "loss": 2.1278, "step": 318850 }, { "epoch": 0.75, "grad_norm": 2.453125, "learning_rate": 0.00013821506620333537, "loss": 2.0949, "step": 318855 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013821335819059861, "loss": 2.0574, "step": 318860 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.0001382116501648075, "loss": 2.0092, "step": 318865 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013820994212596262, "loss": 2.1322, "step": 318870 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013820823407406448, "loss": 2.0091, "step": 318875 }, { "epoch": 0.75, "grad_norm": 2.46875, "learning_rate": 0.00013820652600911372, "loss": 2.1914, "step": 318880 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.00013820481793111098, "loss": 2.129, "step": 318885 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013820310984005672, "loss": 1.9623, "step": 318890 }, { "epoch": 0.75, "grad_norm": 2.578125, "learning_rate": 0.0001382014017359516, "loss": 2.0108, "step": 318895 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.0001381996936187962, "loss": 2.1383, "step": 318900 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013819798548859107, "loss": 2.0637, "step": 318905 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013819627734533684, "loss": 2.0763, "step": 318910 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013819456918903405, "loss": 2.0456, "step": 318915 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013819286101968332, "loss": 2.268, "step": 318920 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.0001381911528372852, "loss": 2.0806, "step": 318925 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.0001381894446418403, "loss": 2.0762, "step": 318930 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013818773643334918, "loss": 2.0549, "step": 318935 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013818602821181247, "loss": 2.2012, "step": 318940 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.00013818431997723068, "loss": 2.1529, "step": 318945 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013818261172960443, "loss": 1.8279, "step": 318950 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013818090346893433, "loss": 2.0629, "step": 318955 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013817919519522093, "loss": 2.0512, "step": 318960 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013817748690846484, "loss": 2.0377, "step": 318965 }, { "epoch": 0.75, "grad_norm": 2.46875, "learning_rate": 0.00013817577860866662, "loss": 2.0061, "step": 318970 }, { "epoch": 0.75, "grad_norm": 3.4375, "learning_rate": 0.00013817407029582688, "loss": 2.0434, "step": 318975 }, { "epoch": 0.75, "grad_norm": 1.953125, "learning_rate": 0.00013817236196994617, "loss": 2.1159, "step": 318980 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013817065363102505, "loss": 1.8943, "step": 318985 }, { "epoch": 0.75, "grad_norm": 2.59375, "learning_rate": 0.00013816894527906418, "loss": 2.1331, "step": 318990 }, { "epoch": 0.75, "grad_norm": 2.640625, "learning_rate": 0.00013816723691406413, "loss": 1.9816, "step": 318995 }, { "epoch": 0.75, "grad_norm": 1.9765625, "learning_rate": 0.00013816552853602545, "loss": 2.1519, "step": 319000 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013816382014494868, "loss": 2.1005, "step": 319005 }, { "epoch": 0.75, "grad_norm": 2.8125, "learning_rate": 0.00013816211174083452, "loss": 1.9958, "step": 319010 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013816040332368344, "loss": 2.0203, "step": 319015 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.0001381586948934961, "loss": 2.0929, "step": 319020 }, { "epoch": 0.75, "grad_norm": 1.875, "learning_rate": 0.0001381569864502731, "loss": 1.9591, "step": 319025 }, { "epoch": 0.75, "grad_norm": 1.796875, "learning_rate": 0.0001381552779940149, "loss": 1.9367, "step": 319030 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.0001381535695247222, "loss": 2.1413, "step": 319035 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013815186104239555, "loss": 1.9818, "step": 319040 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013815015254703555, "loss": 2.0507, "step": 319045 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013814844403864276, "loss": 2.0749, "step": 319050 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013814673551721776, "loss": 1.9719, "step": 319055 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.0001381450269827611, "loss": 2.0045, "step": 319060 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.00013814331843527348, "loss": 2.2596, "step": 319065 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.0001381416098747554, "loss": 2.2431, "step": 319070 }, { "epoch": 0.75, "grad_norm": 1.9375, "learning_rate": 0.00013813990130120743, "loss": 2.0204, "step": 319075 }, { "epoch": 0.75, "grad_norm": 2.59375, "learning_rate": 0.00013813819271463017, "loss": 2.0807, "step": 319080 }, { "epoch": 0.75, "grad_norm": 2.484375, "learning_rate": 0.0001381364841150242, "loss": 2.0338, "step": 319085 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013813477550239019, "loss": 1.9019, "step": 319090 }, { "epoch": 0.75, "grad_norm": 1.9609375, "learning_rate": 0.00013813306687672857, "loss": 2.0419, "step": 319095 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013813135823804003, "loss": 2.11, "step": 319100 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.00013812964958632512, "loss": 2.1409, "step": 319105 }, { "epoch": 0.75, "grad_norm": 2.703125, "learning_rate": 0.00013812794092158445, "loss": 1.9814, "step": 319110 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013812623224381858, "loss": 2.2027, "step": 319115 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.0001381245235530281, "loss": 1.9276, "step": 319120 }, { "epoch": 0.75, "grad_norm": 2.609375, "learning_rate": 0.00013812281484921357, "loss": 1.9355, "step": 319125 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013812110613237562, "loss": 2.098, "step": 319130 }, { "epoch": 0.75, "grad_norm": 1.9765625, "learning_rate": 0.00013811939740251478, "loss": 2.1929, "step": 319135 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013811768865963167, "loss": 2.0082, "step": 319140 }, { "epoch": 0.75, "grad_norm": 2.484375, "learning_rate": 0.0001381159799037269, "loss": 2.1707, "step": 319145 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013811427113480097, "loss": 1.9655, "step": 319150 }, { "epoch": 0.75, "grad_norm": 2.5625, "learning_rate": 0.00013811256235285454, "loss": 2.0392, "step": 319155 }, { "epoch": 0.75, "grad_norm": 2.578125, "learning_rate": 0.0001381108535578882, "loss": 2.0477, "step": 319160 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013810914474990247, "loss": 1.9349, "step": 319165 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013810743592889793, "loss": 2.167, "step": 319170 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013810572709487521, "loss": 1.9428, "step": 319175 }, { "epoch": 0.75, "grad_norm": 2.46875, "learning_rate": 0.00013810401824783494, "loss": 2.1224, "step": 319180 }, { "epoch": 0.75, "grad_norm": 2.453125, "learning_rate": 0.0001381023093877776, "loss": 2.0226, "step": 319185 }, { "epoch": 0.75, "grad_norm": 2.640625, "learning_rate": 0.00013810060051470385, "loss": 2.1163, "step": 319190 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013809889162861417, "loss": 2.0462, "step": 319195 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.0001380971827295093, "loss": 2.1127, "step": 319200 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013809547381738973, "loss": 1.9319, "step": 319205 }, { "epoch": 0.75, "grad_norm": 2.734375, "learning_rate": 0.00013809376489225603, "loss": 1.9913, "step": 319210 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.0001380920559541088, "loss": 2.1658, "step": 319215 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013809034700294865, "loss": 2.1713, "step": 319220 }, { "epoch": 0.75, "grad_norm": 2.46875, "learning_rate": 0.00013808863803877616, "loss": 2.0313, "step": 319225 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.0001380869290615919, "loss": 2.1838, "step": 319230 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.0001380852200713964, "loss": 2.1282, "step": 319235 }, { "epoch": 0.75, "grad_norm": 1.9765625, "learning_rate": 0.00013808351106819034, "loss": 2.0424, "step": 319240 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.0001380818020519743, "loss": 2.1204, "step": 319245 }, { "epoch": 0.75, "grad_norm": 1.9140625, "learning_rate": 0.00013808009302274877, "loss": 1.9087, "step": 319250 }, { "epoch": 0.75, "grad_norm": 2.640625, "learning_rate": 0.0001380783839805144, "loss": 2.1735, "step": 319255 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.00013807667492527176, "loss": 2.0389, "step": 319260 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013807496585702147, "loss": 2.0386, "step": 319265 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013807325677576407, "loss": 1.9908, "step": 319270 }, { "epoch": 0.75, "grad_norm": 1.8828125, "learning_rate": 0.0001380715476815001, "loss": 1.889, "step": 319275 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.00013806983857423026, "loss": 2.2165, "step": 319280 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.00013806812945395508, "loss": 2.1393, "step": 319285 }, { "epoch": 0.75, "grad_norm": 2.546875, "learning_rate": 0.0001380664203206751, "loss": 2.1557, "step": 319290 }, { "epoch": 0.75, "grad_norm": 1.9375, "learning_rate": 0.00013806471117439094, "loss": 2.0561, "step": 319295 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.0001380630020151032, "loss": 2.1088, "step": 319300 }, { "epoch": 0.75, "grad_norm": 2.53125, "learning_rate": 0.00013806129284281245, "loss": 2.1845, "step": 319305 }, { "epoch": 0.75, "grad_norm": 1.6328125, "learning_rate": 0.00013805958365751926, "loss": 2.0074, "step": 319310 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.0001380578744592242, "loss": 2.0887, "step": 319315 }, { "epoch": 0.75, "grad_norm": 2.609375, "learning_rate": 0.00013805616524792798, "loss": 2.025, "step": 319320 }, { "epoch": 0.75, "grad_norm": 1.921875, "learning_rate": 0.000138054456023631, "loss": 2.0617, "step": 319325 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013805274678633392, "loss": 2.0576, "step": 319330 }, { "epoch": 0.75, "grad_norm": 2.609375, "learning_rate": 0.00013805103753603736, "loss": 2.1035, "step": 319335 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.0001380493282727419, "loss": 2.0273, "step": 319340 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013804761899644804, "loss": 2.0854, "step": 319345 }, { "epoch": 0.75, "grad_norm": 3.734375, "learning_rate": 0.00013804590970715647, "loss": 2.0809, "step": 319350 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.0001380442004048677, "loss": 2.177, "step": 319355 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013804249108958236, "loss": 1.9278, "step": 319360 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013804078176130098, "loss": 2.1551, "step": 319365 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013803907242002423, "loss": 2.0816, "step": 319370 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.0001380373630657526, "loss": 2.0021, "step": 319375 }, { "epoch": 0.75, "grad_norm": 2.453125, "learning_rate": 0.00013803565369848673, "loss": 2.0554, "step": 319380 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013803394431822724, "loss": 2.1472, "step": 319385 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.0001380322349249746, "loss": 1.8729, "step": 319390 }, { "epoch": 0.75, "grad_norm": 2.578125, "learning_rate": 0.00013803052551872948, "loss": 2.0511, "step": 319395 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013802881609949243, "loss": 1.9807, "step": 319400 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.0001380271066672641, "loss": 1.8149, "step": 319405 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013802539722204495, "loss": 2.187, "step": 319410 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013802368776383569, "loss": 1.9173, "step": 319415 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.0001380219782926368, "loss": 2.0777, "step": 319420 }, { "epoch": 0.75, "grad_norm": 1.96875, "learning_rate": 0.00013802026880844893, "loss": 1.8601, "step": 319425 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013801855931127265, "loss": 1.9145, "step": 319430 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013801684980110855, "loss": 2.079, "step": 319435 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.0001380151402779572, "loss": 2.0776, "step": 319440 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.00013801343074181917, "loss": 2.0372, "step": 319445 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013801172119269509, "loss": 1.9351, "step": 319450 }, { "epoch": 0.75, "grad_norm": 3.296875, "learning_rate": 0.00013801001163058553, "loss": 2.0277, "step": 319455 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013800830205549104, "loss": 2.0171, "step": 319460 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.0001380065924674122, "loss": 2.1002, "step": 319465 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.00013800488286634964, "loss": 2.0601, "step": 319470 }, { "epoch": 0.75, "grad_norm": 2.609375, "learning_rate": 0.00013800317325230393, "loss": 1.9443, "step": 319475 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013800146362527566, "loss": 2.0785, "step": 319480 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.00013799975398526537, "loss": 1.9641, "step": 319485 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013799804433227365, "loss": 1.8688, "step": 319490 }, { "epoch": 0.75, "grad_norm": 2.84375, "learning_rate": 0.00013799633466630117, "loss": 1.981, "step": 319495 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013799462498734845, "loss": 1.9519, "step": 319500 }, { "epoch": 0.75, "grad_norm": 1.9921875, "learning_rate": 0.00013799291529541604, "loss": 1.9437, "step": 319505 }, { "epoch": 0.75, "grad_norm": 1.8203125, "learning_rate": 0.00013799120559050457, "loss": 2.0512, "step": 319510 }, { "epoch": 0.75, "grad_norm": 2.65625, "learning_rate": 0.0001379894958726146, "loss": 2.0459, "step": 319515 }, { "epoch": 0.75, "grad_norm": 3.15625, "learning_rate": 0.00013798778614174678, "loss": 2.0332, "step": 319520 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.0001379860763979016, "loss": 2.0824, "step": 319525 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013798436664107973, "loss": 2.0607, "step": 319530 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013798265687128166, "loss": 2.2402, "step": 319535 }, { "epoch": 0.75, "grad_norm": 1.875, "learning_rate": 0.00013798094708850807, "loss": 2.0914, "step": 319540 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.00013797923729275946, "loss": 2.0016, "step": 319545 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013797752748403647, "loss": 2.2192, "step": 319550 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013797581766233967, "loss": 2.0461, "step": 319555 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.00013797410782766964, "loss": 2.0374, "step": 319560 }, { "epoch": 0.75, "grad_norm": 1.9765625, "learning_rate": 0.00013797239798002696, "loss": 2.1244, "step": 319565 }, { "epoch": 0.75, "grad_norm": 2.265625, "learning_rate": 0.00013797068811941224, "loss": 2.0155, "step": 319570 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.000137968978245826, "loss": 1.9737, "step": 319575 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013796726835926893, "loss": 1.8929, "step": 319580 }, { "epoch": 0.75, "grad_norm": 2.484375, "learning_rate": 0.00013796555845974152, "loss": 2.1634, "step": 319585 }, { "epoch": 0.75, "grad_norm": 1.9765625, "learning_rate": 0.0001379638485472444, "loss": 2.1519, "step": 319590 }, { "epoch": 0.75, "grad_norm": 1.8984375, "learning_rate": 0.0001379621386217781, "loss": 2.1483, "step": 319595 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013796042868334329, "loss": 2.0438, "step": 319600 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013795871873194048, "loss": 2.0066, "step": 319605 }, { "epoch": 0.75, "grad_norm": 2.265625, "learning_rate": 0.00013795700876757032, "loss": 2.1803, "step": 319610 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013795529879023335, "loss": 2.0101, "step": 319615 }, { "epoch": 0.75, "grad_norm": 1.8984375, "learning_rate": 0.00013795358879993016, "loss": 2.1003, "step": 319620 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.0001379518787966613, "loss": 1.8627, "step": 319625 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.0001379501687804274, "loss": 2.0869, "step": 319630 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013794845875122905, "loss": 2.0954, "step": 319635 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.00013794674870906684, "loss": 2.0979, "step": 319640 }, { "epoch": 0.75, "grad_norm": 1.9921875, "learning_rate": 0.0001379450386539413, "loss": 2.1211, "step": 319645 }, { "epoch": 0.75, "grad_norm": 2.609375, "learning_rate": 0.00013794332858585304, "loss": 1.9911, "step": 319650 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.00013794161850480267, "loss": 1.9096, "step": 319655 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013793990841079077, "loss": 2.1018, "step": 319660 }, { "epoch": 0.75, "grad_norm": 2.453125, "learning_rate": 0.00013793819830381786, "loss": 1.9979, "step": 319665 }, { "epoch": 0.75, "grad_norm": 1.828125, "learning_rate": 0.0001379364881838846, "loss": 2.0435, "step": 319670 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013793477805099157, "loss": 2.0494, "step": 319675 }, { "epoch": 0.75, "grad_norm": 2.453125, "learning_rate": 0.0001379330679051393, "loss": 2.0241, "step": 319680 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013793135774632843, "loss": 2.1732, "step": 319685 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.0001379296475745595, "loss": 1.9122, "step": 319690 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.0001379279373898331, "loss": 1.9294, "step": 319695 }, { "epoch": 0.75, "grad_norm": 2.609375, "learning_rate": 0.00013792622719214988, "loss": 1.9288, "step": 319700 }, { "epoch": 0.75, "grad_norm": 2.609375, "learning_rate": 0.00013792451698151035, "loss": 2.0994, "step": 319705 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.0001379228067579151, "loss": 1.9875, "step": 319710 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013792109652136474, "loss": 1.9278, "step": 319715 }, { "epoch": 0.75, "grad_norm": 2.59375, "learning_rate": 0.00013791938627185984, "loss": 1.9397, "step": 319720 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.000137917676009401, "loss": 2.0985, "step": 319725 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.0001379159657339888, "loss": 2.0606, "step": 319730 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.0001379142554456238, "loss": 2.2689, "step": 319735 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013791254514430664, "loss": 2.1666, "step": 319740 }, { "epoch": 0.75, "grad_norm": 1.96875, "learning_rate": 0.00013791083483003782, "loss": 2.0355, "step": 319745 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013790912450281802, "loss": 2.0606, "step": 319750 }, { "epoch": 0.75, "grad_norm": 2.65625, "learning_rate": 0.00013790741416264772, "loss": 1.9541, "step": 319755 }, { "epoch": 0.75, "grad_norm": 1.9921875, "learning_rate": 0.00013790570380952757, "loss": 2.0102, "step": 319760 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013790399344345818, "loss": 2.0339, "step": 319765 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.00013790228306444007, "loss": 2.1188, "step": 319770 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013790057267247389, "loss": 2.0678, "step": 319775 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013789886226756016, "loss": 2.0352, "step": 319780 }, { "epoch": 0.75, "grad_norm": 1.84375, "learning_rate": 0.00013789715184969948, "loss": 2.1972, "step": 319785 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013789544141889243, "loss": 2.0438, "step": 319790 }, { "epoch": 0.75, "grad_norm": 1.8046875, "learning_rate": 0.00013789373097513967, "loss": 1.9849, "step": 319795 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.0001378920205184417, "loss": 2.1098, "step": 319800 }, { "epoch": 0.75, "grad_norm": 1.8046875, "learning_rate": 0.0001378903100487991, "loss": 2.1397, "step": 319805 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.0001378885995662125, "loss": 2.1166, "step": 319810 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.0001378868890706825, "loss": 2.1671, "step": 319815 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013788517856220959, "loss": 2.1821, "step": 319820 }, { "epoch": 0.75, "grad_norm": 2.46875, "learning_rate": 0.00013788346804079446, "loss": 2.0807, "step": 319825 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013788175750643764, "loss": 2.1227, "step": 319830 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.00013788004695913973, "loss": 2.0198, "step": 319835 }, { "epoch": 0.75, "grad_norm": 2.703125, "learning_rate": 0.0001378783363989013, "loss": 2.0672, "step": 319840 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013787662582572292, "loss": 2.0569, "step": 319845 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013787491523960523, "loss": 2.0761, "step": 319850 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013787320464054878, "loss": 2.1572, "step": 319855 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013787149402855414, "loss": 2.0158, "step": 319860 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.0001378697834036219, "loss": 1.8954, "step": 319865 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013786807276575268, "loss": 2.0535, "step": 319870 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013786636211494705, "loss": 2.0417, "step": 319875 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013786465145120558, "loss": 1.9827, "step": 319880 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013786294077452885, "loss": 2.1667, "step": 319885 }, { "epoch": 0.75, "grad_norm": 2.90625, "learning_rate": 0.00013786123008491745, "loss": 2.0316, "step": 319890 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013785951938237196, "loss": 2.0474, "step": 319895 }, { "epoch": 0.75, "grad_norm": 3.015625, "learning_rate": 0.00013785780866689296, "loss": 2.0636, "step": 319900 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013785609793848109, "loss": 1.9017, "step": 319905 }, { "epoch": 0.75, "grad_norm": 2.5625, "learning_rate": 0.00013785438719713687, "loss": 2.0562, "step": 319910 }, { "epoch": 0.75, "grad_norm": 2.609375, "learning_rate": 0.0001378526764428609, "loss": 1.9887, "step": 319915 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013785096567565377, "loss": 2.035, "step": 319920 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013784925489551605, "loss": 2.1453, "step": 319925 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013784754410244837, "loss": 2.0263, "step": 319930 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013784583329645126, "loss": 2.0834, "step": 319935 }, { "epoch": 0.75, "grad_norm": 2.671875, "learning_rate": 0.00013784412247752533, "loss": 2.0806, "step": 319940 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013784241164567116, "loss": 1.9421, "step": 319945 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013784070080088935, "loss": 1.9642, "step": 319950 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013783898994318045, "loss": 2.0441, "step": 319955 }, { "epoch": 0.75, "grad_norm": 1.90625, "learning_rate": 0.00013783727907254508, "loss": 2.0018, "step": 319960 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.0001378355681889838, "loss": 2.1243, "step": 319965 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013783385729249718, "loss": 2.0664, "step": 319970 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.0001378321463830859, "loss": 1.904, "step": 319975 }, { "epoch": 0.75, "grad_norm": 2.578125, "learning_rate": 0.0001378304354607504, "loss": 2.0623, "step": 319980 }, { "epoch": 0.75, "grad_norm": 2.578125, "learning_rate": 0.00013782872452549138, "loss": 1.9234, "step": 319985 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013782701357730937, "loss": 2.0157, "step": 319990 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013782530261620494, "loss": 2.1985, "step": 319995 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013782359164217874, "loss": 1.8921, "step": 320000 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.0001378218806552313, "loss": 2.1171, "step": 320005 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013782016965536322, "loss": 2.1368, "step": 320010 }, { "epoch": 0.75, "grad_norm": 2.546875, "learning_rate": 0.00013781845864257506, "loss": 2.1106, "step": 320015 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.00013781674761686747, "loss": 1.965, "step": 320020 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.000137815036578241, "loss": 2.0495, "step": 320025 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013781332552669618, "loss": 2.0396, "step": 320030 }, { "epoch": 0.75, "grad_norm": 2.6875, "learning_rate": 0.00013781161446223364, "loss": 2.0229, "step": 320035 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.000137809903384854, "loss": 2.0236, "step": 320040 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.0001378081922945578, "loss": 2.1404, "step": 320045 }, { "epoch": 0.75, "grad_norm": 1.9765625, "learning_rate": 0.00013780648119134564, "loss": 2.0946, "step": 320050 }, { "epoch": 0.75, "grad_norm": 2.546875, "learning_rate": 0.0001378047700752181, "loss": 2.0714, "step": 320055 }, { "epoch": 0.75, "grad_norm": 2.625, "learning_rate": 0.00013780305894617575, "loss": 2.1347, "step": 320060 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013780134780421924, "loss": 2.0365, "step": 320065 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.00013779963664934907, "loss": 2.1065, "step": 320070 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013779792548156584, "loss": 2.0113, "step": 320075 }, { "epoch": 0.75, "grad_norm": 1.984375, "learning_rate": 0.00013779621430087016, "loss": 2.0138, "step": 320080 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.0001377945031072626, "loss": 2.1362, "step": 320085 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.0001377927919007438, "loss": 1.9901, "step": 320090 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013779108068131426, "loss": 2.0131, "step": 320095 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.0001377893694489746, "loss": 2.0974, "step": 320100 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.0001377876582037254, "loss": 2.3394, "step": 320105 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.00013778594694556728, "loss": 2.0122, "step": 320110 }, { "epoch": 0.75, "grad_norm": 2.0625, "learning_rate": 0.00013778423567450078, "loss": 2.0192, "step": 320115 }, { "epoch": 0.75, "grad_norm": 2.0, "learning_rate": 0.0001377825243905265, "loss": 2.0635, "step": 320120 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.000137780813093645, "loss": 2.1731, "step": 320125 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013777910178385693, "loss": 2.0581, "step": 320130 }, { "epoch": 0.75, "grad_norm": 2.046875, "learning_rate": 0.00013777739046116284, "loss": 2.1059, "step": 320135 }, { "epoch": 0.75, "grad_norm": 2.0, "learning_rate": 0.00013777567912556327, "loss": 2.0172, "step": 320140 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013777396777705885, "loss": 2.0394, "step": 320145 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.00013777225641565014, "loss": 2.0756, "step": 320150 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.0001377705450413378, "loss": 2.106, "step": 320155 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.0001377688336541223, "loss": 2.1548, "step": 320160 }, { "epoch": 0.75, "grad_norm": 2.53125, "learning_rate": 0.00013776712225400432, "loss": 2.0737, "step": 320165 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.00013776541084098437, "loss": 2.1827, "step": 320170 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.0001377636994150631, "loss": 2.0465, "step": 320175 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013776198797624107, "loss": 2.0526, "step": 320180 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013776027652451884, "loss": 2.0331, "step": 320185 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013775856505989702, "loss": 2.0409, "step": 320190 }, { "epoch": 0.75, "grad_norm": 1.7734375, "learning_rate": 0.0001377568535823762, "loss": 1.9298, "step": 320195 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013775514209195695, "loss": 2.152, "step": 320200 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013775343058863986, "loss": 2.0754, "step": 320205 }, { "epoch": 0.75, "grad_norm": 2.609375, "learning_rate": 0.0001377517190724255, "loss": 2.1889, "step": 320210 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013775000754331446, "loss": 2.0453, "step": 320215 }, { "epoch": 0.75, "grad_norm": 2.9375, "learning_rate": 0.00013774829600130735, "loss": 2.1094, "step": 320220 }, { "epoch": 0.75, "grad_norm": 2.578125, "learning_rate": 0.00013774658444640477, "loss": 2.1821, "step": 320225 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013774487287860723, "loss": 2.2008, "step": 320230 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013774316129791536, "loss": 2.1677, "step": 320235 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013774144970432972, "loss": 2.2784, "step": 320240 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013773973809785096, "loss": 1.9794, "step": 320245 }, { "epoch": 0.75, "grad_norm": 2.59375, "learning_rate": 0.0001377380264784796, "loss": 2.2254, "step": 320250 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013773631484621625, "loss": 2.0075, "step": 320255 }, { "epoch": 0.75, "grad_norm": 1.9921875, "learning_rate": 0.00013773460320106148, "loss": 2.181, "step": 320260 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.00013773289154301587, "loss": 2.1019, "step": 320265 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013773117987208008, "loss": 2.0096, "step": 320270 }, { "epoch": 0.75, "grad_norm": 2.375, "learning_rate": 0.0001377294681882546, "loss": 2.0581, "step": 320275 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.00013772775649154007, "loss": 2.1924, "step": 320280 }, { "epoch": 0.75, "grad_norm": 3.015625, "learning_rate": 0.00013772604478193702, "loss": 2.2016, "step": 320285 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013772433305944608, "loss": 1.8552, "step": 320290 }, { "epoch": 0.75, "grad_norm": 1.671875, "learning_rate": 0.0001377226213240678, "loss": 1.9848, "step": 320295 }, { "epoch": 0.75, "grad_norm": 1.8125, "learning_rate": 0.00013772090957580284, "loss": 1.9077, "step": 320300 }, { "epoch": 0.75, "grad_norm": 3.5625, "learning_rate": 0.0001377191978146517, "loss": 2.0817, "step": 320305 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.00013771748604061502, "loss": 1.9923, "step": 320310 }, { "epoch": 0.75, "grad_norm": 1.9140625, "learning_rate": 0.00013771577425369334, "loss": 2.0529, "step": 320315 }, { "epoch": 0.75, "grad_norm": 1.8046875, "learning_rate": 0.00013771406245388727, "loss": 2.0376, "step": 320320 }, { "epoch": 0.75, "grad_norm": 1.9609375, "learning_rate": 0.00013771235064119738, "loss": 2.045, "step": 320325 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.0001377106388156243, "loss": 2.225, "step": 320330 }, { "epoch": 0.75, "grad_norm": 1.875, "learning_rate": 0.00013770892697716859, "loss": 1.9828, "step": 320335 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.0001377072151258308, "loss": 1.9178, "step": 320340 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.0001377055032616115, "loss": 2.0459, "step": 320345 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.0001377037913845114, "loss": 2.1144, "step": 320350 }, { "epoch": 0.75, "grad_norm": 2.546875, "learning_rate": 0.00013770207949453096, "loss": 2.0108, "step": 320355 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013770036759167082, "loss": 1.9748, "step": 320360 }, { "epoch": 0.75, "grad_norm": 2.453125, "learning_rate": 0.00013769865567593153, "loss": 2.0862, "step": 320365 }, { "epoch": 0.75, "grad_norm": 1.84375, "learning_rate": 0.0001376969437473137, "loss": 1.9887, "step": 320370 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013769523180581788, "loss": 2.1046, "step": 320375 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013769351985144475, "loss": 2.1295, "step": 320380 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.0001376918078841948, "loss": 2.2, "step": 320385 }, { "epoch": 0.75, "grad_norm": 2.5, "learning_rate": 0.00013769009590406864, "loss": 2.1754, "step": 320390 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013768838391106688, "loss": 2.2428, "step": 320395 }, { "epoch": 0.75, "grad_norm": 1.9375, "learning_rate": 0.00013768667190519005, "loss": 2.0384, "step": 320400 }, { "epoch": 0.75, "grad_norm": 2.484375, "learning_rate": 0.00013768495988643879, "loss": 2.0979, "step": 320405 }, { "epoch": 0.75, "grad_norm": 2.5625, "learning_rate": 0.00013768324785481368, "loss": 2.2275, "step": 320410 }, { "epoch": 0.75, "grad_norm": 2.625, "learning_rate": 0.00013768153581031526, "loss": 2.1649, "step": 320415 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013767982375294415, "loss": 2.0503, "step": 320420 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013767811168270095, "loss": 2.0143, "step": 320425 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.0001376763995995862, "loss": 2.0349, "step": 320430 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013767468750360053, "loss": 1.8837, "step": 320435 }, { "epoch": 0.75, "grad_norm": 3.1875, "learning_rate": 0.0001376729753947445, "loss": 2.0333, "step": 320440 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013767126327301866, "loss": 1.921, "step": 320445 }, { "epoch": 0.75, "grad_norm": 2.875, "learning_rate": 0.00013766955113842365, "loss": 2.0262, "step": 320450 }, { "epoch": 0.75, "grad_norm": 1.984375, "learning_rate": 0.00013766783899096007, "loss": 2.1043, "step": 320455 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013766612683062846, "loss": 1.9687, "step": 320460 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013766441465742942, "loss": 2.0633, "step": 320465 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.0001376627024713635, "loss": 2.0749, "step": 320470 }, { "epoch": 0.75, "grad_norm": 1.9921875, "learning_rate": 0.0001376609902724314, "loss": 2.0455, "step": 320475 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.0001376592780606335, "loss": 1.9083, "step": 320480 }, { "epoch": 0.75, "grad_norm": 2.53125, "learning_rate": 0.0001376575658359706, "loss": 1.9159, "step": 320485 }, { "epoch": 0.75, "grad_norm": 1.7421875, "learning_rate": 0.00013765585359844316, "loss": 2.0172, "step": 320490 }, { "epoch": 0.75, "grad_norm": 2.546875, "learning_rate": 0.0001376541413480518, "loss": 2.1123, "step": 320495 }, { "epoch": 0.75, "grad_norm": 2.390625, "learning_rate": 0.00013765242908479712, "loss": 1.707, "step": 320500 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013765071680867964, "loss": 1.999, "step": 320505 }, { "epoch": 0.75, "grad_norm": 2.265625, "learning_rate": 0.0001376490045197, "loss": 1.8681, "step": 320510 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013764729221785884, "loss": 2.2371, "step": 320515 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.00013764557990315665, "loss": 2.1088, "step": 320520 }, { "epoch": 0.75, "grad_norm": 1.9921875, "learning_rate": 0.00013764386757559403, "loss": 2.0234, "step": 320525 }, { "epoch": 0.75, "grad_norm": 1.796875, "learning_rate": 0.0001376421552351716, "loss": 1.9503, "step": 320530 }, { "epoch": 0.75, "grad_norm": 1.9296875, "learning_rate": 0.0001376404428818899, "loss": 2.1106, "step": 320535 }, { "epoch": 0.75, "grad_norm": 2.40625, "learning_rate": 0.00013763873051574958, "loss": 1.8967, "step": 320540 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013763701813675119, "loss": 1.902, "step": 320545 }, { "epoch": 0.75, "grad_norm": 2.046875, "learning_rate": 0.00013763530574489527, "loss": 2.1302, "step": 320550 }, { "epoch": 0.75, "grad_norm": 2.578125, "learning_rate": 0.00013763359334018248, "loss": 2.1649, "step": 320555 }, { "epoch": 0.75, "grad_norm": 1.8125, "learning_rate": 0.00013763188092261332, "loss": 1.9687, "step": 320560 }, { "epoch": 0.75, "grad_norm": 2.46875, "learning_rate": 0.0001376301684921885, "loss": 2.126, "step": 320565 }, { "epoch": 0.75, "grad_norm": 2.359375, "learning_rate": 0.0001376284560489085, "loss": 2.1818, "step": 320570 }, { "epoch": 0.75, "grad_norm": 2.21875, "learning_rate": 0.00013762674359277393, "loss": 2.0715, "step": 320575 }, { "epoch": 0.75, "grad_norm": 2.640625, "learning_rate": 0.0001376250311237854, "loss": 2.0801, "step": 320580 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.00013762331864194347, "loss": 2.046, "step": 320585 }, { "epoch": 0.75, "grad_norm": 2.15625, "learning_rate": 0.0001376216061472487, "loss": 2.0389, "step": 320590 }, { "epoch": 0.75, "grad_norm": 2.25, "learning_rate": 0.00013761989363970175, "loss": 2.254, "step": 320595 }, { "epoch": 0.75, "grad_norm": 2.765625, "learning_rate": 0.00013761818111930315, "loss": 2.0666, "step": 320600 }, { "epoch": 0.75, "grad_norm": 1.859375, "learning_rate": 0.00013761646858605347, "loss": 2.0959, "step": 320605 }, { "epoch": 0.75, "grad_norm": 1.984375, "learning_rate": 0.00013761475603995334, "loss": 1.8921, "step": 320610 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013761304348100336, "loss": 2.1687, "step": 320615 }, { "epoch": 0.75, "grad_norm": 2.03125, "learning_rate": 0.00013761133090920406, "loss": 2.0635, "step": 320620 }, { "epoch": 0.75, "grad_norm": 2.234375, "learning_rate": 0.00013760961832455604, "loss": 1.953, "step": 320625 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013760790572705988, "loss": 2.2266, "step": 320630 }, { "epoch": 0.75, "grad_norm": 2.109375, "learning_rate": 0.00013760619311671622, "loss": 2.0528, "step": 320635 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013760448049352555, "loss": 2.1056, "step": 320640 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013760276785748855, "loss": 1.7276, "step": 320645 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013760105520860573, "loss": 1.8968, "step": 320650 }, { "epoch": 0.75, "grad_norm": 2.34375, "learning_rate": 0.00013759934254687774, "loss": 2.1753, "step": 320655 }, { "epoch": 0.75, "grad_norm": 2.3125, "learning_rate": 0.0001375976298723051, "loss": 1.7129, "step": 320660 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013759591718488844, "loss": 1.955, "step": 320665 }, { "epoch": 0.75, "grad_norm": 1.8984375, "learning_rate": 0.0001375942044846283, "loss": 2.1198, "step": 320670 }, { "epoch": 0.75, "grad_norm": 2.578125, "learning_rate": 0.00013759249177152535, "loss": 1.9572, "step": 320675 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013759077904558012, "loss": 1.9927, "step": 320680 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.0001375890663067932, "loss": 2.103, "step": 320685 }, { "epoch": 0.75, "grad_norm": 2.140625, "learning_rate": 0.00013758735355516515, "loss": 1.833, "step": 320690 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.0001375856407906966, "loss": 2.1103, "step": 320695 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013758392801338807, "loss": 1.9645, "step": 320700 }, { "epoch": 0.75, "grad_norm": 2.328125, "learning_rate": 0.00013758221522324024, "loss": 2.1812, "step": 320705 }, { "epoch": 0.75, "grad_norm": 1.9375, "learning_rate": 0.00013758050242025362, "loss": 1.9885, "step": 320710 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.00013757878960442881, "loss": 2.029, "step": 320715 }, { "epoch": 0.75, "grad_norm": 2.171875, "learning_rate": 0.00013757707677576638, "loss": 1.8888, "step": 320720 }, { "epoch": 0.75, "grad_norm": 2.1875, "learning_rate": 0.000137575363934267, "loss": 2.0852, "step": 320725 }, { "epoch": 0.75, "grad_norm": 1.8203125, "learning_rate": 0.0001375736510799312, "loss": 2.0533, "step": 320730 }, { "epoch": 0.75, "grad_norm": 2.421875, "learning_rate": 0.0001375719382127595, "loss": 2.1489, "step": 320735 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013757022533275256, "loss": 1.9415, "step": 320740 }, { "epoch": 0.75, "grad_norm": 2.4375, "learning_rate": 0.00013756851243991097, "loss": 2.0847, "step": 320745 }, { "epoch": 0.75, "grad_norm": 2.09375, "learning_rate": 0.0001375667995342353, "loss": 2.1251, "step": 320750 }, { "epoch": 0.75, "grad_norm": 1.859375, "learning_rate": 0.00013756508661572612, "loss": 2.0687, "step": 320755 }, { "epoch": 0.75, "grad_norm": 1.9921875, "learning_rate": 0.00013756337368438403, "loss": 1.8498, "step": 320760 }, { "epoch": 0.75, "grad_norm": 2.203125, "learning_rate": 0.00013756166074020958, "loss": 2.0532, "step": 320765 }, { "epoch": 0.75, "grad_norm": 2.015625, "learning_rate": 0.00013755994778320342, "loss": 2.2224, "step": 320770 }, { "epoch": 0.75, "grad_norm": 2.640625, "learning_rate": 0.00013755823481336607, "loss": 2.0445, "step": 320775 }, { "epoch": 0.75, "grad_norm": 2.078125, "learning_rate": 0.00013755652183069817, "loss": 2.1248, "step": 320780 }, { "epoch": 0.75, "grad_norm": 2.296875, "learning_rate": 0.00013755480883520028, "loss": 1.9722, "step": 320785 }, { "epoch": 0.75, "grad_norm": 2.28125, "learning_rate": 0.00013755309582687297, "loss": 2.0651, "step": 320790 }, { "epoch": 0.75, "grad_norm": 1.859375, "learning_rate": 0.00013755138280571687, "loss": 2.2227, "step": 320795 }, { "epoch": 0.75, "grad_norm": 1.8671875, "learning_rate": 0.00013754966977173248, "loss": 1.9315, "step": 320800 }, { "epoch": 0.75, "grad_norm": 2.453125, "learning_rate": 0.0001375479567249205, "loss": 1.9596, "step": 320805 }, { "epoch": 0.75, "grad_norm": 2.125, "learning_rate": 0.00013754624366528145, "loss": 2.151, "step": 320810 }, { "epoch": 0.75, "grad_norm": 1.9609375, "learning_rate": 0.0001375445305928159, "loss": 2.1701, "step": 320815 }, { "epoch": 0.75, "grad_norm": 2.53125, "learning_rate": 0.0001375428175075245, "loss": 1.7625, "step": 320820 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013754110440940775, "loss": 2.1134, "step": 320825 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013753939129846626, "loss": 1.8056, "step": 320830 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013753767817470066, "loss": 2.0717, "step": 320835 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013753596503811153, "loss": 2.0964, "step": 320840 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.0001375342518886994, "loss": 2.0399, "step": 320845 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.0001375325387264649, "loss": 2.0083, "step": 320850 }, { "epoch": 0.76, "grad_norm": 2.578125, "learning_rate": 0.00013753082555140859, "loss": 2.2864, "step": 320855 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.0001375291123635311, "loss": 2.0066, "step": 320860 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013752739916283297, "loss": 2.0218, "step": 320865 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.00013752568594931481, "loss": 1.9826, "step": 320870 }, { "epoch": 0.76, "grad_norm": 1.703125, "learning_rate": 0.00013752397272297716, "loss": 2.0984, "step": 320875 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013752225948382066, "loss": 2.0847, "step": 320880 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013752054623184589, "loss": 2.1493, "step": 320885 }, { "epoch": 0.76, "grad_norm": 1.921875, "learning_rate": 0.00013751883296705343, "loss": 1.9687, "step": 320890 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013751711968944383, "loss": 1.9832, "step": 320895 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.0001375154063990177, "loss": 2.1227, "step": 320900 }, { "epoch": 0.76, "grad_norm": 2.5, "learning_rate": 0.00013751369309577565, "loss": 1.8871, "step": 320905 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.0001375119797797182, "loss": 2.1564, "step": 320910 }, { "epoch": 0.76, "grad_norm": 1.8046875, "learning_rate": 0.00013751026645084604, "loss": 1.9514, "step": 320915 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013750855310915965, "loss": 2.0971, "step": 320920 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013750683975465966, "loss": 1.8963, "step": 320925 }, { "epoch": 0.76, "grad_norm": 2.484375, "learning_rate": 0.00013750512638734667, "loss": 1.9475, "step": 320930 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013750341300722122, "loss": 2.1222, "step": 320935 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013750169961428393, "loss": 2.0389, "step": 320940 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.0001374999862085354, "loss": 2.0357, "step": 320945 }, { "epoch": 0.76, "grad_norm": 1.7890625, "learning_rate": 0.0001374982727899762, "loss": 2.1171, "step": 320950 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.0001374965593586069, "loss": 2.1302, "step": 320955 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.0001374948459144281, "loss": 2.1587, "step": 320960 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013749313245744037, "loss": 2.187, "step": 320965 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013749141898764428, "loss": 2.1212, "step": 320970 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.0001374897055050405, "loss": 1.9866, "step": 320975 }, { "epoch": 0.76, "grad_norm": 2.65625, "learning_rate": 0.00013748799200962952, "loss": 2.0184, "step": 320980 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.000137486278501412, "loss": 2.14, "step": 320985 }, { "epoch": 0.76, "grad_norm": 1.8515625, "learning_rate": 0.0001374845649803884, "loss": 2.1121, "step": 320990 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.00013748285144655947, "loss": 1.9806, "step": 320995 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.0001374811378999257, "loss": 2.0842, "step": 321000 }, { "epoch": 0.76, "grad_norm": 3.015625, "learning_rate": 0.0001374794243404877, "loss": 2.094, "step": 321005 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013747771076824602, "loss": 2.1545, "step": 321010 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.00013747599718320127, "loss": 2.1117, "step": 321015 }, { "epoch": 0.76, "grad_norm": 2.703125, "learning_rate": 0.0001374742835853541, "loss": 2.2203, "step": 321020 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.000137472569974705, "loss": 2.0489, "step": 321025 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013747085635125455, "loss": 2.1539, "step": 321030 }, { "epoch": 0.76, "grad_norm": 2.640625, "learning_rate": 0.00013746914271500345, "loss": 2.1429, "step": 321035 }, { "epoch": 0.76, "grad_norm": 1.8359375, "learning_rate": 0.00013746742906595215, "loss": 1.943, "step": 321040 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013746571540410133, "loss": 2.129, "step": 321045 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013746400172945153, "loss": 2.1701, "step": 321050 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.00013746228804200337, "loss": 2.217, "step": 321055 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.0001374605743417574, "loss": 2.0114, "step": 321060 }, { "epoch": 0.76, "grad_norm": 2.5, "learning_rate": 0.0001374588606287142, "loss": 1.8611, "step": 321065 }, { "epoch": 0.76, "grad_norm": 2.875, "learning_rate": 0.0001374571469028744, "loss": 2.0997, "step": 321070 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013745543316423853, "loss": 2.0759, "step": 321075 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.0001374537194128072, "loss": 1.7896, "step": 321080 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013745200564858103, "loss": 2.0873, "step": 321085 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.0001374502918715606, "loss": 2.1834, "step": 321090 }, { "epoch": 0.76, "grad_norm": 3.015625, "learning_rate": 0.0001374485780817464, "loss": 1.9863, "step": 321095 }, { "epoch": 0.76, "grad_norm": 1.96875, "learning_rate": 0.00013744686427913914, "loss": 2.1169, "step": 321100 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.00013744515046373932, "loss": 1.9799, "step": 321105 }, { "epoch": 0.76, "grad_norm": 1.890625, "learning_rate": 0.00013744343663554763, "loss": 2.0223, "step": 321110 }, { "epoch": 0.76, "grad_norm": 1.96875, "learning_rate": 0.00013744172279456452, "loss": 2.0019, "step": 321115 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013744000894079064, "loss": 2.1575, "step": 321120 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013743829507422654, "loss": 2.0454, "step": 321125 }, { "epoch": 0.76, "grad_norm": 2.953125, "learning_rate": 0.0001374365811948729, "loss": 1.8668, "step": 321130 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.00013743486730273023, "loss": 2.1503, "step": 321135 }, { "epoch": 0.76, "grad_norm": 2.78125, "learning_rate": 0.0001374331533977991, "loss": 1.9964, "step": 321140 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013743143948008018, "loss": 1.9969, "step": 321145 }, { "epoch": 0.76, "grad_norm": 2.546875, "learning_rate": 0.00013742972554957393, "loss": 1.9979, "step": 321150 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013742801160628108, "loss": 1.9164, "step": 321155 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013742629765020213, "loss": 1.9416, "step": 321160 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013742458368133764, "loss": 2.0493, "step": 321165 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.00013742286969968824, "loss": 2.0528, "step": 321170 }, { "epoch": 0.76, "grad_norm": 2.59375, "learning_rate": 0.00013742115570525453, "loss": 2.0519, "step": 321175 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013741944169803707, "loss": 2.0411, "step": 321180 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013741772767803646, "loss": 2.1985, "step": 321185 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.00013741601364525326, "loss": 2.215, "step": 321190 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.00013741429959968806, "loss": 1.8511, "step": 321195 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.00013741258554134147, "loss": 2.1449, "step": 321200 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013741087147021405, "loss": 2.0026, "step": 321205 }, { "epoch": 0.76, "grad_norm": 1.9296875, "learning_rate": 0.00013740915738630642, "loss": 2.2351, "step": 321210 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013740744328961913, "loss": 2.2662, "step": 321215 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013740572918015279, "loss": 2.1153, "step": 321220 }, { "epoch": 0.76, "grad_norm": 2.5625, "learning_rate": 0.00013740401505790798, "loss": 1.8895, "step": 321225 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013740230092288525, "loss": 2.0517, "step": 321230 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013740058677508523, "loss": 2.0884, "step": 321235 }, { "epoch": 0.76, "grad_norm": 1.9140625, "learning_rate": 0.00013739887261450848, "loss": 1.9734, "step": 321240 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013739715844115563, "loss": 1.8819, "step": 321245 }, { "epoch": 0.76, "grad_norm": 1.96875, "learning_rate": 0.00013739544425502722, "loss": 1.9931, "step": 321250 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013739373005612382, "loss": 2.1233, "step": 321255 }, { "epoch": 0.76, "grad_norm": 1.96875, "learning_rate": 0.00013739201584444608, "loss": 1.9353, "step": 321260 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.0001373903016199945, "loss": 2.1086, "step": 321265 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.0001373885873827698, "loss": 2.0375, "step": 321270 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.0001373868731327724, "loss": 2.0369, "step": 321275 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.000137385158870003, "loss": 2.0161, "step": 321280 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013738344459446214, "loss": 2.14, "step": 321285 }, { "epoch": 0.76, "grad_norm": 2.484375, "learning_rate": 0.00013738173030615044, "loss": 2.0576, "step": 321290 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013738001600506843, "loss": 2.052, "step": 321295 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013737830169121678, "loss": 1.9822, "step": 321300 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.000137376587364596, "loss": 2.0131, "step": 321305 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013737487302520663, "loss": 2.0807, "step": 321310 }, { "epoch": 0.76, "grad_norm": 1.8046875, "learning_rate": 0.00013737315867304943, "loss": 1.9992, "step": 321315 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.00013737144430812483, "loss": 2.0219, "step": 321320 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013736972993043346, "loss": 1.944, "step": 321325 }, { "epoch": 0.76, "grad_norm": 2.578125, "learning_rate": 0.00013736801553997593, "loss": 2.1611, "step": 321330 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.0001373663011367528, "loss": 1.9345, "step": 321335 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013736458672076467, "loss": 1.9864, "step": 321340 }, { "epoch": 0.76, "grad_norm": 1.8046875, "learning_rate": 0.00013736287229201212, "loss": 1.9027, "step": 321345 }, { "epoch": 0.76, "grad_norm": 1.921875, "learning_rate": 0.00013736115785049574, "loss": 1.9175, "step": 321350 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.0001373594433962161, "loss": 2.0666, "step": 321355 }, { "epoch": 0.76, "grad_norm": 2.78125, "learning_rate": 0.00013735772892917384, "loss": 2.1382, "step": 321360 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.0001373560144493694, "loss": 1.9762, "step": 321365 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.00013735429995680355, "loss": 1.9619, "step": 321370 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013735258545147678, "loss": 1.9308, "step": 321375 }, { "epoch": 0.76, "grad_norm": 3.6875, "learning_rate": 0.00013735087093338968, "loss": 1.9549, "step": 321380 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013734915640254287, "loss": 2.3312, "step": 321385 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013734744185893687, "loss": 2.1771, "step": 321390 }, { "epoch": 0.76, "grad_norm": 1.96875, "learning_rate": 0.00013734572730257231, "loss": 2.1597, "step": 321395 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013734401273344978, "loss": 2.0732, "step": 321400 }, { "epoch": 0.76, "grad_norm": 1.90625, "learning_rate": 0.00013734229815156988, "loss": 2.0918, "step": 321405 }, { "epoch": 0.76, "grad_norm": 2.609375, "learning_rate": 0.00013734058355693317, "loss": 2.0763, "step": 321410 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.00013733886894954026, "loss": 2.1891, "step": 321415 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.00013733715432939165, "loss": 1.8824, "step": 321420 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.000137335439696488, "loss": 2.1087, "step": 321425 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.00013733372505082996, "loss": 1.9722, "step": 321430 }, { "epoch": 0.76, "grad_norm": 1.921875, "learning_rate": 0.00013733201039241798, "loss": 1.9684, "step": 321435 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013733029572125273, "loss": 1.9529, "step": 321440 }, { "epoch": 0.76, "grad_norm": 2.515625, "learning_rate": 0.00013732858103733474, "loss": 2.0482, "step": 321445 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013732686634066469, "loss": 2.084, "step": 321450 }, { "epoch": 0.76, "grad_norm": 2.53125, "learning_rate": 0.00013732515163124306, "loss": 1.9993, "step": 321455 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013732343690907049, "loss": 1.9983, "step": 321460 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013732172217414756, "loss": 1.9136, "step": 321465 }, { "epoch": 0.76, "grad_norm": 2.546875, "learning_rate": 0.00013732000742647487, "loss": 2.1449, "step": 321470 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013731829266605295, "loss": 2.0422, "step": 321475 }, { "epoch": 0.76, "grad_norm": 1.984375, "learning_rate": 0.00013731657789288246, "loss": 2.0829, "step": 321480 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013731486310696392, "loss": 2.0106, "step": 321485 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013731314830829798, "loss": 2.0104, "step": 321490 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013731143349688516, "loss": 2.0724, "step": 321495 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013730971867272608, "loss": 2.0835, "step": 321500 }, { "epoch": 0.76, "grad_norm": 2.734375, "learning_rate": 0.00013730800383582133, "loss": 2.1756, "step": 321505 }, { "epoch": 0.76, "grad_norm": 1.9609375, "learning_rate": 0.00013730628898617152, "loss": 2.0742, "step": 321510 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013730457412377718, "loss": 1.7845, "step": 321515 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013730285924863894, "loss": 2.1035, "step": 321520 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.0001373011443607573, "loss": 1.9823, "step": 321525 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.000137299429460133, "loss": 1.9318, "step": 321530 }, { "epoch": 0.76, "grad_norm": 2.640625, "learning_rate": 0.00013729771454676647, "loss": 1.9554, "step": 321535 }, { "epoch": 0.76, "grad_norm": 1.8671875, "learning_rate": 0.00013729599962065842, "loss": 2.1192, "step": 321540 }, { "epoch": 0.76, "grad_norm": 1.8046875, "learning_rate": 0.00013729428468180937, "loss": 2.0561, "step": 321545 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013729256973021986, "loss": 2.071, "step": 321550 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013729085476589056, "loss": 1.9493, "step": 321555 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013728913978882206, "loss": 2.1172, "step": 321560 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.0001372874247990149, "loss": 2.0217, "step": 321565 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013728570979646966, "loss": 2.1586, "step": 321570 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013728399478118696, "loss": 2.0631, "step": 321575 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013728227975316737, "loss": 2.2205, "step": 321580 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013728056471241148, "loss": 2.2129, "step": 321585 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013727884965891986, "loss": 2.296, "step": 321590 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013727713459269313, "loss": 1.8303, "step": 321595 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.00013727541951373185, "loss": 2.0647, "step": 321600 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013727370442203658, "loss": 2.1039, "step": 321605 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013727198931760796, "loss": 2.0669, "step": 321610 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.00013727027420044655, "loss": 2.1526, "step": 321615 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013726855907055297, "loss": 2.0036, "step": 321620 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.0001372668439279277, "loss": 1.9883, "step": 321625 }, { "epoch": 0.76, "grad_norm": 2.703125, "learning_rate": 0.00013726512877257149, "loss": 2.047, "step": 321630 }, { "epoch": 0.76, "grad_norm": 2.78125, "learning_rate": 0.00013726341360448477, "loss": 2.0613, "step": 321635 }, { "epoch": 0.76, "grad_norm": 1.9296875, "learning_rate": 0.0001372616984236682, "loss": 1.9424, "step": 321640 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.00013725998323012236, "loss": 1.9956, "step": 321645 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.00013725826802384787, "loss": 1.8482, "step": 321650 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013725655280484526, "loss": 1.9584, "step": 321655 }, { "epoch": 0.76, "grad_norm": 2.59375, "learning_rate": 0.00013725483757311512, "loss": 1.8678, "step": 321660 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013725312232865805, "loss": 2.1016, "step": 321665 }, { "epoch": 0.76, "grad_norm": 1.8984375, "learning_rate": 0.00013725140707147466, "loss": 2.0471, "step": 321670 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.0001372496918015655, "loss": 1.9761, "step": 321675 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.0001372479765189312, "loss": 1.9402, "step": 321680 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013724626122357228, "loss": 2.0978, "step": 321685 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013724454591548937, "loss": 2.1433, "step": 321690 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013724283059468302, "loss": 2.0017, "step": 321695 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013724111526115392, "loss": 2.1788, "step": 321700 }, { "epoch": 0.76, "grad_norm": 2.625, "learning_rate": 0.0001372393999149025, "loss": 2.1986, "step": 321705 }, { "epoch": 0.76, "grad_norm": 1.9609375, "learning_rate": 0.00013723768455592948, "loss": 2.178, "step": 321710 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.00013723596918423535, "loss": 2.1646, "step": 321715 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.00013723425379982075, "loss": 2.2036, "step": 321720 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013723253840268627, "loss": 2.0652, "step": 321725 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013723082299283248, "loss": 2.1624, "step": 321730 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013722910757025996, "loss": 2.0738, "step": 321735 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013722739213496926, "loss": 2.1004, "step": 321740 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013722567668696106, "loss": 1.97, "step": 321745 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013722396122623588, "loss": 2.1035, "step": 321750 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.00013722224575279433, "loss": 2.1586, "step": 321755 }, { "epoch": 0.76, "grad_norm": 1.9765625, "learning_rate": 0.00013722053026663696, "loss": 2.0369, "step": 321760 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013721881476776442, "loss": 1.862, "step": 321765 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013721709925617722, "loss": 2.1141, "step": 321770 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013721538373187602, "loss": 1.9768, "step": 321775 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.00013721366819486133, "loss": 1.7181, "step": 321780 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.00013721195264513377, "loss": 1.8729, "step": 321785 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.000137210237082694, "loss": 2.0263, "step": 321790 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013720852150754246, "loss": 1.9096, "step": 321795 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013720680591967988, "loss": 2.0953, "step": 321800 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013720509031910674, "loss": 2.0151, "step": 321805 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013720337470582368, "loss": 2.0131, "step": 321810 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013720165907983126, "loss": 1.9724, "step": 321815 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013719994344113008, "loss": 2.0586, "step": 321820 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.0001371982277897207, "loss": 2.0492, "step": 321825 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013719651212560377, "loss": 1.8386, "step": 321830 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013719479644877983, "loss": 2.0894, "step": 321835 }, { "epoch": 0.76, "grad_norm": 1.7578125, "learning_rate": 0.0001371930807592495, "loss": 2.0346, "step": 321840 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.00013719136505701329, "loss": 1.9482, "step": 321845 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013718964934207186, "loss": 2.0387, "step": 321850 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013718793361442577, "loss": 1.9958, "step": 321855 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013718621787407563, "loss": 2.0765, "step": 321860 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.00013718450212102197, "loss": 2.1682, "step": 321865 }, { "epoch": 0.76, "grad_norm": 2.609375, "learning_rate": 0.00013718278635526542, "loss": 1.948, "step": 321870 }, { "epoch": 0.76, "grad_norm": 2.71875, "learning_rate": 0.00013718107057680652, "loss": 1.9342, "step": 321875 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.00013717935478564595, "loss": 2.0077, "step": 321880 }, { "epoch": 0.76, "grad_norm": 2.484375, "learning_rate": 0.00013717763898178422, "loss": 2.1662, "step": 321885 }, { "epoch": 0.76, "grad_norm": 1.9921875, "learning_rate": 0.00013717592316522193, "loss": 2.1495, "step": 321890 }, { "epoch": 0.76, "grad_norm": 2.515625, "learning_rate": 0.00013717420733595968, "loss": 2.1362, "step": 321895 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.000137172491493998, "loss": 2.0997, "step": 321900 }, { "epoch": 0.76, "grad_norm": 2.703125, "learning_rate": 0.00013717077563933758, "loss": 2.201, "step": 321905 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.00013716905977197893, "loss": 2.0113, "step": 321910 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013716734389192264, "loss": 2.0903, "step": 321915 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013716562799916931, "loss": 2.1844, "step": 321920 }, { "epoch": 0.76, "grad_norm": 1.9375, "learning_rate": 0.00013716391209371955, "loss": 1.9622, "step": 321925 }, { "epoch": 0.76, "grad_norm": 1.90625, "learning_rate": 0.00013716219617557394, "loss": 1.9922, "step": 321930 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.000137160480244733, "loss": 2.1362, "step": 321935 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.00013715876430119738, "loss": 1.9644, "step": 321940 }, { "epoch": 0.76, "grad_norm": 1.9609375, "learning_rate": 0.00013715704834496768, "loss": 2.0505, "step": 321945 }, { "epoch": 0.76, "grad_norm": 2.859375, "learning_rate": 0.00013715533237604446, "loss": 2.0554, "step": 321950 }, { "epoch": 0.76, "grad_norm": 2.65625, "learning_rate": 0.00013715361639442825, "loss": 2.0433, "step": 321955 }, { "epoch": 0.76, "grad_norm": 3.109375, "learning_rate": 0.00013715190040011974, "loss": 2.0244, "step": 321960 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013715018439311945, "loss": 2.0958, "step": 321965 }, { "epoch": 0.76, "grad_norm": 2.515625, "learning_rate": 0.00013714846837342798, "loss": 2.0823, "step": 321970 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.00013714675234104595, "loss": 2.1349, "step": 321975 }, { "epoch": 0.76, "grad_norm": 2.90625, "learning_rate": 0.00013714503629597385, "loss": 2.0664, "step": 321980 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.00013714332023821238, "loss": 2.0518, "step": 321985 }, { "epoch": 0.76, "grad_norm": 1.984375, "learning_rate": 0.0001371416041677621, "loss": 2.1079, "step": 321990 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013713988808462354, "loss": 2.2346, "step": 321995 }, { "epoch": 0.76, "grad_norm": 1.84375, "learning_rate": 0.00013713817198879734, "loss": 2.0544, "step": 322000 }, { "epoch": 0.76, "grad_norm": 1.9375, "learning_rate": 0.00013713645588028404, "loss": 2.0881, "step": 322005 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013713473975908428, "loss": 2.112, "step": 322010 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.0001371330236251986, "loss": 2.1287, "step": 322015 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.0001371313074786276, "loss": 2.136, "step": 322020 }, { "epoch": 0.76, "grad_norm": 1.7265625, "learning_rate": 0.00013712959131937188, "loss": 1.9907, "step": 322025 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013712787514743205, "loss": 2.1875, "step": 322030 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.0001371261589628086, "loss": 2.0272, "step": 322035 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013712444276550224, "loss": 2.0997, "step": 322040 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.00013712272655551347, "loss": 2.1318, "step": 322045 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.0001371210103328429, "loss": 2.0078, "step": 322050 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013711929409749112, "loss": 2.1265, "step": 322055 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013711757784945873, "loss": 2.0405, "step": 322060 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.0001371158615887463, "loss": 2.0841, "step": 322065 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.00013711414531535442, "loss": 2.2387, "step": 322070 }, { "epoch": 0.76, "grad_norm": 1.90625, "learning_rate": 0.00013711242902928364, "loss": 2.0792, "step": 322075 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.00013711071273053463, "loss": 2.1299, "step": 322080 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.0001371089964191079, "loss": 2.0522, "step": 322085 }, { "epoch": 0.76, "grad_norm": 2.59375, "learning_rate": 0.00013710728009500408, "loss": 1.982, "step": 322090 }, { "epoch": 0.76, "grad_norm": 2.828125, "learning_rate": 0.00013710556375822375, "loss": 2.0962, "step": 322095 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.00013710384740876744, "loss": 1.8861, "step": 322100 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013710213104663584, "loss": 2.1682, "step": 322105 }, { "epoch": 0.76, "grad_norm": 1.84375, "learning_rate": 0.00013710041467182945, "loss": 1.9793, "step": 322110 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.00013709869828434888, "loss": 1.9911, "step": 322115 }, { "epoch": 0.76, "grad_norm": 2.546875, "learning_rate": 0.0001370969818841947, "loss": 2.0413, "step": 322120 }, { "epoch": 0.76, "grad_norm": 1.9375, "learning_rate": 0.00013709526547136755, "loss": 2.0455, "step": 322125 }, { "epoch": 0.76, "grad_norm": 1.8984375, "learning_rate": 0.00013709354904586798, "loss": 2.0326, "step": 322130 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.0001370918326076966, "loss": 2.1407, "step": 322135 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.00013709011615685394, "loss": 2.0958, "step": 322140 }, { "epoch": 0.76, "grad_norm": 2.515625, "learning_rate": 0.00013708839969334065, "loss": 2.0314, "step": 322145 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013708668321715727, "loss": 2.1452, "step": 322150 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013708496672830444, "loss": 1.9863, "step": 322155 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.0001370832502267827, "loss": 2.0619, "step": 322160 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013708153371259262, "loss": 2.0381, "step": 322165 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013707981718573485, "loss": 2.1321, "step": 322170 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.00013707810064620993, "loss": 1.9641, "step": 322175 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.00013707638409401847, "loss": 2.1092, "step": 322180 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.00013707466752916104, "loss": 1.8891, "step": 322185 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.0001370729509516382, "loss": 2.081, "step": 322190 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013707123436145058, "loss": 2.1579, "step": 322195 }, { "epoch": 0.76, "grad_norm": 2.65625, "learning_rate": 0.00013706951775859877, "loss": 1.8796, "step": 322200 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.00013706780114308332, "loss": 1.9407, "step": 322205 }, { "epoch": 0.76, "grad_norm": 2.484375, "learning_rate": 0.00013706608451490485, "loss": 2.0733, "step": 322210 }, { "epoch": 0.76, "grad_norm": 2.671875, "learning_rate": 0.00013706436787406395, "loss": 1.9701, "step": 322215 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013706265122056117, "loss": 2.0383, "step": 322220 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013706093455439713, "loss": 1.9555, "step": 322225 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.00013705921787557238, "loss": 1.9786, "step": 322230 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013705750118408756, "loss": 2.081, "step": 322235 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.0001370557844799432, "loss": 1.755, "step": 322240 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013705406776313994, "loss": 1.9954, "step": 322245 }, { "epoch": 0.76, "grad_norm": 1.8125, "learning_rate": 0.00013705235103367828, "loss": 1.9945, "step": 322250 }, { "epoch": 0.76, "grad_norm": 2.5, "learning_rate": 0.00013705063429155892, "loss": 2.0351, "step": 322255 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013704891753678238, "loss": 2.1214, "step": 322260 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013704720076934926, "loss": 2.03, "step": 322265 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013704548398926013, "loss": 2.0411, "step": 322270 }, { "epoch": 0.76, "grad_norm": 1.890625, "learning_rate": 0.0001370437671965156, "loss": 1.9141, "step": 322275 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013704205039111623, "loss": 2.0179, "step": 322280 }, { "epoch": 0.76, "grad_norm": 1.7109375, "learning_rate": 0.00013704033357306262, "loss": 1.9805, "step": 322285 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.0001370386167423554, "loss": 2.0006, "step": 322290 }, { "epoch": 0.76, "grad_norm": 1.9296875, "learning_rate": 0.0001370368998989951, "loss": 1.9541, "step": 322295 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.0001370351830429823, "loss": 2.0691, "step": 322300 }, { "epoch": 0.76, "grad_norm": 2.484375, "learning_rate": 0.00013703346617431763, "loss": 1.9467, "step": 322305 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013703174929300164, "loss": 2.2653, "step": 322310 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.000137030032399035, "loss": 2.1412, "step": 322315 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.00013702831549241816, "loss": 2.0149, "step": 322320 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.0001370265985731518, "loss": 1.9887, "step": 322325 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013702488164123647, "loss": 2.1265, "step": 322330 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013702316469667278, "loss": 1.9979, "step": 322335 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013702144773946129, "loss": 2.1012, "step": 322340 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.00013701973076960262, "loss": 2.2177, "step": 322345 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013701801378709734, "loss": 1.9174, "step": 322350 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.000137016296791946, "loss": 1.9924, "step": 322355 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013701457978414924, "loss": 2.0084, "step": 322360 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.00013701286276370764, "loss": 2.2101, "step": 322365 }, { "epoch": 0.76, "grad_norm": 2.484375, "learning_rate": 0.0001370111457306218, "loss": 2.0392, "step": 322370 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.00013700942868489225, "loss": 1.8422, "step": 322375 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.0001370077116265196, "loss": 2.0607, "step": 322380 }, { "epoch": 0.76, "grad_norm": 1.984375, "learning_rate": 0.00013700599455550443, "loss": 2.0768, "step": 322385 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013700427747184737, "loss": 1.9989, "step": 322390 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.00013700256037554897, "loss": 2.0628, "step": 322395 }, { "epoch": 0.76, "grad_norm": 1.8515625, "learning_rate": 0.00013700084326660986, "loss": 2.0249, "step": 322400 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013699912614503057, "loss": 2.0099, "step": 322405 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013699740901081166, "loss": 1.9647, "step": 322410 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.0001369956918639538, "loss": 2.048, "step": 322415 }, { "epoch": 0.76, "grad_norm": 2.53125, "learning_rate": 0.00013699397470445755, "loss": 1.9485, "step": 322420 }, { "epoch": 0.76, "grad_norm": 2.5625, "learning_rate": 0.0001369922575323235, "loss": 2.1254, "step": 322425 }, { "epoch": 0.76, "grad_norm": 1.8984375, "learning_rate": 0.00013699054034755219, "loss": 2.025, "step": 322430 }, { "epoch": 0.76, "grad_norm": 1.90625, "learning_rate": 0.00013698882315014424, "loss": 1.9513, "step": 322435 }, { "epoch": 0.76, "grad_norm": 2.609375, "learning_rate": 0.00013698710594010023, "loss": 2.0205, "step": 322440 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.0001369853887174208, "loss": 2.0159, "step": 322445 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013698367148210646, "loss": 1.9579, "step": 322450 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.00013698195423415783, "loss": 2.2388, "step": 322455 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.00013698023697357548, "loss": 1.8657, "step": 322460 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013697851970036003, "loss": 2.061, "step": 322465 }, { "epoch": 0.76, "grad_norm": 2.5, "learning_rate": 0.00013697680241451206, "loss": 2.142, "step": 322470 }, { "epoch": 0.76, "grad_norm": 2.625, "learning_rate": 0.00013697508511603212, "loss": 2.1184, "step": 322475 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.00013697336780492083, "loss": 2.0317, "step": 322480 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013697165048117874, "loss": 1.9949, "step": 322485 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013696993314480648, "loss": 1.9279, "step": 322490 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013696821579580465, "loss": 2.0243, "step": 322495 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013696649843417377, "loss": 2.013, "step": 322500 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013696478105991447, "loss": 1.8687, "step": 322505 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.00013696306367302736, "loss": 2.0902, "step": 322510 }, { "epoch": 0.76, "grad_norm": 1.703125, "learning_rate": 0.000136961346273513, "loss": 2.0263, "step": 322515 }, { "epoch": 0.76, "grad_norm": 1.9765625, "learning_rate": 0.0001369596288613719, "loss": 1.9694, "step": 322520 }, { "epoch": 0.76, "grad_norm": 1.8515625, "learning_rate": 0.0001369579114366048, "loss": 1.9543, "step": 322525 }, { "epoch": 0.76, "grad_norm": 2.8125, "learning_rate": 0.00013695619399921216, "loss": 2.0505, "step": 322530 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.00013695447654919464, "loss": 1.8995, "step": 322535 }, { "epoch": 0.76, "grad_norm": 1.7109375, "learning_rate": 0.00013695275908655278, "loss": 2.0044, "step": 322540 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.0001369510416112872, "loss": 2.0548, "step": 322545 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013694932412339848, "loss": 1.8683, "step": 322550 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.0001369476066228872, "loss": 2.0833, "step": 322555 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013694588910975392, "loss": 1.934, "step": 322560 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.0001369441715839993, "loss": 2.011, "step": 322565 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.00013694245404562386, "loss": 2.0762, "step": 322570 }, { "epoch": 0.76, "grad_norm": 2.53125, "learning_rate": 0.0001369407364946282, "loss": 2.0677, "step": 322575 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.0001369390189310129, "loss": 2.1899, "step": 322580 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.0001369373013547786, "loss": 1.9981, "step": 322585 }, { "epoch": 0.76, "grad_norm": 1.8359375, "learning_rate": 0.00013693558376592585, "loss": 1.6322, "step": 322590 }, { "epoch": 0.76, "grad_norm": 1.8984375, "learning_rate": 0.00013693386616445515, "loss": 2.0558, "step": 322595 }, { "epoch": 0.76, "grad_norm": 2.546875, "learning_rate": 0.00013693214855036728, "loss": 2.049, "step": 322600 }, { "epoch": 0.76, "grad_norm": 2.640625, "learning_rate": 0.00013693043092366264, "loss": 1.971, "step": 322605 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013692871328434197, "loss": 2.1592, "step": 322610 }, { "epoch": 0.76, "grad_norm": 2.5625, "learning_rate": 0.00013692699563240573, "loss": 2.1308, "step": 322615 }, { "epoch": 0.76, "grad_norm": 1.875, "learning_rate": 0.00013692527796785458, "loss": 2.036, "step": 322620 }, { "epoch": 0.76, "grad_norm": 1.7734375, "learning_rate": 0.000136923560290689, "loss": 1.7691, "step": 322625 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013692184260090978, "loss": 1.8554, "step": 322630 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013692012489851735, "loss": 2.0558, "step": 322635 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.0001369184071835123, "loss": 2.0272, "step": 322640 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.00013691668945589532, "loss": 1.9478, "step": 322645 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.00013691497171566689, "loss": 1.9625, "step": 322650 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.00013691325396282764, "loss": 2.2362, "step": 322655 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013691153619737814, "loss": 2.1404, "step": 322660 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.000136909818419319, "loss": 2.0754, "step": 322665 }, { "epoch": 0.76, "grad_norm": 2.734375, "learning_rate": 0.00013690810062865077, "loss": 1.9141, "step": 322670 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.0001369063828253741, "loss": 2.0887, "step": 322675 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.00013690466500948954, "loss": 2.0503, "step": 322680 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.00013690294718099767, "loss": 1.942, "step": 322685 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013690122933989908, "loss": 2.0463, "step": 322690 }, { "epoch": 0.76, "grad_norm": 3.125, "learning_rate": 0.00013689951148619436, "loss": 1.9802, "step": 322695 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013689779361988414, "loss": 1.7974, "step": 322700 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013689607574096887, "loss": 2.0573, "step": 322705 }, { "epoch": 0.76, "grad_norm": 2.5, "learning_rate": 0.0001368943578494493, "loss": 2.1852, "step": 322710 }, { "epoch": 0.76, "grad_norm": 1.8828125, "learning_rate": 0.00013689263994532596, "loss": 2.1178, "step": 322715 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013689092202859938, "loss": 1.9853, "step": 322720 }, { "epoch": 0.76, "grad_norm": 2.671875, "learning_rate": 0.00013688920409927024, "loss": 2.0337, "step": 322725 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013688748615733903, "loss": 2.0511, "step": 322730 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.0001368857682028064, "loss": 2.0899, "step": 322735 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013688405023567292, "loss": 2.0655, "step": 322740 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013688233225593921, "loss": 1.7745, "step": 322745 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.0001368806142636058, "loss": 2.0681, "step": 322750 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.00013687889625867332, "loss": 1.9719, "step": 322755 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.0001368771782411423, "loss": 2.1242, "step": 322760 }, { "epoch": 0.76, "grad_norm": 1.9453125, "learning_rate": 0.00013687546021101343, "loss": 2.0498, "step": 322765 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.0001368737421682872, "loss": 2.0691, "step": 322770 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013687202411296422, "loss": 1.9745, "step": 322775 }, { "epoch": 0.76, "grad_norm": 1.984375, "learning_rate": 0.00013687030604504508, "loss": 1.9033, "step": 322780 }, { "epoch": 0.76, "grad_norm": 2.609375, "learning_rate": 0.0001368685879645304, "loss": 2.2233, "step": 322785 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.00013686686987142074, "loss": 1.7821, "step": 322790 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013686515176571668, "loss": 2.0706, "step": 322795 }, { "epoch": 0.76, "grad_norm": 1.734375, "learning_rate": 0.00013686343364741882, "loss": 2.0445, "step": 322800 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013686171551652776, "loss": 1.9458, "step": 322805 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013685999737304402, "loss": 1.8961, "step": 322810 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.00013685827921696827, "loss": 2.0545, "step": 322815 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.00013685656104830106, "loss": 1.7797, "step": 322820 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.000136854842867043, "loss": 2.1536, "step": 322825 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013685312467319463, "loss": 2.1261, "step": 322830 }, { "epoch": 0.76, "grad_norm": 1.765625, "learning_rate": 0.00013685140646675658, "loss": 2.0349, "step": 322835 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.0001368496882477294, "loss": 1.9127, "step": 322840 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.0001368479700161137, "loss": 1.9568, "step": 322845 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013684625177191008, "loss": 1.992, "step": 322850 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.00013684453351511913, "loss": 2.0157, "step": 322855 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013684281524574141, "loss": 2.081, "step": 322860 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.0001368410969637775, "loss": 2.0348, "step": 322865 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013683937866922798, "loss": 2.1399, "step": 322870 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.0001368376603620935, "loss": 2.0483, "step": 322875 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.0001368359420423746, "loss": 1.9935, "step": 322880 }, { "epoch": 0.76, "grad_norm": 2.515625, "learning_rate": 0.00013683422371007188, "loss": 2.1062, "step": 322885 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013683250536518588, "loss": 1.8679, "step": 322890 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013683078700771727, "loss": 2.101, "step": 322895 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.00013682906863766658, "loss": 2.0178, "step": 322900 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013682735025503443, "loss": 1.9944, "step": 322905 }, { "epoch": 0.76, "grad_norm": 2.890625, "learning_rate": 0.0001368256318598214, "loss": 2.018, "step": 322910 }, { "epoch": 0.76, "grad_norm": 2.71875, "learning_rate": 0.00013682391345202802, "loss": 1.9998, "step": 322915 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013682219503165493, "loss": 1.9769, "step": 322920 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.00013682047659870273, "loss": 2.1037, "step": 322925 }, { "epoch": 0.76, "grad_norm": 2.8125, "learning_rate": 0.000136818758153172, "loss": 1.9826, "step": 322930 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.0001368170396950633, "loss": 2.146, "step": 322935 }, { "epoch": 0.76, "grad_norm": 1.859375, "learning_rate": 0.00013681532122437721, "loss": 1.9678, "step": 322940 }, { "epoch": 0.76, "grad_norm": 2.578125, "learning_rate": 0.00013681360274111438, "loss": 2.1285, "step": 322945 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.0001368118842452753, "loss": 2.1888, "step": 322950 }, { "epoch": 0.76, "grad_norm": 1.8984375, "learning_rate": 0.00013681016573686066, "loss": 2.0053, "step": 322955 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013680844721587098, "loss": 1.9898, "step": 322960 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013680672868230688, "loss": 1.8542, "step": 322965 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.0001368050101361689, "loss": 2.1288, "step": 322970 }, { "epoch": 0.76, "grad_norm": 2.84375, "learning_rate": 0.0001368032915774577, "loss": 1.7506, "step": 322975 }, { "epoch": 0.76, "grad_norm": 2.484375, "learning_rate": 0.00013680157300617382, "loss": 1.9544, "step": 322980 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013679985442231787, "loss": 2.1208, "step": 322985 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013679813582589037, "loss": 1.9308, "step": 322990 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.000136796417216892, "loss": 1.9952, "step": 322995 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.0001367946985953233, "loss": 2.0697, "step": 323000 }, { "epoch": 0.76, "grad_norm": 1.953125, "learning_rate": 0.00013679297996118485, "loss": 2.0372, "step": 323005 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.00013679126131447728, "loss": 2.15, "step": 323010 }, { "epoch": 0.76, "grad_norm": 1.8984375, "learning_rate": 0.0001367895426552011, "loss": 2.0232, "step": 323015 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.000136787823983357, "loss": 1.9165, "step": 323020 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.0001367861052989455, "loss": 1.9867, "step": 323025 }, { "epoch": 0.76, "grad_norm": 1.9921875, "learning_rate": 0.00013678438660196717, "loss": 2.163, "step": 323030 }, { "epoch": 0.76, "grad_norm": 1.9296875, "learning_rate": 0.00013678266789242263, "loss": 2.0037, "step": 323035 }, { "epoch": 0.76, "grad_norm": 2.609375, "learning_rate": 0.00013678094917031253, "loss": 2.0683, "step": 323040 }, { "epoch": 0.76, "grad_norm": 2.75, "learning_rate": 0.00013677923043563733, "loss": 2.0912, "step": 323045 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013677751168839766, "loss": 2.0372, "step": 323050 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013677579292859415, "loss": 2.0152, "step": 323055 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013677407415622737, "loss": 2.0868, "step": 323060 }, { "epoch": 0.76, "grad_norm": 2.4375, "learning_rate": 0.0001367723553712979, "loss": 2.1293, "step": 323065 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.00013677063657380634, "loss": 1.9573, "step": 323070 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013676891776375323, "loss": 2.0637, "step": 323075 }, { "epoch": 0.76, "grad_norm": 2.703125, "learning_rate": 0.00013676719894113918, "loss": 1.9784, "step": 323080 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013676548010596484, "loss": 2.1741, "step": 323085 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.0001367637612582307, "loss": 2.1776, "step": 323090 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013676204239793742, "loss": 1.863, "step": 323095 }, { "epoch": 0.76, "grad_norm": 1.65625, "learning_rate": 0.00013676032352508553, "loss": 2.0479, "step": 323100 }, { "epoch": 0.76, "grad_norm": 2.546875, "learning_rate": 0.00013675860463967568, "loss": 1.9564, "step": 323105 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.0001367568857417084, "loss": 2.1269, "step": 323110 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013675516683118433, "loss": 2.0228, "step": 323115 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.000136753447908104, "loss": 2.0493, "step": 323120 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.000136751728972468, "loss": 1.9412, "step": 323125 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.000136750010024277, "loss": 1.9517, "step": 323130 }, { "epoch": 0.76, "grad_norm": 1.6171875, "learning_rate": 0.0001367482910635315, "loss": 2.0952, "step": 323135 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.00013674657209023213, "loss": 2.0047, "step": 323140 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.0001367448531043795, "loss": 1.9721, "step": 323145 }, { "epoch": 0.76, "grad_norm": 2.640625, "learning_rate": 0.0001367431341059741, "loss": 2.0325, "step": 323150 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.0001367414150950166, "loss": 1.9115, "step": 323155 }, { "epoch": 0.76, "grad_norm": 1.8046875, "learning_rate": 0.00013673969607150757, "loss": 2.0549, "step": 323160 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013673797703544758, "loss": 2.125, "step": 323165 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013673625798683726, "loss": 2.0705, "step": 323170 }, { "epoch": 0.76, "grad_norm": 1.90625, "learning_rate": 0.00013673453892567715, "loss": 2.1653, "step": 323175 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013673281985196787, "loss": 1.9867, "step": 323180 }, { "epoch": 0.76, "grad_norm": 1.9453125, "learning_rate": 0.00013673110076570995, "loss": 1.9673, "step": 323185 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.00013672938166690404, "loss": 2.1672, "step": 323190 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.0001367276625555507, "loss": 1.9955, "step": 323195 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.00013672594343165057, "loss": 2.0635, "step": 323200 }, { "epoch": 0.76, "grad_norm": 2.609375, "learning_rate": 0.00013672422429520414, "loss": 2.0762, "step": 323205 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.0001367225051462121, "loss": 2.1005, "step": 323210 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.0001367207859846749, "loss": 2.152, "step": 323215 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.0001367190668105933, "loss": 1.8852, "step": 323220 }, { "epoch": 0.76, "grad_norm": 2.75, "learning_rate": 0.00013671734762396776, "loss": 2.0361, "step": 323225 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013671562842479891, "loss": 2.0049, "step": 323230 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.00013671390921308736, "loss": 1.9481, "step": 323235 }, { "epoch": 0.76, "grad_norm": 2.546875, "learning_rate": 0.00013671218998883365, "loss": 1.9818, "step": 323240 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.0001367104707520384, "loss": 2.0411, "step": 323245 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.0001367087515027022, "loss": 1.9531, "step": 323250 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.0001367070322408256, "loss": 2.0018, "step": 323255 }, { "epoch": 0.76, "grad_norm": 2.59375, "learning_rate": 0.0001367053129664092, "loss": 2.1192, "step": 323260 }, { "epoch": 0.76, "grad_norm": 1.9140625, "learning_rate": 0.00013670359367945362, "loss": 2.0583, "step": 323265 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013670187437995942, "loss": 2.1003, "step": 323270 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.0001367001550679272, "loss": 1.9781, "step": 323275 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013669843574335756, "loss": 1.9481, "step": 323280 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013669671640625106, "loss": 2.0565, "step": 323285 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013669499705660829, "loss": 2.0673, "step": 323290 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.0001366932776944298, "loss": 1.9783, "step": 323295 }, { "epoch": 0.76, "grad_norm": 1.9453125, "learning_rate": 0.00013669155831971626, "loss": 1.9362, "step": 323300 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013668983893246826, "loss": 2.0066, "step": 323305 }, { "epoch": 0.76, "grad_norm": 2.4375, "learning_rate": 0.0001366881195326863, "loss": 2.1769, "step": 323310 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.000136686400120371, "loss": 2.136, "step": 323315 }, { "epoch": 0.76, "grad_norm": 2.65625, "learning_rate": 0.00013668468069552299, "loss": 2.1083, "step": 323320 }, { "epoch": 0.76, "grad_norm": 2.625, "learning_rate": 0.0001366829612581428, "loss": 2.0091, "step": 323325 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.0001366812418082311, "loss": 1.9502, "step": 323330 }, { "epoch": 0.76, "grad_norm": 1.78125, "learning_rate": 0.00013667952234578838, "loss": 1.9041, "step": 323335 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013667780287081527, "loss": 2.102, "step": 323340 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.00013667608338331232, "loss": 2.0809, "step": 323345 }, { "epoch": 0.76, "grad_norm": 1.7578125, "learning_rate": 0.0001366743638832802, "loss": 1.7681, "step": 323350 }, { "epoch": 0.76, "grad_norm": 1.9765625, "learning_rate": 0.00013667264437071947, "loss": 2.0821, "step": 323355 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.0001366709248456307, "loss": 2.216, "step": 323360 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013666920530801445, "loss": 2.2091, "step": 323365 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.00013666748575787133, "loss": 2.0434, "step": 323370 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.0001366657661952019, "loss": 2.134, "step": 323375 }, { "epoch": 0.76, "grad_norm": 2.5625, "learning_rate": 0.00013666404662000686, "loss": 2.2622, "step": 323380 }, { "epoch": 0.76, "grad_norm": 2.546875, "learning_rate": 0.00013666232703228668, "loss": 2.1257, "step": 323385 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.00013666060743204196, "loss": 2.0878, "step": 323390 }, { "epoch": 0.76, "grad_norm": 1.9140625, "learning_rate": 0.00013665888781927334, "loss": 1.884, "step": 323395 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.00013665716819398138, "loss": 2.0473, "step": 323400 }, { "epoch": 0.76, "grad_norm": 2.625, "learning_rate": 0.00013665544855616664, "loss": 2.0051, "step": 323405 }, { "epoch": 0.76, "grad_norm": 1.9140625, "learning_rate": 0.00013665372890582977, "loss": 2.1571, "step": 323410 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.0001366520092429713, "loss": 1.9801, "step": 323415 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.00013665028956759184, "loss": 1.9955, "step": 323420 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.000136648569879692, "loss": 2.2271, "step": 323425 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.0001366468501792723, "loss": 1.9059, "step": 323430 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.0001366451304663334, "loss": 2.269, "step": 323435 }, { "epoch": 0.76, "grad_norm": 1.828125, "learning_rate": 0.00013664341074087587, "loss": 1.9058, "step": 323440 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.00013664169100290025, "loss": 2.1443, "step": 323445 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.00013663997125240723, "loss": 1.9791, "step": 323450 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013663825148939725, "loss": 2.0497, "step": 323455 }, { "epoch": 0.76, "grad_norm": 1.96875, "learning_rate": 0.000136636531713871, "loss": 2.1124, "step": 323460 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013663481192582907, "loss": 1.9461, "step": 323465 }, { "epoch": 0.76, "grad_norm": 2.6875, "learning_rate": 0.00013663309212527203, "loss": 2.1705, "step": 323470 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013663137231220045, "loss": 2.0576, "step": 323475 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.0001366296524866149, "loss": 2.1117, "step": 323480 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.000136627932648516, "loss": 2.1302, "step": 323485 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013662621279790437, "loss": 1.992, "step": 323490 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013662449293478057, "loss": 2.0161, "step": 323495 }, { "epoch": 0.76, "grad_norm": 2.75, "learning_rate": 0.00013662277305914516, "loss": 2.0651, "step": 323500 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.00013662105317099874, "loss": 1.9635, "step": 323505 }, { "epoch": 0.76, "grad_norm": 2.59375, "learning_rate": 0.00013661933327034187, "loss": 2.1743, "step": 323510 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013661761335717523, "loss": 2.0038, "step": 323515 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.0001366158934314993, "loss": 1.9689, "step": 323520 }, { "epoch": 0.76, "grad_norm": 2.765625, "learning_rate": 0.00013661417349331476, "loss": 2.113, "step": 323525 }, { "epoch": 0.76, "grad_norm": 1.8125, "learning_rate": 0.00013661245354262215, "loss": 2.0011, "step": 323530 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013661073357942204, "loss": 2.0093, "step": 323535 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.00013660901360371504, "loss": 2.001, "step": 323540 }, { "epoch": 0.76, "grad_norm": 1.828125, "learning_rate": 0.00013660729361550175, "loss": 1.806, "step": 323545 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013660557361478275, "loss": 1.9455, "step": 323550 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.0001366038536015586, "loss": 2.1808, "step": 323555 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.00013660213357582994, "loss": 1.9889, "step": 323560 }, { "epoch": 0.76, "grad_norm": 4.78125, "learning_rate": 0.0001366004135375973, "loss": 2.2451, "step": 323565 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.0001365986934868613, "loss": 2.1408, "step": 323570 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.00013659697342362253, "loss": 2.0747, "step": 323575 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013659525334788154, "loss": 2.0152, "step": 323580 }, { "epoch": 0.76, "grad_norm": 2.609375, "learning_rate": 0.000136593533259639, "loss": 1.9615, "step": 323585 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013659181315889542, "loss": 2.0215, "step": 323590 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.0001365900930456514, "loss": 1.8751, "step": 323595 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.00013658837291990756, "loss": 2.1004, "step": 323600 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013658665278166448, "loss": 2.1084, "step": 323605 }, { "epoch": 0.76, "grad_norm": 1.953125, "learning_rate": 0.00013658493263092272, "loss": 2.0413, "step": 323610 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013658321246768287, "loss": 2.0961, "step": 323615 }, { "epoch": 0.76, "grad_norm": 2.578125, "learning_rate": 0.00013658149229194553, "loss": 1.9662, "step": 323620 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.0001365797721037113, "loss": 2.0212, "step": 323625 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.0001365780519029808, "loss": 2.0882, "step": 323630 }, { "epoch": 0.76, "grad_norm": 2.59375, "learning_rate": 0.0001365763316897545, "loss": 1.8475, "step": 323635 }, { "epoch": 0.76, "grad_norm": 2.546875, "learning_rate": 0.00013657461146403306, "loss": 2.0917, "step": 323640 }, { "epoch": 0.76, "grad_norm": 3.015625, "learning_rate": 0.0001365728912258171, "loss": 2.2899, "step": 323645 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013657117097510725, "loss": 2.0915, "step": 323650 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013656945071190393, "loss": 2.1957, "step": 323655 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.00013656773043620787, "loss": 2.153, "step": 323660 }, { "epoch": 0.76, "grad_norm": 1.90625, "learning_rate": 0.00013656601014801954, "loss": 1.9534, "step": 323665 }, { "epoch": 0.76, "grad_norm": 1.84375, "learning_rate": 0.00013656428984733964, "loss": 2.1629, "step": 323670 }, { "epoch": 0.76, "grad_norm": 2.8125, "learning_rate": 0.00013656256953416875, "loss": 1.9586, "step": 323675 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.0001365608492085074, "loss": 1.858, "step": 323680 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.0001365591288703562, "loss": 2.0874, "step": 323685 }, { "epoch": 0.76, "grad_norm": 1.9921875, "learning_rate": 0.00013655740851971574, "loss": 1.8747, "step": 323690 }, { "epoch": 0.76, "grad_norm": 2.828125, "learning_rate": 0.00013655568815658657, "loss": 2.0059, "step": 323695 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013655396778096933, "loss": 2.19, "step": 323700 }, { "epoch": 0.76, "grad_norm": 1.84375, "learning_rate": 0.00013655224739286462, "loss": 1.9412, "step": 323705 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013655052699227297, "loss": 1.9738, "step": 323710 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013654880657919502, "loss": 2.0166, "step": 323715 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.0001365470861536313, "loss": 2.0087, "step": 323720 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.0001365453657155825, "loss": 2.2131, "step": 323725 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.00013654364526504909, "loss": 2.0688, "step": 323730 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013654192480203173, "loss": 2.2557, "step": 323735 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013654020432653097, "loss": 2.1001, "step": 323740 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.00013653848383854742, "loss": 2.149, "step": 323745 }, { "epoch": 0.76, "grad_norm": 1.9609375, "learning_rate": 0.00013653676333808164, "loss": 1.9575, "step": 323750 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013653504282513425, "loss": 1.8919, "step": 323755 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013653332229970585, "loss": 1.915, "step": 323760 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.000136531601761797, "loss": 1.9356, "step": 323765 }, { "epoch": 0.76, "grad_norm": 2.546875, "learning_rate": 0.0001365298812114083, "loss": 2.0665, "step": 323770 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.00013652816064854028, "loss": 2.082, "step": 323775 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.00013652644007319358, "loss": 1.9467, "step": 323780 }, { "epoch": 0.76, "grad_norm": 1.8671875, "learning_rate": 0.00013652471948536883, "loss": 1.9753, "step": 323785 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.00013652299888506657, "loss": 2.1746, "step": 323790 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.0001365212782722874, "loss": 2.1859, "step": 323795 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013651955764703183, "loss": 1.9945, "step": 323800 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013651783700930058, "loss": 1.9206, "step": 323805 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013651611635909417, "loss": 2.0478, "step": 323810 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013651439569641318, "loss": 2.0702, "step": 323815 }, { "epoch": 0.76, "grad_norm": 2.4375, "learning_rate": 0.00013651267502125822, "loss": 2.0662, "step": 323820 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013651095433362987, "loss": 1.9911, "step": 323825 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.0001365092336335287, "loss": 1.8892, "step": 323830 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.0001365075129209553, "loss": 1.9736, "step": 323835 }, { "epoch": 0.76, "grad_norm": 2.484375, "learning_rate": 0.0001365057921959103, "loss": 2.0144, "step": 323840 }, { "epoch": 0.76, "grad_norm": 2.5, "learning_rate": 0.00013650407145839423, "loss": 2.0612, "step": 323845 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013650235070840772, "loss": 1.9693, "step": 323850 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.0001365006299459514, "loss": 1.919, "step": 323855 }, { "epoch": 0.76, "grad_norm": 1.8984375, "learning_rate": 0.00013649890917102572, "loss": 1.8929, "step": 323860 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013649718838363138, "loss": 2.2077, "step": 323865 }, { "epoch": 0.76, "grad_norm": 1.9609375, "learning_rate": 0.00013649546758376895, "loss": 2.0786, "step": 323870 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.000136493746771439, "loss": 1.9401, "step": 323875 }, { "epoch": 0.76, "grad_norm": 2.578125, "learning_rate": 0.00013649202594664215, "loss": 2.1649, "step": 323880 }, { "epoch": 0.76, "grad_norm": 2.5625, "learning_rate": 0.0001364903051093789, "loss": 2.0835, "step": 323885 }, { "epoch": 0.76, "grad_norm": 1.9921875, "learning_rate": 0.00013648858425964995, "loss": 1.9396, "step": 323890 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.00013648686339745583, "loss": 2.003, "step": 323895 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013648514252279713, "loss": 2.0527, "step": 323900 }, { "epoch": 0.76, "grad_norm": 2.53125, "learning_rate": 0.00013648342163567447, "loss": 2.1819, "step": 323905 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013648170073608836, "loss": 2.0081, "step": 323910 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.00013647997982403946, "loss": 2.0308, "step": 323915 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013647825889952835, "loss": 2.0862, "step": 323920 }, { "epoch": 0.76, "grad_norm": 1.828125, "learning_rate": 0.00013647653796255562, "loss": 1.9551, "step": 323925 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.00013647481701312182, "loss": 1.9121, "step": 323930 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013647309605122754, "loss": 2.1015, "step": 323935 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.0001364713750768734, "loss": 1.9953, "step": 323940 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013646965409006, "loss": 1.8526, "step": 323945 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013646793309078788, "loss": 2.0585, "step": 323950 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013646621207905768, "loss": 1.8597, "step": 323955 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.00013646449105486993, "loss": 2.0165, "step": 323960 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013646277001822524, "loss": 2.1251, "step": 323965 }, { "epoch": 0.76, "grad_norm": 2.65625, "learning_rate": 0.00013646104896912427, "loss": 2.0787, "step": 323970 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013645932790756752, "loss": 1.9013, "step": 323975 }, { "epoch": 0.76, "grad_norm": 2.6875, "learning_rate": 0.00013645760683355558, "loss": 2.0962, "step": 323980 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013645588574708904, "loss": 2.0269, "step": 323985 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013645416464816857, "loss": 2.0945, "step": 323990 }, { "epoch": 0.76, "grad_norm": 2.78125, "learning_rate": 0.00013645244353679465, "loss": 2.0481, "step": 323995 }, { "epoch": 0.76, "grad_norm": 1.828125, "learning_rate": 0.00013645072241296792, "loss": 1.9309, "step": 324000 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013644900127668895, "loss": 2.0878, "step": 324005 }, { "epoch": 0.76, "grad_norm": 1.96875, "learning_rate": 0.0001364472801279584, "loss": 1.9371, "step": 324010 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.00013644555896677674, "loss": 1.8159, "step": 324015 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013644383779314463, "loss": 2.157, "step": 324020 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013644211660706262, "loss": 2.1891, "step": 324025 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013644039540853137, "loss": 2.2136, "step": 324030 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.0001364386741975514, "loss": 2.1987, "step": 324035 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013643695297412334, "loss": 2.1495, "step": 324040 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.0001364352317382477, "loss": 2.1212, "step": 324045 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013643351048992517, "loss": 2.2711, "step": 324050 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013643178922915627, "loss": 2.0207, "step": 324055 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013643006795594165, "loss": 2.0313, "step": 324060 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013642834667028183, "loss": 1.9722, "step": 324065 }, { "epoch": 0.76, "grad_norm": 3.234375, "learning_rate": 0.0001364266253721774, "loss": 2.0155, "step": 324070 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013642490406162898, "loss": 2.0832, "step": 324075 }, { "epoch": 0.76, "grad_norm": 2.6875, "learning_rate": 0.00013642318273863717, "loss": 2.0348, "step": 324080 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013642146140320255, "loss": 2.0137, "step": 324085 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.0001364197400553257, "loss": 1.9277, "step": 324090 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013641801869500718, "loss": 2.1481, "step": 324095 }, { "epoch": 0.76, "grad_norm": 2.78125, "learning_rate": 0.00013641629732224762, "loss": 1.9119, "step": 324100 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.0001364145759370476, "loss": 2.1785, "step": 324105 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.0001364128545394077, "loss": 2.065, "step": 324110 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013641113312932846, "loss": 2.0173, "step": 324115 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013640941170681059, "loss": 2.0988, "step": 324120 }, { "epoch": 0.76, "grad_norm": 2.65625, "learning_rate": 0.00013640769027185455, "loss": 2.1024, "step": 324125 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.000136405968824461, "loss": 2.0436, "step": 324130 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013640424736463053, "loss": 1.9063, "step": 324135 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.00013640252589236369, "loss": 1.998, "step": 324140 }, { "epoch": 0.76, "grad_norm": 1.7265625, "learning_rate": 0.0001364008044076611, "loss": 1.9043, "step": 324145 }, { "epoch": 0.76, "grad_norm": 1.7109375, "learning_rate": 0.00013639908291052333, "loss": 1.8844, "step": 324150 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013639736140095096, "loss": 1.8192, "step": 324155 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.0001363956398789446, "loss": 1.9701, "step": 324160 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013639391834450483, "loss": 2.0212, "step": 324165 }, { "epoch": 0.76, "grad_norm": 2.765625, "learning_rate": 0.00013639219679763222, "loss": 2.1567, "step": 324170 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.0001363904752383274, "loss": 2.0165, "step": 324175 }, { "epoch": 0.76, "grad_norm": 1.9921875, "learning_rate": 0.00013638875366659092, "loss": 2.0888, "step": 324180 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013638703208242338, "loss": 2.0256, "step": 324185 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013638531048582538, "loss": 2.1613, "step": 324190 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.0001363835888767975, "loss": 2.0175, "step": 324195 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013638186725534036, "loss": 2.1266, "step": 324200 }, { "epoch": 0.76, "grad_norm": 2.828125, "learning_rate": 0.00013638014562145445, "loss": 2.1422, "step": 324205 }, { "epoch": 0.76, "grad_norm": 1.9453125, "learning_rate": 0.00013637842397514044, "loss": 1.933, "step": 324210 }, { "epoch": 0.76, "grad_norm": 2.5, "learning_rate": 0.00013637670231639892, "loss": 2.0464, "step": 324215 }, { "epoch": 0.76, "grad_norm": 2.484375, "learning_rate": 0.00013637498064523044, "loss": 2.0246, "step": 324220 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.00013637325896163565, "loss": 2.0368, "step": 324225 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.000136371537265615, "loss": 2.2101, "step": 324230 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013636981555716925, "loss": 2.0927, "step": 324235 }, { "epoch": 0.76, "grad_norm": 1.890625, "learning_rate": 0.0001363680938362989, "loss": 2.146, "step": 324240 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.00013636637210300457, "loss": 2.2074, "step": 324245 }, { "epoch": 0.76, "grad_norm": 2.546875, "learning_rate": 0.0001363646503572868, "loss": 2.0592, "step": 324250 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.0001363629285991462, "loss": 2.1548, "step": 324255 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013636120682858336, "loss": 2.228, "step": 324260 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.0001363594850455989, "loss": 2.0415, "step": 324265 }, { "epoch": 0.76, "grad_norm": 1.9609375, "learning_rate": 0.00013635776325019336, "loss": 1.8727, "step": 324270 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013635604144236735, "loss": 2.1575, "step": 324275 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013635431962212146, "loss": 1.9858, "step": 324280 }, { "epoch": 0.76, "grad_norm": 2.625, "learning_rate": 0.0001363525977894563, "loss": 2.0602, "step": 324285 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013635087594437238, "loss": 2.0862, "step": 324290 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013634915408687037, "loss": 2.1287, "step": 324295 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.00013634743221695083, "loss": 2.1982, "step": 324300 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013634571033461435, "loss": 2.1352, "step": 324305 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.00013634398843986151, "loss": 1.9936, "step": 324310 }, { "epoch": 0.76, "grad_norm": 2.671875, "learning_rate": 0.0001363422665326929, "loss": 2.0874, "step": 324315 }, { "epoch": 0.76, "grad_norm": 2.53125, "learning_rate": 0.00013634054461310915, "loss": 2.062, "step": 324320 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013633882268111077, "loss": 2.2664, "step": 324325 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013633710073669838, "loss": 2.0214, "step": 324330 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.00013633537877987264, "loss": 2.1573, "step": 324335 }, { "epoch": 0.76, "grad_norm": 2.453125, "learning_rate": 0.00013633365681063402, "loss": 2.1872, "step": 324340 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.00013633193482898316, "loss": 1.9724, "step": 324345 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013633021283492066, "loss": 1.9975, "step": 324350 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013632849082844715, "loss": 2.0803, "step": 324355 }, { "epoch": 0.76, "grad_norm": 2.484375, "learning_rate": 0.0001363267688095631, "loss": 1.8616, "step": 324360 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.0001363250467782692, "loss": 2.0324, "step": 324365 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.000136323324734566, "loss": 2.0652, "step": 324370 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013632160267845408, "loss": 2.0396, "step": 324375 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.00013631988060993407, "loss": 1.9979, "step": 324380 }, { "epoch": 0.76, "grad_norm": 2.4375, "learning_rate": 0.00013631815852900654, "loss": 2.1029, "step": 324385 }, { "epoch": 0.76, "grad_norm": 1.9609375, "learning_rate": 0.000136316436435672, "loss": 2.0567, "step": 324390 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013631471432993118, "loss": 1.804, "step": 324395 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013631299221178458, "loss": 1.923, "step": 324400 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013631127008123279, "loss": 2.0113, "step": 324405 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013630954793827642, "loss": 2.0941, "step": 324410 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.00013630782578291604, "loss": 1.943, "step": 324415 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013630610361515223, "loss": 1.9505, "step": 324420 }, { "epoch": 0.76, "grad_norm": 2.6875, "learning_rate": 0.00013630438143498564, "loss": 2.1476, "step": 324425 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.0001363026592424168, "loss": 1.9975, "step": 324430 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.00013630093703744629, "loss": 2.126, "step": 324435 }, { "epoch": 0.76, "grad_norm": 1.9453125, "learning_rate": 0.00013629921482007472, "loss": 2.0196, "step": 324440 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.0001362974925903027, "loss": 2.0846, "step": 324445 }, { "epoch": 0.76, "grad_norm": 2.484375, "learning_rate": 0.00013629577034813078, "loss": 2.1047, "step": 324450 }, { "epoch": 0.76, "grad_norm": 2.53125, "learning_rate": 0.0001362940480935596, "loss": 2.0496, "step": 324455 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.0001362923258265897, "loss": 2.1519, "step": 324460 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.00013629060354722168, "loss": 2.0998, "step": 324465 }, { "epoch": 0.76, "grad_norm": 2.640625, "learning_rate": 0.00013628888125545615, "loss": 2.1906, "step": 324470 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013628715895129366, "loss": 2.068, "step": 324475 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.00013628543663473478, "loss": 2.0334, "step": 324480 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.0001362837143057802, "loss": 2.0255, "step": 324485 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013628199196443042, "loss": 1.9494, "step": 324490 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013628026961068606, "loss": 2.0271, "step": 324495 }, { "epoch": 0.76, "grad_norm": 2.515625, "learning_rate": 0.00013627854724454768, "loss": 1.9079, "step": 324500 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.0001362768248660159, "loss": 2.0297, "step": 324505 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.0001362751024750913, "loss": 2.1798, "step": 324510 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.0001362733800717745, "loss": 2.174, "step": 324515 }, { "epoch": 0.76, "grad_norm": 2.515625, "learning_rate": 0.00013627165765606604, "loss": 2.1409, "step": 324520 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.00013626993522796647, "loss": 1.9948, "step": 324525 }, { "epoch": 0.76, "grad_norm": 2.046875, "learning_rate": 0.0001362682127874765, "loss": 2.2069, "step": 324530 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.00013626649033459664, "loss": 2.1142, "step": 324535 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013626476786932746, "loss": 2.042, "step": 324540 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.0001362630453916696, "loss": 2.0376, "step": 324545 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.0001362613229016236, "loss": 2.1757, "step": 324550 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.0001362596003991901, "loss": 2.0361, "step": 324555 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013625787788436967, "loss": 2.1195, "step": 324560 }, { "epoch": 0.76, "grad_norm": 1.703125, "learning_rate": 0.00013625615535716286, "loss": 1.9081, "step": 324565 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.00013625443281757032, "loss": 2.2242, "step": 324570 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.00013625271026559258, "loss": 2.0999, "step": 324575 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013625098770123027, "loss": 2.07, "step": 324580 }, { "epoch": 0.76, "grad_norm": 1.953125, "learning_rate": 0.00013624926512448396, "loss": 1.9275, "step": 324585 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013624754253535425, "loss": 2.0356, "step": 324590 }, { "epoch": 0.76, "grad_norm": 2.078125, "learning_rate": 0.00013624581993384172, "loss": 2.2796, "step": 324595 }, { "epoch": 0.76, "grad_norm": 1.8046875, "learning_rate": 0.000136244097319947, "loss": 2.0499, "step": 324600 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013624237469367058, "loss": 2.0366, "step": 324605 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013624065205501313, "loss": 2.0635, "step": 324610 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013623892940397523, "loss": 1.9449, "step": 324615 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013623720674055742, "loss": 1.8571, "step": 324620 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013623548406476035, "loss": 2.1099, "step": 324625 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.0001362337613765846, "loss": 1.8834, "step": 324630 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.0001362320386760307, "loss": 2.0047, "step": 324635 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.0001362303159630993, "loss": 2.051, "step": 324640 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013622859323779093, "loss": 2.0934, "step": 324645 }, { "epoch": 0.76, "grad_norm": 2.421875, "learning_rate": 0.00013622687050010628, "loss": 2.0841, "step": 324650 }, { "epoch": 0.76, "grad_norm": 2.59375, "learning_rate": 0.00013622514775004586, "loss": 2.1582, "step": 324655 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.0001362234249876102, "loss": 2.0591, "step": 324660 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013622170221280002, "loss": 1.9943, "step": 324665 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.00013621997942561584, "loss": 2.1328, "step": 324670 }, { "epoch": 0.76, "grad_norm": 1.984375, "learning_rate": 0.00013621825662605828, "loss": 2.1331, "step": 324675 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.0001362165338141279, "loss": 2.1386, "step": 324680 }, { "epoch": 0.76, "grad_norm": 3.40625, "learning_rate": 0.00013621481098982524, "loss": 1.9159, "step": 324685 }, { "epoch": 0.76, "grad_norm": 2.5, "learning_rate": 0.000136213088153151, "loss": 2.2214, "step": 324690 }, { "epoch": 0.76, "grad_norm": 1.890625, "learning_rate": 0.00013621136530410572, "loss": 2.156, "step": 324695 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013620964244268996, "loss": 2.0734, "step": 324700 }, { "epoch": 0.76, "grad_norm": 2.375, "learning_rate": 0.00013620791956890432, "loss": 2.1835, "step": 324705 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013620619668274942, "loss": 2.0557, "step": 324710 }, { "epoch": 0.76, "grad_norm": 1.8828125, "learning_rate": 0.0001362044737842258, "loss": 2.1614, "step": 324715 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013620275087333409, "loss": 2.1053, "step": 324720 }, { "epoch": 0.76, "grad_norm": 1.9296875, "learning_rate": 0.00013620102795007486, "loss": 2.016, "step": 324725 }, { "epoch": 0.76, "grad_norm": 1.5859375, "learning_rate": 0.00013619930501444872, "loss": 2.021, "step": 324730 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013619758206645622, "loss": 1.9448, "step": 324735 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.00013619585910609798, "loss": 2.0766, "step": 324740 }, { "epoch": 0.76, "grad_norm": 1.984375, "learning_rate": 0.00013619413613337456, "loss": 1.977, "step": 324745 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013619241314828656, "loss": 2.0425, "step": 324750 }, { "epoch": 0.76, "grad_norm": 2.53125, "learning_rate": 0.0001361906901508346, "loss": 1.9604, "step": 324755 }, { "epoch": 0.76, "grad_norm": 2.578125, "learning_rate": 0.00013618896714101927, "loss": 2.0327, "step": 324760 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013618724411884107, "loss": 2.083, "step": 324765 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.0001361855210843007, "loss": 2.0697, "step": 324770 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.0001361837980373987, "loss": 2.1585, "step": 324775 }, { "epoch": 0.76, "grad_norm": 1.875, "learning_rate": 0.00013618207497813562, "loss": 1.9741, "step": 324780 }, { "epoch": 0.76, "grad_norm": 1.8515625, "learning_rate": 0.0001361803519065121, "loss": 2.1063, "step": 324785 }, { "epoch": 0.76, "grad_norm": 2.296875, "learning_rate": 0.00013617862882252875, "loss": 2.1238, "step": 324790 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.0001361769057261861, "loss": 1.9914, "step": 324795 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013617518261748474, "loss": 1.8335, "step": 324800 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.0001361734594964253, "loss": 2.0747, "step": 324805 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.00013617173636300836, "loss": 1.9058, "step": 324810 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.0001361700132172345, "loss": 2.1615, "step": 324815 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.00013616829005910427, "loss": 2.2113, "step": 324820 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013616656688861833, "loss": 1.9144, "step": 324825 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013616484370577723, "loss": 1.9042, "step": 324830 }, { "epoch": 0.76, "grad_norm": 2.125, "learning_rate": 0.0001361631205105816, "loss": 2.0292, "step": 324835 }, { "epoch": 0.76, "grad_norm": 2.34375, "learning_rate": 0.00013616139730303196, "loss": 2.2444, "step": 324840 }, { "epoch": 0.76, "grad_norm": 2.640625, "learning_rate": 0.0001361596740831289, "loss": 2.1539, "step": 324845 }, { "epoch": 0.76, "grad_norm": 2.25, "learning_rate": 0.00013615795085087306, "loss": 2.1957, "step": 324850 }, { "epoch": 0.76, "grad_norm": 2.328125, "learning_rate": 0.00013615622760626504, "loss": 2.1332, "step": 324855 }, { "epoch": 0.76, "grad_norm": 2.3125, "learning_rate": 0.00013615450434930536, "loss": 2.1179, "step": 324860 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013615278107999466, "loss": 1.9881, "step": 324865 }, { "epoch": 0.76, "grad_norm": 2.515625, "learning_rate": 0.0001361510577983335, "loss": 1.9755, "step": 324870 }, { "epoch": 0.76, "grad_norm": 2.625, "learning_rate": 0.0001361493345043225, "loss": 2.0998, "step": 324875 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013614761119796223, "loss": 2.1146, "step": 324880 }, { "epoch": 0.76, "grad_norm": 2.234375, "learning_rate": 0.00013614588787925328, "loss": 2.145, "step": 324885 }, { "epoch": 0.76, "grad_norm": 1.9765625, "learning_rate": 0.00013614416454819624, "loss": 2.0117, "step": 324890 }, { "epoch": 0.76, "grad_norm": 2.1875, "learning_rate": 0.00013614244120479168, "loss": 2.0817, "step": 324895 }, { "epoch": 0.76, "grad_norm": 2.390625, "learning_rate": 0.00013614071784904023, "loss": 2.078, "step": 324900 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013613899448094242, "loss": 1.9294, "step": 324905 }, { "epoch": 0.76, "grad_norm": 1.9765625, "learning_rate": 0.00013613727110049892, "loss": 1.9885, "step": 324910 }, { "epoch": 0.76, "grad_norm": 1.9765625, "learning_rate": 0.00013613554770771025, "loss": 1.8491, "step": 324915 }, { "epoch": 0.76, "grad_norm": 2.890625, "learning_rate": 0.00013613382430257702, "loss": 1.9539, "step": 324920 }, { "epoch": 0.76, "grad_norm": 2.0, "learning_rate": 0.00013613210088509982, "loss": 2.0472, "step": 324925 }, { "epoch": 0.76, "grad_norm": 1.96875, "learning_rate": 0.00013613037745527926, "loss": 2.14, "step": 324930 }, { "epoch": 0.76, "grad_norm": 2.203125, "learning_rate": 0.00013612865401311586, "loss": 1.9934, "step": 324935 }, { "epoch": 0.76, "grad_norm": 2.5625, "learning_rate": 0.0001361269305586103, "loss": 2.0407, "step": 324940 }, { "epoch": 0.76, "grad_norm": 1.953125, "learning_rate": 0.00013612520709176313, "loss": 2.0683, "step": 324945 }, { "epoch": 0.76, "grad_norm": 2.46875, "learning_rate": 0.0001361234836125749, "loss": 2.0789, "step": 324950 }, { "epoch": 0.76, "grad_norm": 1.8671875, "learning_rate": 0.00013612176012104625, "loss": 2.1221, "step": 324955 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013612003661717776, "loss": 2.1159, "step": 324960 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013611831310097, "loss": 2.1126, "step": 324965 }, { "epoch": 0.76, "grad_norm": 2.28125, "learning_rate": 0.00013611658957242358, "loss": 2.097, "step": 324970 }, { "epoch": 0.76, "grad_norm": 2.140625, "learning_rate": 0.00013611486603153908, "loss": 2.2772, "step": 324975 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013611314247831703, "loss": 2.0614, "step": 324980 }, { "epoch": 0.76, "grad_norm": 1.7890625, "learning_rate": 0.00013611141891275815, "loss": 1.9271, "step": 324985 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.00013610969533486294, "loss": 2.1745, "step": 324990 }, { "epoch": 0.76, "grad_norm": 2.640625, "learning_rate": 0.000136107971744632, "loss": 2.0511, "step": 324995 }, { "epoch": 0.76, "grad_norm": 2.265625, "learning_rate": 0.00013610624814206588, "loss": 2.0689, "step": 325000 }, { "epoch": 0.76, "grad_norm": 2.015625, "learning_rate": 0.00013610452452716526, "loss": 1.9004, "step": 325005 }, { "epoch": 0.76, "grad_norm": 2.09375, "learning_rate": 0.00013610280089993066, "loss": 2.133, "step": 325010 }, { "epoch": 0.76, "grad_norm": 1.9453125, "learning_rate": 0.00013610107726036268, "loss": 2.0598, "step": 325015 }, { "epoch": 0.76, "grad_norm": 2.359375, "learning_rate": 0.00013609935360846194, "loss": 2.2012, "step": 325020 }, { "epoch": 0.76, "grad_norm": 2.109375, "learning_rate": 0.00013609762994422898, "loss": 2.0288, "step": 325025 }, { "epoch": 0.76, "grad_norm": 2.0625, "learning_rate": 0.0001360959062676644, "loss": 1.9626, "step": 325030 }, { "epoch": 0.76, "grad_norm": 2.828125, "learning_rate": 0.00013609418257876888, "loss": 2.0341, "step": 325035 }, { "epoch": 0.76, "grad_norm": 2.40625, "learning_rate": 0.00013609245887754288, "loss": 1.9689, "step": 325040 }, { "epoch": 0.76, "grad_norm": 2.546875, "learning_rate": 0.00013609073516398704, "loss": 1.8944, "step": 325045 }, { "epoch": 0.76, "grad_norm": 2.03125, "learning_rate": 0.00013608901143810195, "loss": 1.9932, "step": 325050 }, { "epoch": 0.76, "grad_norm": 2.171875, "learning_rate": 0.0001360872876998882, "loss": 2.0129, "step": 325055 }, { "epoch": 0.76, "grad_norm": 2.890625, "learning_rate": 0.0001360855639493464, "loss": 2.0407, "step": 325060 }, { "epoch": 0.76, "grad_norm": 2.15625, "learning_rate": 0.0001360838401864771, "loss": 2.035, "step": 325065 }, { "epoch": 0.76, "grad_norm": 2.21875, "learning_rate": 0.00013608211641128089, "loss": 2.041, "step": 325070 }, { "epoch": 0.77, "grad_norm": 2.234375, "learning_rate": 0.0001360803926237584, "loss": 2.1204, "step": 325075 }, { "epoch": 0.77, "grad_norm": 2.109375, "learning_rate": 0.0001360786688239102, "loss": 2.086, "step": 325080 }, { "epoch": 0.77, "grad_norm": 2.234375, "learning_rate": 0.00013607694501173684, "loss": 2.2307, "step": 325085 }, { "epoch": 0.77, "grad_norm": 2.0625, "learning_rate": 0.00013607522118723895, "loss": 2.0228, "step": 325090 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013607349735041713, "loss": 2.16, "step": 325095 }, { "epoch": 0.77, "grad_norm": 2.53125, "learning_rate": 0.0001360717735012719, "loss": 1.9951, "step": 325100 }, { "epoch": 0.77, "grad_norm": 1.7734375, "learning_rate": 0.00013607004963980398, "loss": 2.0687, "step": 325105 }, { "epoch": 0.77, "grad_norm": 2.0625, "learning_rate": 0.0001360683257660138, "loss": 2.162, "step": 325110 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.00013606660187990207, "loss": 2.0448, "step": 325115 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.00013606487798146932, "loss": 1.9325, "step": 325120 }, { "epoch": 0.77, "grad_norm": 2.109375, "learning_rate": 0.00013606315407071616, "loss": 1.9843, "step": 325125 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.00013606143014764318, "loss": 2.1809, "step": 325130 }, { "epoch": 0.77, "grad_norm": 1.8203125, "learning_rate": 0.00013605970621225096, "loss": 2.0262, "step": 325135 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.00013605798226454005, "loss": 1.9935, "step": 325140 }, { "epoch": 0.77, "grad_norm": 2.453125, "learning_rate": 0.00013605625830451112, "loss": 2.0215, "step": 325145 }, { "epoch": 0.77, "grad_norm": 2.4375, "learning_rate": 0.00013605453433216473, "loss": 2.0972, "step": 325150 }, { "epoch": 0.77, "grad_norm": 1.90625, "learning_rate": 0.00013605281034750143, "loss": 1.9392, "step": 325155 }, { "epoch": 0.77, "grad_norm": 2.0, "learning_rate": 0.00013605108635052183, "loss": 2.0612, "step": 325160 }, { "epoch": 0.77, "grad_norm": 2.0625, "learning_rate": 0.00013604936234122653, "loss": 1.9372, "step": 325165 }, { "epoch": 0.77, "grad_norm": 2.234375, "learning_rate": 0.00013604763831961614, "loss": 2.1126, "step": 325170 }, { "epoch": 0.77, "grad_norm": 1.9140625, "learning_rate": 0.0001360459142856912, "loss": 2.0888, "step": 325175 }, { "epoch": 0.77, "grad_norm": 2.03125, "learning_rate": 0.00013604419023945232, "loss": 2.0958, "step": 325180 }, { "epoch": 0.77, "grad_norm": 2.609375, "learning_rate": 0.0001360424661809001, "loss": 2.1048, "step": 325185 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.0001360407421100351, "loss": 2.0608, "step": 325190 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.00013603901802685796, "loss": 2.0154, "step": 325195 }, { "epoch": 0.77, "grad_norm": 1.90625, "learning_rate": 0.00013603729393136922, "loss": 2.0365, "step": 325200 }, { "epoch": 0.77, "grad_norm": 1.9921875, "learning_rate": 0.0001360355698235695, "loss": 2.1935, "step": 325205 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.00013603384570345936, "loss": 2.0316, "step": 325210 }, { "epoch": 0.77, "grad_norm": 2.3125, "learning_rate": 0.00013603212157103938, "loss": 2.1128, "step": 325215 }, { "epoch": 0.77, "grad_norm": 2.109375, "learning_rate": 0.00013603039742631024, "loss": 1.9318, "step": 325220 }, { "epoch": 0.77, "grad_norm": 2.046875, "learning_rate": 0.00013602867326927242, "loss": 1.9107, "step": 325225 }, { "epoch": 0.77, "grad_norm": 2.34375, "learning_rate": 0.00013602694909992655, "loss": 2.1539, "step": 325230 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013602522491827323, "loss": 1.9453, "step": 325235 }, { "epoch": 0.77, "grad_norm": 1.8359375, "learning_rate": 0.00013602350072431307, "loss": 1.9413, "step": 325240 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.0001360217765180466, "loss": 2.0606, "step": 325245 }, { "epoch": 0.77, "grad_norm": 2.765625, "learning_rate": 0.00013602005229947442, "loss": 2.034, "step": 325250 }, { "epoch": 0.77, "grad_norm": 2.234375, "learning_rate": 0.00013601832806859712, "loss": 2.0464, "step": 325255 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.00013601660382541538, "loss": 2.0601, "step": 325260 }, { "epoch": 0.77, "grad_norm": 2.328125, "learning_rate": 0.00013601487956992967, "loss": 2.1224, "step": 325265 }, { "epoch": 0.77, "grad_norm": 2.359375, "learning_rate": 0.00013601315530214063, "loss": 2.0931, "step": 325270 }, { "epoch": 0.77, "grad_norm": 2.359375, "learning_rate": 0.00013601143102204882, "loss": 2.1331, "step": 325275 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.00013600970672965486, "loss": 2.1507, "step": 325280 }, { "epoch": 0.77, "grad_norm": 2.46875, "learning_rate": 0.0001360079824249594, "loss": 2.1304, "step": 325285 }, { "epoch": 0.77, "grad_norm": 2.5, "learning_rate": 0.00013600625810796288, "loss": 2.1636, "step": 325290 }, { "epoch": 0.77, "grad_norm": 2.328125, "learning_rate": 0.000136004533778666, "loss": 2.1241, "step": 325295 }, { "epoch": 0.77, "grad_norm": 2.625, "learning_rate": 0.0001360028094370693, "loss": 2.2981, "step": 325300 }, { "epoch": 0.77, "grad_norm": 2.34375, "learning_rate": 0.0001360010850831734, "loss": 2.1318, "step": 325305 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.00013599936071697887, "loss": 2.1049, "step": 325310 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.00013599763633848633, "loss": 2.0813, "step": 325315 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.00013599591194769631, "loss": 2.0511, "step": 325320 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.00013599418754460944, "loss": 2.1205, "step": 325325 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.00013599246312922633, "loss": 2.1824, "step": 325330 }, { "epoch": 0.77, "grad_norm": 2.5, "learning_rate": 0.00013599073870154753, "loss": 2.029, "step": 325335 }, { "epoch": 0.77, "grad_norm": 1.9140625, "learning_rate": 0.00013598901426157363, "loss": 2.1176, "step": 325340 }, { "epoch": 0.77, "grad_norm": 1.921875, "learning_rate": 0.00013598728980930523, "loss": 1.9683, "step": 325345 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.00013598556534474292, "loss": 1.9667, "step": 325350 }, { "epoch": 0.77, "grad_norm": 2.359375, "learning_rate": 0.00013598384086788728, "loss": 2.1446, "step": 325355 }, { "epoch": 0.77, "grad_norm": 2.46875, "learning_rate": 0.00013598211637873893, "loss": 2.0304, "step": 325360 }, { "epoch": 0.77, "grad_norm": 2.234375, "learning_rate": 0.00013598039187729844, "loss": 1.9738, "step": 325365 }, { "epoch": 0.77, "grad_norm": 2.078125, "learning_rate": 0.00013597866736356638, "loss": 2.1528, "step": 325370 }, { "epoch": 0.77, "grad_norm": 2.171875, "learning_rate": 0.00013597694283754334, "loss": 2.035, "step": 325375 }, { "epoch": 0.77, "grad_norm": 2.71875, "learning_rate": 0.00013597521829922997, "loss": 2.1267, "step": 325380 }, { "epoch": 0.77, "grad_norm": 1.9140625, "learning_rate": 0.00013597349374862676, "loss": 2.1688, "step": 325385 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.00013597176918573437, "loss": 1.9544, "step": 325390 }, { "epoch": 0.77, "grad_norm": 2.140625, "learning_rate": 0.00013597004461055338, "loss": 1.9744, "step": 325395 }, { "epoch": 0.77, "grad_norm": 2.5625, "learning_rate": 0.0001359683200230844, "loss": 2.0239, "step": 325400 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.00013596659542332794, "loss": 1.9878, "step": 325405 }, { "epoch": 0.77, "grad_norm": 2.046875, "learning_rate": 0.00013596487081128463, "loss": 2.0339, "step": 325410 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.0001359631461869551, "loss": 1.9417, "step": 325415 }, { "epoch": 0.77, "grad_norm": 2.515625, "learning_rate": 0.00013596142155033992, "loss": 1.9991, "step": 325420 }, { "epoch": 0.77, "grad_norm": 1.9296875, "learning_rate": 0.00013595969690143964, "loss": 1.9436, "step": 325425 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.00013595797224025488, "loss": 2.1122, "step": 325430 }, { "epoch": 0.77, "grad_norm": 1.703125, "learning_rate": 0.0001359562475667862, "loss": 2.0047, "step": 325435 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013595452288103424, "loss": 2.0069, "step": 325440 }, { "epoch": 0.77, "grad_norm": 2.296875, "learning_rate": 0.0001359527981829996, "loss": 1.8845, "step": 325445 }, { "epoch": 0.77, "grad_norm": 2.4375, "learning_rate": 0.00013595107347268279, "loss": 2.0964, "step": 325450 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013594934875008446, "loss": 2.1278, "step": 325455 }, { "epoch": 0.77, "grad_norm": 2.8125, "learning_rate": 0.00013594762401520515, "loss": 2.1096, "step": 325460 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.00013594589926804547, "loss": 2.0487, "step": 325465 }, { "epoch": 0.77, "grad_norm": 1.9921875, "learning_rate": 0.0001359441745086061, "loss": 2.2051, "step": 325470 }, { "epoch": 0.77, "grad_norm": 3.078125, "learning_rate": 0.00013594244973688747, "loss": 1.8307, "step": 325475 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.00013594072495289026, "loss": 1.9932, "step": 325480 }, { "epoch": 0.77, "grad_norm": 2.03125, "learning_rate": 0.00013593900015661508, "loss": 1.9379, "step": 325485 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.00013593727534806245, "loss": 2.0221, "step": 325490 }, { "epoch": 0.77, "grad_norm": 2.0, "learning_rate": 0.00013593555052723302, "loss": 1.8123, "step": 325495 }, { "epoch": 0.77, "grad_norm": 1.9140625, "learning_rate": 0.00013593382569412733, "loss": 2.002, "step": 325500 }, { "epoch": 0.77, "grad_norm": 2.296875, "learning_rate": 0.00013593210084874598, "loss": 2.0256, "step": 325505 }, { "epoch": 0.77, "grad_norm": 2.234375, "learning_rate": 0.00013593037599108962, "loss": 2.0992, "step": 325510 }, { "epoch": 0.77, "grad_norm": 2.4375, "learning_rate": 0.00013592865112115878, "loss": 2.0432, "step": 325515 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.00013592692623895405, "loss": 2.0845, "step": 325520 }, { "epoch": 0.77, "grad_norm": 2.328125, "learning_rate": 0.00013592520134447601, "loss": 2.1279, "step": 325525 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.0001359234764377253, "loss": 2.1934, "step": 325530 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.00013592175151870247, "loss": 2.0866, "step": 325535 }, { "epoch": 0.77, "grad_norm": 2.640625, "learning_rate": 0.00013592002658740815, "loss": 2.0352, "step": 325540 }, { "epoch": 0.77, "grad_norm": 1.9765625, "learning_rate": 0.00013591830164384283, "loss": 2.1174, "step": 325545 }, { "epoch": 0.77, "grad_norm": 2.40625, "learning_rate": 0.00013591657668800722, "loss": 2.0489, "step": 325550 }, { "epoch": 0.77, "grad_norm": 1.75, "learning_rate": 0.00013591485171990183, "loss": 1.9433, "step": 325555 }, { "epoch": 0.77, "grad_norm": 2.140625, "learning_rate": 0.0001359131267395273, "loss": 1.9811, "step": 325560 }, { "epoch": 0.77, "grad_norm": 2.125, "learning_rate": 0.0001359114017468842, "loss": 2.1458, "step": 325565 }, { "epoch": 0.77, "grad_norm": 2.171875, "learning_rate": 0.00013590967674197306, "loss": 2.0828, "step": 325570 }, { "epoch": 0.77, "grad_norm": 1.890625, "learning_rate": 0.00013590795172479454, "loss": 2.1474, "step": 325575 }, { "epoch": 0.77, "grad_norm": 2.109375, "learning_rate": 0.00013590622669534924, "loss": 1.969, "step": 325580 }, { "epoch": 0.77, "grad_norm": 2.34375, "learning_rate": 0.00013590450165363772, "loss": 2.0258, "step": 325585 }, { "epoch": 0.77, "grad_norm": 2.5, "learning_rate": 0.00013590277659966056, "loss": 2.0823, "step": 325590 }, { "epoch": 0.77, "grad_norm": 2.375, "learning_rate": 0.00013590105153341833, "loss": 1.9534, "step": 325595 }, { "epoch": 0.77, "grad_norm": 2.578125, "learning_rate": 0.00013589932645491172, "loss": 2.0445, "step": 325600 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.0001358976013641412, "loss": 1.9743, "step": 325605 }, { "epoch": 0.77, "grad_norm": 1.9921875, "learning_rate": 0.00013589587626110743, "loss": 2.3149, "step": 325610 }, { "epoch": 0.77, "grad_norm": 1.9375, "learning_rate": 0.00013589415114581097, "loss": 1.965, "step": 325615 }, { "epoch": 0.77, "grad_norm": 1.96875, "learning_rate": 0.0001358924260182524, "loss": 2.1743, "step": 325620 }, { "epoch": 0.77, "grad_norm": 2.234375, "learning_rate": 0.00013589070087843233, "loss": 2.0208, "step": 325625 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.00013588897572635138, "loss": 2.0415, "step": 325630 }, { "epoch": 0.77, "grad_norm": 2.171875, "learning_rate": 0.0001358872505620101, "loss": 2.0664, "step": 325635 }, { "epoch": 0.77, "grad_norm": 1.8984375, "learning_rate": 0.00013588552538540904, "loss": 2.0945, "step": 325640 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013588380019654886, "loss": 2.1762, "step": 325645 }, { "epoch": 0.77, "grad_norm": 2.3125, "learning_rate": 0.0001358820749954301, "loss": 2.0621, "step": 325650 }, { "epoch": 0.77, "grad_norm": 2.578125, "learning_rate": 0.00013588034978205343, "loss": 1.9372, "step": 325655 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.00013587862455641933, "loss": 2.0106, "step": 325660 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.00013587689931852844, "loss": 2.0545, "step": 325665 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.00013587517406838138, "loss": 2.0098, "step": 325670 }, { "epoch": 0.77, "grad_norm": 2.359375, "learning_rate": 0.0001358734488059787, "loss": 2.198, "step": 325675 }, { "epoch": 0.77, "grad_norm": 2.125, "learning_rate": 0.00013587172353132098, "loss": 2.0555, "step": 325680 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013586999824440884, "loss": 2.0742, "step": 325685 }, { "epoch": 0.77, "grad_norm": 2.65625, "learning_rate": 0.00013586827294524286, "loss": 2.1204, "step": 325690 }, { "epoch": 0.77, "grad_norm": 1.8203125, "learning_rate": 0.00013586654763382368, "loss": 1.8114, "step": 325695 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.00013586482231015177, "loss": 1.8934, "step": 325700 }, { "epoch": 0.77, "grad_norm": 2.125, "learning_rate": 0.00013586309697422778, "loss": 2.17, "step": 325705 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.00013586137162605234, "loss": 2.0193, "step": 325710 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.000135859646265626, "loss": 1.9272, "step": 325715 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.00013585792089294935, "loss": 2.0571, "step": 325720 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.000135856195508023, "loss": 2.0269, "step": 325725 }, { "epoch": 0.77, "grad_norm": 2.453125, "learning_rate": 0.00013585447011084747, "loss": 1.8911, "step": 325730 }, { "epoch": 0.77, "grad_norm": 2.109375, "learning_rate": 0.00013585274470142344, "loss": 2.1408, "step": 325735 }, { "epoch": 0.77, "grad_norm": 2.0625, "learning_rate": 0.00013585101927975147, "loss": 2.0325, "step": 325740 }, { "epoch": 0.77, "grad_norm": 2.328125, "learning_rate": 0.00013584929384583214, "loss": 1.9142, "step": 325745 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.00013584756839966604, "loss": 1.9604, "step": 325750 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.00013584584294125374, "loss": 2.0169, "step": 325755 }, { "epoch": 0.77, "grad_norm": 1.8828125, "learning_rate": 0.0001358441174705959, "loss": 2.1834, "step": 325760 }, { "epoch": 0.77, "grad_norm": 2.578125, "learning_rate": 0.000135842391987693, "loss": 2.0485, "step": 325765 }, { "epoch": 0.77, "grad_norm": 2.5, "learning_rate": 0.00013584066649254572, "loss": 2.1201, "step": 325770 }, { "epoch": 0.77, "grad_norm": 1.90625, "learning_rate": 0.0001358389409851546, "loss": 1.9832, "step": 325775 }, { "epoch": 0.77, "grad_norm": 2.453125, "learning_rate": 0.00013583721546552027, "loss": 1.909, "step": 325780 }, { "epoch": 0.77, "grad_norm": 3.21875, "learning_rate": 0.00013583548993364327, "loss": 1.9838, "step": 325785 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.00013583376438952425, "loss": 2.1111, "step": 325790 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013583203883316375, "loss": 2.0942, "step": 325795 }, { "epoch": 0.77, "grad_norm": 2.640625, "learning_rate": 0.00013583031326456238, "loss": 2.1418, "step": 325800 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.0001358285876837207, "loss": 1.9898, "step": 325805 }, { "epoch": 0.77, "grad_norm": 1.75, "learning_rate": 0.00013582686209063935, "loss": 1.7625, "step": 325810 }, { "epoch": 0.77, "grad_norm": 1.9375, "learning_rate": 0.0001358251364853189, "loss": 2.0496, "step": 325815 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.0001358234108677599, "loss": 2.0346, "step": 325820 }, { "epoch": 0.77, "grad_norm": 2.5625, "learning_rate": 0.00013582168523796302, "loss": 2.147, "step": 325825 }, { "epoch": 0.77, "grad_norm": 1.8984375, "learning_rate": 0.00013581995959592878, "loss": 2.2766, "step": 325830 }, { "epoch": 0.77, "grad_norm": 2.484375, "learning_rate": 0.00013581823394165777, "loss": 2.0651, "step": 325835 }, { "epoch": 0.77, "grad_norm": 2.453125, "learning_rate": 0.00013581650827515062, "loss": 2.1317, "step": 325840 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.00013581478259640792, "loss": 2.2203, "step": 325845 }, { "epoch": 0.77, "grad_norm": 2.40625, "learning_rate": 0.0001358130569054302, "loss": 2.019, "step": 325850 }, { "epoch": 0.77, "grad_norm": 2.140625, "learning_rate": 0.00013581133120221814, "loss": 2.1446, "step": 325855 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.00013580960548677227, "loss": 2.037, "step": 325860 }, { "epoch": 0.77, "grad_norm": 1.5703125, "learning_rate": 0.00013580787975909315, "loss": 1.9365, "step": 325865 }, { "epoch": 0.77, "grad_norm": 1.9765625, "learning_rate": 0.00013580615401918142, "loss": 1.8891, "step": 325870 }, { "epoch": 0.77, "grad_norm": 2.0625, "learning_rate": 0.0001358044282670377, "loss": 2.0926, "step": 325875 }, { "epoch": 0.77, "grad_norm": 2.4375, "learning_rate": 0.00013580270250266249, "loss": 1.9898, "step": 325880 }, { "epoch": 0.77, "grad_norm": 2.40625, "learning_rate": 0.00013580097672605645, "loss": 1.926, "step": 325885 }, { "epoch": 0.77, "grad_norm": 2.0625, "learning_rate": 0.00013579925093722012, "loss": 1.9245, "step": 325890 }, { "epoch": 0.77, "grad_norm": 2.625, "learning_rate": 0.00013579752513615415, "loss": 2.0167, "step": 325895 }, { "epoch": 0.77, "grad_norm": 2.125, "learning_rate": 0.00013579579932285912, "loss": 1.9888, "step": 325900 }, { "epoch": 0.77, "grad_norm": 2.359375, "learning_rate": 0.00013579407349733556, "loss": 1.9222, "step": 325905 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.0001357923476595841, "loss": 2.146, "step": 325910 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.00013579062180960532, "loss": 2.1543, "step": 325915 }, { "epoch": 0.77, "grad_norm": 2.03125, "learning_rate": 0.00013578889594739982, "loss": 2.0618, "step": 325920 }, { "epoch": 0.77, "grad_norm": 2.59375, "learning_rate": 0.00013578717007296817, "loss": 2.1158, "step": 325925 }, { "epoch": 0.77, "grad_norm": 2.578125, "learning_rate": 0.00013578544418631097, "loss": 2.1245, "step": 325930 }, { "epoch": 0.77, "grad_norm": 2.234375, "learning_rate": 0.00013578371828742883, "loss": 2.1881, "step": 325935 }, { "epoch": 0.77, "grad_norm": 2.453125, "learning_rate": 0.00013578199237632233, "loss": 2.0515, "step": 325940 }, { "epoch": 0.77, "grad_norm": 2.359375, "learning_rate": 0.00013578026645299205, "loss": 1.9292, "step": 325945 }, { "epoch": 0.77, "grad_norm": 2.578125, "learning_rate": 0.00013577854051743853, "loss": 2.22, "step": 325950 }, { "epoch": 0.77, "grad_norm": 2.0625, "learning_rate": 0.00013577681456966248, "loss": 2.2213, "step": 325955 }, { "epoch": 0.77, "grad_norm": 2.515625, "learning_rate": 0.0001357750886096644, "loss": 2.0271, "step": 325960 }, { "epoch": 0.77, "grad_norm": 1.9140625, "learning_rate": 0.0001357733626374449, "loss": 1.9872, "step": 325965 }, { "epoch": 0.77, "grad_norm": 2.34375, "learning_rate": 0.00013577163665300456, "loss": 2.226, "step": 325970 }, { "epoch": 0.77, "grad_norm": 2.359375, "learning_rate": 0.000135769910656344, "loss": 2.0048, "step": 325975 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.00013576818464746377, "loss": 2.0416, "step": 325980 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.0001357664586263645, "loss": 2.157, "step": 325985 }, { "epoch": 0.77, "grad_norm": 2.3125, "learning_rate": 0.00013576473259304677, "loss": 2.0697, "step": 325990 }, { "epoch": 0.77, "grad_norm": 2.34375, "learning_rate": 0.00013576300654751113, "loss": 2.1463, "step": 325995 }, { "epoch": 0.77, "grad_norm": 2.140625, "learning_rate": 0.0001357612804897582, "loss": 2.1213, "step": 326000 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.00013575955441978856, "loss": 2.0619, "step": 326005 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.00013575782833760282, "loss": 2.0394, "step": 326010 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.00013575610224320157, "loss": 2.0808, "step": 326015 }, { "epoch": 0.77, "grad_norm": 2.234375, "learning_rate": 0.00013575437613658537, "loss": 2.0749, "step": 326020 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.00013575265001775482, "loss": 2.1514, "step": 326025 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.0001357509238867105, "loss": 2.1439, "step": 326030 }, { "epoch": 0.77, "grad_norm": 1.953125, "learning_rate": 0.0001357491977434531, "loss": 2.0116, "step": 326035 }, { "epoch": 0.77, "grad_norm": 2.671875, "learning_rate": 0.00013574747158798305, "loss": 2.2415, "step": 326040 }, { "epoch": 0.77, "grad_norm": 2.46875, "learning_rate": 0.00013574574542030106, "loss": 1.9819, "step": 326045 }, { "epoch": 0.77, "grad_norm": 2.671875, "learning_rate": 0.00013574401924040761, "loss": 1.9423, "step": 326050 }, { "epoch": 0.77, "grad_norm": 2.3125, "learning_rate": 0.00013574229304830343, "loss": 2.0538, "step": 326055 }, { "epoch": 0.77, "grad_norm": 2.140625, "learning_rate": 0.00013574056684398898, "loss": 1.8603, "step": 326060 }, { "epoch": 0.77, "grad_norm": 2.109375, "learning_rate": 0.00013573884062746493, "loss": 2.0951, "step": 326065 }, { "epoch": 0.77, "grad_norm": 1.765625, "learning_rate": 0.0001357371143987318, "loss": 1.9718, "step": 326070 }, { "epoch": 0.77, "grad_norm": 2.3125, "learning_rate": 0.0001357353881577903, "loss": 2.1041, "step": 326075 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.0001357336619046409, "loss": 1.9978, "step": 326080 }, { "epoch": 0.77, "grad_norm": 2.34375, "learning_rate": 0.00013573193563928425, "loss": 2.0197, "step": 326085 }, { "epoch": 0.77, "grad_norm": 2.3125, "learning_rate": 0.0001357302093617209, "loss": 2.0282, "step": 326090 }, { "epoch": 0.77, "grad_norm": 2.171875, "learning_rate": 0.00013572848307195147, "loss": 2.1434, "step": 326095 }, { "epoch": 0.77, "grad_norm": 2.59375, "learning_rate": 0.00013572675676997654, "loss": 2.0226, "step": 326100 }, { "epoch": 0.77, "grad_norm": 2.03125, "learning_rate": 0.0001357250304557967, "loss": 2.0761, "step": 326105 }, { "epoch": 0.77, "grad_norm": 1.703125, "learning_rate": 0.00013572330412941255, "loss": 1.8581, "step": 326110 }, { "epoch": 0.77, "grad_norm": 2.078125, "learning_rate": 0.00013572157779082468, "loss": 2.2203, "step": 326115 }, { "epoch": 0.77, "grad_norm": 2.140625, "learning_rate": 0.00013571985144003365, "loss": 2.1146, "step": 326120 }, { "epoch": 0.77, "grad_norm": 2.171875, "learning_rate": 0.00013571812507704012, "loss": 1.9125, "step": 326125 }, { "epoch": 0.77, "grad_norm": 2.140625, "learning_rate": 0.00013571639870184457, "loss": 1.9834, "step": 326130 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.00013571467231444765, "loss": 2.078, "step": 326135 }, { "epoch": 0.77, "grad_norm": 2.328125, "learning_rate": 0.00013571294591484997, "loss": 2.1256, "step": 326140 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.00013571121950305213, "loss": 2.0974, "step": 326145 }, { "epoch": 0.77, "grad_norm": 2.0, "learning_rate": 0.00013570949307905466, "loss": 1.968, "step": 326150 }, { "epoch": 0.77, "grad_norm": 2.34375, "learning_rate": 0.0001357077666428582, "loss": 1.9582, "step": 326155 }, { "epoch": 0.77, "grad_norm": 2.296875, "learning_rate": 0.00013570604019446327, "loss": 2.0532, "step": 326160 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.00013570431373387053, "loss": 2.0746, "step": 326165 }, { "epoch": 0.77, "grad_norm": 2.046875, "learning_rate": 0.0001357025872610806, "loss": 2.0943, "step": 326170 }, { "epoch": 0.77, "grad_norm": 1.796875, "learning_rate": 0.00013570086077609398, "loss": 2.1035, "step": 326175 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.0001356991342789113, "loss": 2.2169, "step": 326180 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.00013569740776953312, "loss": 1.7846, "step": 326185 }, { "epoch": 0.77, "grad_norm": 2.109375, "learning_rate": 0.0001356956812479601, "loss": 2.0153, "step": 326190 }, { "epoch": 0.77, "grad_norm": 2.4375, "learning_rate": 0.00013569395471419278, "loss": 2.0075, "step": 326195 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.00013569222816823176, "loss": 1.9103, "step": 326200 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.00013569050161007758, "loss": 2.014, "step": 326205 }, { "epoch": 0.77, "grad_norm": 2.328125, "learning_rate": 0.00013568877503973093, "loss": 2.0915, "step": 326210 }, { "epoch": 0.77, "grad_norm": 2.453125, "learning_rate": 0.00013568704845719234, "loss": 1.9827, "step": 326215 }, { "epoch": 0.77, "grad_norm": 2.171875, "learning_rate": 0.00013568532186246238, "loss": 1.9526, "step": 326220 }, { "epoch": 0.77, "grad_norm": 2.40625, "learning_rate": 0.00013568359525554168, "loss": 1.9847, "step": 326225 }, { "epoch": 0.77, "grad_norm": 1.8046875, "learning_rate": 0.00013568186863643081, "loss": 2.1087, "step": 326230 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.0001356801420051304, "loss": 2.212, "step": 326235 }, { "epoch": 0.77, "grad_norm": 2.046875, "learning_rate": 0.000135678415361641, "loss": 2.0268, "step": 326240 }, { "epoch": 0.77, "grad_norm": 2.765625, "learning_rate": 0.00013567668870596318, "loss": 2.0258, "step": 326245 }, { "epoch": 0.77, "grad_norm": 2.40625, "learning_rate": 0.00013567496203809758, "loss": 2.0989, "step": 326250 }, { "epoch": 0.77, "grad_norm": 2.546875, "learning_rate": 0.0001356732353580447, "loss": 1.9757, "step": 326255 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.00013567150866580527, "loss": 2.0783, "step": 326260 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.0001356697819613798, "loss": 1.9894, "step": 326265 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.00013566805524476886, "loss": 1.8124, "step": 326270 }, { "epoch": 0.77, "grad_norm": 2.53125, "learning_rate": 0.00013566632851597308, "loss": 1.9219, "step": 326275 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013566460177499303, "loss": 2.2138, "step": 326280 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.00013566287502182932, "loss": 1.9694, "step": 326285 }, { "epoch": 0.77, "grad_norm": 4.0, "learning_rate": 0.0001356611482564825, "loss": 2.036, "step": 326290 }, { "epoch": 0.77, "grad_norm": 2.59375, "learning_rate": 0.0001356594214789532, "loss": 1.9612, "step": 326295 }, { "epoch": 0.77, "grad_norm": 1.96875, "learning_rate": 0.000135657694689242, "loss": 2.1318, "step": 326300 }, { "epoch": 0.77, "grad_norm": 2.59375, "learning_rate": 0.0001356559678873495, "loss": 2.0029, "step": 326305 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.00013565424107327622, "loss": 2.1174, "step": 326310 }, { "epoch": 0.77, "grad_norm": 2.078125, "learning_rate": 0.00013565251424702286, "loss": 2.0528, "step": 326315 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.0001356507874085899, "loss": 2.0921, "step": 326320 }, { "epoch": 0.77, "grad_norm": 2.53125, "learning_rate": 0.000135649060557978, "loss": 2.1297, "step": 326325 }, { "epoch": 0.77, "grad_norm": 1.9765625, "learning_rate": 0.00013564733369518777, "loss": 2.0741, "step": 326330 }, { "epoch": 0.77, "grad_norm": 2.5, "learning_rate": 0.00013564560682021974, "loss": 1.973, "step": 326335 }, { "epoch": 0.77, "grad_norm": 1.9609375, "learning_rate": 0.00013564387993307455, "loss": 1.8746, "step": 326340 }, { "epoch": 0.77, "grad_norm": 2.3125, "learning_rate": 0.00013564215303375272, "loss": 1.8854, "step": 326345 }, { "epoch": 0.77, "grad_norm": 1.9609375, "learning_rate": 0.0001356404261222549, "loss": 2.0413, "step": 326350 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.00013563869919858167, "loss": 2.086, "step": 326355 }, { "epoch": 0.77, "grad_norm": 2.328125, "learning_rate": 0.0001356369722627336, "loss": 1.9946, "step": 326360 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.00013563524531471133, "loss": 1.9343, "step": 326365 }, { "epoch": 0.77, "grad_norm": 2.3125, "learning_rate": 0.0001356335183545154, "loss": 2.195, "step": 326370 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.0001356317913821464, "loss": 1.9983, "step": 326375 }, { "epoch": 0.77, "grad_norm": 2.484375, "learning_rate": 0.00013563006439760495, "loss": 1.9688, "step": 326380 }, { "epoch": 0.77, "grad_norm": 2.3125, "learning_rate": 0.0001356283374008916, "loss": 2.0468, "step": 326385 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.00013562661039200698, "loss": 2.0047, "step": 326390 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.00013562488337095165, "loss": 2.12, "step": 326395 }, { "epoch": 0.77, "grad_norm": 2.0, "learning_rate": 0.00013562315633772624, "loss": 2.257, "step": 326400 }, { "epoch": 0.77, "grad_norm": 2.4375, "learning_rate": 0.00013562142929233128, "loss": 2.2096, "step": 326405 }, { "epoch": 0.77, "grad_norm": 2.984375, "learning_rate": 0.00013561970223476742, "loss": 2.1611, "step": 326410 }, { "epoch": 0.77, "grad_norm": 2.328125, "learning_rate": 0.00013561797516503521, "loss": 2.1214, "step": 326415 }, { "epoch": 0.77, "grad_norm": 1.984375, "learning_rate": 0.00013561624808313528, "loss": 2.0844, "step": 326420 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.00013561452098906815, "loss": 1.957, "step": 326425 }, { "epoch": 0.77, "grad_norm": 2.125, "learning_rate": 0.0001356127938828345, "loss": 1.9685, "step": 326430 }, { "epoch": 0.77, "grad_norm": 2.140625, "learning_rate": 0.00013561106676443485, "loss": 2.164, "step": 326435 }, { "epoch": 0.77, "grad_norm": 2.171875, "learning_rate": 0.0001356093396338698, "loss": 1.9371, "step": 326440 }, { "epoch": 0.77, "grad_norm": 2.46875, "learning_rate": 0.00013560761249114, "loss": 1.8786, "step": 326445 }, { "epoch": 0.77, "grad_norm": 2.390625, "learning_rate": 0.00013560588533624595, "loss": 2.2007, "step": 326450 }, { "epoch": 0.77, "grad_norm": 2.296875, "learning_rate": 0.00013560415816918828, "loss": 2.0784, "step": 326455 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013560243098996759, "loss": 2.1259, "step": 326460 }, { "epoch": 0.77, "grad_norm": 1.859375, "learning_rate": 0.0001356007037985845, "loss": 2.2063, "step": 326465 }, { "epoch": 0.77, "grad_norm": 2.40625, "learning_rate": 0.00013559897659503952, "loss": 2.1841, "step": 326470 }, { "epoch": 0.77, "grad_norm": 2.078125, "learning_rate": 0.00013559724937933331, "loss": 1.9976, "step": 326475 }, { "epoch": 0.77, "grad_norm": 2.0625, "learning_rate": 0.0001355955221514664, "loss": 1.9569, "step": 326480 }, { "epoch": 0.77, "grad_norm": 1.84375, "learning_rate": 0.00013559379491143948, "loss": 2.1132, "step": 326485 }, { "epoch": 0.77, "grad_norm": 1.96875, "learning_rate": 0.00013559206765925304, "loss": 1.8713, "step": 326490 }, { "epoch": 0.77, "grad_norm": 2.40625, "learning_rate": 0.00013559034039490768, "loss": 2.1139, "step": 326495 }, { "epoch": 0.77, "grad_norm": 2.046875, "learning_rate": 0.00013558861311840402, "loss": 2.0727, "step": 326500 }, { "epoch": 0.77, "grad_norm": 2.453125, "learning_rate": 0.00013558688582974266, "loss": 1.8911, "step": 326505 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.00013558515852892417, "loss": 2.2295, "step": 326510 }, { "epoch": 0.77, "grad_norm": 2.0625, "learning_rate": 0.00013558343121594914, "loss": 2.0131, "step": 326515 }, { "epoch": 0.77, "grad_norm": 2.34375, "learning_rate": 0.00013558170389081818, "loss": 1.8346, "step": 326520 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.00013557997655353184, "loss": 2.1708, "step": 326525 }, { "epoch": 0.77, "grad_norm": 2.375, "learning_rate": 0.00013557824920409076, "loss": 2.2608, "step": 326530 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.0001355765218424955, "loss": 2.0374, "step": 326535 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.00013557479446874664, "loss": 2.1104, "step": 326540 }, { "epoch": 0.77, "grad_norm": 2.03125, "learning_rate": 0.00013557306708284478, "loss": 2.003, "step": 326545 }, { "epoch": 0.77, "grad_norm": 2.34375, "learning_rate": 0.00013557133968479054, "loss": 2.0557, "step": 326550 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013556961227458446, "loss": 1.9983, "step": 326555 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.00013556788485222717, "loss": 1.9475, "step": 326560 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.00013556615741771926, "loss": 2.0597, "step": 326565 }, { "epoch": 0.77, "grad_norm": 2.34375, "learning_rate": 0.0001355644299710613, "loss": 2.0415, "step": 326570 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.00013556270251225387, "loss": 1.8487, "step": 326575 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.0001355609750412976, "loss": 2.0091, "step": 326580 }, { "epoch": 0.77, "grad_norm": 1.828125, "learning_rate": 0.00013555924755819304, "loss": 2.0057, "step": 326585 }, { "epoch": 0.77, "grad_norm": 2.640625, "learning_rate": 0.00013555752006294078, "loss": 2.024, "step": 326590 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.00013555579255554142, "loss": 2.0776, "step": 326595 }, { "epoch": 0.77, "grad_norm": 2.484375, "learning_rate": 0.0001355540650359956, "loss": 1.836, "step": 326600 }, { "epoch": 0.77, "grad_norm": 2.4375, "learning_rate": 0.00013555233750430385, "loss": 2.1515, "step": 326605 }, { "epoch": 0.77, "grad_norm": 1.8515625, "learning_rate": 0.00013555060996046678, "loss": 1.9913, "step": 326610 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.00013554888240448494, "loss": 2.1724, "step": 326615 }, { "epoch": 0.77, "grad_norm": 2.578125, "learning_rate": 0.00013554715483635898, "loss": 2.0835, "step": 326620 }, { "epoch": 0.77, "grad_norm": 2.65625, "learning_rate": 0.0001355454272560895, "loss": 2.0435, "step": 326625 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.00013554369966367702, "loss": 2.0508, "step": 326630 }, { "epoch": 0.77, "grad_norm": 2.375, "learning_rate": 0.00013554197205912217, "loss": 2.1205, "step": 326635 }, { "epoch": 0.77, "grad_norm": 2.328125, "learning_rate": 0.00013554024444242555, "loss": 2.0841, "step": 326640 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.00013553851681358774, "loss": 2.0326, "step": 326645 }, { "epoch": 0.77, "grad_norm": 2.625, "learning_rate": 0.00013553678917260933, "loss": 2.0197, "step": 326650 }, { "epoch": 0.77, "grad_norm": 2.0625, "learning_rate": 0.00013553506151949089, "loss": 2.03, "step": 326655 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.00013553333385423304, "loss": 1.9887, "step": 326660 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.00013553160617683636, "loss": 2.1054, "step": 326665 }, { "epoch": 0.77, "grad_norm": 1.984375, "learning_rate": 0.0001355298784873014, "loss": 2.2566, "step": 326670 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.00013552815078562885, "loss": 1.935, "step": 326675 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.0001355264230718192, "loss": 1.9683, "step": 326680 }, { "epoch": 0.77, "grad_norm": 2.515625, "learning_rate": 0.0001355246953458731, "loss": 1.8304, "step": 326685 }, { "epoch": 0.77, "grad_norm": 2.296875, "learning_rate": 0.0001355229676077911, "loss": 1.97, "step": 326690 }, { "epoch": 0.77, "grad_norm": 2.515625, "learning_rate": 0.00013552123985757384, "loss": 2.1799, "step": 326695 }, { "epoch": 0.77, "grad_norm": 1.9921875, "learning_rate": 0.00013551951209522187, "loss": 2.0996, "step": 326700 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013551778432073576, "loss": 2.029, "step": 326705 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.00013551605653411617, "loss": 2.1246, "step": 326710 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.00013551432873536366, "loss": 2.2044, "step": 326715 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.00013551260092447876, "loss": 2.0314, "step": 326720 }, { "epoch": 0.77, "grad_norm": 2.453125, "learning_rate": 0.00013551087310146212, "loss": 2.1531, "step": 326725 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.00013550914526631435, "loss": 2.0463, "step": 326730 }, { "epoch": 0.77, "grad_norm": 2.359375, "learning_rate": 0.000135507417419036, "loss": 2.0488, "step": 326735 }, { "epoch": 0.77, "grad_norm": 1.984375, "learning_rate": 0.0001355056895596277, "loss": 2.0461, "step": 326740 }, { "epoch": 0.77, "grad_norm": 1.9375, "learning_rate": 0.00013550396168808998, "loss": 2.0945, "step": 326745 }, { "epoch": 0.77, "grad_norm": 2.046875, "learning_rate": 0.00013550223380442343, "loss": 2.1023, "step": 326750 }, { "epoch": 0.77, "grad_norm": 2.9375, "learning_rate": 0.00013550050590862872, "loss": 2.1536, "step": 326755 }, { "epoch": 0.77, "grad_norm": 2.140625, "learning_rate": 0.00013549877800070643, "loss": 2.0211, "step": 326760 }, { "epoch": 0.77, "grad_norm": 2.171875, "learning_rate": 0.00013549705008065706, "loss": 2.1107, "step": 326765 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.00013549532214848127, "loss": 2.0683, "step": 326770 }, { "epoch": 0.77, "grad_norm": 1.9375, "learning_rate": 0.0001354935942041796, "loss": 2.0624, "step": 326775 }, { "epoch": 0.77, "grad_norm": 1.8984375, "learning_rate": 0.0001354918662477527, "loss": 2.1175, "step": 326780 }, { "epoch": 0.77, "grad_norm": 1.9296875, "learning_rate": 0.00013549013827920115, "loss": 2.0871, "step": 326785 }, { "epoch": 0.77, "grad_norm": 2.5, "learning_rate": 0.00013548841029852553, "loss": 1.9869, "step": 326790 }, { "epoch": 0.77, "grad_norm": 1.9609375, "learning_rate": 0.0001354866823057264, "loss": 2.117, "step": 326795 }, { "epoch": 0.77, "grad_norm": 2.328125, "learning_rate": 0.00013548495430080437, "loss": 2.0447, "step": 326800 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.00013548322628376006, "loss": 2.1441, "step": 326805 }, { "epoch": 0.77, "grad_norm": 2.53125, "learning_rate": 0.000135481498254594, "loss": 2.1977, "step": 326810 }, { "epoch": 0.77, "grad_norm": 2.3125, "learning_rate": 0.00013547977021330688, "loss": 2.0181, "step": 326815 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.00013547804215989915, "loss": 2.0849, "step": 326820 }, { "epoch": 0.77, "grad_norm": 1.8359375, "learning_rate": 0.00013547631409437154, "loss": 1.8653, "step": 326825 }, { "epoch": 0.77, "grad_norm": 2.546875, "learning_rate": 0.00013547458601672452, "loss": 1.895, "step": 326830 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.0001354728579269588, "loss": 2.0402, "step": 326835 }, { "epoch": 0.77, "grad_norm": 2.0, "learning_rate": 0.0001354711298250749, "loss": 2.0518, "step": 326840 }, { "epoch": 0.77, "grad_norm": 1.8671875, "learning_rate": 0.00013546940171107338, "loss": 2.093, "step": 326845 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013546767358495489, "loss": 1.9902, "step": 326850 }, { "epoch": 0.77, "grad_norm": 2.53125, "learning_rate": 0.00013546594544672, "loss": 2.0297, "step": 326855 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.00013546421729636928, "loss": 1.9027, "step": 326860 }, { "epoch": 0.77, "grad_norm": 1.796875, "learning_rate": 0.00013546248913390337, "loss": 1.9985, "step": 326865 }, { "epoch": 0.77, "grad_norm": 2.359375, "learning_rate": 0.0001354607609593228, "loss": 2.1296, "step": 326870 }, { "epoch": 0.77, "grad_norm": 2.53125, "learning_rate": 0.00013545903277262827, "loss": 1.971, "step": 326875 }, { "epoch": 0.77, "grad_norm": 2.71875, "learning_rate": 0.0001354573045738202, "loss": 2.1734, "step": 326880 }, { "epoch": 0.77, "grad_norm": 1.96875, "learning_rate": 0.00013545557636289928, "loss": 2.0255, "step": 326885 }, { "epoch": 0.77, "grad_norm": 2.8125, "learning_rate": 0.00013545384813986612, "loss": 2.1632, "step": 326890 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.00013545211990472131, "loss": 2.2336, "step": 326895 }, { "epoch": 0.77, "grad_norm": 2.140625, "learning_rate": 0.00013545039165746537, "loss": 1.9237, "step": 326900 }, { "epoch": 0.77, "grad_norm": 2.40625, "learning_rate": 0.00013544866339809894, "loss": 2.0115, "step": 326905 }, { "epoch": 0.77, "grad_norm": 1.90625, "learning_rate": 0.0001354469351266226, "loss": 2.007, "step": 326910 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.00013544520684303694, "loss": 2.1553, "step": 326915 }, { "epoch": 0.77, "grad_norm": 2.03125, "learning_rate": 0.00013544347854734258, "loss": 2.0573, "step": 326920 }, { "epoch": 0.77, "grad_norm": 2.234375, "learning_rate": 0.00013544175023954007, "loss": 1.9618, "step": 326925 }, { "epoch": 0.77, "grad_norm": 3.0, "learning_rate": 0.00013544002191963003, "loss": 1.9997, "step": 326930 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.000135438293587613, "loss": 1.7491, "step": 326935 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.00013543656524348964, "loss": 2.1919, "step": 326940 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.00013543483688726048, "loss": 2.108, "step": 326945 }, { "epoch": 0.77, "grad_norm": 2.34375, "learning_rate": 0.00013543310851892617, "loss": 2.2433, "step": 326950 }, { "epoch": 0.77, "grad_norm": 2.65625, "learning_rate": 0.00013543138013848724, "loss": 2.2084, "step": 326955 }, { "epoch": 0.77, "grad_norm": 1.96875, "learning_rate": 0.0001354296517459443, "loss": 2.0217, "step": 326960 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.00013542792334129798, "loss": 1.8986, "step": 326965 }, { "epoch": 0.77, "grad_norm": 2.546875, "learning_rate": 0.00013542619492454882, "loss": 2.2219, "step": 326970 }, { "epoch": 0.77, "grad_norm": 3.078125, "learning_rate": 0.00013542446649569744, "loss": 2.0713, "step": 326975 }, { "epoch": 0.77, "grad_norm": 2.140625, "learning_rate": 0.00013542273805474441, "loss": 2.0172, "step": 326980 }, { "epoch": 0.77, "grad_norm": 2.375, "learning_rate": 0.00013542100960169034, "loss": 1.9612, "step": 326985 }, { "epoch": 0.77, "grad_norm": 2.40625, "learning_rate": 0.0001354192811365358, "loss": 1.8921, "step": 326990 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.0001354175526592814, "loss": 2.1884, "step": 326995 }, { "epoch": 0.77, "grad_norm": 2.046875, "learning_rate": 0.0001354158241699277, "loss": 2.1095, "step": 327000 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.00013541409566847533, "loss": 2.0897, "step": 327005 }, { "epoch": 0.77, "grad_norm": 2.046875, "learning_rate": 0.00013541236715492488, "loss": 1.8019, "step": 327010 }, { "epoch": 0.77, "grad_norm": 2.625, "learning_rate": 0.0001354106386292769, "loss": 2.0191, "step": 327015 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.000135408910091532, "loss": 2.005, "step": 327020 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.0001354071815416908, "loss": 2.0119, "step": 327025 }, { "epoch": 0.77, "grad_norm": 2.53125, "learning_rate": 0.00013540545297975384, "loss": 2.0457, "step": 327030 }, { "epoch": 0.77, "grad_norm": 2.296875, "learning_rate": 0.00013540372440572175, "loss": 2.0518, "step": 327035 }, { "epoch": 0.77, "grad_norm": 1.9921875, "learning_rate": 0.0001354019958195951, "loss": 2.0504, "step": 327040 }, { "epoch": 0.77, "grad_norm": 2.328125, "learning_rate": 0.00013540026722137446, "loss": 2.1219, "step": 327045 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.0001353985386110605, "loss": 1.9177, "step": 327050 }, { "epoch": 0.77, "grad_norm": 2.3125, "learning_rate": 0.00013539680998865373, "loss": 2.1058, "step": 327055 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.00013539508135415476, "loss": 1.8445, "step": 327060 }, { "epoch": 0.77, "grad_norm": 3.0, "learning_rate": 0.0001353933527075642, "loss": 2.0609, "step": 327065 }, { "epoch": 0.77, "grad_norm": 2.828125, "learning_rate": 0.00013539162404888262, "loss": 2.0236, "step": 327070 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.0001353898953781106, "loss": 2.113, "step": 327075 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.0001353881666952488, "loss": 2.0175, "step": 327080 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.00013538643800029774, "loss": 1.9991, "step": 327085 }, { "epoch": 0.77, "grad_norm": 2.234375, "learning_rate": 0.00013538470929325803, "loss": 1.9605, "step": 327090 }, { "epoch": 0.77, "grad_norm": 2.078125, "learning_rate": 0.00013538298057413022, "loss": 2.032, "step": 327095 }, { "epoch": 0.77, "grad_norm": 2.109375, "learning_rate": 0.000135381251842915, "loss": 2.0907, "step": 327100 }, { "epoch": 0.77, "grad_norm": 2.265625, "learning_rate": 0.0001353795230996129, "loss": 2.1103, "step": 327105 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.00013537779434422446, "loss": 2.0408, "step": 327110 }, { "epoch": 0.77, "grad_norm": 2.03125, "learning_rate": 0.00013537606557675037, "loss": 1.9827, "step": 327115 }, { "epoch": 0.77, "grad_norm": 1.796875, "learning_rate": 0.00013537433679719116, "loss": 1.9975, "step": 327120 }, { "epoch": 0.77, "grad_norm": 2.203125, "learning_rate": 0.00013537260800554744, "loss": 2.0377, "step": 327125 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.0001353708792018198, "loss": 2.1673, "step": 327130 }, { "epoch": 0.77, "grad_norm": 2.40625, "learning_rate": 0.00013536915038600878, "loss": 2.1222, "step": 327135 }, { "epoch": 0.77, "grad_norm": 1.734375, "learning_rate": 0.00013536742155811508, "loss": 2.0292, "step": 327140 }, { "epoch": 0.77, "grad_norm": 1.953125, "learning_rate": 0.00013536569271813923, "loss": 2.088, "step": 327145 }, { "epoch": 0.77, "grad_norm": 2.453125, "learning_rate": 0.00013536396386608176, "loss": 2.1617, "step": 327150 }, { "epoch": 0.77, "grad_norm": 2.125, "learning_rate": 0.00013536223500194337, "loss": 1.9908, "step": 327155 }, { "epoch": 0.77, "grad_norm": 2.5, "learning_rate": 0.00013536050612572456, "loss": 1.9125, "step": 327160 }, { "epoch": 0.77, "grad_norm": 2.0625, "learning_rate": 0.00013535877723742598, "loss": 2.0293, "step": 327165 }, { "epoch": 0.77, "grad_norm": 2.515625, "learning_rate": 0.00013535704833704824, "loss": 2.2507, "step": 327170 }, { "epoch": 0.77, "grad_norm": 2.140625, "learning_rate": 0.00013535531942459182, "loss": 2.1311, "step": 327175 }, { "epoch": 0.77, "grad_norm": 2.375, "learning_rate": 0.0001353535905000574, "loss": 2.0037, "step": 327180 }, { "epoch": 0.77, "grad_norm": 1.5625, "learning_rate": 0.00013535186156344556, "loss": 1.7844, "step": 327185 }, { "epoch": 0.77, "grad_norm": 2.46875, "learning_rate": 0.00013535013261475691, "loss": 2.0179, "step": 327190 }, { "epoch": 0.77, "grad_norm": 2.390625, "learning_rate": 0.000135348403653992, "loss": 2.1549, "step": 327195 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.00013534667468115142, "loss": 2.0306, "step": 327200 }, { "epoch": 0.77, "grad_norm": 2.1875, "learning_rate": 0.00013534494569623576, "loss": 2.0568, "step": 327205 }, { "epoch": 0.77, "grad_norm": 2.015625, "learning_rate": 0.00013534321669924563, "loss": 2.0708, "step": 327210 }, { "epoch": 0.77, "grad_norm": 2.671875, "learning_rate": 0.00013534148769018165, "loss": 2.1081, "step": 327215 }, { "epoch": 0.77, "grad_norm": 4.125, "learning_rate": 0.00013533975866904436, "loss": 2.2165, "step": 327220 }, { "epoch": 0.77, "grad_norm": 2.40625, "learning_rate": 0.00013533802963583438, "loss": 2.0132, "step": 327225 }, { "epoch": 0.77, "grad_norm": 2.359375, "learning_rate": 0.00013533630059055224, "loss": 1.8638, "step": 327230 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.00013533457153319865, "loss": 1.9573, "step": 327235 }, { "epoch": 0.77, "grad_norm": 2.21875, "learning_rate": 0.0001353328424637741, "loss": 1.8982, "step": 327240 }, { "epoch": 0.77, "grad_norm": 1.6015625, "learning_rate": 0.00013533111338227916, "loss": 1.8444, "step": 327245 }, { "epoch": 0.77, "grad_norm": 2.375, "learning_rate": 0.00013532938428871454, "loss": 2.0668, "step": 327250 }, { "epoch": 0.77, "grad_norm": 2.796875, "learning_rate": 0.00013532765518308072, "loss": 1.9384, "step": 327255 }, { "epoch": 0.77, "grad_norm": 2.4375, "learning_rate": 0.00013532592606537835, "loss": 2.0799, "step": 327260 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.000135324196935608, "loss": 1.9764, "step": 327265 }, { "epoch": 0.77, "grad_norm": 1.875, "learning_rate": 0.00013532246779377028, "loss": 2.0286, "step": 327270 }, { "epoch": 0.77, "grad_norm": 2.4375, "learning_rate": 0.00013532073863986574, "loss": 2.1748, "step": 327275 }, { "epoch": 0.77, "grad_norm": 2.5, "learning_rate": 0.00013531900947389499, "loss": 2.2375, "step": 327280 }, { "epoch": 0.77, "grad_norm": 2.359375, "learning_rate": 0.00013531728029585866, "loss": 2.0472, "step": 327285 }, { "epoch": 0.77, "grad_norm": 2.109375, "learning_rate": 0.00013531555110575728, "loss": 2.1504, "step": 327290 }, { "epoch": 0.77, "grad_norm": 1.84375, "learning_rate": 0.0001353138219035915, "loss": 2.0508, "step": 327295 }, { "epoch": 0.77, "grad_norm": 2.375, "learning_rate": 0.00013531209268936184, "loss": 2.0644, "step": 327300 }, { "epoch": 0.77, "grad_norm": 2.53125, "learning_rate": 0.00013531036346306894, "loss": 2.1351, "step": 327305 }, { "epoch": 0.77, "grad_norm": 2.609375, "learning_rate": 0.00013530863422471336, "loss": 2.0763, "step": 327310 }, { "epoch": 0.77, "grad_norm": 2.234375, "learning_rate": 0.00013530690497429572, "loss": 2.1816, "step": 327315 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.00013530517571181663, "loss": 1.9257, "step": 327320 }, { "epoch": 0.77, "grad_norm": 2.15625, "learning_rate": 0.0001353034464372766, "loss": 1.7969, "step": 327325 }, { "epoch": 0.77, "grad_norm": 2.421875, "learning_rate": 0.00013530171715067635, "loss": 2.242, "step": 327330 }, { "epoch": 0.77, "grad_norm": 2.359375, "learning_rate": 0.00013529998785201635, "loss": 2.0714, "step": 327335 }, { "epoch": 0.77, "grad_norm": 1.9921875, "learning_rate": 0.0001352982585412972, "loss": 1.9164, "step": 327340 }, { "epoch": 0.77, "grad_norm": 2.375, "learning_rate": 0.00013529652921851957, "loss": 2.0107, "step": 327345 }, { "epoch": 0.77, "grad_norm": 2.09375, "learning_rate": 0.000135294799883684, "loss": 1.9781, "step": 327350 }, { "epoch": 0.77, "grad_norm": 2.171875, "learning_rate": 0.00013529307053679107, "loss": 1.9621, "step": 327355 }, { "epoch": 0.77, "grad_norm": 2.390625, "learning_rate": 0.0001352913411778414, "loss": 2.0518, "step": 327360 }, { "epoch": 0.77, "grad_norm": 2.734375, "learning_rate": 0.00013528961180683555, "loss": 2.1946, "step": 327365 }, { "epoch": 0.77, "grad_norm": 2.28125, "learning_rate": 0.00013528788242377413, "loss": 2.3454, "step": 327370 }, { "epoch": 0.77, "grad_norm": 1.875, "learning_rate": 0.00013528615302865776, "loss": 2.0168, "step": 327375 }, { "epoch": 0.77, "grad_norm": 2.4375, "learning_rate": 0.00013528442362148698, "loss": 2.0516, "step": 327380 }, { "epoch": 0.77, "grad_norm": 2.5, "learning_rate": 0.0001352826942022624, "loss": 2.1455, "step": 327385 }, { "epoch": 0.77, "grad_norm": 2.40625, "learning_rate": 0.00013528096477098458, "loss": 2.0668, "step": 327390 }, { "epoch": 0.77, "grad_norm": 2.59375, "learning_rate": 0.00013527923532765423, "loss": 2.2093, "step": 327395 }, { "epoch": 0.77, "grad_norm": 2.25, "learning_rate": 0.0001352775058722718, "loss": 2.1564, "step": 327400 } ], "logging_steps": 5, "max_steps": 849864, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, "total_flos": 1.976116670110095e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }